mirror of
https://github.com/GreptimeTeam/greptimedb.git
synced 2026-05-30 03:40:37 +00:00
feat: optimize region migration concurrency with fine-grained table lock (#6023)
* feat(procedure): add dynamic key locking mechanism * feat: optimize region migration concurrency with fine-grained table lock * chore: apply suggestions from CR
This commit is contained in:
@@ -18,11 +18,13 @@ use std::collections::HashMap;
|
||||
use std::sync::Arc;
|
||||
|
||||
use async_trait::async_trait;
|
||||
use common_procedure::local::{acquire_dynamic_key_lock, DynamicKeyLockGuard};
|
||||
use common_procedure::rwlock::KeyRwLock;
|
||||
use common_procedure::store::poison_store::PoisonStore;
|
||||
use common_procedure::test_util::InMemoryPoisonStore;
|
||||
use common_procedure::{
|
||||
Context, ContextProvider, Output, PoisonKey, Procedure, ProcedureId, ProcedureState,
|
||||
ProcedureWithId, Result, Status,
|
||||
ProcedureWithId, Result, Status, StringKey,
|
||||
};
|
||||
|
||||
/// A Mock [ContextProvider].
|
||||
@@ -30,6 +32,7 @@ use common_procedure::{
|
||||
pub struct MockContextProvider {
|
||||
states: HashMap<ProcedureId, ProcedureState>,
|
||||
poison_manager: InMemoryPoisonStore,
|
||||
dynamic_key_lock: Arc<KeyRwLock<String>>,
|
||||
}
|
||||
|
||||
impl MockContextProvider {
|
||||
@@ -38,6 +41,7 @@ impl MockContextProvider {
|
||||
MockContextProvider {
|
||||
states,
|
||||
poison_manager: InMemoryPoisonStore::default(),
|
||||
dynamic_key_lock: Arc::new(KeyRwLock::new()),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -58,6 +62,10 @@ impl ContextProvider for MockContextProvider {
|
||||
.try_put_poison(key.to_string(), procedure_id.to_string())
|
||||
.await
|
||||
}
|
||||
|
||||
async fn acquire_lock(&self, key: &StringKey) -> DynamicKeyLockGuard {
|
||||
acquire_dynamic_key_lock(&self.dynamic_key_lock, key).await
|
||||
}
|
||||
}
|
||||
|
||||
/// Executes a procedure until it returns [Status::Done].
|
||||
|
||||
@@ -20,6 +20,7 @@ pub mod error;
|
||||
pub mod local;
|
||||
pub mod options;
|
||||
mod procedure;
|
||||
pub mod rwlock;
|
||||
pub mod store;
|
||||
pub mod watcher;
|
||||
|
||||
@@ -28,8 +29,8 @@ pub mod test_util;
|
||||
|
||||
pub use crate::error::{Error, Result};
|
||||
pub use crate::procedure::{
|
||||
BoxedProcedure, BoxedProcedureLoader, Context, ContextProvider, LockKey, Output, ParseIdError,
|
||||
PoisonKey, PoisonKeys, Procedure, ProcedureId, ProcedureInfo, ProcedureManager,
|
||||
ProcedureManagerRef, ProcedureState, ProcedureWithId, Status, StringKey,
|
||||
BoxedProcedure, BoxedProcedureLoader, Context, ContextProvider, ContextProviderRef, LockKey,
|
||||
Output, ParseIdError, PoisonKey, PoisonKeys, Procedure, ProcedureId, ProcedureInfo,
|
||||
ProcedureManager, ProcedureManagerRef, ProcedureState, ProcedureWithId, Status, StringKey,
|
||||
};
|
||||
pub use crate::watcher::Watcher;
|
||||
|
||||
@@ -13,7 +13,6 @@
|
||||
// limitations under the License.
|
||||
|
||||
mod runner;
|
||||
mod rwlock;
|
||||
|
||||
use std::collections::hash_map::Entry;
|
||||
use std::collections::{HashMap, HashSet, VecDeque};
|
||||
@@ -30,7 +29,6 @@ use snafu::{ensure, OptionExt, ResultExt};
|
||||
use tokio::sync::watch::{self, Receiver, Sender};
|
||||
use tokio::sync::{Mutex as TokioMutex, Notify};
|
||||
|
||||
use self::rwlock::KeyRwLock;
|
||||
use crate::error::{
|
||||
self, DuplicateProcedureSnafu, Error, LoaderConflictSnafu, ManagerNotStartSnafu,
|
||||
PoisonKeyNotDefinedSnafu, ProcedureNotFoundSnafu, Result, StartRemoveOutdatedMetaTaskSnafu,
|
||||
@@ -38,11 +36,12 @@ use crate::error::{
|
||||
};
|
||||
use crate::local::runner::Runner;
|
||||
use crate::procedure::{BoxedProcedureLoader, InitProcedureState, PoisonKeys, ProcedureInfo};
|
||||
use crate::rwlock::{KeyRwLock, OwnedKeyRwLockGuard};
|
||||
use crate::store::poison_store::PoisonStoreRef;
|
||||
use crate::store::{ProcedureMessage, ProcedureMessages, ProcedureStore, StateStoreRef};
|
||||
use crate::{
|
||||
BoxedProcedure, ContextProvider, LockKey, PoisonKey, ProcedureId, ProcedureManager,
|
||||
ProcedureState, ProcedureWithId, Watcher,
|
||||
ProcedureState, ProcedureWithId, StringKey, Watcher,
|
||||
};
|
||||
|
||||
/// The expired time of a procedure's metadata.
|
||||
@@ -157,12 +156,80 @@ struct LoadedProcedure {
|
||||
step: u32,
|
||||
}
|
||||
|
||||
/// The dynamic lock for procedure execution.
|
||||
///
|
||||
/// Unlike the procedure-level locks, these locks are acquired dynamically by the procedure
|
||||
/// during execution. They are only held when the procedure specifically needs these keys
|
||||
/// and are released as soon as the procedure no longer needs them.
|
||||
/// This allows for more fine-grained concurrency control during procedure execution.
|
||||
pub(crate) type DynamicKeyLock = Arc<KeyRwLock<String>>;
|
||||
|
||||
/// Acquires a dynamic key lock for the given key.
|
||||
///
|
||||
/// This function takes a reference to the dynamic key lock and a pointer to the key.
|
||||
/// It then matches the key type and acquires the appropriate lock.
|
||||
pub async fn acquire_dynamic_key_lock(
|
||||
lock: &DynamicKeyLock,
|
||||
key: &StringKey,
|
||||
) -> DynamicKeyLockGuard {
|
||||
match key {
|
||||
StringKey::Share(key) => {
|
||||
let guard = lock.read(key.to_string()).await;
|
||||
DynamicKeyLockGuard {
|
||||
guard: Some(OwnedKeyRwLockGuard::from(guard)),
|
||||
key: key.to_string(),
|
||||
lock: lock.clone(),
|
||||
}
|
||||
}
|
||||
StringKey::Exclusive(key) => {
|
||||
let guard = lock.write(key.to_string()).await;
|
||||
DynamicKeyLockGuard {
|
||||
guard: Some(OwnedKeyRwLockGuard::from(guard)),
|
||||
key: key.to_string(),
|
||||
lock: lock.clone(),
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
/// A guard for the dynamic key lock.
|
||||
///
|
||||
/// This guard is used to release the lock when the procedure no longer needs it.
|
||||
/// It also ensures that the lock is cleaned up when the guard is dropped.
|
||||
pub struct DynamicKeyLockGuard {
|
||||
guard: Option<OwnedKeyRwLockGuard>,
|
||||
key: String,
|
||||
lock: DynamicKeyLock,
|
||||
}
|
||||
|
||||
impl Drop for DynamicKeyLockGuard {
|
||||
fn drop(&mut self) {
|
||||
if let Some(guard) = self.guard.take() {
|
||||
drop(guard);
|
||||
}
|
||||
self.lock.clean_keys(&[self.key.to_string()]);
|
||||
}
|
||||
}
|
||||
|
||||
/// Shared context of the manager.
|
||||
pub(crate) struct ManagerContext {
|
||||
/// Procedure loaders. The key is the type name of the procedure which the loader returns.
|
||||
loaders: Mutex<HashMap<String, BoxedProcedureLoader>>,
|
||||
/// The key lock for the procedure.
|
||||
///
|
||||
/// The lock keys are defined in `Procedure::lock_key()`.
|
||||
/// These locks are acquired before the procedure starts and released after the procedure finishes.
|
||||
/// They ensure exclusive access to resources throughout the entire procedure lifecycle.
|
||||
key_lock: KeyRwLock<String>,
|
||||
/// The dynamic lock for procedure execution.
|
||||
///
|
||||
/// Unlike the procedure-level locks, these locks are acquired dynamically by the procedure
|
||||
/// during execution. They are only held when the procedure specifically needs these keys
|
||||
/// and are released as soon as the procedure no longer needs them.
|
||||
/// This allows for more fine-grained concurrency control during procedure execution.
|
||||
dynamic_key_lock: DynamicKeyLock,
|
||||
/// Procedures in the manager.
|
||||
procedures: RwLock<HashMap<ProcedureId, ProcedureMetaRef>>,
|
||||
/// Running procedures.
|
||||
running_procedures: Mutex<HashSet<ProcedureId>>,
|
||||
/// Ids and finished time of finished procedures.
|
||||
finished_procedures: Mutex<VecDeque<(ProcedureId, Instant)>>,
|
||||
@@ -199,6 +266,10 @@ impl ContextProvider for ManagerContext {
|
||||
let procedure_id = procedure_id.to_string();
|
||||
self.poison_manager.try_put_poison(key, procedure_id).await
|
||||
}
|
||||
|
||||
async fn acquire_lock(&self, key: &StringKey) -> DynamicKeyLockGuard {
|
||||
acquire_dynamic_key_lock(&self.dynamic_key_lock, key).await
|
||||
}
|
||||
}
|
||||
|
||||
impl ManagerContext {
|
||||
@@ -206,6 +277,7 @@ impl ManagerContext {
|
||||
fn new(poison_manager: PoisonStoreRef) -> ManagerContext {
|
||||
ManagerContext {
|
||||
key_lock: KeyRwLock::new(),
|
||||
dynamic_key_lock: Arc::new(KeyRwLock::new()),
|
||||
loaders: Mutex::new(HashMap::new()),
|
||||
procedures: RwLock::new(HashMap::new()),
|
||||
running_procedures: Mutex::new(HashSet::new()),
|
||||
|
||||
@@ -23,9 +23,9 @@ use snafu::ResultExt;
|
||||
use tokio::time;
|
||||
|
||||
use crate::error::{self, ProcedurePanicSnafu, Result, RollbackTimesExceededSnafu};
|
||||
use crate::local::rwlock::OwnedKeyRwLockGuard;
|
||||
use crate::local::{ManagerContext, ProcedureMeta, ProcedureMetaRef};
|
||||
use crate::procedure::{Output, StringKey};
|
||||
use crate::rwlock::OwnedKeyRwLockGuard;
|
||||
use crate::store::{ProcedureMessage, ProcedureStore};
|
||||
use crate::{
|
||||
BoxedProcedure, Context, Error, Procedure, ProcedureId, ProcedureState, ProcedureWithId, Status,
|
||||
@@ -581,6 +581,7 @@ impl Runner {
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::sync::atomic::{AtomicU64, Ordering};
|
||||
use std::sync::Arc;
|
||||
|
||||
use async_trait::async_trait;
|
||||
@@ -588,13 +589,14 @@ mod tests {
|
||||
use common_error::mock::MockError;
|
||||
use common_error::status_code::StatusCode;
|
||||
use common_test_util::temp_dir::create_temp_dir;
|
||||
use futures::future::join_all;
|
||||
use futures_util::future::BoxFuture;
|
||||
use futures_util::FutureExt;
|
||||
use object_store::{EntryMode, ObjectStore};
|
||||
use tokio::sync::mpsc;
|
||||
|
||||
use super::*;
|
||||
use crate::local::test_util;
|
||||
use crate::local::{test_util, DynamicKeyLockGuard};
|
||||
use crate::procedure::PoisonKeys;
|
||||
use crate::store::proc_path;
|
||||
use crate::test_util::InMemoryPoisonStore;
|
||||
@@ -666,6 +668,10 @@ mod tests {
|
||||
) -> Result<()> {
|
||||
unimplemented!()
|
||||
}
|
||||
|
||||
async fn acquire_lock(&self, _key: &StringKey) -> DynamicKeyLockGuard {
|
||||
unimplemented!()
|
||||
}
|
||||
}
|
||||
|
||||
Context {
|
||||
@@ -1674,4 +1680,66 @@ mod tests {
|
||||
// If the procedure is poisoned, the poison key shouldn't be deleted.
|
||||
assert_eq!(procedure_id, ROOT_ID);
|
||||
}
|
||||
|
||||
fn test_procedure_with_dynamic_lock(
|
||||
shared_atomic_value: Arc<AtomicU64>,
|
||||
id: u64,
|
||||
) -> (BoxedProcedure, Arc<ProcedureMeta>) {
|
||||
let exec_fn = move |ctx: Context| {
|
||||
let moved_shared_atomic_value = shared_atomic_value.clone();
|
||||
let moved_ctx = ctx.clone();
|
||||
async move {
|
||||
debug!("Acquiring write lock, id: {}", id);
|
||||
let key = StringKey::Exclusive("test_lock".to_string());
|
||||
let guard = moved_ctx.provider.acquire_lock(&key).await;
|
||||
debug!("Acquired write lock, id: {}", id);
|
||||
let millis = rand::rng().random_range(10..=50);
|
||||
tokio::time::sleep(Duration::from_millis(millis)).await;
|
||||
let value = moved_shared_atomic_value.load(Ordering::Relaxed);
|
||||
moved_shared_atomic_value.store(value + 1, Ordering::Relaxed);
|
||||
debug!("Dropping write lock, id: {}", id);
|
||||
drop(guard);
|
||||
|
||||
Ok(Status::done())
|
||||
}
|
||||
.boxed()
|
||||
};
|
||||
|
||||
let adapter = ProcedureAdapter {
|
||||
data: "dynamic_lock".to_string(),
|
||||
lock_key: LockKey::new_exclusive([]),
|
||||
poison_keys: PoisonKeys::new([]),
|
||||
exec_fn,
|
||||
rollback_fn: None,
|
||||
};
|
||||
let meta = adapter.new_meta(ROOT_ID);
|
||||
|
||||
(Box::new(adapter), meta)
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread")]
|
||||
async fn test_execute_with_dynamic_lock() {
|
||||
common_telemetry::init_default_ut_logging();
|
||||
let shared_atomic_value = Arc::new(AtomicU64::new(0));
|
||||
let (procedure1, meta1) = test_procedure_with_dynamic_lock(shared_atomic_value.clone(), 1);
|
||||
let (procedure2, meta2) = test_procedure_with_dynamic_lock(shared_atomic_value.clone(), 2);
|
||||
|
||||
let dir = create_temp_dir("dynamic_lock");
|
||||
let object_store = test_util::new_object_store(&dir);
|
||||
let procedure_store = Arc::new(ProcedureStore::from_object_store(object_store.clone()));
|
||||
let mut runner1 = new_runner(meta1.clone(), procedure1, procedure_store.clone());
|
||||
let mut runner2 = new_runner(meta2.clone(), procedure2, procedure_store.clone());
|
||||
let ctx1 = context_with_provider(
|
||||
meta1.id,
|
||||
runner1.manager_ctx.clone() as Arc<dyn ContextProvider>,
|
||||
);
|
||||
let ctx2 = context_with_provider(
|
||||
meta2.id,
|
||||
// use same manager ctx as runner1
|
||||
runner1.manager_ctx.clone() as Arc<dyn ContextProvider>,
|
||||
);
|
||||
let tasks = [runner1.execute_once(&ctx1), runner2.execute_once(&ctx2)];
|
||||
join_all(tasks).await;
|
||||
assert_eq!(shared_atomic_value.load(Ordering::Relaxed), 2);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -25,6 +25,7 @@ use snafu::{ResultExt, Snafu};
|
||||
use uuid::Uuid;
|
||||
|
||||
use crate::error::{self, Error, Result};
|
||||
use crate::local::DynamicKeyLockGuard;
|
||||
use crate::watcher::Watcher;
|
||||
|
||||
pub type Output = Arc<dyn Any + Send + Sync>;
|
||||
@@ -144,6 +145,9 @@ pub trait ContextProvider: Send + Sync {
|
||||
/// This method is used to mark a resource as being operated on by a procedure.
|
||||
/// If the poison key already exists with a different value, the operation will fail.
|
||||
async fn try_put_poison(&self, key: &PoisonKey, procedure_id: ProcedureId) -> Result<()>;
|
||||
|
||||
/// Acquires a key lock for the procedure.
|
||||
async fn acquire_lock(&self, key: &StringKey) -> DynamicKeyLockGuard;
|
||||
}
|
||||
|
||||
/// Reference-counted pointer to [ContextProvider].
|
||||
|
||||
@@ -18,8 +18,18 @@ use std::sync::{Arc, Mutex};
|
||||
|
||||
use tokio::sync::{OwnedRwLockReadGuard, OwnedRwLockWriteGuard, RwLock};
|
||||
|
||||
/// A guard that owns a read or write lock on a key.
|
||||
///
|
||||
/// This enum wraps either a read or write lock guard obtained from a `KeyRwLock`.
|
||||
/// The guard is automatically released when it is dropped.
|
||||
pub enum OwnedKeyRwLockGuard {
|
||||
/// Represents a shared read lock on a key.
|
||||
/// Multiple read locks can be held simultaneously for the same key.
|
||||
Read { _guard: OwnedRwLockReadGuard<()> },
|
||||
|
||||
/// Represents an exclusive write lock on a key.
|
||||
/// Only one write lock can be held at a time for a given key,
|
||||
/// and no read locks can be held simultaneously with a write lock.
|
||||
Write { _guard: OwnedRwLockWriteGuard<()> },
|
||||
}
|
||||
|
||||
@@ -36,7 +46,7 @@ impl From<OwnedRwLockWriteGuard<()>> for OwnedKeyRwLockGuard {
|
||||
}
|
||||
|
||||
/// Locks based on a key, allowing other keys to lock independently.
|
||||
#[derive(Debug)]
|
||||
#[derive(Debug, Default)]
|
||||
pub struct KeyRwLock<K> {
|
||||
/// The inner map of locks for specific keys.
|
||||
inner: Mutex<HashMap<K, Arc<RwLock<()>>>>,
|
||||
Reference in New Issue
Block a user