mirror of
https://github.com/GreptimeTeam/greptimedb.git
synced 2026-05-26 09:50:40 +00:00
refactor(datanode): move Instance heartbeat task to Datanode struct (#1832)
* refactor(datanode): move Instance heartbeat to Datanode struct * chore: apply suggestions from CR * fix: start heartbeat task after instance starts
This commit is contained in:
@@ -14,10 +14,10 @@
|
||||
|
||||
//! Datanode configurations
|
||||
|
||||
use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
|
||||
use common_base::readable_size::ReadableSize;
|
||||
use common_error::prelude::BoxedError;
|
||||
use common_telemetry::info;
|
||||
use common_telemetry::logging::LoggingOptions;
|
||||
use meta_client::MetaClientOptions;
|
||||
@@ -25,13 +25,15 @@ use secrecy::SecretString;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use servers::http::HttpOptions;
|
||||
use servers::Mode;
|
||||
use snafu::ResultExt;
|
||||
use storage::config::{
|
||||
EngineConfig as StorageEngineConfig, DEFAULT_AUTO_FLUSH_INTERVAL, DEFAULT_MAX_FLUSH_TASKS,
|
||||
DEFAULT_PICKER_SCHEDULE_INTERVAL, DEFAULT_REGION_WRITE_BUFFER_SIZE,
|
||||
};
|
||||
use storage::scheduler::SchedulerConfig;
|
||||
|
||||
use crate::error::Result;
|
||||
use crate::error::{Result, ShutdownInstanceSnafu};
|
||||
use crate::heartbeat::HeartbeatTask;
|
||||
use crate::instance::{Instance, InstanceRef};
|
||||
use crate::server::Services;
|
||||
|
||||
@@ -340,6 +342,7 @@ pub struct DatanodeOptions {
|
||||
pub rpc_addr: String,
|
||||
pub rpc_hostname: Option<String>,
|
||||
pub rpc_runtime_size: usize,
|
||||
pub heartbeat_interval_millis: u64,
|
||||
pub http_opts: HttpOptions,
|
||||
pub meta_client_options: Option<MetaClientOptions>,
|
||||
pub wal: WalConfig,
|
||||
@@ -363,6 +366,7 @@ impl Default for DatanodeOptions {
|
||||
storage: StorageConfig::default(),
|
||||
procedure: ProcedureConfig::default(),
|
||||
logging: LoggingOptions::default(),
|
||||
heartbeat_interval_millis: 5000,
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -378,11 +382,12 @@ pub struct Datanode {
|
||||
opts: DatanodeOptions,
|
||||
services: Option<Services>,
|
||||
instance: InstanceRef,
|
||||
heartbeat_task: Option<HeartbeatTask>,
|
||||
}
|
||||
|
||||
impl Datanode {
|
||||
pub async fn new(opts: DatanodeOptions) -> Result<Datanode> {
|
||||
let instance = Arc::new(Instance::with_opts(&opts).await?);
|
||||
let (instance, heartbeat_task) = Instance::with_opts(&opts).await?;
|
||||
let services = match opts.mode {
|
||||
Mode::Distributed => Some(Services::try_new(instance.clone(), &opts).await?),
|
||||
Mode::Standalone => None,
|
||||
@@ -391,6 +396,7 @@ impl Datanode {
|
||||
opts,
|
||||
services,
|
||||
instance,
|
||||
heartbeat_task,
|
||||
})
|
||||
}
|
||||
|
||||
@@ -402,7 +408,11 @@ impl Datanode {
|
||||
|
||||
/// Start only the internal component of datanode.
|
||||
pub async fn start_instance(&mut self) -> Result<()> {
|
||||
self.instance.start().await
|
||||
let _ = self.instance.start().await;
|
||||
if let Some(task) = &self.heartbeat_task {
|
||||
task.start().await?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Start services of datanode. This method call will block until services are shutdown.
|
||||
@@ -419,7 +429,15 @@ impl Datanode {
|
||||
}
|
||||
|
||||
pub async fn shutdown_instance(&self) -> Result<()> {
|
||||
self.instance.shutdown().await
|
||||
if let Some(heartbeat_task) = &self.heartbeat_task {
|
||||
heartbeat_task
|
||||
.close()
|
||||
.await
|
||||
.map_err(BoxedError::new)
|
||||
.context(ShutdownInstanceSnafu)?;
|
||||
}
|
||||
let _ = self.instance.shutdown().await;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn shutdown_services(&self) -> Result<()> {
|
||||
|
||||
@@ -48,7 +48,7 @@ use storage::config::EngineConfig as StorageEngineConfig;
|
||||
use storage::scheduler::{LocalScheduler, SchedulerConfig};
|
||||
use storage::EngineImpl;
|
||||
use store_api::logstore::LogStore;
|
||||
use table::engine::manager::MemoryTableEngineManager;
|
||||
use table::engine::manager::{MemoryTableEngineManager, TableEngineManagerRef};
|
||||
use table::engine::{TableEngine, TableEngineProcedureRef};
|
||||
use table::requests::FlushTableRequest;
|
||||
use table::table::numbers::NumbersTable;
|
||||
@@ -78,14 +78,13 @@ pub struct Instance {
|
||||
pub(crate) sql_handler: SqlHandler,
|
||||
pub(crate) catalog_manager: CatalogManagerRef,
|
||||
pub(crate) table_id_provider: Option<TableIdProviderRef>,
|
||||
pub(crate) heartbeat_task: Option<HeartbeatTask>,
|
||||
procedure_manager: ProcedureManagerRef,
|
||||
}
|
||||
|
||||
pub type InstanceRef = Arc<Instance>;
|
||||
|
||||
impl Instance {
|
||||
pub async fn with_opts(opts: &DatanodeOptions) -> Result<Self> {
|
||||
pub async fn with_opts(opts: &DatanodeOptions) -> Result<(InstanceRef, Option<HeartbeatTask>)> {
|
||||
let meta_client = match opts.mode {
|
||||
Mode::Standalone => None,
|
||||
Mode::Distributed => {
|
||||
@@ -105,11 +104,57 @@ impl Instance {
|
||||
Self::new(opts, meta_client, compaction_scheduler).await
|
||||
}
|
||||
|
||||
fn build_heartbeat_task(
|
||||
opts: &DatanodeOptions,
|
||||
meta_client: Option<Arc<MetaClient>>,
|
||||
catalog_manager: CatalogManagerRef,
|
||||
engine_manager: TableEngineManagerRef,
|
||||
region_alive_keepers: Option<Arc<RegionAliveKeepers>>,
|
||||
) -> Result<Option<HeartbeatTask>> {
|
||||
Ok(match opts.mode {
|
||||
Mode::Standalone => None,
|
||||
Mode::Distributed => {
|
||||
let node_id = opts.node_id.context(MissingNodeIdSnafu)?;
|
||||
let meta_client = meta_client.context(IncorrectInternalStateSnafu {
|
||||
state: "meta client is not provided when building heartbeat task",
|
||||
})?;
|
||||
let region_alive_keepers =
|
||||
region_alive_keepers.context(IncorrectInternalStateSnafu {
|
||||
state: "region_alive_keepers is not provided when building heartbeat task",
|
||||
})?;
|
||||
let handlers_executor = HandlerGroupExecutor::new(vec![
|
||||
Arc::new(ParseMailboxMessageHandler::default()),
|
||||
Arc::new(OpenRegionHandler::new(
|
||||
catalog_manager.clone(),
|
||||
engine_manager.clone(),
|
||||
region_alive_keepers.clone(),
|
||||
)),
|
||||
Arc::new(CloseRegionHandler::new(
|
||||
catalog_manager.clone(),
|
||||
engine_manager,
|
||||
region_alive_keepers.clone(),
|
||||
)),
|
||||
region_alive_keepers.clone(),
|
||||
]);
|
||||
|
||||
Some(HeartbeatTask::new(
|
||||
node_id,
|
||||
opts,
|
||||
meta_client,
|
||||
catalog_manager,
|
||||
Arc::new(handlers_executor),
|
||||
opts.heartbeat_interval_millis,
|
||||
region_alive_keepers,
|
||||
))
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
pub(crate) async fn new(
|
||||
opts: &DatanodeOptions,
|
||||
meta_client: Option<Arc<MetaClient>>,
|
||||
compaction_scheduler: CompactionSchedulerRef<RaftEngineLogStore>,
|
||||
) -> Result<Self> {
|
||||
) -> Result<(InstanceRef, Option<HeartbeatTask>)> {
|
||||
let object_store = store::new_object_store(&opts.storage.store).await?;
|
||||
let log_store = Arc::new(create_log_store(&opts.storage.store, &opts.wal).await?);
|
||||
|
||||
@@ -151,7 +196,7 @@ impl Instance {
|
||||
);
|
||||
|
||||
// create remote catalog manager
|
||||
let (catalog_manager, table_id_provider, heartbeat_task) = match opts.mode {
|
||||
let (catalog_manager, table_id_provider, region_alive_keepers) = match opts.mode {
|
||||
Mode::Standalone => {
|
||||
if opts.enable_memory_catalog {
|
||||
let catalog = Arc::new(catalog::local::MemoryCatalogManager::default());
|
||||
@@ -189,17 +234,15 @@ impl Instance {
|
||||
}
|
||||
|
||||
Mode::Distributed => {
|
||||
let meta_client = meta_client.context(IncorrectInternalStateSnafu {
|
||||
let meta_client = meta_client.clone().context(IncorrectInternalStateSnafu {
|
||||
state: "meta client is not provided when creating distributed Datanode",
|
||||
})?;
|
||||
|
||||
let kv_backend = Arc::new(CachedMetaKvBackend::new(meta_client.clone()));
|
||||
|
||||
let heartbeat_interval_millis = 5000;
|
||||
let kv_backend = Arc::new(CachedMetaKvBackend::new(meta_client));
|
||||
|
||||
let region_alive_keepers = Arc::new(RegionAliveKeepers::new(
|
||||
engine_manager.clone(),
|
||||
heartbeat_interval_millis,
|
||||
opts.heartbeat_interval_millis,
|
||||
));
|
||||
|
||||
let catalog_manager = Arc::new(RemoteCatalogManager::new(
|
||||
@@ -209,32 +252,11 @@ impl Instance {
|
||||
region_alive_keepers.clone(),
|
||||
));
|
||||
|
||||
let handlers_executor = HandlerGroupExecutor::new(vec![
|
||||
Arc::new(ParseMailboxMessageHandler::default()),
|
||||
Arc::new(OpenRegionHandler::new(
|
||||
catalog_manager.clone(),
|
||||
engine_manager.clone(),
|
||||
region_alive_keepers.clone(),
|
||||
)),
|
||||
Arc::new(CloseRegionHandler::new(
|
||||
catalog_manager.clone(),
|
||||
engine_manager.clone(),
|
||||
region_alive_keepers.clone(),
|
||||
)),
|
||||
region_alive_keepers.clone(),
|
||||
]);
|
||||
|
||||
let heartbeat_task = Some(HeartbeatTask::new(
|
||||
opts.node_id.context(MissingNodeIdSnafu)?,
|
||||
opts,
|
||||
meta_client,
|
||||
catalog_manager.clone(),
|
||||
Arc::new(handlers_executor),
|
||||
heartbeat_interval_millis,
|
||||
region_alive_keepers,
|
||||
));
|
||||
|
||||
(catalog_manager as CatalogManagerRef, None, heartbeat_task)
|
||||
(
|
||||
catalog_manager as CatalogManagerRef,
|
||||
None,
|
||||
Some(region_alive_keepers),
|
||||
)
|
||||
}
|
||||
};
|
||||
|
||||
@@ -258,18 +280,27 @@ impl Instance {
|
||||
&*procedure_manager,
|
||||
);
|
||||
|
||||
Ok(Self {
|
||||
let instance = Arc::new(Self {
|
||||
query_engine: query_engine.clone(),
|
||||
sql_handler: SqlHandler::new(
|
||||
engine_manager,
|
||||
engine_manager.clone(),
|
||||
catalog_manager.clone(),
|
||||
procedure_manager.clone(),
|
||||
),
|
||||
catalog_manager,
|
||||
heartbeat_task,
|
||||
catalog_manager: catalog_manager.clone(),
|
||||
table_id_provider,
|
||||
procedure_manager,
|
||||
})
|
||||
});
|
||||
|
||||
let heartbeat_task = Instance::build_heartbeat_task(
|
||||
opts,
|
||||
meta_client,
|
||||
catalog_manager,
|
||||
engine_manager,
|
||||
region_alive_keepers,
|
||||
)?;
|
||||
|
||||
Ok((instance, heartbeat_task))
|
||||
}
|
||||
|
||||
pub async fn start(&self) -> Result<()> {
|
||||
@@ -277,9 +308,6 @@ impl Instance {
|
||||
.start()
|
||||
.await
|
||||
.context(NewCatalogSnafu)?;
|
||||
if let Some(task) = &self.heartbeat_task {
|
||||
task.start().await?;
|
||||
}
|
||||
|
||||
// Recover procedures after the catalog manager is started, so we can
|
||||
// ensure we can access all tables from the catalog manager.
|
||||
@@ -298,13 +326,6 @@ impl Instance {
|
||||
.stop()
|
||||
.await
|
||||
.context(StopProcedureManagerSnafu)?;
|
||||
if let Some(heartbeat_task) = &self.heartbeat_task {
|
||||
heartbeat_task
|
||||
.close()
|
||||
.await
|
||||
.map_err(BoxedError::new)
|
||||
.context(ShutdownInstanceSnafu)?;
|
||||
}
|
||||
|
||||
self.flush_tables().await?;
|
||||
|
||||
|
||||
@@ -17,7 +17,7 @@
|
||||
|
||||
pub mod datanode;
|
||||
pub mod error;
|
||||
mod heartbeat;
|
||||
pub mod heartbeat;
|
||||
pub mod instance;
|
||||
pub mod metrics;
|
||||
mod mock;
|
||||
|
||||
@@ -21,15 +21,21 @@ use storage::compaction::noop::NoopCompactionScheduler;
|
||||
|
||||
use crate::datanode::DatanodeOptions;
|
||||
use crate::error::Result;
|
||||
use crate::instance::Instance;
|
||||
use crate::heartbeat::HeartbeatTask;
|
||||
use crate::instance::{Instance, InstanceRef};
|
||||
|
||||
impl Instance {
|
||||
pub async fn with_mock_meta_client(opts: &DatanodeOptions) -> Result<Self> {
|
||||
pub async fn with_mock_meta_client(
|
||||
opts: &DatanodeOptions,
|
||||
) -> Result<(InstanceRef, Option<HeartbeatTask>)> {
|
||||
let mock_info = meta_srv::mocks::mock_with_memstore().await;
|
||||
Self::with_mock_meta_server(opts, mock_info).await
|
||||
}
|
||||
|
||||
pub async fn with_mock_meta_server(opts: &DatanodeOptions, meta_srv: MockInfo) -> Result<Self> {
|
||||
pub async fn with_mock_meta_server(
|
||||
opts: &DatanodeOptions,
|
||||
meta_srv: MockInfo,
|
||||
) -> Result<(InstanceRef, Option<HeartbeatTask>)> {
|
||||
let meta_client = Arc::new(mock_meta_client(meta_srv, opts.node_id.unwrap_or(42)).await);
|
||||
let compaction_scheduler = Arc::new(NoopCompactionScheduler::default());
|
||||
Instance::new(opts, Some(meta_client), compaction_scheduler).await
|
||||
|
||||
@@ -29,10 +29,12 @@ use crate::datanode::{
|
||||
DatanodeOptions, FileConfig, ObjectStoreConfig, ProcedureConfig, StorageConfig, WalConfig,
|
||||
};
|
||||
use crate::error::{CreateTableSnafu, Result};
|
||||
use crate::instance::Instance;
|
||||
use crate::heartbeat::HeartbeatTask;
|
||||
use crate::instance::{Instance, InstanceRef};
|
||||
|
||||
pub(crate) struct MockInstance {
|
||||
instance: Instance,
|
||||
instance: InstanceRef,
|
||||
_heartbeat: Option<HeartbeatTask>,
|
||||
_guard: TestGuard,
|
||||
}
|
||||
|
||||
@@ -40,10 +42,17 @@ impl MockInstance {
|
||||
pub(crate) async fn new(name: &str) -> Self {
|
||||
let (opts, _guard) = create_tmp_dir_and_datanode_opts(name);
|
||||
|
||||
let instance = Instance::with_mock_meta_client(&opts).await.unwrap();
|
||||
let (instance, heartbeat) = Instance::with_mock_meta_client(&opts).await.unwrap();
|
||||
instance.start().await.unwrap();
|
||||
if let Some(task) = heartbeat.as_ref() {
|
||||
task.start().await.unwrap();
|
||||
}
|
||||
|
||||
MockInstance { instance, _guard }
|
||||
MockInstance {
|
||||
instance,
|
||||
_guard,
|
||||
_heartbeat: heartbeat,
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn inner(&self) -> &Instance {
|
||||
|
||||
Reference in New Issue
Block a user