mirror of
https://github.com/GreptimeTeam/greptimedb.git
synced 2026-01-07 05:42:57 +00:00
* basic impl Signed-off-by: Ruihang Xia <waynestxia@gmail.com> * refactor, collapse one layer Signed-off-by: Ruihang Xia <waynestxia@gmail.com> * add test Signed-off-by: Ruihang Xia <waynestxia@gmail.com> * remove old heartbeat handler impls Signed-off-by: Ruihang Xia <waynestxia@gmail.com> * remove old region alive keeper Signed-off-by: Ruihang Xia <waynestxia@gmail.com> * remove remote catalog manager Signed-off-by: Ruihang Xia <waynestxia@gmail.com> * global replace Signed-off-by: Ruihang Xia <waynestxia@gmail.com> * test countdown task Signed-off-by: Ruihang Xia <waynestxia@gmail.com> --------- Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
420 lines
14 KiB
Rust
420 lines
14 KiB
Rust
// Copyright 2023 Greptime Team
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
use std::collections::HashMap;
|
|
use std::sync::Arc;
|
|
use std::time::Duration;
|
|
use std::{fs, path};
|
|
|
|
use api::v1::meta::Role;
|
|
use catalog::local::MemoryCatalogManager;
|
|
use catalog::CatalogManagerRef;
|
|
use common_base::Plugins;
|
|
use common_catalog::consts::DEFAULT_CATALOG_NAME;
|
|
use common_config::WalConfig;
|
|
use common_error::ext::BoxedError;
|
|
use common_greptimedb_telemetry::GreptimeDBTelemetryTask;
|
|
use common_grpc::channel_manager::{ChannelConfig, ChannelManager};
|
|
use common_meta::heartbeat::handler::parse_mailbox_message::ParseMailboxMessageHandler;
|
|
use common_meta::heartbeat::handler::HandlerGroupExecutor;
|
|
use common_procedure::local::{LocalManager, ManagerConfig};
|
|
use common_procedure::store::state_store::ObjectStateStore;
|
|
use common_procedure::ProcedureManagerRef;
|
|
use common_telemetry::logging::{debug, info};
|
|
use file_table_engine::engine::immutable::ImmutableFileTableEngine;
|
|
use log_store::raft_engine::log_store::RaftEngineLogStore;
|
|
use meta_client::client::{MetaClient, MetaClientBuilder};
|
|
use meta_client::MetaClientOptions;
|
|
use mito::config::EngineConfig as TableEngineConfig;
|
|
use mito::engine::MitoEngine;
|
|
use object_store::{util, ObjectStore};
|
|
use query::query_engine::{QueryEngineFactory, QueryEngineRef};
|
|
use servers::Mode;
|
|
use session::context::QueryContextBuilder;
|
|
use snafu::prelude::*;
|
|
use storage::compaction::{CompactionHandler, CompactionSchedulerRef};
|
|
use storage::config::EngineConfig as StorageEngineConfig;
|
|
use storage::scheduler::{LocalScheduler, SchedulerConfig};
|
|
use storage::EngineImpl;
|
|
use store_api::logstore::LogStore;
|
|
use store_api::path_utils::{CLUSTER_DIR, WAL_DIR};
|
|
use table::engine::manager::MemoryTableEngineManager;
|
|
use table::engine::{TableEngine, TableEngineProcedureRef};
|
|
use table::requests::FlushTableRequest;
|
|
use table::table::TableIdProviderRef;
|
|
|
|
use crate::datanode::{DatanodeOptions, ProcedureConfig};
|
|
use crate::error::{
|
|
self, CatalogSnafu, IncorrectInternalStateSnafu, MetaClientInitSnafu, MissingMetasrvOptsSnafu,
|
|
MissingNodeIdSnafu, NewCatalogSnafu, OpenLogStoreSnafu, RecoverProcedureSnafu, Result,
|
|
ShutdownInstanceSnafu, StartProcedureManagerSnafu, StopProcedureManagerSnafu,
|
|
};
|
|
use crate::greptimedb_telemetry::get_greptimedb_telemetry_task;
|
|
use crate::heartbeat::HeartbeatTask;
|
|
use crate::row_inserter::RowInserter;
|
|
use crate::sql::{SqlHandler, SqlRequest};
|
|
use crate::store;
|
|
|
|
mod grpc;
|
|
pub mod sql;
|
|
|
|
pub(crate) type DefaultEngine = MitoEngine<EngineImpl<RaftEngineLogStore>>;
|
|
|
|
// An abstraction to read/write services.
|
|
pub struct Instance {
|
|
pub(crate) query_engine: QueryEngineRef,
|
|
pub(crate) sql_handler: SqlHandler,
|
|
pub(crate) catalog_manager: CatalogManagerRef,
|
|
pub(crate) table_id_provider: Option<TableIdProviderRef>,
|
|
row_inserter: RowInserter,
|
|
procedure_manager: ProcedureManagerRef,
|
|
greptimedb_telemetry_task: Arc<GreptimeDBTelemetryTask>,
|
|
}
|
|
|
|
pub type InstanceRef = Arc<Instance>;
|
|
|
|
impl Instance {
|
|
pub async fn with_opts(
|
|
opts: &DatanodeOptions,
|
|
plugins: Arc<Plugins>,
|
|
) -> Result<(InstanceRef, Option<HeartbeatTask>)> {
|
|
let meta_client = match opts.mode {
|
|
Mode::Standalone => None,
|
|
Mode::Distributed => {
|
|
let meta_client = new_metasrv_client(
|
|
opts.node_id.context(MissingNodeIdSnafu)?,
|
|
opts.meta_client_options
|
|
.as_ref()
|
|
.context(MissingMetasrvOptsSnafu)?,
|
|
)
|
|
.await?;
|
|
Some(Arc::new(meta_client))
|
|
}
|
|
};
|
|
|
|
let compaction_scheduler = create_compaction_scheduler(opts);
|
|
|
|
Self::new(opts, meta_client, compaction_scheduler, plugins).await
|
|
}
|
|
|
|
fn build_heartbeat_task(
|
|
opts: &DatanodeOptions,
|
|
meta_client: Option<Arc<MetaClient>>,
|
|
) -> Result<Option<HeartbeatTask>> {
|
|
Ok(match opts.mode {
|
|
Mode::Standalone => None,
|
|
Mode::Distributed => {
|
|
let _node_id = opts.node_id.context(MissingNodeIdSnafu)?;
|
|
let _meta_client = meta_client.context(IncorrectInternalStateSnafu {
|
|
state: "meta client is not provided when building heartbeat task",
|
|
})?;
|
|
let _handlers_executor =
|
|
HandlerGroupExecutor::new(vec![Arc::new(ParseMailboxMessageHandler)]);
|
|
|
|
todo!("remove this method")
|
|
}
|
|
})
|
|
}
|
|
|
|
pub(crate) async fn new(
|
|
opts: &DatanodeOptions,
|
|
meta_client: Option<Arc<MetaClient>>,
|
|
compaction_scheduler: CompactionSchedulerRef<RaftEngineLogStore>,
|
|
plugins: Arc<Plugins>,
|
|
) -> Result<(InstanceRef, Option<HeartbeatTask>)> {
|
|
let data_home = util::normalize_dir(&opts.storage.data_home);
|
|
info!("The working home directory is: {}", data_home);
|
|
let object_store = store::new_object_store(opts).await?;
|
|
let log_store = Arc::new(create_log_store(&data_home, opts.wal.clone()).await?);
|
|
|
|
let mito_engine = Arc::new(DefaultEngine::new(
|
|
TableEngineConfig {
|
|
compress_manifest: opts.storage.manifest.compress,
|
|
},
|
|
EngineImpl::new(
|
|
StorageEngineConfig::from(opts),
|
|
log_store.clone(),
|
|
object_store.clone(),
|
|
compaction_scheduler,
|
|
)
|
|
.unwrap(),
|
|
object_store.clone(),
|
|
));
|
|
|
|
let immutable_file_engine = Arc::new(ImmutableFileTableEngine::new(
|
|
file_table_engine::config::EngineConfig::default(),
|
|
object_store.clone(),
|
|
));
|
|
|
|
let engine_procedures = HashMap::from([
|
|
(
|
|
mito_engine.name().to_string(),
|
|
mito_engine.clone() as TableEngineProcedureRef,
|
|
),
|
|
(
|
|
immutable_file_engine.name().to_string(),
|
|
immutable_file_engine.clone() as TableEngineProcedureRef,
|
|
),
|
|
]);
|
|
let engine_manager = Arc::new(
|
|
MemoryTableEngineManager::with(vec![
|
|
mito_engine.clone(),
|
|
immutable_file_engine.clone(),
|
|
])
|
|
.with_engine_procedures(engine_procedures),
|
|
);
|
|
|
|
// create remote catalog manager
|
|
let (catalog_manager, table_id_provider) = match opts.mode {
|
|
Mode::Standalone => {
|
|
let catalog = Arc::new(
|
|
catalog::local::LocalCatalogManager::try_new(engine_manager.clone())
|
|
.await
|
|
.context(CatalogSnafu)?,
|
|
);
|
|
|
|
(
|
|
catalog.clone() as CatalogManagerRef,
|
|
Some(catalog as TableIdProviderRef),
|
|
)
|
|
}
|
|
|
|
Mode::Distributed => (
|
|
MemoryCatalogManager::with_default_setup() as CatalogManagerRef,
|
|
None,
|
|
),
|
|
};
|
|
|
|
let factory =
|
|
QueryEngineFactory::new_with_plugins(catalog_manager.clone(), None, false, plugins);
|
|
let query_engine = factory.query_engine();
|
|
let procedure_manager = create_procedure_manager(
|
|
opts.node_id.unwrap_or(0),
|
|
&ProcedureConfig::default(),
|
|
object_store,
|
|
)
|
|
.await?;
|
|
let sql_handler = SqlHandler::new(
|
|
engine_manager.clone(),
|
|
catalog_manager.clone(),
|
|
procedure_manager.clone(),
|
|
);
|
|
// Register all procedures.
|
|
// Register procedures of the mito engine.
|
|
mito_engine.register_procedure_loaders(&*procedure_manager);
|
|
// Register procedures of the file table engine.
|
|
immutable_file_engine.register_procedure_loaders(&*procedure_manager);
|
|
// Register procedures in table-procedure crate.
|
|
table_procedure::register_procedure_loaders(
|
|
catalog_manager.clone(),
|
|
mito_engine.clone(),
|
|
mito_engine.clone(),
|
|
&*procedure_manager,
|
|
);
|
|
let row_inserter = RowInserter::new(catalog_manager.clone());
|
|
let greptimedb_telemetry_task = get_greptimedb_telemetry_task(
|
|
Some(opts.storage.data_home.clone()),
|
|
&opts.mode,
|
|
opts.enable_telemetry,
|
|
)
|
|
.await;
|
|
|
|
let instance = Arc::new(Self {
|
|
query_engine: query_engine.clone(),
|
|
sql_handler,
|
|
catalog_manager: catalog_manager.clone(),
|
|
table_id_provider,
|
|
row_inserter,
|
|
procedure_manager,
|
|
greptimedb_telemetry_task,
|
|
});
|
|
|
|
let heartbeat_task = Instance::build_heartbeat_task(opts, meta_client)?;
|
|
|
|
Ok((instance, heartbeat_task))
|
|
}
|
|
|
|
pub async fn start(&self) -> Result<()> {
|
|
self.catalog_manager
|
|
.start()
|
|
.await
|
|
.context(NewCatalogSnafu)?;
|
|
|
|
// Recover procedures after the catalog manager is started, so we can
|
|
// ensure we can access all tables from the catalog manager.
|
|
self.procedure_manager
|
|
.recover()
|
|
.await
|
|
.context(RecoverProcedureSnafu)?;
|
|
self.procedure_manager
|
|
.start()
|
|
.context(StartProcedureManagerSnafu)?;
|
|
let _ = self
|
|
.greptimedb_telemetry_task
|
|
.start(common_runtime::bg_runtime())
|
|
.map_err(|e| {
|
|
debug!("Failed to start greptimedb telemetry task: {}", e);
|
|
});
|
|
|
|
Ok(())
|
|
}
|
|
|
|
pub async fn shutdown(&self) -> Result<()> {
|
|
self.procedure_manager
|
|
.stop()
|
|
.await
|
|
.context(StopProcedureManagerSnafu)?;
|
|
|
|
self.flush_tables().await?;
|
|
|
|
self.sql_handler
|
|
.close()
|
|
.await
|
|
.map_err(BoxedError::new)
|
|
.context(ShutdownInstanceSnafu)
|
|
}
|
|
|
|
pub async fn flush_tables(&self) -> Result<()> {
|
|
info!("going to flush all schemas under {DEFAULT_CATALOG_NAME}");
|
|
let schema_list = self
|
|
.catalog_manager
|
|
.schema_names(DEFAULT_CATALOG_NAME)
|
|
.await
|
|
.map_err(BoxedError::new)
|
|
.context(ShutdownInstanceSnafu)?;
|
|
let flush_requests = schema_list
|
|
.into_iter()
|
|
.map(|schema_name| {
|
|
SqlRequest::FlushTable(FlushTableRequest {
|
|
catalog_name: DEFAULT_CATALOG_NAME.to_string(),
|
|
schema_name,
|
|
table_name: None,
|
|
region_number: None,
|
|
wait: Some(true),
|
|
})
|
|
})
|
|
.collect::<Vec<_>>();
|
|
let flush_result =
|
|
futures::future::try_join_all(flush_requests.into_iter().map(|request| {
|
|
self.sql_handler
|
|
.execute(request, QueryContextBuilder::default().build())
|
|
}))
|
|
.await
|
|
.map_err(BoxedError::new)
|
|
.context(ShutdownInstanceSnafu);
|
|
info!("Flushed all tables result: {}", flush_result.is_ok());
|
|
let _ = flush_result?;
|
|
|
|
Ok(())
|
|
}
|
|
|
|
pub fn sql_handler(&self) -> &SqlHandler {
|
|
&self.sql_handler
|
|
}
|
|
|
|
pub fn catalog_manager(&self) -> &CatalogManagerRef {
|
|
&self.catalog_manager
|
|
}
|
|
|
|
pub fn query_engine(&self) -> QueryEngineRef {
|
|
self.query_engine.clone()
|
|
}
|
|
}
|
|
|
|
fn create_compaction_scheduler<S: LogStore>(opts: &DatanodeOptions) -> CompactionSchedulerRef<S> {
|
|
let config = SchedulerConfig::from(opts);
|
|
let handler = CompactionHandler::default();
|
|
let scheduler = LocalScheduler::new(config, handler);
|
|
Arc::new(scheduler)
|
|
}
|
|
|
|
/// Create metasrv client instance and spawn heartbeat loop.
|
|
pub async fn new_metasrv_client(
|
|
node_id: u64,
|
|
meta_config: &MetaClientOptions,
|
|
) -> Result<MetaClient> {
|
|
let cluster_id = 0; // TODO(hl): read from config
|
|
let member_id = node_id;
|
|
|
|
let config = ChannelConfig::new()
|
|
.timeout(Duration::from_millis(meta_config.timeout_millis))
|
|
.connect_timeout(Duration::from_millis(meta_config.connect_timeout_millis))
|
|
.tcp_nodelay(meta_config.tcp_nodelay);
|
|
let channel_manager = ChannelManager::with_config(config);
|
|
|
|
let mut meta_client = MetaClientBuilder::new(cluster_id, member_id, Role::Datanode)
|
|
.enable_heartbeat()
|
|
.enable_router()
|
|
.enable_store()
|
|
.channel_manager(channel_manager)
|
|
.build();
|
|
meta_client
|
|
.start(&meta_config.metasrv_addrs)
|
|
.await
|
|
.context(MetaClientInitSnafu)?;
|
|
|
|
// required only when the heartbeat_client is enabled
|
|
meta_client
|
|
.ask_leader()
|
|
.await
|
|
.context(MetaClientInitSnafu)?;
|
|
Ok(meta_client)
|
|
}
|
|
|
|
pub(crate) async fn create_log_store(
|
|
data_home: &str,
|
|
wal_config: WalConfig,
|
|
) -> Result<RaftEngineLogStore> {
|
|
let wal_dir = format!("{}{WAL_DIR}", data_home);
|
|
|
|
// create WAL directory
|
|
fs::create_dir_all(path::Path::new(&wal_dir))
|
|
.context(error::CreateDirSnafu { dir: &wal_dir })?;
|
|
info!(
|
|
"Creating logstore with config: {:?} and storage path: {}",
|
|
wal_config, &wal_dir
|
|
);
|
|
let logstore = RaftEngineLogStore::try_new(wal_dir, wal_config)
|
|
.await
|
|
.map_err(Box::new)
|
|
.context(OpenLogStoreSnafu)?;
|
|
Ok(logstore)
|
|
}
|
|
|
|
pub(crate) async fn create_procedure_manager(
|
|
datanode_id: u64,
|
|
procedure_config: &ProcedureConfig,
|
|
object_store: ObjectStore,
|
|
) -> Result<ProcedureManagerRef> {
|
|
info!(
|
|
"Creating procedure manager with config: {:?}",
|
|
procedure_config
|
|
);
|
|
|
|
let state_store = Arc::new(ObjectStateStore::new(object_store));
|
|
|
|
let dn_store_path = format!("{CLUSTER_DIR}dn-{datanode_id}/");
|
|
|
|
info!("The datanode internal storage path is: {}", dn_store_path);
|
|
|
|
let manager_config = ManagerConfig {
|
|
parent_path: dn_store_path,
|
|
max_retry_times: procedure_config.max_retry_times,
|
|
retry_delay: procedure_config.retry_delay,
|
|
..Default::default()
|
|
};
|
|
|
|
Ok(Arc::new(LocalManager::new(manager_config, state_store)))
|
|
}
|