diff --git a/Cargo.lock b/Cargo.lock
index bdd59547a9..0d93a4a45e 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -3170,6 +3170,15 @@ dependencies = [
"uuid",
]
+[[package]]
+name = "delta-encoding"
+version = "0.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4f8513a5eeb3d7b9149563409dc4ab6fd9de5767fd285af5b4d0ee1b778fbce0"
+dependencies = [
+ "num-traits",
+]
+
[[package]]
name = "der"
version = "0.5.1"
@@ -5806,10 +5815,12 @@ dependencies = [
"common-test-util",
"common-time",
"common-wal",
+ "delta-encoding",
"futures",
"futures-util",
"itertools 0.10.5",
"lazy_static",
+ "object-store",
"pin-project",
"prometheus",
"protobuf",
diff --git a/config/config.md b/config/config.md
index cf622daf9f..295eacf4f1 100644
--- a/config/config.md
+++ b/config/config.md
@@ -374,6 +374,8 @@
| `wal.backoff_max` | String | `10s` | The maximum backoff delay.
**It's only used when the provider is `kafka`**. |
| `wal.backoff_base` | Integer | `2` | The exponential backoff rate, i.e. next backoff = base * current backoff.
**It's only used when the provider is `kafka`**. |
| `wal.backoff_deadline` | String | `5mins` | The deadline of retries.
**It's only used when the provider is `kafka`**. |
+| `wal.create_index` | Bool | `true` | Whether to enable WAL index creation.
**It's only used when the provider is `kafka`**. |
+| `wal.dump_index_interval` | String | `60s` | The interval for dumping WAL indexes.
**It's only used when the provider is `kafka`**. |
| `storage` | -- | -- | The data storage options. |
| `storage.data_home` | String | `/tmp/greptimedb/` | The working home directory. |
| `storage.type` | String | `File` | The storage type used to store the data.
- `File`: the data is stored in the local file system.
- `S3`: the data is stored in the S3 object storage.
- `Gcs`: the data is stored in the Google Cloud Storage.
- `Azblob`: the data is stored in the Azure Blob Storage.
- `Oss`: the data is stored in the Aliyun OSS. |
diff --git a/config/datanode.example.toml b/config/datanode.example.toml
index c5cc04ebca..caaff9a3fb 100644
--- a/config/datanode.example.toml
+++ b/config/datanode.example.toml
@@ -187,6 +187,14 @@ backoff_base = 2
## **It's only used when the provider is `kafka`**.
backoff_deadline = "5mins"
+## Whether to enable WAL index creation.
+## **It's only used when the provider is `kafka`**.
+create_index = true
+
+## The interval for dumping WAL indexes.
+## **It's only used when the provider is `kafka`**.
+dump_index_interval = "60s"
+
# The Kafka SASL configuration.
# **It's only used when the provider is `kafka`**.
# Available SASL mechanisms:
diff --git a/src/common/wal/src/config.rs b/src/common/wal/src/config.rs
index 9bf3280c5a..90f3e44f9c 100644
--- a/src/common/wal/src/config.rs
+++ b/src/common/wal/src/config.rs
@@ -30,6 +30,7 @@ pub enum MetasrvWalConfig {
Kafka(MetasrvKafkaConfig),
}
+#[allow(clippy::large_enum_variant)]
/// Wal configurations for datanode.
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
#[serde(tag = "provider", rename_all = "snake_case")]
@@ -223,6 +224,7 @@ mod tests {
replication_factor: 1,
create_topic_timeout: Duration::from_secs(30),
},
+ ..Default::default()
};
assert_eq!(datanode_wal_config, DatanodeWalConfig::Kafka(expected));
}
diff --git a/src/common/wal/src/config/kafka/datanode.rs b/src/common/wal/src/config/kafka/datanode.rs
index a1260c05ef..84e9da6bcc 100644
--- a/src/common/wal/src/config/kafka/datanode.rs
+++ b/src/common/wal/src/config/kafka/datanode.rs
@@ -40,6 +40,9 @@ pub struct DatanodeKafkaConfig {
/// The kafka topic config.
#[serde(flatten)]
pub kafka_topic: KafkaTopicConfig,
+ pub create_index: bool,
+ #[serde(with = "humantime_serde")]
+ pub dump_index_interval: Duration,
}
impl Default for DatanodeKafkaConfig {
@@ -51,6 +54,8 @@ impl Default for DatanodeKafkaConfig {
consumer_wait_timeout: Duration::from_millis(100),
backoff: BackoffConfig::default(),
kafka_topic: KafkaTopicConfig::default(),
+ create_index: true,
+ dump_index_interval: Duration::from_secs(60),
}
}
}
diff --git a/src/datanode/src/datanode.rs b/src/datanode/src/datanode.rs
index ceb40081d1..eca551a4a0 100644
--- a/src/datanode/src/datanode.rs
+++ b/src/datanode/src/datanode.rs
@@ -16,6 +16,7 @@
use std::path::Path;
use std::sync::Arc;
+use std::time::Duration;
use catalog::memory::MemoryCatalogManager;
use common_base::Plugins;
@@ -32,6 +33,7 @@ use common_wal::config::DatanodeWalConfig;
use file_engine::engine::FileRegionEngine;
use futures_util::TryStreamExt;
use log_store::kafka::log_store::KafkaLogStore;
+use log_store::kafka::{default_index_file, GlobalIndexCollector};
use log_store::raft_engine::log_store::RaftEngineLogStore;
use meta_client::MetaClientRef;
use metric_engine::engine::MetricEngine;
@@ -64,7 +66,7 @@ use crate::event_listener::{
use crate::greptimedb_telemetry::get_greptimedb_telemetry_task;
use crate::heartbeat::HeartbeatTask;
use crate::region_server::{DummyTableProviderFactory, RegionServer};
-use crate::store;
+use crate::store::{self, new_object_store_without_cache};
/// Datanode service.
pub struct Datanode {
@@ -398,15 +400,37 @@ impl DatanodeBuilder {
)
.await
.context(BuildMitoEngineSnafu)?,
- DatanodeWalConfig::Kafka(kafka_config) => MitoEngine::new(
- &opts.storage.data_home,
- config,
- Self::build_kafka_log_store(kafka_config).await?,
- object_store_manager,
- plugins,
- )
- .await
- .context(BuildMitoEngineSnafu)?,
+ DatanodeWalConfig::Kafka(kafka_config) => {
+ if kafka_config.create_index && opts.node_id.is_none() {
+ warn!("The WAL index creation only available in distributed mode.")
+ }
+ let global_index_collector = if kafka_config.create_index && opts.node_id.is_some()
+ {
+ let operator = new_object_store_without_cache(
+ &opts.storage.store,
+ &opts.storage.data_home,
+ )
+ .await?;
+ let path = default_index_file(opts.node_id.unwrap());
+ Some(Self::build_global_index_collector(
+ kafka_config.dump_index_interval,
+ operator,
+ path,
+ ))
+ } else {
+ None
+ };
+
+ MitoEngine::new(
+ &opts.storage.data_home,
+ config,
+ Self::build_kafka_log_store(kafka_config, global_index_collector).await?,
+ object_store_manager,
+ plugins,
+ )
+ .await
+ .context(BuildMitoEngineSnafu)?
+ }
};
Ok(mito_engine)
}
@@ -438,14 +462,26 @@ impl DatanodeBuilder {
Ok(Arc::new(logstore))
}
- /// Builds [KafkaLogStore].
- async fn build_kafka_log_store(config: &DatanodeKafkaConfig) -> Result> {
- KafkaLogStore::try_new(config)
+ /// Builds [`KafkaLogStore`].
+ async fn build_kafka_log_store(
+ config: &DatanodeKafkaConfig,
+ global_index_collector: Option,
+ ) -> Result> {
+ KafkaLogStore::try_new(config, global_index_collector)
.await
.map_err(Box::new)
.context(OpenLogStoreSnafu)
.map(Arc::new)
}
+
+ /// Builds [`GlobalIndexCollector`]
+ fn build_global_index_collector(
+ dump_index_interval: Duration,
+ operator: object_store::ObjectStore,
+ path: String,
+ ) -> GlobalIndexCollector {
+ GlobalIndexCollector::new(dump_index_interval, operator, path)
+ }
}
/// Open all regions belong to this datanode.
diff --git a/src/datanode/src/store.rs b/src/datanode/src/store.rs
index 877f044974..16b6e0bc8b 100644
--- a/src/datanode/src/store.rs
+++ b/src/datanode/src/store.rs
@@ -29,18 +29,18 @@ use common_telemetry::{info, warn};
use object_store::layers::{LruCacheLayer, RetryInterceptor, RetryLayer};
use object_store::services::Fs;
use object_store::util::{join_dir, normalize_dir, with_instrument_layers};
-use object_store::{Error, HttpClient, ObjectStore, ObjectStoreBuilder};
+use object_store::{Access, Error, HttpClient, ObjectStore, ObjectStoreBuilder};
use snafu::prelude::*;
use crate::config::{ObjectStoreConfig, DEFAULT_OBJECT_STORE_CACHE_SIZE};
use crate::error::{self, Result};
-pub(crate) async fn new_object_store(
- store: ObjectStoreConfig,
+pub(crate) async fn new_raw_object_store(
+ store: &ObjectStoreConfig,
data_home: &str,
) -> Result {
let data_home = normalize_dir(data_home);
- let object_store = match &store {
+ let object_store = match store {
ObjectStoreConfig::File(file_config) => {
fs::new_fs_object_store(&data_home, file_config).await
}
@@ -51,27 +51,61 @@ pub(crate) async fn new_object_store(
}
ObjectStoreConfig::Gcs(gcs_config) => gcs::new_gcs_object_store(gcs_config).await,
}?;
+ Ok(object_store)
+}
+fn with_retry_layers(object_store: ObjectStore) -> ObjectStore {
+ object_store.layer(
+ RetryLayer::new()
+ .with_jitter()
+ .with_notify(PrintDetailedError),
+ )
+}
+
+pub(crate) async fn new_object_store_without_cache(
+ store: &ObjectStoreConfig,
+ data_home: &str,
+) -> Result {
+ let object_store = new_raw_object_store(store, data_home).await?;
// Enable retry layer and cache layer for non-fs object storages
let object_store = if !matches!(store, ObjectStoreConfig::File(..)) {
- let object_store = create_object_store_with_cache(object_store, &store).await?;
- object_store.layer(
- RetryLayer::new()
- .with_jitter()
- .with_notify(PrintDetailedError),
- )
+ // Adds retry layer
+ with_retry_layers(object_store)
} else {
object_store
};
- let store = with_instrument_layers(object_store, true);
- Ok(store)
+ let object_store = with_instrument_layers(object_store, true);
+ Ok(object_store)
}
-async fn create_object_store_with_cache(
- object_store: ObjectStore,
- store_config: &ObjectStoreConfig,
+pub(crate) async fn new_object_store(
+ store: ObjectStoreConfig,
+ data_home: &str,
) -> Result {
+ let object_store = new_raw_object_store(&store, data_home).await?;
+ // Enable retry layer and cache layer for non-fs object storages
+ let object_store = if !matches!(store, ObjectStoreConfig::File(..)) {
+ let object_store = if let Some(cache_layer) = build_cache_layer(&store).await? {
+ // Adds cache layer
+ object_store.layer(cache_layer)
+ } else {
+ object_store
+ };
+
+ // Adds retry layer
+ with_retry_layers(object_store)
+ } else {
+ object_store
+ };
+
+ let object_store = with_instrument_layers(object_store, true);
+ Ok(object_store)
+}
+
+async fn build_cache_layer(
+ store_config: &ObjectStoreConfig,
+) -> Result