diff --git a/config/config.md b/config/config.md
index 1f034d2873..0f70a8cb12 100644
--- a/config/config.md
+++ b/config/config.md
@@ -93,7 +93,7 @@
| `storage` | -- | -- | The data storage options. |
| `storage.data_home` | String | `/tmp/greptimedb/` | The working home directory. |
| `storage.type` | String | `File` | The storage type used to store the data. - `File`: the data is stored in the local file system. - `S3`: the data is stored in the S3 object storage. - `Gcs`: the data is stored in the Google Cloud Storage. - `Azblob`: the data is stored in the Azure Blob Storage. - `Oss`: the data is stored in the Aliyun OSS. |
-| `storage.cache_path` | String | Unset | Cache configuration for object storage such as 'S3' etc. It is recommended to configure it when using object storage for better performance. The local file cache directory. |
+| `storage.cache_path` | String | Unset | Read cache configuration for object storage such as 'S3' etc, it's configured by default when using object storage. It is recommended to configure it when using object storage for better performance. A local file directory, defaults to `{data_home}/object_cache/read`. An empty string means disabling. |
| `storage.cache_capacity` | String | Unset | The local file cache capacity in bytes. If your disk space is sufficient, it is recommended to set it larger. |
| `storage.bucket` | String | Unset | The S3 bucket name. **It's only used when the storage type is `S3`, `Oss` and `Gcs`**. |
| `storage.root` | String | Unset | The S3 data will be stored in the specified prefix, for example, `s3://${bucket}/${root}`. **It's only used when the storage type is `S3`, `Oss` and `Azblob`**. |
@@ -131,9 +131,9 @@
| `region_engine.mito.vector_cache_size` | String | Auto | Cache size for vectors and arrow arrays. Setting it to 0 to disable the cache. If not set, it's default to 1/16 of OS memory with a max limitation of 512MB. |
| `region_engine.mito.page_cache_size` | String | Auto | Cache size for pages of SST row groups. Setting it to 0 to disable the cache. If not set, it's default to 1/8 of OS memory. |
| `region_engine.mito.selector_result_cache_size` | String | Auto | Cache size for time series selector (e.g. `last_value()`). Setting it to 0 to disable the cache. If not set, it's default to 1/16 of OS memory with a max limitation of 512MB. |
-| `region_engine.mito.enable_experimental_write_cache` | Bool | `false` | Whether to enable the experimental write cache. It is recommended to enable it when using object storage for better performance. |
-| `region_engine.mito.experimental_write_cache_path` | String | `""` | File system path for write cache, defaults to `{data_home}/write_cache`. |
-| `region_engine.mito.experimental_write_cache_size` | String | `1GiB` | Capacity for write cache. If your disk space is sufficient, it is recommended to set it larger. |
+| `region_engine.mito.enable_experimental_write_cache` | Bool | `false` | Whether to enable the experimental write cache, it's enabled by default when using object storage. It is recommended to enable it when using object storage for better performance. |
+| `region_engine.mito.experimental_write_cache_path` | String | `""` | File system path for write cache, defaults to `{data_home}/object_cache/write`. |
+| `region_engine.mito.experimental_write_cache_size` | String | `5GiB` | Capacity for write cache. If your disk space is sufficient, it is recommended to set it larger. |
| `region_engine.mito.experimental_write_cache_ttl` | String | Unset | TTL for write cache. |
| `region_engine.mito.sst_write_buffer_size` | String | `8MB` | Buffer size for SST writing. |
| `region_engine.mito.parallel_scan_channel_size` | Integer | `32` | Capacity of the channel to send data from parallel scan tasks to the main task. |
@@ -420,7 +420,7 @@
| `storage` | -- | -- | The data storage options. |
| `storage.data_home` | String | `/tmp/greptimedb/` | The working home directory. |
| `storage.type` | String | `File` | The storage type used to store the data. - `File`: the data is stored in the local file system. - `S3`: the data is stored in the S3 object storage. - `Gcs`: the data is stored in the Google Cloud Storage. - `Azblob`: the data is stored in the Azure Blob Storage. - `Oss`: the data is stored in the Aliyun OSS. |
-| `storage.cache_path` | String | Unset | Cache configuration for object storage such as 'S3' etc. It is recommended to configure it when using object storage for better performance. The local file cache directory. |
+| `storage.cache_path` | String | Unset | Read cache configuration for object storage such as 'S3' etc, it's configured by default when using object storage. It is recommended to configure it when using object storage for better performance. A local file directory, defaults to `{data_home}/object_cache/read`. An empty string means disabling. |
| `storage.cache_capacity` | String | Unset | The local file cache capacity in bytes. If your disk space is sufficient, it is recommended to set it larger. |
| `storage.bucket` | String | Unset | The S3 bucket name. **It's only used when the storage type is `S3`, `Oss` and `Gcs`**. |
| `storage.root` | String | Unset | The S3 data will be stored in the specified prefix, for example, `s3://${bucket}/${root}`. **It's only used when the storage type is `S3`, `Oss` and `Azblob`**. |
@@ -458,9 +458,9 @@
| `region_engine.mito.vector_cache_size` | String | Auto | Cache size for vectors and arrow arrays. Setting it to 0 to disable the cache. If not set, it's default to 1/16 of OS memory with a max limitation of 512MB. |
| `region_engine.mito.page_cache_size` | String | Auto | Cache size for pages of SST row groups. Setting it to 0 to disable the cache. If not set, it's default to 1/8 of OS memory. |
| `region_engine.mito.selector_result_cache_size` | String | Auto | Cache size for time series selector (e.g. `last_value()`). Setting it to 0 to disable the cache. If not set, it's default to 1/16 of OS memory with a max limitation of 512MB. |
-| `region_engine.mito.enable_experimental_write_cache` | Bool | `false` | Whether to enable the experimental write cache. It is recommended to enable it when using object storage for better performance. |
-| `region_engine.mito.experimental_write_cache_path` | String | `""` | File system path for write cache, defaults to `{data_home}/write_cache`. |
-| `region_engine.mito.experimental_write_cache_size` | String | `1GiB` | Capacity for write cache. If your disk space is sufficient, it is recommended to set it larger. |
+| `region_engine.mito.enable_experimental_write_cache` | Bool | `false` | Whether to enable the experimental write cache, it's enabled by default when using object storage. It is recommended to enable it when using object storage for better performance. |
+| `region_engine.mito.experimental_write_cache_path` | String | `""` | File system path for write cache, defaults to `{data_home}/object_cache/write`. |
+| `region_engine.mito.experimental_write_cache_size` | String | `5GiB` | Capacity for write cache. If your disk space is sufficient, it is recommended to set it larger. |
| `region_engine.mito.experimental_write_cache_ttl` | String | Unset | TTL for write cache. |
| `region_engine.mito.sst_write_buffer_size` | String | `8MB` | Buffer size for SST writing. |
| `region_engine.mito.parallel_scan_channel_size` | Integer | `32` | Capacity of the channel to send data from parallel scan tasks to the main task. |
diff --git a/config/datanode.example.toml b/config/datanode.example.toml
index 11c2794e61..8bfa8732cc 100644
--- a/config/datanode.example.toml
+++ b/config/datanode.example.toml
@@ -294,14 +294,14 @@ data_home = "/tmp/greptimedb/"
## - `Oss`: the data is stored in the Aliyun OSS.
type = "File"
-## Cache configuration for object storage such as 'S3' etc. It is recommended to configure it when using object storage for better performance.
-## The local file cache directory.
+## Read cache configuration for object storage such as 'S3' etc, it's configured by default when using object storage. It is recommended to configure it when using object storage for better performance.
+## A local file directory, defaults to `{data_home}/object_cache/read`. An empty string means disabling.
## @toml2docs:none-default
-cache_path = "/path/local_cache"
+#+ cache_path = ""
## The local file cache capacity in bytes. If your disk space is sufficient, it is recommended to set it larger.
## @toml2docs:none-default
-cache_capacity = "1GiB"
+cache_capacity = "5GiB"
## The S3 bucket name.
## **It's only used when the storage type is `S3`, `Oss` and `Gcs`**.
@@ -476,14 +476,14 @@ auto_flush_interval = "1h"
## @toml2docs:none-default="Auto"
#+ selector_result_cache_size = "512MB"
-## Whether to enable the experimental write cache. It is recommended to enable it when using object storage for better performance.
+## Whether to enable the experimental write cache, it's enabled by default when using object storage. It is recommended to enable it when using object storage for better performance.
enable_experimental_write_cache = false
-## File system path for write cache, defaults to `{data_home}/write_cache`.
+## File system path for write cache, defaults to `{data_home}/object_cache/write`.
experimental_write_cache_path = ""
## Capacity for write cache. If your disk space is sufficient, it is recommended to set it larger.
-experimental_write_cache_size = "1GiB"
+experimental_write_cache_size = "5GiB"
## TTL for write cache.
## @toml2docs:none-default
diff --git a/config/standalone.example.toml b/config/standalone.example.toml
index a69295af16..56cbeaddb9 100644
--- a/config/standalone.example.toml
+++ b/config/standalone.example.toml
@@ -332,14 +332,14 @@ data_home = "/tmp/greptimedb/"
## - `Oss`: the data is stored in the Aliyun OSS.
type = "File"
-## Cache configuration for object storage such as 'S3' etc. It is recommended to configure it when using object storage for better performance.
-## The local file cache directory.
+## Read cache configuration for object storage such as 'S3' etc, it's configured by default when using object storage. It is recommended to configure it when using object storage for better performance.
+## A local file directory, defaults to `{data_home}/object_cache/read`. An empty string means disabling.
## @toml2docs:none-default
-cache_path = "/path/local_cache"
+#+ cache_path = ""
## The local file cache capacity in bytes. If your disk space is sufficient, it is recommended to set it larger.
## @toml2docs:none-default
-cache_capacity = "1GiB"
+cache_capacity = "5GiB"
## The S3 bucket name.
## **It's only used when the storage type is `S3`, `Oss` and `Gcs`**.
@@ -514,14 +514,14 @@ auto_flush_interval = "1h"
## @toml2docs:none-default="Auto"
#+ selector_result_cache_size = "512MB"
-## Whether to enable the experimental write cache. It is recommended to enable it when using object storage for better performance.
+## Whether to enable the experimental write cache, it's enabled by default when using object storage. It is recommended to enable it when using object storage for better performance.
enable_experimental_write_cache = false
-## File system path for write cache, defaults to `{data_home}/write_cache`.
+## File system path for write cache, defaults to `{data_home}/object_cache/write`.
experimental_write_cache_path = ""
## Capacity for write cache. If your disk space is sufficient, it is recommended to set it larger.
-experimental_write_cache_size = "1GiB"
+experimental_write_cache_size = "5GiB"
## TTL for write cache.
## @toml2docs:none-default
diff --git a/src/datanode/src/config.rs b/src/datanode/src/config.rs
index 4fedb9ea2c..4e1b4f3195 100644
--- a/src/datanode/src/config.rs
+++ b/src/datanode/src/config.rs
@@ -32,7 +32,7 @@ use servers::heartbeat_options::HeartbeatOptions;
use servers::http::HttpOptions;
use servers::Mode;
-pub const DEFAULT_OBJECT_STORE_CACHE_SIZE: ReadableSize = ReadableSize::gb(1);
+pub const DEFAULT_OBJECT_STORE_CACHE_SIZE: ReadableSize = ReadableSize::gb(5);
/// Default data home in file storage
const DEFAULT_DATA_HOME: &str = "/tmp/greptimedb";
@@ -60,6 +60,11 @@ impl ObjectStoreConfig {
}
}
+ /// Returns true when it's a remote object storage such as AWS s3 etc.
+ pub fn is_object_storage(&self) -> bool {
+ !matches!(self, Self::File(_))
+ }
+
/// Returns the object storage configuration name, return the provider name if it's empty.
pub fn config_name(&self) -> &str {
let name = match self {
@@ -91,6 +96,13 @@ pub struct StorageConfig {
pub providers: Vec,
}
+impl StorageConfig {
+ /// Returns true when the default storage config is a remote object storage service such as AWS S3, etc.
+ pub fn is_object_storage(&self) -> bool {
+ self.store.is_object_storage()
+ }
+}
+
impl Default for StorageConfig {
fn default() -> Self {
Self {
@@ -452,6 +464,20 @@ mod tests {
assert_eq!("S3", s3_config.provider_name());
}
+ #[test]
+ fn test_is_object_storage() {
+ let store = ObjectStoreConfig::default();
+ assert!(!store.is_object_storage());
+ let s3_config = ObjectStoreConfig::S3(S3Config::default());
+ assert!(s3_config.is_object_storage());
+ let oss_config = ObjectStoreConfig::Oss(OssConfig::default());
+ assert!(oss_config.is_object_storage());
+ let gcs_config = ObjectStoreConfig::Gcs(GcsConfig::default());
+ assert!(gcs_config.is_object_storage());
+ let azblob_config = ObjectStoreConfig::Azblob(AzblobConfig::default());
+ assert!(azblob_config.is_object_storage());
+ }
+
#[test]
fn test_secstr() {
let toml_str = r#"
diff --git a/src/datanode/src/datanode.rs b/src/datanode/src/datanode.rs
index c89c007082..53a0cf9fd7 100644
--- a/src/datanode/src/datanode.rs
+++ b/src/datanode/src/datanode.rs
@@ -428,10 +428,16 @@ impl DatanodeBuilder {
async fn build_mito_engine(
opts: &DatanodeOptions,
object_store_manager: ObjectStoreManagerRef,
- config: MitoConfig,
+ mut config: MitoConfig,
schema_metadata_manager: SchemaMetadataManagerRef,
plugins: Plugins,
) -> Result {
+ if opts.storage.is_object_storage() {
+ // Enable the write cache when setting object storage
+ config.enable_experimental_write_cache = true;
+ info!("Configured 'enable_experimental_write_cache=true' for mito engine.");
+ }
+
let mito_engine = match &opts.wal {
DatanodeWalConfig::RaftEngine(raft_engine_config) => MitoEngine::new(
&opts.storage.data_home,
diff --git a/src/datanode/src/lib.rs b/src/datanode/src/lib.rs
index dae3eef76c..6a7b1c596d 100644
--- a/src/datanode/src/lib.rs
+++ b/src/datanode/src/lib.rs
@@ -13,6 +13,7 @@
// limitations under the License.
#![feature(assert_matches)]
+#![feature(let_chains)]
pub mod alive_keeper;
pub mod config;
diff --git a/src/datanode/src/store.rs b/src/datanode/src/store.rs
index e8ede07674..c78afe448e 100644
--- a/src/datanode/src/store.rs
+++ b/src/datanode/src/store.rs
@@ -19,21 +19,20 @@ mod fs;
mod gcs;
mod oss;
mod s3;
-
+use std::path::Path;
use std::sync::Arc;
use std::time::Duration;
use std::{env, path};
-use common_base::readable_size::ReadableSize;
use common_telemetry::{info, warn};
use object_store::layers::{LruCacheLayer, RetryInterceptor, RetryLayer};
use object_store::services::Fs;
use object_store::util::{join_dir, normalize_dir, with_instrument_layers};
-use object_store::{Access, Error, HttpClient, ObjectStore, ObjectStoreBuilder};
+use object_store::{Access, Error, HttpClient, ObjectStore, ObjectStoreBuilder, OBJECT_CACHE_DIR};
use snafu::prelude::*;
use crate::config::{HttpClientConfig, ObjectStoreConfig, DEFAULT_OBJECT_STORE_CACHE_SIZE};
-use crate::error::{self, Result};
+use crate::error::{self, CreateDirSnafu, Result};
pub(crate) async fn new_raw_object_store(
store: &ObjectStoreConfig,
@@ -68,7 +67,7 @@ pub(crate) async fn new_object_store_without_cache(
) -> Result {
let object_store = new_raw_object_store(store, data_home).await?;
// Enable retry layer and cache layer for non-fs object storages
- let object_store = if !matches!(store, ObjectStoreConfig::File(..)) {
+ let object_store = if store.is_object_storage() {
// Adds retry layer
with_retry_layers(object_store)
} else {
@@ -85,8 +84,8 @@ pub(crate) async fn new_object_store(
) -> Result {
let object_store = new_raw_object_store(&store, data_home).await?;
// Enable retry layer and cache layer for non-fs object storages
- let object_store = if !matches!(store, ObjectStoreConfig::File(..)) {
- let object_store = if let Some(cache_layer) = build_cache_layer(&store).await? {
+ let object_store = if store.is_object_storage() {
+ let object_store = if let Some(cache_layer) = build_cache_layer(&store, data_home).await? {
// Adds cache layer
object_store.layer(cache_layer)
} else {
@@ -105,44 +104,72 @@ pub(crate) async fn new_object_store(
async fn build_cache_layer(
store_config: &ObjectStoreConfig,
+ data_home: &str,
) -> Result