From edc49623deaf9a16044d6b4ae0ea220db121ca75 Mon Sep 17 00:00:00 2001 From: dennis zhuang Date: Mon, 4 Nov 2024 11:53:17 +0800 Subject: [PATCH] chore: update default cache size to 1Gib (#4923) * chore: update default cache size to 1Gib for object storage read/write cache * feat: update docs * fix: test --- config/config.md | 16 ++++++++-------- config/datanode.example.toml | 12 ++++++------ config/standalone.example.toml | 12 ++++++------ src/datanode/src/config.rs | 2 +- src/mito2/src/config.rs | 2 +- tests-integration/tests/http.rs | 2 +- 6 files changed, 23 insertions(+), 23 deletions(-) diff --git a/config/config.md b/config/config.md index 65d8fa0c16..d8164c11d4 100644 --- a/config/config.md +++ b/config/config.md @@ -93,8 +93,8 @@ | `storage` | -- | -- | The data storage options. | | `storage.data_home` | String | `/tmp/greptimedb/` | The working home directory. | | `storage.type` | String | `File` | The storage type used to store the data.
- `File`: the data is stored in the local file system.
- `S3`: the data is stored in the S3 object storage.
- `Gcs`: the data is stored in the Google Cloud Storage.
- `Azblob`: the data is stored in the Azure Blob Storage.
- `Oss`: the data is stored in the Aliyun OSS. | -| `storage.cache_path` | String | Unset | Cache configuration for object storage such as 'S3' etc.
The local file cache directory. | -| `storage.cache_capacity` | String | Unset | The local file cache capacity in bytes. | +| `storage.cache_path` | String | Unset | Cache configuration for object storage such as 'S3' etc. It is recommended to configure it when using object storage for better performance.
The local file cache directory. | +| `storage.cache_capacity` | String | Unset | The local file cache capacity in bytes. If your disk space is sufficient, it is recommended to set it larger. | | `storage.bucket` | String | Unset | The S3 bucket name.
**It's only used when the storage type is `S3`, `Oss` and `Gcs`**. | | `storage.root` | String | Unset | The S3 data will be stored in the specified prefix, for example, `s3://${bucket}/${root}`.
**It's only used when the storage type is `S3`, `Oss` and `Azblob`**. | | `storage.access_key_id` | String | Unset | The access key id of the aws account.
It's **highly recommended** to use AWS IAM roles instead of hardcoding the access key id and secret key.
**It's only used when the storage type is `S3` and `Oss`**. | @@ -126,9 +126,9 @@ | `region_engine.mito.vector_cache_size` | String | Auto | Cache size for vectors and arrow arrays. Setting it to 0 to disable the cache.
If not set, it's default to 1/16 of OS memory with a max limitation of 512MB. | | `region_engine.mito.page_cache_size` | String | Auto | Cache size for pages of SST row groups. Setting it to 0 to disable the cache.
If not set, it's default to 1/8 of OS memory. | | `region_engine.mito.selector_result_cache_size` | String | Auto | Cache size for time series selector (e.g. `last_value()`). Setting it to 0 to disable the cache.
If not set, it's default to 1/16 of OS memory with a max limitation of 512MB. | -| `region_engine.mito.enable_experimental_write_cache` | Bool | `false` | Whether to enable the experimental write cache. | +| `region_engine.mito.enable_experimental_write_cache` | Bool | `false` | Whether to enable the experimental write cache. It is recommended to enable it when using object storage for better performance. | | `region_engine.mito.experimental_write_cache_path` | String | `""` | File system path for write cache, defaults to `{data_home}/write_cache`. | -| `region_engine.mito.experimental_write_cache_size` | String | `512MB` | Capacity for write cache. | +| `region_engine.mito.experimental_write_cache_size` | String | `1GiB` | Capacity for write cache. If your disk space is sufficient, it is recommended to set it larger. | | `region_engine.mito.experimental_write_cache_ttl` | String | Unset | TTL for write cache. | | `region_engine.mito.sst_write_buffer_size` | String | `8MB` | Buffer size for SST writing. | | `region_engine.mito.scan_parallelism` | Integer | `0` | Parallelism to scan a region (default: 1/4 of cpu cores).
- `0`: using the default value (1/4 of cpu cores).
- `1`: scan in current thread.
- `n`: scan in parallelism n. | @@ -416,8 +416,8 @@ | `storage` | -- | -- | The data storage options. | | `storage.data_home` | String | `/tmp/greptimedb/` | The working home directory. | | `storage.type` | String | `File` | The storage type used to store the data.
- `File`: the data is stored in the local file system.
- `S3`: the data is stored in the S3 object storage.
- `Gcs`: the data is stored in the Google Cloud Storage.
- `Azblob`: the data is stored in the Azure Blob Storage.
- `Oss`: the data is stored in the Aliyun OSS. | -| `storage.cache_path` | String | Unset | Cache configuration for object storage such as 'S3' etc.
The local file cache directory. | -| `storage.cache_capacity` | String | Unset | The local file cache capacity in bytes. | +| `storage.cache_path` | String | Unset | Cache configuration for object storage such as 'S3' etc. It is recommended to configure it when using object storage for better performance.
The local file cache directory. | +| `storage.cache_capacity` | String | Unset | The local file cache capacity in bytes. If your disk space is sufficient, it is recommended to set it larger. | | `storage.bucket` | String | Unset | The S3 bucket name.
**It's only used when the storage type is `S3`, `Oss` and `Gcs`**. | | `storage.root` | String | Unset | The S3 data will be stored in the specified prefix, for example, `s3://${bucket}/${root}`.
**It's only used when the storage type is `S3`, `Oss` and `Azblob`**. | | `storage.access_key_id` | String | Unset | The access key id of the aws account.
It's **highly recommended** to use AWS IAM roles instead of hardcoding the access key id and secret key.
**It's only used when the storage type is `S3` and `Oss`**. | @@ -449,9 +449,9 @@ | `region_engine.mito.vector_cache_size` | String | Auto | Cache size for vectors and arrow arrays. Setting it to 0 to disable the cache.
If not set, it's default to 1/16 of OS memory with a max limitation of 512MB. | | `region_engine.mito.page_cache_size` | String | Auto | Cache size for pages of SST row groups. Setting it to 0 to disable the cache.
If not set, it's default to 1/8 of OS memory. | | `region_engine.mito.selector_result_cache_size` | String | Auto | Cache size for time series selector (e.g. `last_value()`). Setting it to 0 to disable the cache.
If not set, it's default to 1/16 of OS memory with a max limitation of 512MB. | -| `region_engine.mito.enable_experimental_write_cache` | Bool | `false` | Whether to enable the experimental write cache. | +| `region_engine.mito.enable_experimental_write_cache` | Bool | `false` | Whether to enable the experimental write cache. It is recommended to enable it when using object storage for better performance. | | `region_engine.mito.experimental_write_cache_path` | String | `""` | File system path for write cache, defaults to `{data_home}/write_cache`. | -| `region_engine.mito.experimental_write_cache_size` | String | `512MB` | Capacity for write cache. | +| `region_engine.mito.experimental_write_cache_size` | String | `1GiB` | Capacity for write cache. If your disk space is sufficient, it is recommended to set it larger. | | `region_engine.mito.experimental_write_cache_ttl` | String | Unset | TTL for write cache. | | `region_engine.mito.sst_write_buffer_size` | String | `8MB` | Buffer size for SST writing. | | `region_engine.mito.scan_parallelism` | Integer | `0` | Parallelism to scan a region (default: 1/4 of cpu cores).
- `0`: using the default value (1/4 of cpu cores).
- `1`: scan in current thread.
- `n`: scan in parallelism n. | diff --git a/config/datanode.example.toml b/config/datanode.example.toml index 6e426e89cd..1bc084ad40 100644 --- a/config/datanode.example.toml +++ b/config/datanode.example.toml @@ -294,14 +294,14 @@ data_home = "/tmp/greptimedb/" ## - `Oss`: the data is stored in the Aliyun OSS. type = "File" -## Cache configuration for object storage such as 'S3' etc. +## Cache configuration for object storage such as 'S3' etc. It is recommended to configure it when using object storage for better performance. ## The local file cache directory. ## @toml2docs:none-default cache_path = "/path/local_cache" -## The local file cache capacity in bytes. +## The local file cache capacity in bytes. If your disk space is sufficient, it is recommended to set it larger. ## @toml2docs:none-default -cache_capacity = "256MB" +cache_capacity = "1GiB" ## The S3 bucket name. ## **It's only used when the storage type is `S3`, `Oss` and `Gcs`**. @@ -459,14 +459,14 @@ auto_flush_interval = "1h" ## @toml2docs:none-default="Auto" #+ selector_result_cache_size = "512MB" -## Whether to enable the experimental write cache. +## Whether to enable the experimental write cache. It is recommended to enable it when using object storage for better performance. enable_experimental_write_cache = false ## File system path for write cache, defaults to `{data_home}/write_cache`. experimental_write_cache_path = "" -## Capacity for write cache. -experimental_write_cache_size = "512MB" +## Capacity for write cache. If your disk space is sufficient, it is recommended to set it larger. +experimental_write_cache_size = "1GiB" ## TTL for write cache. ## @toml2docs:none-default diff --git a/config/standalone.example.toml b/config/standalone.example.toml index 52f6d5b694..6d7755c7e6 100644 --- a/config/standalone.example.toml +++ b/config/standalone.example.toml @@ -332,14 +332,14 @@ data_home = "/tmp/greptimedb/" ## - `Oss`: the data is stored in the Aliyun OSS. type = "File" -## Cache configuration for object storage such as 'S3' etc. +## Cache configuration for object storage such as 'S3' etc. It is recommended to configure it when using object storage for better performance. ## The local file cache directory. ## @toml2docs:none-default cache_path = "/path/local_cache" -## The local file cache capacity in bytes. +## The local file cache capacity in bytes. If your disk space is sufficient, it is recommended to set it larger. ## @toml2docs:none-default -cache_capacity = "256MB" +cache_capacity = "1GiB" ## The S3 bucket name. ## **It's only used when the storage type is `S3`, `Oss` and `Gcs`**. @@ -497,14 +497,14 @@ auto_flush_interval = "1h" ## @toml2docs:none-default="Auto" #+ selector_result_cache_size = "512MB" -## Whether to enable the experimental write cache. +## Whether to enable the experimental write cache. It is recommended to enable it when using object storage for better performance. enable_experimental_write_cache = false ## File system path for write cache, defaults to `{data_home}/write_cache`. experimental_write_cache_path = "" -## Capacity for write cache. -experimental_write_cache_size = "512MB" +## Capacity for write cache. If your disk space is sufficient, it is recommended to set it larger. +experimental_write_cache_size = "1GiB" ## TTL for write cache. ## @toml2docs:none-default diff --git a/src/datanode/src/config.rs b/src/datanode/src/config.rs index 70be2513b2..dae9a65581 100644 --- a/src/datanode/src/config.rs +++ b/src/datanode/src/config.rs @@ -30,7 +30,7 @@ use servers::heartbeat_options::HeartbeatOptions; use servers::http::HttpOptions; use servers::Mode; -pub const DEFAULT_OBJECT_STORE_CACHE_SIZE: ReadableSize = ReadableSize::mb(256); +pub const DEFAULT_OBJECT_STORE_CACHE_SIZE: ReadableSize = ReadableSize::gb(1); /// Default data home in file storage const DEFAULT_DATA_HOME: &str = "/tmp/greptimedb"; diff --git a/src/mito2/src/config.rs b/src/mito2/src/config.rs index 001d5ffaa8..8cd2b08f2e 100644 --- a/src/mito2/src/config.rs +++ b/src/mito2/src/config.rs @@ -150,7 +150,7 @@ impl Default for MitoConfig { selector_result_cache_size: ReadableSize::mb(512), enable_experimental_write_cache: false, experimental_write_cache_path: String::new(), - experimental_write_cache_size: ReadableSize::mb(512), + experimental_write_cache_size: ReadableSize::gb(1), experimental_write_cache_ttl: None, sst_write_buffer_size: DEFAULT_WRITE_BUFFER_SIZE, scan_parallelism: divide_num_cpus(4), diff --git a/tests-integration/tests/http.rs b/tests-integration/tests/http.rs index 638734faba..f5a1080d76 100644 --- a/tests-integration/tests/http.rs +++ b/tests-integration/tests/http.rs @@ -892,7 +892,7 @@ compress_manifest = false auto_flush_interval = "30m" enable_experimental_write_cache = false experimental_write_cache_path = "" -experimental_write_cache_size = "512MiB" +experimental_write_cache_size = "1GiB" sst_write_buffer_size = "8MiB" parallel_scan_channel_size = 32 allow_stale_entries = false