greptimedb/config/datanode.example.toml

## The datanode identifier and should be unique in the cluster.
## @toml2docs:none-default
node_id = 42

## The default column prefix for auto-created time index and value columns.
## @toml2docs:none-default
default_column_prefix = "greptime"

## Start services after regions have obtained leases.
## It will block the datanode start if it can't receive leases in the heartbeat from metasrv.
require_lease_before_startup = false

## Initialize all regions in the background during the startup.
## By default, it provides services after all regions have been initialized.
init_regions_in_background = false

## Parallelism of initializing regions.
init_regions_parallelism = 16

## The maximum current queries allowed to be executed. Zero means unlimited.
## NOTE: This setting affects scan_memory_limit's privileged tier allocation.
## When set, 70% of queries get privileged memory access (full scan_memory_limit).
## The remaining 30% get standard tier access (70% of scan_memory_limit).
max_concurrent_queries = 0

## Enable telemetry to collect anonymous usage data. Enabled by default.
#+ enable_telemetry = true

## The HTTP server options.
[http]
## The address to bind the HTTP server.
addr = "127.0.0.1:4000"
## HTTP request timeout. Set to 0 to disable timeout.
timeout = "0s"
## HTTP request body limit.
## The following units are supported: `B`, `KB`, `KiB`, `MB`, `MiB`, `GB`, `GiB`, `TB`, `TiB`, `PB`, `PiB`.
## Set to 0 to disable limit.
body_limit = "64MB"

## The gRPC server options.
[grpc]
## The address to bind the gRPC server.
bind_addr = "127.0.0.1:3001"
## The address advertised to the metasrv, and used for connections from outside the host.
## If left empty or unset, the server will automatically use the IP address of the first network interface
## on the host, with the same port number as the one specified in `grpc.bind_addr`.
server_addr = "127.0.0.1:3001"
## The number of server worker threads.
runtime_size = 8
## The maximum receive message size for gRPC server.
max_recv_message_size = "512MB"
## The maximum send message size for gRPC server.
max_send_message_size = "512MB"
## Compression mode for datanode side Arrow IPC service. Available options:
## - `none`: disable all compression
## - `transport`: only enable gRPC transport compression (zstd)
## - `arrow_ipc`: only enable Arrow IPC compression (lz4)
## - `all`: enable all compression.
## Default to `none`
flight_compression = "arrow_ipc"

## gRPC server TLS options, see `mysql.tls` section.
[grpc.tls]
## TLS mode.
mode = "disable"

## Certificate file path.
## @toml2docs:none-default
cert_path = ""

## Private key file path.
## @toml2docs:none-default
key_path = ""

## Watch for Certificate and key file change and auto reload.
## For now, gRPC tls config does not support auto reload.
watch = false

## The runtime options.
#+ [runtime]
## The number of threads to execute the runtime for global read operations.
#+ global_rt_size = 8
## The number of threads to execute the runtime for global write operations.
#+ compact_rt_size = 4

## The heartbeat options.
[heartbeat]
## Interval for sending heartbeat messages to the metasrv.
interval = "3s"

## Interval for retrying to send heartbeat messages to the metasrv.
retry_interval = "3s"

## The metasrv client options.
[meta_client]
## The addresses of the metasrv.
metasrv_addrs = ["127.0.0.1:3002"]

## Operation timeout.
timeout = "3s"

## Heartbeat timeout.
heartbeat_timeout = "500ms"

## DDL timeout.
ddl_timeout = "10s"

## Connect server timeout.
connect_timeout = "1s"

## `TCP_NODELAY` option for accepted connections.
tcp_nodelay = true

## The configuration about the cache of the metadata.
metadata_cache_max_capacity = 100000

## TTL of the metadata cache.
metadata_cache_ttl = "10m"

# TTI of the metadata cache.
metadata_cache_tti = "5m"

## The WAL options.
[wal]
## The provider of the WAL.
## - `raft_engine`: the wal is stored in the local file system by raft-engine.
## - `kafka`: it's remote wal that data is stored in Kafka.
## - `noop`: it's a no-op WAL provider that does not store any WAL data.<br/>**Notes: any unflushed data will be lost when the datanode is shutdown.**
provider = "raft_engine"

## The directory to store the WAL files.
## **It's only used when the provider is `raft_engine`**.
## @toml2docs:none-default
dir = "./greptimedb_data/wal"

## The size of the WAL segment file.
## **It's only used when the provider is `raft_engine`**.
file_size = "128MB"

## The threshold of the WAL size to trigger a purge.
## **It's only used when the provider is `raft_engine`**.
purge_threshold = "1GB"

## The interval to trigger a purge.
## **It's only used when the provider is `raft_engine`**.
purge_interval = "1m"

## The read batch size.
## **It's only used when the provider is `raft_engine`**.
read_batch_size = 128

## Whether to use sync write.
## **It's only used when the provider is `raft_engine`**.
sync_write = false

## Whether to reuse logically truncated log files.
## **It's only used when the provider is `raft_engine`**.
enable_log_recycle = true

## Whether to pre-create log files on start up.
## **It's only used when the provider is `raft_engine`**.
prefill_log_files = false

## Duration for fsyncing log files.
## **It's only used when the provider is `raft_engine`**.
sync_period = "10s"

## Parallelism during WAL recovery.
recovery_parallelism = 2

## The Kafka broker endpoints.
## **It's only used when the provider is `kafka`**.
broker_endpoints = ["127.0.0.1:9092"]

## The max size of a single producer batch.
## Warning: Kafka has a default limit of 1MB per message in a topic.
## **It's only used when the provider is `kafka`**.
max_batch_bytes = "1MB"

## The consumer wait timeout.
## **It's only used when the provider is `kafka`**.
consumer_wait_timeout = "100ms"

## Whether to enable WAL index creation.
## **It's only used when the provider is `kafka`**.
create_index = true

## The interval for dumping WAL indexes.
## **It's only used when the provider is `kafka`**.
dump_index_interval = "60s"

## Ignore missing entries during read WAL.
## **It's only used when the provider is `kafka`**.
##
## This option ensures that when Kafka messages are deleted, the system
## can still successfully replay memtable data without throwing an
## out-of-range error.
## However, enabling this option might lead to unexpected data loss,
## as the system will skip over missing entries instead of treating
## them as critical errors.
overwrite_entry_start_id = false

# The Kafka SASL configuration.
# **It's only used when the provider is `kafka`**.
# Available SASL mechanisms:
# - `PLAIN`
# - `SCRAM-SHA-256`
# - `SCRAM-SHA-512`
# [wal.sasl]
# type = "SCRAM-SHA-512"
# username = "user_kafka"
# password = "secret"

# The Kafka TLS configuration.
# **It's only used when the provider is `kafka`**.
# [wal.tls]
# server_ca_cert_path = "/path/to/server_cert"
# client_cert_path = "/path/to/client_cert"
# client_key_path = "/path/to/key"

# Example of using S3 as the storage.
# [storage]
# type = "S3"
# bucket = "greptimedb"
# root = "data"
# access_key_id = "test"
# secret_access_key = "123456"
# endpoint = "https://s3.amazonaws.com"
# region = "us-west-2"
# enable_virtual_host_style = false

# Example of using Oss as the storage.
# [storage]
# type = "Oss"
# bucket = "greptimedb"
# root = "data"
# access_key_id = "test"
# access_key_secret = "123456"
# endpoint = "https://oss-cn-hangzhou.aliyuncs.com"

# Example of using Azblob as the storage.
# [storage]
# type = "Azblob"
# container = "greptimedb"
# root = "data"
# account_name = "test"
# account_key = "123456"
# endpoint = "https://greptimedb.blob.core.windows.net"
# sas_token = ""

# Example of using Gcs as the storage.
# [storage]
# type = "Gcs"
# bucket = "greptimedb"
# root = "data"
# scope = "test"
# credential_path = "123456"
# credential = "base64-credential"
# endpoint = "https://storage.googleapis.com"

## The query engine options.
[query]
## Parallelism of the query engine.
## Default to 0, which means the number of CPU cores.
parallelism = 0

## Memory pool size for query execution operators (aggregation, sorting, join).
## Supports absolute size (e.g., "2GB", "4GB") or percentage of system memory (e.g., "20%").
## Setting it to 0 disables the limit (unbounded, default behavior).
## When this limit is reached, queries will fail with ResourceExhausted error.
## NOTE: This does NOT limit memory used by table scans.
memory_pool_size = "50%"

## The data storage options.
[storage]
## The working home directory.
data_home = "./greptimedb_data"

## The storage type used to store the data.
## - `File`: the data is stored in the local file system.
## - `S3`: the data is stored in the S3 object storage.
## - `Gcs`: the data is stored in the Google Cloud Storage.
## - `Azblob`: the data is stored in the Azure Blob Storage.
## - `Oss`: the data is stored in the Aliyun OSS.
type = "File"

## Read cache configuration for object storage such as 'S3' etc, it's configured by default when using object storage. It is recommended to configure it when using object storage for better performance.
## A local file directory, defaults to `{data_home}`. An empty string means disabling.
## @toml2docs:none-default
#+ cache_path = ""

## Whether to enable read cache. If not set, the read cache will be enabled by default when using object storage.
#+ enable_read_cache = true

## The local file cache capacity in bytes. If your disk space is sufficient, it is recommended to set it larger.
## @toml2docs:none-default
cache_capacity = "5GiB"

## The S3 bucket name.
## **It's only used when the storage type is `S3`, `Oss` and `Gcs`**.
## @toml2docs:none-default
bucket = "greptimedb"

## The S3 data will be stored in the specified prefix, for example, `s3://${bucket}/${root}`.
## **It's only used when the storage type is `S3`, `Oss` and `Azblob`**.
## @toml2docs:none-default
root = "greptimedb"

## The access key id of the aws account.
## It's **highly recommended** to use AWS IAM roles instead of hardcoding the access key id and secret key.
## **It's only used when the storage type is `S3` and `Oss`**.
## @toml2docs:none-default
access_key_id = "test"

## The secret access key of the aws account.
## It's **highly recommended** to use AWS IAM roles instead of hardcoding the access key id and secret key.
## **It's only used when the storage type is `S3`**.
## @toml2docs:none-default
secret_access_key = "test"

## The secret access key of the aliyun account.
## **It's only used when the storage type is `Oss`**.
## @toml2docs:none-default
access_key_secret = "test"

## The account key of the azure account.
## **It's only used when the storage type is `Azblob`**.
## @toml2docs:none-default
account_name = "test"

## The account key of the azure account.
## **It's only used when the storage type is `Azblob`**.
## @toml2docs:none-default
account_key = "test"

## The scope of the google cloud storage.
## **It's only used when the storage type is `Gcs`**.
## @toml2docs:none-default
scope = "test"

## The credential path of the google cloud storage.
## **It's only used when the storage type is `Gcs`**.
## @toml2docs:none-default
credential_path = "test"

## The credential of the google cloud storage.
## **It's only used when the storage type is `Gcs`**.
## @toml2docs:none-default
credential = "base64-credential"

## The container of the azure account.
## **It's only used when the storage type is `Azblob`**.
## @toml2docs:none-default
container = "greptimedb"

## The sas token of the azure account.
## **It's only used when the storage type is `Azblob`**.
## @toml2docs:none-default
sas_token = ""

## The endpoint of the S3 service.
## **It's only used when the storage type is `S3`, `Oss`, `Gcs` and `Azblob`**.
## @toml2docs:none-default
endpoint = "https://s3.amazonaws.com"

## The region of the S3 service.
## **It's only used when the storage type is `S3`, `Oss`, `Gcs` and `Azblob`**.
## @toml2docs:none-default
region = "us-west-2"

## The http client options to the storage.
## **It's only used when the storage type is `S3`, `Oss`, `Gcs` and `Azblob`**.
[storage.http_client]

## The maximum idle connection per host allowed in the pool.
pool_max_idle_per_host = 1024

## The timeout for only the connect phase of a http client.
connect_timeout = "30s"

## The total request timeout, applied from when the request starts connecting until the response body has finished.
## Also considered a total deadline.
timeout = "30s"

## The timeout for idle sockets being kept-alive.
pool_idle_timeout = "90s"

## To skip the ssl verification
## **Security Notice**: Setting `skip_ssl_validation = true` disables certificate verification, making connections vulnerable to man-in-the-middle attacks. Only use this in development or trusted private networks.
skip_ssl_validation = false

# Custom storage options
# [[storage.providers]]
# name = "S3"
# type = "S3"
# bucket = "greptimedb"
# root = "data"
# access_key_id = "test"
# secret_access_key = "123456"
# endpoint = "https://s3.amazonaws.com"
# region = "us-west-2"
# [[storage.providers]]
# name = "Gcs"
# type = "Gcs"
# bucket = "greptimedb"
# root = "data"
# scope = "test"
# credential_path = "123456"
# credential = "base64-credential"
# endpoint = "https://storage.googleapis.com"

## The region engine options. You can configure multiple region engines.
[[region_engine]]

## The Mito engine options.
[region_engine.mito]

## Number of region workers.
#+ num_workers = 8

## Request channel size of each worker.
worker_channel_size = 128

## Max batch size for a worker to handle requests.
worker_request_batch_size = 64

## Number of meta action updated to trigger a new checkpoint for the manifest.
manifest_checkpoint_distance = 10


## Number of removed files to keep in manifest's `removed_files` field before also
## remove them from `removed_files`. Mostly for debugging purpose.
## If set to 0, it will only use `keep_removed_file_ttl` to decide when to remove files
## from `removed_files` field.
experimental_manifest_keep_removed_file_count = 256

## How long to keep removed files in the `removed_files` field of manifest
## after they are removed from manifest.
## files will only be removed from `removed_files` field
## if both `keep_removed_file_count` and `keep_removed_file_ttl` is reached.
experimental_manifest_keep_removed_file_ttl = "1h"

## Whether to compress manifest and checkpoint file by gzip (default false).
compress_manifest = false

## Max number of running background flush jobs (default: 1/2 of cpu cores).
## @toml2docs:none-default="Auto"
#+ max_background_flushes = 4

## Max number of running background compaction jobs (default: 1/4 of cpu cores).
## @toml2docs:none-default="Auto"
#+ max_background_compactions = 2

## Max number of running background purge jobs (default: number of cpu cores).
## @toml2docs:none-default="Auto"
#+ max_background_purges = 8

## Interval to auto flush a region if it has not flushed yet.
auto_flush_interval = "1h"

## Global write buffer size for all regions. If not set, it's default to 1/8 of OS memory with a max limitation of 1GB.
## @toml2docs:none-default="Auto"
#+ global_write_buffer_size = "1GB"

## Global write buffer size threshold to reject write requests. If not set, it's default to 2 times of `global_write_buffer_size`
## @toml2docs:none-default="Auto"
#+ global_write_buffer_reject_size = "2GB"

## Cache size for SST metadata. Setting it to 0 to disable the cache.
## If not set, it's default to 1/32 of OS memory with a max limitation of 128MB.
## @toml2docs:none-default="Auto"
#+ sst_meta_cache_size = "128MB"

## Cache size for vectors and arrow arrays. Setting it to 0 to disable the cache.
## If not set, it's default to 1/16 of OS memory with a max limitation of 512MB.
## @toml2docs:none-default="Auto"
#+ vector_cache_size = "512MB"

## Cache size for pages of SST row groups. Setting it to 0 to disable the cache.
## If not set, it's default to 1/8 of OS memory.
## @toml2docs:none-default="Auto"
#+ page_cache_size = "512MB"

## Cache size for time series selector (e.g. `last_value()`). Setting it to 0 to disable the cache.
## If not set, it's default to 1/16 of OS memory with a max limitation of 512MB.
## @toml2docs:none-default="Auto"
#+ selector_result_cache_size = "512MB"

## Whether to enable the write cache, it's enabled by default when using object storage. It is recommended to enable it when using object storage for better performance.
enable_write_cache = false

## File system path for write cache, defaults to `{data_home}`.
write_cache_path = ""

## Capacity for write cache. If your disk space is sufficient, it is recommended to set it larger.
write_cache_size = "5GiB"

## TTL for write cache.
## @toml2docs:none-default
write_cache_ttl = "8h"

## Preload index (puffin) files into cache on region open (default: true).
## When enabled, index files are loaded into the write cache during region initialization,
## which can improve query performance at the cost of longer startup times.
preload_index_cache = true

## Percentage of write cache capacity allocated for index (puffin) files (default: 20).
## The remaining capacity is used for data (parquet) files.
## Must be between 0 and 100 (exclusive). For example, with a 5GiB write cache and 20% allocation,
## 1GiB is reserved for index files and 4GiB for data files.
index_cache_percent = 20

## Buffer size for SST writing.
sst_write_buffer_size = "8MB"

## Capacity of the channel to send data from parallel scan tasks to the main task.
parallel_scan_channel_size = 32

## Maximum number of SST files to scan concurrently.
max_concurrent_scan_files = 384

## Whether to allow stale WAL entries read during replay.
allow_stale_entries = false

## Memory limit for table scans across all queries.
## Supports absolute size (e.g., "2GB") or percentage of system memory (e.g., "20%").
## Setting it to 0 disables the limit.
## NOTE: Works with max_concurrent_queries for tiered memory allocation.
## - If max_concurrent_queries is set: 70% of queries get full access, 30% get 70% access.
## - If max_concurrent_queries is 0 (unlimited): first 20 queries get full access, rest get 70% access.
scan_memory_limit = "50%"

## Minimum time interval between two compactions.
## To align with the old behavior, the default value is 0 (no restrictions).
min_compaction_interval = "0m"

## Whether to enable experimental flat format as the default format.
default_experimental_flat_format = false

## The options for index in Mito engine.
[region_engine.mito.index]

## Auxiliary directory path for the index in filesystem, used to store intermediate files for
## creating the index and staging files for searching the index, defaults to `{data_home}/index_intermediate`.
## The default name for this directory is `index_intermediate` for backward compatibility.
##
## This path contains two subdirectories:
## - `__intm`: for storing intermediate files used during creating index.
## - `staging`: for storing staging files used during searching index.
aux_path = ""

## The max capacity of the staging directory.
staging_size = "2GB"

## The TTL of the staging directory.
## Defaults to 7 days.
## Setting it to "0s" to disable TTL.
staging_ttl = "7d"

## Cache size for inverted index metadata.
metadata_cache_size = "64MiB"

## Cache size for inverted index content.
content_cache_size = "128MiB"

## Page size for inverted index content cache.
content_cache_page_size = "64KiB"

## Cache size for index result.
result_cache_size = "128MiB"

## The options for inverted index in Mito engine.
[region_engine.mito.inverted_index]

## Whether to create the index on flush.
## - `auto`: automatically (default)
## - `disable`: never
create_on_flush = "auto"

## Whether to create the index on compaction.
## - `auto`: automatically (default)
## - `disable`: never
create_on_compaction = "auto"

## Whether to apply the index on query
## - `auto`: automatically (default)
## - `disable`: never
apply_on_query = "auto"

## Memory threshold for performing an external sort during index creation.
## - `auto`: automatically determine the threshold based on the system memory size (default)
## - `unlimited`: no memory limit
## - `[size]` e.g. `64MB`: fixed memory threshold
mem_threshold_on_create = "auto"

## Deprecated, use `region_engine.mito.index.aux_path` instead.
intermediate_path = ""

## The options for full-text index in Mito engine.
[region_engine.mito.fulltext_index]

## Whether to create the index on flush.
## - `auto`: automatically (default)
## - `disable`: never
create_on_flush = "auto"

## Whether to create the index on compaction.
## - `auto`: automatically (default)
## - `disable`: never
create_on_compaction = "auto"

## Whether to apply the index on query
## - `auto`: automatically (default)
## - `disable`: never
apply_on_query = "auto"

## Memory threshold for index creation.
## - `auto`: automatically determine the threshold based on the system memory size (default)
## - `unlimited`: no memory limit
## - `[size]` e.g. `64MB`: fixed memory threshold
mem_threshold_on_create = "auto"

## The options for bloom filter index in Mito engine.
[region_engine.mito.bloom_filter_index]

## Whether to create the index on flush.
## - `auto`: automatically (default)
## - `disable`: never
create_on_flush = "auto"

## Whether to create the index on compaction.
## - `auto`: automatically (default)
## - `disable`: never
create_on_compaction = "auto"

## Whether to apply the index on query
## - `auto`: automatically (default)
## - `disable`: never
apply_on_query = "auto"

## Memory threshold for the index creation.
## - `auto`: automatically determine the threshold based on the system memory size (default)
## - `unlimited`: no memory limit
## - `[size]` e.g. `64MB`: fixed memory threshold
mem_threshold_on_create = "auto"

[region_engine.mito.memtable]
## Memtable type.
## - `time_series`: time-series memtable
## - `partition_tree`: partition tree memtable (experimental)
type = "time_series"

## The max number of keys in one shard.
## Only available for `partition_tree` memtable.
index_max_keys_per_shard = 8192

## The max rows of data inside the actively writing buffer in one shard.
## Only available for `partition_tree` memtable.
data_freeze_threshold = 32768

## Max dictionary bytes.
## Only available for `partition_tree` memtable.
fork_dictionary_bytes = "1GiB"

[[region_engine]]
## Enable the file engine.
[region_engine.file]

[[region_engine]]
## Metric engine options.
[region_engine.metric]
## Whether to use sparse primary key encoding.
sparse_primary_key_encoding = true

## The logging options.
[logging]
## The directory to store the log files. If set to empty, logs will not be written to files.
dir = "./greptimedb_data/logs"

## The log level. Can be `info`/`debug`/`warn`/`error`.
## @toml2docs:none-default
level = "info"

## Enable OTLP tracing.
enable_otlp_tracing = false

## The OTLP tracing endpoint.
otlp_endpoint = "http://localhost:4318/v1/traces"

## Whether to append logs to stdout.
append_stdout = true

## The log format. Can be `text`/`json`.
log_format = "text"

## The maximum amount of log files.
max_log_files = 720

## The OTLP tracing export protocol. Can be `grpc`/`http`.
otlp_export_protocol = "http"

## Additional OTLP headers, only valid when using OTLP http
[logging.otlp_headers]
## @toml2docs:none-default
#Authorization = "Bearer my-token"
## @toml2docs:none-default
#Database = "My database"

## The percentage of tracing will be sampled and exported.
## Valid range `[0, 1]`, 1 means all traces are sampled, 0 means all traces are not sampled, the default value is 1.
## ratio > 1 are treated as 1. Fractions < 0 are treated as 0
[logging.tracing_sample_ratio]
default_ratio = 1.0

## The tracing options. Only effect when compiled with `tokio-console` feature.
#+ [tracing]
## The tokio console address.
## @toml2docs:none-default
#+ tokio_console_addr = "127.0.0.1"

## The memory options.
[memory]
## Whether to enable heap profiling activation during startup.
## When enabled, heap profiling will be activated if the `MALLOC_CONF` environment variable
## is set to "prof:true,prof_active:false". The official image adds this env variable.
## Default is true.
enable_heap_profiling = true