diff --git a/.github/workflows/develop.yml b/.github/workflows/develop.yml index 1f6f050df5..52e57f68cd 100644 --- a/.github/workflows/develop.yml +++ b/.github/workflows/develop.yml @@ -33,12 +33,17 @@ env: RUST_TOOLCHAIN: nightly-2023-12-19 jobs: - typos: - name: Spell Check with Typos + check-typos-and-docs: + name: Check typos and docs runs-on: ubuntu-20.04 steps: - uses: actions/checkout@v4 - uses: crate-ci/typos@v1.13.10 + - name: Check the config docs + run: | + make config-docs && \ + git diff --name-only --exit-code ./config/config.md \ + || (echo "'config/config.md' is not up-to-date, please run 'make config-docs'." && exit 1) check: name: Check diff --git a/Makefile b/Makefile index 28077b6960..4826bbb5dd 100644 --- a/Makefile +++ b/Makefile @@ -192,6 +192,16 @@ run-it-in-container: start-etcd ## Run integration tests in dev-builder. -w /greptimedb ${IMAGE_REGISTRY}/${IMAGE_NAMESPACE}/dev-builder-${BASE_IMAGE}:latest \ make test sqlness-test BUILD_JOBS=${BUILD_JOBS} +##@ Docs +config-docs: ## Generate configuration documentation from toml files. + docker run --rm \ + -v ${PWD}:/greptimedb \ + -w /greptimedb/config \ + toml2docs/toml2docs:latest \ + -p '##' \ + -t ./config-docs-template.md \ + -o ./config.md + ##@ General # The help target prints out all targets with their descriptions organized diff --git a/config/config-docs-template.md b/config/config-docs-template.md new file mode 100644 index 0000000000..8fe10780f5 --- /dev/null +++ b/config/config-docs-template.md @@ -0,0 +1,19 @@ +# Configurations + +## Standalone Mode + +{{ toml2docs "./standalone.example.toml" }} + +## Cluster Mode + +### Frontend + +{{ toml2docs "./frontend.example.toml" }} + +### Metasrv + +{{ toml2docs "./metasrv.example.toml" }} + +### Datanode + +{{ toml2docs "./datanode.example.toml" }} diff --git a/config/config.md b/config/config.md new file mode 100644 index 0000000000..50c3f967b2 --- /dev/null +++ b/config/config.md @@ -0,0 +1,376 @@ +# Configurations + +## Standalone Mode + +| Key | Type | Default | Descriptions | +| --- | -----| ------- | ----------- | +| `mode` | String | `standalone` | The running mode of the datanode. It can be `standalone` or `distributed`. | +| `enable_telemetry` | Bool | `true` | Enable telemetry to collect anonymous usage data. | +| `default_timezone` | String | `None` | The default timezone of the server. | +| `http` | -- | -- | The HTTP server options. | +| `http.addr` | String | `127.0.0.1:4000` | The address to bind the HTTP server. | +| `http.timeout` | String | `30s` | HTTP request timeout. | +| `http.body_limit` | String | `64MB` | HTTP request body limit.
Support the following units are supported: `B`, `KB`, `KiB`, `MB`, `MiB`, `GB`, `GiB`, `TB`, `TiB`, `PB`, `PiB`. | +| `grpc` | -- | -- | The gRPC server options. | +| `grpc.addr` | String | `127.0.0.1:4001` | The address to bind the gRPC server. | +| `grpc.runtime_size` | Integer | `8` | The number of server worker threads. | +| `mysql` | -- | -- | MySQL server options. | +| `mysql.enable` | Bool | `true` | Whether to enable. | +| `mysql.addr` | String | `127.0.0.1:4002` | The addr to bind the MySQL server. | +| `mysql.runtime_size` | Integer | `2` | The number of server worker threads. | +| `mysql.tls` | -- | -- | -- | +| `mysql.tls.mode` | String | `disable` | TLS mode, refer to https://www.postgresql.org/docs/current/libpq-ssl.html
- `disable` (default value)
- `prefer`
- `require`
- `verify-ca`
- `verify-full` | +| `mysql.tls.cert_path` | String | `None` | Certificate file path. | +| `mysql.tls.key_path` | String | `None` | Private key file path. | +| `mysql.tls.watch` | Bool | `false` | Watch for Certificate and key file change and auto reload | +| `postgres` | -- | -- | PostgresSQL server options. | +| `postgres.enable` | Bool | `true` | Whether to enable | +| `postgres.addr` | String | `127.0.0.1:4003` | The addr to bind the PostgresSQL server. | +| `postgres.runtime_size` | Integer | `2` | The number of server worker threads. | +| `postgres.tls` | -- | -- | PostgresSQL server TLS options, see `mysql_options.tls` section. | +| `postgres.tls.mode` | String | `disable` | TLS mode. | +| `postgres.tls.cert_path` | String | `None` | Certificate file path. | +| `postgres.tls.key_path` | String | `None` | Private key file path. | +| `postgres.tls.watch` | Bool | `false` | Watch for Certificate and key file change and auto reload | +| `opentsdb` | -- | -- | OpenTSDB protocol options. | +| `opentsdb.enable` | Bool | `true` | Whether to enable | +| `opentsdb.addr` | String | `127.0.0.1:4242` | OpenTSDB telnet API server address. | +| `opentsdb.runtime_size` | Integer | `2` | The number of server worker threads. | +| `influxdb` | -- | -- | InfluxDB protocol options. | +| `influxdb.enable` | Bool | `true` | Whether to enable InfluxDB protocol in HTTP API. | +| `prom_store` | -- | -- | Prometheus remote storage options | +| `prom_store.enable` | Bool | `true` | Whether to enable Prometheus remote write and read in HTTP API. | +| `prom_store.with_metric_engine` | Bool | `true` | Whether to store the data from Prometheus remote write in metric engine. | +| `wal` | -- | -- | The WAL options. | +| `wal.provider` | String | `raft_engine` | The provider of the WAL.
- `raft_engine`: the wal is stored in the local file system by raft-engine.
- `kafka`: it's remote wal that data is stored in Kafka. | +| `wal.dir` | String | `None` | The directory to store the WAL files.
**It's only used when the provider is `raft_engine`**. | +| `wal.file_size` | String | `256MB` | The size of the WAL segment file.
**It's only used when the provider is `raft_engine`**. | +| `wal.purge_threshold` | String | `4GB` | The threshold of the WAL size to trigger a flush.
**It's only used when the provider is `raft_engine`**. | +| `wal.purge_interval` | String | `10m` | The interval to trigger a flush.
**It's only used when the provider is `raft_engine`**. | +| `wal.read_batch_size` | Integer | `128` | The read batch size.
**It's only used when the provider is `raft_engine`**. | +| `wal.sync_write` | Bool | `false` | Whether to use sync write.
**It's only used when the provider is `raft_engine`**. | +| `wal.enable_log_recycle` | Bool | `true` | Whether to reuse logically truncated log files.
**It's only used when the provider is `raft_engine`**. | +| `wal.prefill_log_files` | Bool | `false` | Whether to pre-create log files on start up.
**It's only used when the provider is `raft_engine`**. | +| `wal.sync_period` | String | `10s` | Duration for fsyncing log files.
**It's only used when the provider is `raft_engine`**. | +| `wal.broker_endpoints` | Array | -- | The Kafka broker endpoints.
**It's only used when the provider is `kafka`**. | +| `wal.max_batch_size` | String | `1MB` | The max size of a single producer batch.
Warning: Kafka has a default limit of 1MB per message in a topic.
**It's only used when the provider is `kafka`**. | +| `wal.linger` | String | `200ms` | The linger duration of a kafka batch producer.
**It's only used when the provider is `kafka`**. | +| `wal.consumer_wait_timeout` | String | `100ms` | The consumer wait timeout.
**It's only used when the provider is `kafka`**. | +| `wal.backoff_init` | String | `500ms` | The initial backoff delay.
**It's only used when the provider is `kafka`**. | +| `wal.backoff_max` | String | `10s` | The maximum backoff delay.
**It's only used when the provider is `kafka`**. | +| `wal.backoff_base` | Integer | `2` | The exponential backoff rate, i.e. next backoff = base * current backoff.
**It's only used when the provider is `kafka`**. | +| `wal.backoff_deadline` | String | `5mins` | The deadline of retries.
**It's only used when the provider is `kafka`**. | +| `metadata_store` | -- | -- | Metadata storage options. | +| `metadata_store.file_size` | String | `256MB` | Kv file size in bytes. | +| `metadata_store.purge_threshold` | String | `4GB` | Kv purge threshold. | +| `procedure` | -- | -- | Procedure storage options. | +| `procedure.max_retry_times` | Integer | `3` | Procedure max retry time. | +| `procedure.retry_delay` | String | `500ms` | Initial retry delay of procedures, increases exponentially | +| `storage` | -- | -- | The data storage options. | +| `storage.data_home` | String | `/tmp/greptimedb/` | The working home directory. | +| `storage.type` | String | `File` | The storage type used to store the data.
- `File`: the data is stored in the local file system.
- `S3`: the data is stored in the S3 object storage.
- `Gcs`: the data is stored in the Google Cloud Storage.
- `Azblob`: the data is stored in the Azure Blob Storage.
- `Oss`: the data is stored in the Aliyun OSS. | +| `storage.cache_path` | String | `None` | Cache configuration for object storage such as 'S3' etc.
The local file cache directory. | +| `storage.cache_capacity` | String | `None` | The local file cache capacity in bytes. | +| `storage.bucket` | String | `None` | The S3 bucket name.
**It's only used when the storage type is `S3`, `Oss` and `Gcs`**. | +| `storage.root` | String | `None` | The S3 data will be stored in the specified prefix, for example, `s3://${bucket}/${root}`.
**It's only used when the storage type is `S3`, `Oss` and `Azblob`**. | +| `storage.access_key_id` | String | `None` | The access key id of the aws account.
It's **highly recommended** to use AWS IAM roles instead of hardcoding the access key id and secret key.
**It's only used when the storage type is `S3` and `Oss`**. | +| `storage.secret_access_key` | String | `None` | The secret access key of the aws account.
It's **highly recommended** to use AWS IAM roles instead of hardcoding the access key id and secret key.
**It's only used when the storage type is `S3`**. | +| `storage.access_key_secret` | String | `None` | The secret access key of the aliyun account.
**It's only used when the storage type is `Oss`**. | +| `storage.account_name` | String | `None` | The account key of the azure account.
**It's only used when the storage type is `Azblob`**. | +| `storage.account_key` | String | `None` | The account key of the azure account.
**It's only used when the storage type is `Azblob`**. | +| `storage.scope` | String | `None` | The scope of the google cloud storage.
**It's only used when the storage type is `Gcs`**. | +| `storage.credential_path` | String | `None` | The credential path of the google cloud storage.
**It's only used when the storage type is `Gcs`**. | +| `storage.container` | String | `None` | The container of the azure account.
**It's only used when the storage type is `Azblob`**. | +| `storage.sas_token` | String | `None` | The sas token of the azure account.
**It's only used when the storage type is `Azblob`**. | +| `storage.endpoint` | String | `None` | The endpoint of the S3 service.
**It's only used when the storage type is `S3`, `Oss`, `Gcs` and `Azblob`**. | +| `storage.region` | String | `None` | The region of the S3 service.
**It's only used when the storage type is `S3`, `Oss`, `Gcs` and `Azblob`**. | +| `[[region_engine]]` | -- | -- | The region engine options. You can configure multiple region engines. | +| `region_engine.mito` | -- | -- | The Mito engine options. | +| `region_engine.mito.num_workers` | Integer | `8` | Number of region workers. | +| `region_engine.mito.worker_channel_size` | Integer | `128` | Request channel size of each worker. | +| `region_engine.mito.worker_request_batch_size` | Integer | `64` | Max batch size for a worker to handle requests. | +| `region_engine.mito.manifest_checkpoint_distance` | Integer | `10` | Number of meta action updated to trigger a new checkpoint for the manifest. | +| `region_engine.mito.compress_manifest` | Bool | `false` | Whether to compress manifest and checkpoint file by gzip (default false). | +| `region_engine.mito.max_background_jobs` | Integer | `4` | Max number of running background jobs | +| `region_engine.mito.auto_flush_interval` | String | `1h` | Interval to auto flush a region if it has not flushed yet. | +| `region_engine.mito.global_write_buffer_size` | String | `1GB` | Global write buffer size for all regions. If not set, it's default to 1/8 of OS memory with a max limitation of 1GB. | +| `region_engine.mito.global_write_buffer_reject_size` | String | `2GB` | Global write buffer size threshold to reject write requests. If not set, it's default to 2 times of `global_write_buffer_size` | +| `region_engine.mito.sst_meta_cache_size` | String | `128MB` | Cache size for SST metadata. Setting it to 0 to disable the cache.
If not set, it's default to 1/32 of OS memory with a max limitation of 128MB. | +| `region_engine.mito.vector_cache_size` | String | `512MB` | Cache size for vectors and arrow arrays. Setting it to 0 to disable the cache.
If not set, it's default to 1/16 of OS memory with a max limitation of 512MB. | +| `region_engine.mito.page_cache_size` | String | `512MB` | Cache size for pages of SST row groups. Setting it to 0 to disable the cache.
If not set, it's default to 1/16 of OS memory with a max limitation of 512MB. | +| `region_engine.mito.sst_write_buffer_size` | String | `8MB` | Buffer size for SST writing. | +| `region_engine.mito.scan_parallelism` | Integer | `0` | Parallelism to scan a region (default: 1/4 of cpu cores).
- `0`: using the default value (1/4 of cpu cores).
- `1`: scan in current thread.
- `n`: scan in parallelism n. | +| `region_engine.mito.parallel_scan_channel_size` | Integer | `32` | Capacity of the channel to send data from parallel scan tasks to the main task. | +| `region_engine.mito.allow_stale_entries` | Bool | `false` | Whether to allow stale WAL entries read during replay. | +| `region_engine.mito.inverted_index` | -- | -- | The options for inverted index in Mito engine. | +| `region_engine.mito.inverted_index.create_on_flush` | String | `auto` | Whether to create the index on flush.
- `auto`: automatically
- `disable`: never | +| `region_engine.mito.inverted_index.create_on_compaction` | String | `auto` | Whether to create the index on compaction.
- `auto`: automatically
- `disable`: never | +| `region_engine.mito.inverted_index.apply_on_query` | String | `auto` | Whether to apply the index on query
- `auto`: automatically
- `disable`: never | +| `region_engine.mito.inverted_index.mem_threshold_on_create` | String | `64M` | Memory threshold for performing an external sort during index creation.
Setting to empty will disable external sorting, forcing all sorting operations to happen in memory. | +| `region_engine.mito.inverted_index.intermediate_path` | String | `""` | File system path to store intermediate files for external sorting (default `{data_home}/index_intermediate`). | +| `region_engine.mito.memtable` | -- | -- | -- | +| `region_engine.mito.memtable.type` | String | `partition_tree` | Memtable type.
- `partition_tree`: partition tree memtable
- `time_series`: time-series memtable (deprecated) | +| `region_engine.mito.memtable.index_max_keys_per_shard` | Integer | `8192` | The max number of keys in one shard. | +| `region_engine.mito.memtable.data_freeze_threshold` | Integer | `32768` | The max rows of data inside the actively writing buffer in one shard. | +| `region_engine.mito.memtable.fork_dictionary_bytes` | String | `1GiB` | Max dictionary bytes. | +| `logging` | -- | -- | The logging options. | +| `logging.dir` | String | `/tmp/greptimedb/logs` | The directory to store the log files. | +| `logging.level` | String | `None` | The log level. Can be `info`/`debug`/`warn`/`error`. | +| `logging.enable_otlp_tracing` | Bool | `false` | Enable OTLP tracing. | +| `logging.otlp_endpoint` | String | `None` | The OTLP tracing endpoint. | +| `logging.append_stdout` | Bool | `true` | Whether to append logs to stdout. | +| `logging.tracing_sample_ratio` | -- | -- | The percentage of tracing will be sampled and exported.
Valid range `[0, 1]`, 1 means all traces are sampled, 0 means all traces are not sampled, the default value is 1.
ratio > 1 are treated as 1. Fractions < 0 are treated as 0 | +| `logging.tracing_sample_ratio.default_ratio` | Float | `1.0` | -- | +| `export_metrics` | -- | -- | The datanode can export its metrics and send to Prometheus compatible service (e.g. send to `greptimedb` itself) from remote-write API.
This is only used for `greptimedb` to export its own metrics internally. It's different from prometheus scrape. | +| `export_metrics.enable` | Bool | `false` | whether enable export metrics. | +| `export_metrics.write_interval` | String | `30s` | The interval of export metrics. | +| `export_metrics.self_import` | -- | -- | For `standalone` mode, `self_import` is recommend to collect metrics generated by itself | +| `export_metrics.self_import.db` | String | `None` | -- | +| `export_metrics.remote_write` | -- | -- | -- | +| `export_metrics.remote_write.url` | String | `""` | The url the metrics send to. The url example can be: `http://127.0.0.1:4000/v1/prometheus/write?db=information_schema`. | +| `export_metrics.remote_write.headers` | InlineTable | -- | HTTP headers of Prometheus remote-write carry. | + + +## Cluster Mode + +### Frontend + +| Key | Type | Default | Descriptions | +| --- | -----| ------- | ----------- | +| `mode` | String | `standalone` | The running mode of the datanode. It can be `standalone` or `distributed`. | +| `default_timezone` | String | `None` | The default timezone of the server. | +| `heartbeat` | -- | -- | The heartbeat options. | +| `heartbeat.interval` | String | `18s` | Interval for sending heartbeat messages to the metasrv. | +| `heartbeat.retry_interval` | String | `3s` | Interval for retrying to send heartbeat messages to the metasrv. | +| `http` | -- | -- | The HTTP server options. | +| `http.addr` | String | `127.0.0.1:4000` | The address to bind the HTTP server. | +| `http.timeout` | String | `30s` | HTTP request timeout. | +| `http.body_limit` | String | `64MB` | HTTP request body limit.
Support the following units are supported: `B`, `KB`, `KiB`, `MB`, `MiB`, `GB`, `GiB`, `TB`, `TiB`, `PB`, `PiB`. | +| `grpc` | -- | -- | The gRPC server options. | +| `grpc.addr` | String | `127.0.0.1:4001` | The address to bind the gRPC server. | +| `grpc.runtime_size` | Integer | `8` | The number of server worker threads. | +| `mysql` | -- | -- | MySQL server options. | +| `mysql.enable` | Bool | `true` | Whether to enable. | +| `mysql.addr` | String | `127.0.0.1:4002` | The addr to bind the MySQL server. | +| `mysql.runtime_size` | Integer | `2` | The number of server worker threads. | +| `mysql.tls` | -- | -- | -- | +| `mysql.tls.mode` | String | `disable` | TLS mode, refer to https://www.postgresql.org/docs/current/libpq-ssl.html
- `disable` (default value)
- `prefer`
- `require`
- `verify-ca`
- `verify-full` | +| `mysql.tls.cert_path` | String | `None` | Certificate file path. | +| `mysql.tls.key_path` | String | `None` | Private key file path. | +| `mysql.tls.watch` | Bool | `false` | Watch for Certificate and key file change and auto reload | +| `postgres` | -- | -- | PostgresSQL server options. | +| `postgres.enable` | Bool | `true` | Whether to enable | +| `postgres.addr` | String | `127.0.0.1:4003` | The addr to bind the PostgresSQL server. | +| `postgres.runtime_size` | Integer | `2` | The number of server worker threads. | +| `postgres.tls` | -- | -- | PostgresSQL server TLS options, see `mysql_options.tls` section. | +| `postgres.tls.mode` | String | `disable` | TLS mode. | +| `postgres.tls.cert_path` | String | `None` | Certificate file path. | +| `postgres.tls.key_path` | String | `None` | Private key file path. | +| `postgres.tls.watch` | Bool | `false` | Watch for Certificate and key file change and auto reload | +| `opentsdb` | -- | -- | OpenTSDB protocol options. | +| `opentsdb.enable` | Bool | `true` | Whether to enable | +| `opentsdb.addr` | String | `127.0.0.1:4242` | OpenTSDB telnet API server address. | +| `opentsdb.runtime_size` | Integer | `2` | The number of server worker threads. | +| `influxdb` | -- | -- | InfluxDB protocol options. | +| `influxdb.enable` | Bool | `true` | Whether to enable InfluxDB protocol in HTTP API. | +| `prom_store` | -- | -- | Prometheus remote storage options | +| `prom_store.enable` | Bool | `true` | Whether to enable Prometheus remote write and read in HTTP API. | +| `prom_store.with_metric_engine` | Bool | `true` | Whether to store the data from Prometheus remote write in metric engine. | +| `meta_client` | -- | -- | The metasrv client options. | +| `meta_client.metasrv_addrs` | Array | -- | The addresses of the metasrv. | +| `meta_client.timeout` | String | `3s` | Operation timeout. | +| `meta_client.heartbeat_timeout` | String | `500ms` | Heartbeat timeout. | +| `meta_client.ddl_timeout` | String | `10s` | DDL timeout. | +| `meta_client.connect_timeout` | String | `1s` | Connect server timeout. | +| `meta_client.tcp_nodelay` | Bool | `true` | `TCP_NODELAY` option for accepted connections. | +| `meta_client.metadata_cache_max_capacity` | Integer | `100000` | The configuration about the cache of the metadata. | +| `meta_client.metadata_cache_ttl` | String | `10m` | TTL of the metadata cache. | +| `meta_client.metadata_cache_tti` | String | `5m` | -- | +| `datanode` | -- | -- | Datanode options. | +| `datanode.client` | -- | -- | Datanode client options. | +| `datanode.client.timeout` | String | `10s` | -- | +| `datanode.client.connect_timeout` | String | `10s` | -- | +| `datanode.client.tcp_nodelay` | Bool | `true` | -- | +| `logging` | -- | -- | The logging options. | +| `logging.dir` | String | `/tmp/greptimedb/logs` | The directory to store the log files. | +| `logging.level` | String | `None` | The log level. Can be `info`/`debug`/`warn`/`error`. | +| `logging.enable_otlp_tracing` | Bool | `false` | Enable OTLP tracing. | +| `logging.otlp_endpoint` | String | `None` | The OTLP tracing endpoint. | +| `logging.append_stdout` | Bool | `true` | Whether to append logs to stdout. | +| `logging.tracing_sample_ratio` | -- | -- | The percentage of tracing will be sampled and exported.
Valid range `[0, 1]`, 1 means all traces are sampled, 0 means all traces are not sampled, the default value is 1.
ratio > 1 are treated as 1. Fractions < 0 are treated as 0 | +| `logging.tracing_sample_ratio.default_ratio` | Float | `1.0` | -- | +| `export_metrics` | -- | -- | The datanode can export its metrics and send to Prometheus compatible service (e.g. send to `greptimedb` itself) from remote-write API.
This is only used for `greptimedb` to export its own metrics internally. It's different from prometheus scrape. | +| `export_metrics.enable` | Bool | `false` | whether enable export metrics. | +| `export_metrics.write_interval` | String | `30s` | The interval of export metrics. | +| `export_metrics.self_import` | -- | -- | For `standalone` mode, `self_import` is recommend to collect metrics generated by itself | +| `export_metrics.self_import.db` | String | `None` | -- | +| `export_metrics.remote_write` | -- | -- | -- | +| `export_metrics.remote_write.url` | String | `""` | The url the metrics send to. The url example can be: `http://127.0.0.1:4000/v1/prometheus/write?db=information_schema`. | +| `export_metrics.remote_write.headers` | InlineTable | -- | HTTP headers of Prometheus remote-write carry. | + + +### Metasrv + +| Key | Type | Default | Descriptions | +| --- | -----| ------- | ----------- | +| `data_home` | String | `/tmp/metasrv/` | The working home directory. | +| `bind_addr` | String | `127.0.0.1:3002` | The bind address of metasrv. | +| `server_addr` | String | `127.0.0.1:3002` | The communication server address for frontend and datanode to connect to metasrv, "127.0.0.1:3002" by default for localhost. | +| `store_addr` | String | `127.0.0.1:2379` | Etcd server address. | +| `selector` | String | `lease_based` | Datanode selector type.
- `lease_based` (default value).
- `load_based`
For details, please see "https://docs.greptime.com/developer-guide/metasrv/selector". | +| `use_memory_store` | Bool | `false` | Store data in memory. | +| `enable_telemetry` | Bool | `true` | Whether to enable greptimedb telemetry. | +| `store_key_prefix` | String | `""` | If it's not empty, the metasrv will store all data with this key prefix. | +| `procedure` | -- | -- | Procedure storage options. | +| `procedure.max_retry_times` | Integer | `12` | Procedure max retry time. | +| `procedure.retry_delay` | String | `500ms` | Initial retry delay of procedures, increases exponentially | +| `procedure.max_metadata_value_size` | String | `1500KiB` | Auto split large value
GreptimeDB procedure uses etcd as the default metadata storage backend.
The etcd the maximum size of any request is 1.5 MiB
1500KiB = 1536KiB (1.5MiB) - 36KiB (reserved size of key)
Comments out the `max_metadata_value_size`, for don't split large value (no limit). | +| `failure_detector` | -- | -- | -- | +| `failure_detector.threshold` | Float | `8.0` | -- | +| `failure_detector.min_std_deviation` | String | `100ms` | -- | +| `failure_detector.acceptable_heartbeat_pause` | String | `3000ms` | -- | +| `failure_detector.first_heartbeat_estimate` | String | `1000ms` | -- | +| `datanode` | -- | -- | Datanode options. | +| `datanode.client` | -- | -- | Datanode client options. | +| `datanode.client.timeout` | String | `10s` | -- | +| `datanode.client.connect_timeout` | String | `10s` | -- | +| `datanode.client.tcp_nodelay` | Bool | `true` | -- | +| `wal` | -- | -- | -- | +| `wal.provider` | String | `raft_engine` | -- | +| `wal.broker_endpoints` | Array | -- | The broker endpoints of the Kafka cluster. | +| `wal.num_topics` | Integer | `64` | Number of topics to be created upon start. | +| `wal.selector_type` | String | `round_robin` | Topic selector type.
Available selector types:
- `round_robin` (default) | +| `wal.topic_name_prefix` | String | `greptimedb_wal_topic` | A Kafka topic is constructed by concatenating `topic_name_prefix` and `topic_id`. | +| `wal.replication_factor` | Integer | `1` | Expected number of replicas of each partition. | +| `wal.create_topic_timeout` | String | `30s` | Above which a topic creation operation will be cancelled. | +| `wal.backoff_init` | String | `500ms` | The initial backoff for kafka clients. | +| `wal.backoff_max` | String | `10s` | The maximum backoff for kafka clients. | +| `wal.backoff_base` | Integer | `2` | Exponential backoff rate, i.e. next backoff = base * current backoff. | +| `wal.backoff_deadline` | String | `5mins` | Stop reconnecting if the total wait time reaches the deadline. If this config is missing, the reconnecting won't terminate. | +| `logging` | -- | -- | The logging options. | +| `logging.dir` | String | `/tmp/greptimedb/logs` | The directory to store the log files. | +| `logging.level` | String | `None` | The log level. Can be `info`/`debug`/`warn`/`error`. | +| `logging.enable_otlp_tracing` | Bool | `false` | Enable OTLP tracing. | +| `logging.otlp_endpoint` | String | `None` | The OTLP tracing endpoint. | +| `logging.append_stdout` | Bool | `true` | Whether to append logs to stdout. | +| `logging.tracing_sample_ratio` | -- | -- | The percentage of tracing will be sampled and exported.
Valid range `[0, 1]`, 1 means all traces are sampled, 0 means all traces are not sampled, the default value is 1.
ratio > 1 are treated as 1. Fractions < 0 are treated as 0 | +| `logging.tracing_sample_ratio.default_ratio` | Float | `1.0` | -- | +| `export_metrics` | -- | -- | The datanode can export its metrics and send to Prometheus compatible service (e.g. send to `greptimedb` itself) from remote-write API.
This is only used for `greptimedb` to export its own metrics internally. It's different from prometheus scrape. | +| `export_metrics.enable` | Bool | `false` | whether enable export metrics. | +| `export_metrics.write_interval` | String | `30s` | The interval of export metrics. | +| `export_metrics.self_import` | -- | -- | For `standalone` mode, `self_import` is recommend to collect metrics generated by itself | +| `export_metrics.self_import.db` | String | `None` | -- | +| `export_metrics.remote_write` | -- | -- | -- | +| `export_metrics.remote_write.url` | String | `""` | The url the metrics send to. The url example can be: `http://127.0.0.1:4000/v1/prometheus/write?db=information_schema`. | +| `export_metrics.remote_write.headers` | InlineTable | -- | HTTP headers of Prometheus remote-write carry. | + + +### Datanode + +| Key | Type | Default | Descriptions | +| --- | -----| ------- | ----------- | +| `mode` | String | `standalone` | The running mode of the datanode. It can be `standalone` or `distributed`. | +| `node_id` | Integer | `None` | The datanode identifier and should be unique in the cluster. | +| `require_lease_before_startup` | Bool | `false` | Start services after regions have obtained leases.
It will block the datanode start if it can't receive leases in the heartbeat from metasrv. | +| `init_regions_in_background` | Bool | `false` | Initialize all regions in the background during the startup.
By default, it provides services after all regions have been initialized. | +| `rpc_addr` | String | `127.0.0.1:3001` | The gRPC address of the datanode. | +| `rpc_hostname` | String | `None` | The hostname of the datanode. | +| `rpc_runtime_size` | Integer | `8` | The number of gRPC server worker threads. | +| `rpc_max_recv_message_size` | String | `512MB` | The maximum receive message size for gRPC server. | +| `rpc_max_send_message_size` | String | `512MB` | The maximum send message size for gRPC server. | +| `enable_telemetry` | Bool | `true` | Enable telemetry to collect anonymous usage data. | +| `heartbeat` | -- | -- | The heartbeat options. | +| `heartbeat.interval` | String | `3s` | Interval for sending heartbeat messages to the metasrv. | +| `heartbeat.retry_interval` | String | `3s` | Interval for retrying to send heartbeat messages to the metasrv. | +| `meta_client` | -- | -- | The metasrv client options. | +| `meta_client.metasrv_addrs` | Array | -- | The addresses of the metasrv. | +| `meta_client.timeout` | String | `3s` | Operation timeout. | +| `meta_client.heartbeat_timeout` | String | `500ms` | Heartbeat timeout. | +| `meta_client.ddl_timeout` | String | `10s` | DDL timeout. | +| `meta_client.connect_timeout` | String | `1s` | Connect server timeout. | +| `meta_client.tcp_nodelay` | Bool | `true` | `TCP_NODELAY` option for accepted connections. | +| `meta_client.metadata_cache_max_capacity` | Integer | `100000` | The configuration about the cache of the metadata. | +| `meta_client.metadata_cache_ttl` | String | `10m` | TTL of the metadata cache. | +| `meta_client.metadata_cache_tti` | String | `5m` | -- | +| `wal` | -- | -- | The WAL options. | +| `wal.provider` | String | `raft_engine` | The provider of the WAL.
- `raft_engine`: the wal is stored in the local file system by raft-engine.
- `kafka`: it's remote wal that data is stored in Kafka. | +| `wal.dir` | String | `None` | The directory to store the WAL files.
**It's only used when the provider is `raft_engine`**. | +| `wal.file_size` | String | `256MB` | The size of the WAL segment file.
**It's only used when the provider is `raft_engine`**. | +| `wal.purge_threshold` | String | `4GB` | The threshold of the WAL size to trigger a flush.
**It's only used when the provider is `raft_engine`**. | +| `wal.purge_interval` | String | `10m` | The interval to trigger a flush.
**It's only used when the provider is `raft_engine`**. | +| `wal.read_batch_size` | Integer | `128` | The read batch size.
**It's only used when the provider is `raft_engine`**. | +| `wal.sync_write` | Bool | `false` | Whether to use sync write.
**It's only used when the provider is `raft_engine`**. | +| `wal.enable_log_recycle` | Bool | `true` | Whether to reuse logically truncated log files.
**It's only used when the provider is `raft_engine`**. | +| `wal.prefill_log_files` | Bool | `false` | Whether to pre-create log files on start up.
**It's only used when the provider is `raft_engine`**. | +| `wal.sync_period` | String | `10s` | Duration for fsyncing log files.
**It's only used when the provider is `raft_engine`**. | +| `wal.broker_endpoints` | Array | -- | The Kafka broker endpoints.
**It's only used when the provider is `kafka`**. | +| `wal.max_batch_size` | String | `1MB` | The max size of a single producer batch.
Warning: Kafka has a default limit of 1MB per message in a topic.
**It's only used when the provider is `kafka`**. | +| `wal.linger` | String | `200ms` | The linger duration of a kafka batch producer.
**It's only used when the provider is `kafka`**. | +| `wal.consumer_wait_timeout` | String | `100ms` | The consumer wait timeout.
**It's only used when the provider is `kafka`**. | +| `wal.backoff_init` | String | `500ms` | The initial backoff delay.
**It's only used when the provider is `kafka`**. | +| `wal.backoff_max` | String | `10s` | The maximum backoff delay.
**It's only used when the provider is `kafka`**. | +| `wal.backoff_base` | Integer | `2` | The exponential backoff rate, i.e. next backoff = base * current backoff.
**It's only used when the provider is `kafka`**. | +| `wal.backoff_deadline` | String | `5mins` | The deadline of retries.
**It's only used when the provider is `kafka`**. | +| `storage` | -- | -- | The data storage options. | +| `storage.data_home` | String | `/tmp/greptimedb/` | The working home directory. | +| `storage.type` | String | `File` | The storage type used to store the data.
- `File`: the data is stored in the local file system.
- `S3`: the data is stored in the S3 object storage.
- `Gcs`: the data is stored in the Google Cloud Storage.
- `Azblob`: the data is stored in the Azure Blob Storage.
- `Oss`: the data is stored in the Aliyun OSS. | +| `storage.cache_path` | String | `None` | Cache configuration for object storage such as 'S3' etc.
The local file cache directory. | +| `storage.cache_capacity` | String | `None` | The local file cache capacity in bytes. | +| `storage.bucket` | String | `None` | The S3 bucket name.
**It's only used when the storage type is `S3`, `Oss` and `Gcs`**. | +| `storage.root` | String | `None` | The S3 data will be stored in the specified prefix, for example, `s3://${bucket}/${root}`.
**It's only used when the storage type is `S3`, `Oss` and `Azblob`**. | +| `storage.access_key_id` | String | `None` | The access key id of the aws account.
It's **highly recommended** to use AWS IAM roles instead of hardcoding the access key id and secret key.
**It's only used when the storage type is `S3` and `Oss`**. | +| `storage.secret_access_key` | String | `None` | The secret access key of the aws account.
It's **highly recommended** to use AWS IAM roles instead of hardcoding the access key id and secret key.
**It's only used when the storage type is `S3`**. | +| `storage.access_key_secret` | String | `None` | The secret access key of the aliyun account.
**It's only used when the storage type is `Oss`**. | +| `storage.account_name` | String | `None` | The account key of the azure account.
**It's only used when the storage type is `Azblob`**. | +| `storage.account_key` | String | `None` | The account key of the azure account.
**It's only used when the storage type is `Azblob`**. | +| `storage.scope` | String | `None` | The scope of the google cloud storage.
**It's only used when the storage type is `Gcs`**. | +| `storage.credential_path` | String | `None` | The credential path of the google cloud storage.
**It's only used when the storage type is `Gcs`**. | +| `storage.container` | String | `None` | The container of the azure account.
**It's only used when the storage type is `Azblob`**. | +| `storage.sas_token` | String | `None` | The sas token of the azure account.
**It's only used when the storage type is `Azblob`**. | +| `storage.endpoint` | String | `None` | The endpoint of the S3 service.
**It's only used when the storage type is `S3`, `Oss`, `Gcs` and `Azblob`**. | +| `storage.region` | String | `None` | The region of the S3 service.
**It's only used when the storage type is `S3`, `Oss`, `Gcs` and `Azblob`**. | +| `[[region_engine]]` | -- | -- | The region engine options. You can configure multiple region engines. | +| `region_engine.mito` | -- | -- | The Mito engine options. | +| `region_engine.mito.num_workers` | Integer | `8` | Number of region workers. | +| `region_engine.mito.worker_channel_size` | Integer | `128` | Request channel size of each worker. | +| `region_engine.mito.worker_request_batch_size` | Integer | `64` | Max batch size for a worker to handle requests. | +| `region_engine.mito.manifest_checkpoint_distance` | Integer | `10` | Number of meta action updated to trigger a new checkpoint for the manifest. | +| `region_engine.mito.compress_manifest` | Bool | `false` | Whether to compress manifest and checkpoint file by gzip (default false). | +| `region_engine.mito.max_background_jobs` | Integer | `4` | Max number of running background jobs | +| `region_engine.mito.auto_flush_interval` | String | `1h` | Interval to auto flush a region if it has not flushed yet. | +| `region_engine.mito.global_write_buffer_size` | String | `1GB` | Global write buffer size for all regions. If not set, it's default to 1/8 of OS memory with a max limitation of 1GB. | +| `region_engine.mito.global_write_buffer_reject_size` | String | `2GB` | Global write buffer size threshold to reject write requests. If not set, it's default to 2 times of `global_write_buffer_size` | +| `region_engine.mito.sst_meta_cache_size` | String | `128MB` | Cache size for SST metadata. Setting it to 0 to disable the cache.
If not set, it's default to 1/32 of OS memory with a max limitation of 128MB. | +| `region_engine.mito.vector_cache_size` | String | `512MB` | Cache size for vectors and arrow arrays. Setting it to 0 to disable the cache.
If not set, it's default to 1/16 of OS memory with a max limitation of 512MB. | +| `region_engine.mito.page_cache_size` | String | `512MB` | Cache size for pages of SST row groups. Setting it to 0 to disable the cache.
If not set, it's default to 1/16 of OS memory with a max limitation of 512MB. | +| `region_engine.mito.sst_write_buffer_size` | String | `8MB` | Buffer size for SST writing. | +| `region_engine.mito.scan_parallelism` | Integer | `0` | Parallelism to scan a region (default: 1/4 of cpu cores).
- `0`: using the default value (1/4 of cpu cores).
- `1`: scan in current thread.
- `n`: scan in parallelism n. | +| `region_engine.mito.parallel_scan_channel_size` | Integer | `32` | Capacity of the channel to send data from parallel scan tasks to the main task. | +| `region_engine.mito.allow_stale_entries` | Bool | `false` | Whether to allow stale WAL entries read during replay. | +| `region_engine.mito.inverted_index` | -- | -- | The options for inverted index in Mito engine. | +| `region_engine.mito.inverted_index.create_on_flush` | String | `auto` | Whether to create the index on flush.
- `auto`: automatically
- `disable`: never | +| `region_engine.mito.inverted_index.create_on_compaction` | String | `auto` | Whether to create the index on compaction.
- `auto`: automatically
- `disable`: never | +| `region_engine.mito.inverted_index.apply_on_query` | String | `auto` | Whether to apply the index on query
- `auto`: automatically
- `disable`: never | +| `region_engine.mito.inverted_index.mem_threshold_on_create` | String | `64M` | Memory threshold for performing an external sort during index creation.
Setting to empty will disable external sorting, forcing all sorting operations to happen in memory. | +| `region_engine.mito.inverted_index.intermediate_path` | String | `""` | File system path to store intermediate files for external sorting (default `{data_home}/index_intermediate`). | +| `region_engine.mito.memtable` | -- | -- | -- | +| `region_engine.mito.memtable.type` | String | `partition_tree` | Memtable type.
- `partition_tree`: partition tree memtable
- `time_series`: time-series memtable (deprecated) | +| `region_engine.mito.memtable.index_max_keys_per_shard` | Integer | `8192` | The max number of keys in one shard. | +| `region_engine.mito.memtable.data_freeze_threshold` | Integer | `32768` | The max rows of data inside the actively writing buffer in one shard. | +| `region_engine.mito.memtable.fork_dictionary_bytes` | String | `1GiB` | Max dictionary bytes. | +| `logging` | -- | -- | The logging options. | +| `logging.dir` | String | `/tmp/greptimedb/logs` | The directory to store the log files. | +| `logging.level` | String | `None` | The log level. Can be `info`/`debug`/`warn`/`error`. | +| `logging.enable_otlp_tracing` | Bool | `false` | Enable OTLP tracing. | +| `logging.otlp_endpoint` | String | `None` | The OTLP tracing endpoint. | +| `logging.append_stdout` | Bool | `true` | Whether to append logs to stdout. | +| `logging.tracing_sample_ratio` | -- | -- | The percentage of tracing will be sampled and exported.
Valid range `[0, 1]`, 1 means all traces are sampled, 0 means all traces are not sampled, the default value is 1.
ratio > 1 are treated as 1. Fractions < 0 are treated as 0 | +| `logging.tracing_sample_ratio.default_ratio` | Float | `1.0` | -- | +| `export_metrics` | -- | -- | The datanode can export its metrics and send to Prometheus compatible service (e.g. send to `greptimedb` itself) from remote-write API.
This is only used for `greptimedb` to export its own metrics internally. It's different from prometheus scrape. | +| `export_metrics.enable` | Bool | `false` | whether enable export metrics. | +| `export_metrics.write_interval` | String | `30s` | The interval of export metrics. | +| `export_metrics.self_import` | -- | -- | For `standalone` mode, `self_import` is recommend to collect metrics generated by itself | +| `export_metrics.self_import.db` | String | `None` | -- | +| `export_metrics.remote_write` | -- | -- | -- | +| `export_metrics.remote_write.url` | String | `""` | The url the metrics send to. The url example can be: `http://127.0.0.1:4000/v1/prometheus/write?db=information_schema`. | +| `export_metrics.remote_write.headers` | InlineTable | -- | HTTP headers of Prometheus remote-write carry. | diff --git a/config/datanode.example.toml b/config/datanode.example.toml index 6d80fd839f..498b728241 100644 --- a/config/datanode.example.toml +++ b/config/datanode.example.toml @@ -1,169 +1,427 @@ -# Node running mode, see `standalone.example.toml`. -mode = "distributed" -# The datanode identifier, should be unique. +## The running mode of the datanode. It can be `standalone` or `distributed`. +mode = "standalone" + +## The datanode identifier and should be unique in the cluster. +## +toml2docs:none-default node_id = 42 -# gRPC server address, "127.0.0.1:3001" by default. -rpc_addr = "127.0.0.1:3001" -# Hostname of this node. -rpc_hostname = "127.0.0.1" -# The number of gRPC server worker threads, 8 by default. -rpc_runtime_size = 8 -# Start services after regions have obtained leases. -# It will block the datanode start if it can't receive leases in the heartbeat from metasrv. + +## Start services after regions have obtained leases. +## It will block the datanode start if it can't receive leases in the heartbeat from metasrv. require_lease_before_startup = false -# Initialize all regions in the background during the startup. -# By default, it provides services after all regions have been initialized. +## Initialize all regions in the background during the startup. +## By default, it provides services after all regions have been initialized. init_regions_in_background = false +## The gRPC address of the datanode. +rpc_addr = "127.0.0.1:3001" + +## The hostname of the datanode. +## +toml2docs:none-default +rpc_hostname = "127.0.0.1" + +## The number of gRPC server worker threads. +rpc_runtime_size = 8 + +## The maximum receive message size for gRPC server. +rpc_max_recv_message_size = "512MB" + +## The maximum send message size for gRPC server. +rpc_max_send_message_size = "512MB" + +## Enable telemetry to collect anonymous usage data. +enable_telemetry = true + +## The heartbeat options. [heartbeat] -# Interval for sending heartbeat messages to the Metasrv, 3 seconds by default. +## Interval for sending heartbeat messages to the metasrv. interval = "3s" -# Metasrv client options. +## Interval for retrying to send heartbeat messages to the metasrv. +retry_interval = "3s" + +## The metasrv client options. [meta_client] -# Metasrv address list. +## The addresses of the metasrv. metasrv_addrs = ["127.0.0.1:3002"] -# Heartbeat timeout, 500 milliseconds by default. -heartbeat_timeout = "500ms" -# Operation timeout, 3 seconds by default. + +## Operation timeout. timeout = "3s" -# Connect server timeout, 1 second by default. + +## Heartbeat timeout. +heartbeat_timeout = "500ms" + +## DDL timeout. +ddl_timeout = "10s" + +## Connect server timeout. connect_timeout = "1s" -# `TCP_NODELAY` option for accepted connections, true by default. + +## `TCP_NODELAY` option for accepted connections. tcp_nodelay = true -# WAL options. +## The configuration about the cache of the metadata. +metadata_cache_max_capacity = 100000 + +## TTL of the metadata cache. +metadata_cache_ttl = "10m" + +# TTI of the metadata cache. +metadata_cache_tti = "5m" + +## The WAL options. [wal] +## The provider of the WAL. +## - `raft_engine`: the wal is stored in the local file system by raft-engine. +## - `kafka`: it's remote wal that data is stored in Kafka. provider = "raft_engine" -# Raft-engine wal options, see `standalone.example.toml`. -# dir = "/tmp/greptimedb/wal" +## The directory to store the WAL files. +## **It's only used when the provider is `raft_engine`**. +## +toml2docs:none-default +dir = "/tmp/greptimedb/wal" + +## The size of the WAL segment file. +## **It's only used when the provider is `raft_engine`**. file_size = "256MB" + +## The threshold of the WAL size to trigger a flush. +## **It's only used when the provider is `raft_engine`**. purge_threshold = "4GB" + +## The interval to trigger a flush. +## **It's only used when the provider is `raft_engine`**. purge_interval = "10m" + +## The read batch size. +## **It's only used when the provider is `raft_engine`**. read_batch_size = 128 + +## Whether to use sync write. +## **It's only used when the provider is `raft_engine`**. sync_write = false -# Kafka wal options, see `standalone.example.toml`. -# broker_endpoints = ["127.0.0.1:9092"] -# Warning: Kafka has a default limit of 1MB per message in a topic. -# max_batch_size = "1MB" -# linger = "200ms" -# consumer_wait_timeout = "100ms" -# backoff_init = "500ms" -# backoff_max = "10s" -# backoff_base = 2 -# backoff_deadline = "5mins" +## Whether to reuse logically truncated log files. +## **It's only used when the provider is `raft_engine`**. +enable_log_recycle = true -# Storage options, see `standalone.example.toml`. +## Whether to pre-create log files on start up. +## **It's only used when the provider is `raft_engine`**. +prefill_log_files = false + +## Duration for fsyncing log files. +## **It's only used when the provider is `raft_engine`**. +sync_period = "10s" + +## The Kafka broker endpoints. +## **It's only used when the provider is `kafka`**. +broker_endpoints = ["127.0.0.1:9092"] + +## The max size of a single producer batch. +## Warning: Kafka has a default limit of 1MB per message in a topic. +## **It's only used when the provider is `kafka`**. +max_batch_size = "1MB" + +## The linger duration of a kafka batch producer. +## **It's only used when the provider is `kafka`**. +linger = "200ms" + +## The consumer wait timeout. +## **It's only used when the provider is `kafka`**. +consumer_wait_timeout = "100ms" + +## The initial backoff delay. +## **It's only used when the provider is `kafka`**. +backoff_init = "500ms" + +## The maximum backoff delay. +## **It's only used when the provider is `kafka`**. +backoff_max = "10s" + +## The exponential backoff rate, i.e. next backoff = base * current backoff. +## **It's only used when the provider is `kafka`**. +backoff_base = 2 + +## The deadline of retries. +## **It's only used when the provider is `kafka`**. +backoff_deadline = "5mins" + +# Example of using S3 as the storage. +# [storage] +# type = "S3" +# bucket = "greptimedb" +# root = "data" +# access_key_id = "test" +# secret_access_key = "123456" +# endpoint = "https://s3.amazonaws.com" +# region = "us-west-2" + +# Example of using Oss as the storage. +# [storage] +# type = "Oss" +# bucket = "greptimedb" +# root = "data" +# access_key_id = "test" +# access_key_secret = "123456" +# endpoint = "https://oss-cn-hangzhou.aliyuncs.com" + +# Example of using Azblob as the storage. +# [storage] +# type = "Azblob" +# container = "greptimedb" +# root = "data" +# account_name = "test" +# account_key = "123456" +# endpoint = "https://greptimedb.blob.core.windows.net" +# sas_token = "" + +# Example of using Gcs as the storage. +# [storage] +# type = "Gcs" +# bucket = "greptimedb" +# root = "data" +# scope = "test" +# credential_path = "123456" +# endpoint = "https://storage.googleapis.com" + +## The data storage options. [storage] -# The working home directory. +## The working home directory. data_home = "/tmp/greptimedb/" -# Storage type. + +## The storage type used to store the data. +## - `File`: the data is stored in the local file system. +## - `S3`: the data is stored in the S3 object storage. +## - `Gcs`: the data is stored in the Google Cloud Storage. +## - `Azblob`: the data is stored in the Azure Blob Storage. +## - `Oss`: the data is stored in the Aliyun OSS. type = "File" -# Cache configuration for object storage such as 'S3' etc. -# The local file cache directory -# cache_path = "/path/local_cache" -# The local file cache capacity in bytes. -# cache_capacity = "256MB" +## Cache configuration for object storage such as 'S3' etc. +## The local file cache directory. +## +toml2docs:none-default +cache_path = "/path/local_cache" + +## The local file cache capacity in bytes. +## +toml2docs:none-default +cache_capacity = "256MB" + +## The S3 bucket name. +## **It's only used when the storage type is `S3`, `Oss` and `Gcs`**. +## +toml2docs:none-default +bucket = "greptimedb" + +## The S3 data will be stored in the specified prefix, for example, `s3://${bucket}/${root}`. +## **It's only used when the storage type is `S3`, `Oss` and `Azblob`**. +## +toml2docs:none-default +root = "greptimedb" + +## The access key id of the aws account. +## It's **highly recommended** to use AWS IAM roles instead of hardcoding the access key id and secret key. +## **It's only used when the storage type is `S3` and `Oss`**. +## +toml2docs:none-default +access_key_id = "test" + +## The secret access key of the aws account. +## It's **highly recommended** to use AWS IAM roles instead of hardcoding the access key id and secret key. +## **It's only used when the storage type is `S3`**. +## +toml2docs:none-default +secret_access_key = "test" + +## The secret access key of the aliyun account. +## **It's only used when the storage type is `Oss`**. +## +toml2docs:none-default +access_key_secret = "test" + +## The account key of the azure account. +## **It's only used when the storage type is `Azblob`**. +## +toml2docs:none-default +account_name = "test" + +## The account key of the azure account. +## **It's only used when the storage type is `Azblob`**. +## +toml2docs:none-default +account_key = "test" + +## The scope of the google cloud storage. +## **It's only used when the storage type is `Gcs`**. +## +toml2docs:none-default +scope = "test" + +## The credential path of the google cloud storage. +## **It's only used when the storage type is `Gcs`**. +## +toml2docs:none-default +credential_path = "test" + +## The container of the azure account. +## **It's only used when the storage type is `Azblob`**. +## +toml2docs:none-default +container = "greptimedb" + +## The sas token of the azure account. +## **It's only used when the storage type is `Azblob`**. +## +toml2docs:none-default +sas_token = "" + +## The endpoint of the S3 service. +## **It's only used when the storage type is `S3`, `Oss`, `Gcs` and `Azblob`**. +## +toml2docs:none-default +endpoint = "https://s3.amazonaws.com" + +## The region of the S3 service. +## **It's only used when the storage type is `S3`, `Oss`, `Gcs` and `Azblob`**. +## +toml2docs:none-default +region = "us-west-2" # Custom storage options -#[[storage.providers]] -#type = "S3" -#[[storage.providers]] -#type = "Gcs" +# [[storage.providers]] +# type = "S3" +# [[storage.providers]] +# type = "Gcs" -# Mito engine options +## The region engine options. You can configure multiple region engines. [[region_engine]] + +## The Mito engine options. [region_engine.mito] -# Number of region workers + +## Number of region workers. num_workers = 8 -# Request channel size of each worker + +## Request channel size of each worker. worker_channel_size = 128 -# Max batch size for a worker to handle requests + +## Max batch size for a worker to handle requests. worker_request_batch_size = 64 -# Number of meta action updated to trigger a new checkpoint for the manifest + +## Number of meta action updated to trigger a new checkpoint for the manifest. manifest_checkpoint_distance = 10 -# Whether to compress manifest and checkpoint file by gzip (default false). + +## Whether to compress manifest and checkpoint file by gzip (default false). compress_manifest = false -# Max number of running background jobs + +## Max number of running background jobs max_background_jobs = 4 -# Interval to auto flush a region if it has not flushed yet. + +## Interval to auto flush a region if it has not flushed yet. auto_flush_interval = "1h" -# Global write buffer size for all regions. If not set, it's default to 1/8 of OS memory with a max limitation of 1GB. + +## Global write buffer size for all regions. If not set, it's default to 1/8 of OS memory with a max limitation of 1GB. global_write_buffer_size = "1GB" -# Global write buffer size threshold to reject write requests. If not set, it's default to 2 times of `global_write_buffer_size` + +## Global write buffer size threshold to reject write requests. If not set, it's default to 2 times of `global_write_buffer_size` global_write_buffer_reject_size = "2GB" -# Cache size for SST metadata. Setting it to 0 to disable the cache. -# If not set, it's default to 1/32 of OS memory with a max limitation of 128MB. + +## Cache size for SST metadata. Setting it to 0 to disable the cache. +## If not set, it's default to 1/32 of OS memory with a max limitation of 128MB. sst_meta_cache_size = "128MB" -# Cache size for vectors and arrow arrays. Setting it to 0 to disable the cache. -# If not set, it's default to 1/16 of OS memory with a max limitation of 512MB. + +## Cache size for vectors and arrow arrays. Setting it to 0 to disable the cache. +## If not set, it's default to 1/16 of OS memory with a max limitation of 512MB. vector_cache_size = "512MB" -# Cache size for pages of SST row groups. Setting it to 0 to disable the cache. -# If not set, it's default to 1/16 of OS memory with a max limitation of 512MB. + +## Cache size for pages of SST row groups. Setting it to 0 to disable the cache. +## If not set, it's default to 1/16 of OS memory with a max limitation of 512MB. page_cache_size = "512MB" -# Buffer size for SST writing. + +## Buffer size for SST writing. sst_write_buffer_size = "8MB" -# Parallelism to scan a region (default: 1/4 of cpu cores). -# - 0: using the default value (1/4 of cpu cores). -# - 1: scan in current thread. -# - n: scan in parallelism n. + +## Parallelism to scan a region (default: 1/4 of cpu cores). +## - `0`: using the default value (1/4 of cpu cores). +## - `1`: scan in current thread. +## - `n`: scan in parallelism n. scan_parallelism = 0 -# Capacity of the channel to send data from parallel scan tasks to the main task (default 32). + +## Capacity of the channel to send data from parallel scan tasks to the main task. parallel_scan_channel_size = 32 -# Whether to allow stale WAL entries read during replay. + +## Whether to allow stale WAL entries read during replay. allow_stale_entries = false +## The options for inverted index in Mito engine. [region_engine.mito.inverted_index] -# Whether to create the index on flush. -# - "auto": automatically -# - "disable": never + +## Whether to create the index on flush. +## - `auto`: automatically +## - `disable`: never create_on_flush = "auto" -# Whether to create the index on compaction. -# - "auto": automatically -# - "disable": never + +## Whether to create the index on compaction. +## - `auto`: automatically +## - `disable`: never create_on_compaction = "auto" -# Whether to apply the index on query -# - "auto": automatically -# - "disable": never + +## Whether to apply the index on query +## - `auto`: automatically +## - `disable`: never apply_on_query = "auto" -# Memory threshold for performing an external sort during index creation. -# Setting to empty will disable external sorting, forcing all sorting operations to happen in memory. + +## Memory threshold for performing an external sort during index creation. +## Setting to empty will disable external sorting, forcing all sorting operations to happen in memory. mem_threshold_on_create = "64M" -# File system path to store intermediate files for external sorting (default `{data_home}/index_intermediate`). + +## File system path to store intermediate files for external sorting (default `{data_home}/index_intermediate`). intermediate_path = "" [region_engine.mito.memtable] -# Memtable type. -# - "partition_tree": partition tree memtable -# - "time_series": time-series memtable (deprecated) +## Memtable type. +## - `partition_tree`: partition tree memtable +## - `time_series`: time-series memtable (deprecated) type = "partition_tree" -# The max number of keys in one shard. + +## The max number of keys in one shard. index_max_keys_per_shard = 8192 -# The max rows of data inside the actively writing buffer in one shard. + +## The max rows of data inside the actively writing buffer in one shard. data_freeze_threshold = 32768 -# Max dictionary bytes. + +## Max dictionary bytes. fork_dictionary_bytes = "1GiB" -# Log options, see `standalone.example.toml` -# [logging] -# dir = "/tmp/greptimedb/logs" -# level = "info" +## The logging options. +[logging] +## The directory to store the log files. +dir = "/tmp/greptimedb/logs" -# Datanode export the metrics generated by itself -# encoded to Prometheus remote-write format -# and send to Prometheus remote-write compatible receiver (e.g. send to `greptimedb` itself) -# This is only used for `greptimedb` to export its own metrics internally. It's different from prometheus scrape. -# [export_metrics] -# whether enable export metrics, default is false -# enable = false -# The interval of export metrics -# write_interval = "30s" -# [export_metrics.remote_write] -# The url the metrics send to. The url is empty by default, url example: `http://127.0.0.1:4000/v1/prometheus/write?db=information_schema` -# url = "" -# HTTP headers of Prometheus remote-write carry -# headers = {} +## The log level. Can be `info`/`debug`/`warn`/`error`. +## +toml2docs:none-default +level = "info" + +## Enable OTLP tracing. +enable_otlp_tracing = false + +## The OTLP tracing endpoint. +## +toml2docs:none-default +otlp_endpoint = "" + +## Whether to append logs to stdout. +append_stdout = true + +## The percentage of tracing will be sampled and exported. +## Valid range `[0, 1]`, 1 means all traces are sampled, 0 means all traces are not sampled, the default value is 1. +## ratio > 1 are treated as 1. Fractions < 0 are treated as 0 +[logging.tracing_sample_ratio] +default_ratio = 1.0 + +## The datanode can export its metrics and send to Prometheus compatible service (e.g. send to `greptimedb` itself) from remote-write API. +## This is only used for `greptimedb` to export its own metrics internally. It's different from prometheus scrape. +[export_metrics] + +## whether enable export metrics. +enable = false + +## The interval of export metrics. +write_interval = "30s" + +## For `standalone` mode, `self_import` is recommend to collect metrics generated by itself +[export_metrics.self_import] +## +toml2docs:none-default +db = "information_schema" + +[export_metrics.remote_write] +## The url the metrics send to. The url example can be: `http://127.0.0.1:4000/v1/prometheus/write?db=information_schema`. +url = "" + +## HTTP headers of Prometheus remote-write carry. +headers = { } diff --git a/config/frontend.example.toml b/config/frontend.example.toml index dc9c6917bd..9123e4f8a0 100644 --- a/config/frontend.example.toml +++ b/config/frontend.example.toml @@ -1,106 +1,192 @@ -# Node running mode, see `standalone.example.toml`. -mode = "distributed" -# The default timezone of the server -# default_timezone = "UTC" +## The running mode of the datanode. It can be `standalone` or `distributed`. +mode = "standalone" +## The default timezone of the server. +## +toml2docs:none-default +default_timezone = "UTC" + +## The heartbeat options. [heartbeat] -# Interval for sending heartbeat task to the Metasrv, 5 seconds by default. -interval = "5s" -# Interval for retry sending heartbeat task, 5 seconds by default. -retry_interval = "5s" +## Interval for sending heartbeat messages to the metasrv. +interval = "18s" -# HTTP server options, see `standalone.example.toml`. +## Interval for retrying to send heartbeat messages to the metasrv. +retry_interval = "3s" + +## The HTTP server options. [http] +## The address to bind the HTTP server. addr = "127.0.0.1:4000" +## HTTP request timeout. timeout = "30s" +## HTTP request body limit. +## Support the following units are supported: `B`, `KB`, `KiB`, `MB`, `MiB`, `GB`, `GiB`, `TB`, `TiB`, `PB`, `PiB`. body_limit = "64MB" -# gRPC server options, see `standalone.example.toml`. +## The gRPC server options. [grpc] +## The address to bind the gRPC server. addr = "127.0.0.1:4001" +## The number of server worker threads. runtime_size = 8 -# MySQL server options, see `standalone.example.toml`. +## MySQL server options. [mysql] +## Whether to enable. enable = true +## The addr to bind the MySQL server. addr = "127.0.0.1:4002" +## The number of server worker threads. runtime_size = 2 -# MySQL server TLS options, see `standalone.example.toml`. +# MySQL server TLS options. [mysql.tls] + +## TLS mode, refer to https://www.postgresql.org/docs/current/libpq-ssl.html +## - `disable` (default value) +## - `prefer` +## - `require` +## - `verify-ca` +## - `verify-full` mode = "disable" + +## Certificate file path. +## +toml2docs:none-default cert_path = "" + +## Private key file path. +## +toml2docs:none-default key_path = "" + +## Watch for Certificate and key file change and auto reload watch = false -# PostgresSQL server options, see `standalone.example.toml`. +## PostgresSQL server options. [postgres] +## Whether to enable enable = true +## The addr to bind the PostgresSQL server. addr = "127.0.0.1:4003" +## The number of server worker threads. runtime_size = 2 -# PostgresSQL server TLS options, see `standalone.example.toml`. +## PostgresSQL server TLS options, see `mysql_options.tls` section. [postgres.tls] +## TLS mode. mode = "disable" + +## Certificate file path. +## +toml2docs:none-default cert_path = "" + +## Private key file path. +## +toml2docs:none-default key_path = "" + +## Watch for Certificate and key file change and auto reload watch = false -# OpenTSDB protocol options, see `standalone.example.toml`. +## OpenTSDB protocol options. [opentsdb] +## Whether to enable enable = true +## OpenTSDB telnet API server address. addr = "127.0.0.1:4242" +## The number of server worker threads. runtime_size = 2 -# InfluxDB protocol options, see `standalone.example.toml`. +## InfluxDB protocol options. [influxdb] +## Whether to enable InfluxDB protocol in HTTP API. enable = true -# Prometheus remote storage options, see `standalone.example.toml`. +## Prometheus remote storage options [prom_store] +## Whether to enable Prometheus remote write and read in HTTP API. enable = true -# Whether to store the data from Prometheus remote write in metric engine. -# true by default +## Whether to store the data from Prometheus remote write in metric engine. with_metric_engine = true -# Metasrv client options, see `datanode.example.toml`. +## The metasrv client options. [meta_client] +## The addresses of the metasrv. metasrv_addrs = ["127.0.0.1:3002"] + +## Operation timeout. timeout = "3s" -# DDL timeouts options. + +## Heartbeat timeout. +heartbeat_timeout = "500ms" + +## DDL timeout. ddl_timeout = "10s" + +## Connect server timeout. connect_timeout = "1s" + +## `TCP_NODELAY` option for accepted connections. tcp_nodelay = true -# The configuration about the cache of the Metadata. -# default: 100000 + +## The configuration about the cache of the metadata. metadata_cache_max_capacity = 100000 -# default: 10m + +## TTL of the metadata cache. metadata_cache_ttl = "10m" -# default: 5m + +# TTI of the metadata cache. metadata_cache_tti = "5m" -# Log options, see `standalone.example.toml` -# [logging] -# dir = "/tmp/greptimedb/logs" -# level = "info" - -# Datanode options. +## Datanode options. [datanode] -# Datanode client options. +## Datanode client options. [datanode.client] timeout = "10s" connect_timeout = "10s" tcp_nodelay = true -# Frontend export the metrics generated by itself -# encoded to Prometheus remote-write format -# and send to Prometheus remote-write compatible receiver (e.g. send to `greptimedb` itself) -# This is only used for `greptimedb` to export its own metrics internally. It's different from prometheus scrape. -# [export_metrics] -# whether enable export metrics, default is false -# enable = false -# The interval of export metrics -# write_interval = "30s" -# for `frontend`, `self_import` is recommend to collect metrics generated by itself -# [export_metrics.self_import] -# db = "information_schema" +## The logging options. +[logging] +## The directory to store the log files. +dir = "/tmp/greptimedb/logs" + +## The log level. Can be `info`/`debug`/`warn`/`error`. +## +toml2docs:none-default +level = "info" + +## Enable OTLP tracing. +enable_otlp_tracing = false + +## The OTLP tracing endpoint. +## +toml2docs:none-default +otlp_endpoint = "" + +## Whether to append logs to stdout. +append_stdout = true + +## The percentage of tracing will be sampled and exported. +## Valid range `[0, 1]`, 1 means all traces are sampled, 0 means all traces are not sampled, the default value is 1. +## ratio > 1 are treated as 1. Fractions < 0 are treated as 0 +[logging.tracing_sample_ratio] +default_ratio = 1.0 + +## The datanode can export its metrics and send to Prometheus compatible service (e.g. send to `greptimedb` itself) from remote-write API. +## This is only used for `greptimedb` to export its own metrics internally. It's different from prometheus scrape. +[export_metrics] + +## whether enable export metrics. +enable = false + +## The interval of export metrics. +write_interval = "30s" + +## For `standalone` mode, `self_import` is recommend to collect metrics generated by itself +[export_metrics.self_import] +## +toml2docs:none-default +db = "information_schema" + +[export_metrics.remote_write] +## The url the metrics send to. The url example can be: `http://127.0.0.1:4000/v1/prometheus/write?db=information_schema`. +url = "" + +## HTTP headers of Prometheus remote-write carry. +headers = { } diff --git a/config/metasrv.example.toml b/config/metasrv.example.toml index 472a57e40a..1b0bca2e26 100644 --- a/config/metasrv.example.toml +++ b/config/metasrv.example.toml @@ -1,39 +1,44 @@ -# The working home directory. +## The working home directory. data_home = "/tmp/metasrv/" -# The bind address of metasrv, "127.0.0.1:3002" by default. + +## The bind address of metasrv. bind_addr = "127.0.0.1:3002" -# The communication server address for frontend and datanode to connect to metasrv, "127.0.0.1:3002" by default for localhost. + +## The communication server address for frontend and datanode to connect to metasrv, "127.0.0.1:3002" by default for localhost. server_addr = "127.0.0.1:3002" -# Etcd server address, "127.0.0.1:2379" by default. + +## Etcd server address. store_addr = "127.0.0.1:2379" -# Datanode selector type. -# - "lease_based" (default value). -# - "load_based" -# For details, please see "https://docs.greptime.com/developer-guide/metasrv/selector". + +## Datanode selector type. +## - `lease_based` (default value). +## - `load_based` +## For details, please see "https://docs.greptime.com/developer-guide/metasrv/selector". selector = "lease_based" -# Store data in memory, false by default. + +## Store data in memory. use_memory_store = false -# Whether to enable greptimedb telemetry, true by default. + +## Whether to enable greptimedb telemetry. enable_telemetry = true -# If it's not empty, the metasrv will store all data with this key prefix. + +## If it's not empty, the metasrv will store all data with this key prefix. store_key_prefix = "" -# Log options, see `standalone.example.toml` -# [logging] -# dir = "/tmp/greptimedb/logs" -# level = "info" - -# Procedure storage options. +## Procedure storage options. [procedure] -# Procedure max retry time. + +## Procedure max retry time. max_retry_times = 12 -# Initial retry delay of procedures, increases exponentially + +## Initial retry delay of procedures, increases exponentially retry_delay = "500ms" -# Auto split large value -# GreptimeDB procedure uses etcd as the default metadata storage backend. -# The etcd the maximum size of any request is 1.5 MiB -# 1500KiB = 1536KiB (1.5MiB) - 36KiB (reserved size of key) -# Comments out the `max_metadata_value_size`, for don't split large value (no limit). + +## Auto split large value +## GreptimeDB procedure uses etcd as the default metadata storage backend. +## The etcd the maximum size of any request is 1.5 MiB +## 1500KiB = 1536KiB (1.5MiB) - 36KiB (reserved size of key) +## Comments out the `max_metadata_value_size`, for don't split large value (no limit). max_metadata_value_size = "1500KiB" # Failure detectors options. @@ -43,57 +48,96 @@ min_std_deviation = "100ms" acceptable_heartbeat_pause = "3000ms" first_heartbeat_estimate = "1000ms" -# # Datanode options. -# [datanode] -# # Datanode client options. -# [datanode.client_options] -# timeout = "10s" -# connect_timeout = "10s" -# tcp_nodelay = true +## Datanode options. +[datanode] +## Datanode client options. +[datanode.client] +timeout = "10s" +connect_timeout = "10s" +tcp_nodelay = true [wal] # Available wal providers: -# - "raft_engine" (default) -# - "kafka" +# - `raft_engine` (default): there're none raft-engine wal config since metasrv only involves in remote wal currently. +# - `kafka`: metasrv **have to be** configured with kafka wal config when using kafka wal provider in datanode. provider = "raft_engine" -# There're none raft-engine wal config since meta srv only involves in remote wal currently. - # Kafka wal config. -# The broker endpoints of the Kafka cluster. ["127.0.0.1:9092"] by default. -# broker_endpoints = ["127.0.0.1:9092"] -# Number of topics to be created upon start. -# num_topics = 64 -# Topic selector type. -# Available selector types: -# - "round_robin" (default) -# selector_type = "round_robin" -# A Kafka topic is constructed by concatenating `topic_name_prefix` and `topic_id`. -# topic_name_prefix = "greptimedb_wal_topic" -# Expected number of replicas of each partition. -# replication_factor = 1 -# Above which a topic creation operation will be cancelled. -# create_topic_timeout = "30s" -# The initial backoff for kafka clients. -# backoff_init = "500ms" -# The maximum backoff for kafka clients. -# backoff_max = "10s" -# Exponential backoff rate, i.e. next backoff = base * current backoff. -# backoff_base = 2 -# Stop reconnecting if the total wait time reaches the deadline. If this config is missing, the reconnecting won't terminate. -# backoff_deadline = "5mins" -# Metasrv export the metrics generated by itself -# encoded to Prometheus remote-write format -# and send to Prometheus remote-write compatible receiver (e.g. send to `greptimedb` itself) -# This is only used for `greptimedb` to export its own metrics internally. It's different from prometheus scrape. -# [export_metrics] -# whether enable export metrics, default is false -# enable = false -# The interval of export metrics -# write_interval = "30s" -# [export_metrics.remote_write] -# The url the metrics send to. The url is empty by default, url example: `http://127.0.0.1:4000/v1/prometheus/write?db=information_schema` -# url = "" -# HTTP headers of Prometheus remote-write carry -# headers = {} +## The broker endpoints of the Kafka cluster. +broker_endpoints = ["127.0.0.1:9092"] + +## Number of topics to be created upon start. +num_topics = 64 + +## Topic selector type. +## Available selector types: +## - `round_robin` (default) +selector_type = "round_robin" + +## A Kafka topic is constructed by concatenating `topic_name_prefix` and `topic_id`. +topic_name_prefix = "greptimedb_wal_topic" + +## Expected number of replicas of each partition. +replication_factor = 1 + +## Above which a topic creation operation will be cancelled. +create_topic_timeout = "30s" +## The initial backoff for kafka clients. +backoff_init = "500ms" + +## The maximum backoff for kafka clients. +backoff_max = "10s" + +## Exponential backoff rate, i.e. next backoff = base * current backoff. +backoff_base = 2 + +## Stop reconnecting if the total wait time reaches the deadline. If this config is missing, the reconnecting won't terminate. +backoff_deadline = "5mins" + +## The logging options. +[logging] +## The directory to store the log files. +dir = "/tmp/greptimedb/logs" + +## The log level. Can be `info`/`debug`/`warn`/`error`. +## +toml2docs:none-default +level = "info" + +## Enable OTLP tracing. +enable_otlp_tracing = false + +## The OTLP tracing endpoint. +## +toml2docs:none-default +otlp_endpoint = "" + +## Whether to append logs to stdout. +append_stdout = true + +## The percentage of tracing will be sampled and exported. +## Valid range `[0, 1]`, 1 means all traces are sampled, 0 means all traces are not sampled, the default value is 1. +## ratio > 1 are treated as 1. Fractions < 0 are treated as 0 +[logging.tracing_sample_ratio] +default_ratio = 1.0 + +## The datanode can export its metrics and send to Prometheus compatible service (e.g. send to `greptimedb` itself) from remote-write API. +## This is only used for `greptimedb` to export its own metrics internally. It's different from prometheus scrape. +[export_metrics] + +## whether enable export metrics. +enable = false + +## The interval of export metrics. +write_interval = "30s" + +## For `standalone` mode, `self_import` is recommend to collect metrics generated by itself +[export_metrics.self_import] +## +toml2docs:none-default +db = "information_schema" + +[export_metrics.remote_write] +## The url the metrics send to. The url example can be: `http://127.0.0.1:4000/v1/prometheus/write?db=information_schema`. +url = "" + +## HTTP headers of Prometheus remote-write carry. +headers = { } diff --git a/config/standalone.example.toml b/config/standalone.example.toml index 916e536a82..189e679e86 100644 --- a/config/standalone.example.toml +++ b/config/standalone.example.toml @@ -1,284 +1,474 @@ -# Node running mode, "standalone" or "distributed". +## The running mode of the datanode. It can be `standalone` or `distributed`. mode = "standalone" -# Whether to enable greptimedb telemetry, true by default. -enable_telemetry = true -# The default timezone of the server -# default_timezone = "UTC" -# HTTP server options. +## Enable telemetry to collect anonymous usage data. +enable_telemetry = true + +## The default timezone of the server. +## +toml2docs:none-default +default_timezone = "UTC" + +## The HTTP server options. [http] -# Server address, "127.0.0.1:4000" by default. +## The address to bind the HTTP server. addr = "127.0.0.1:4000" -# HTTP request timeout, 30s by default. +## HTTP request timeout. timeout = "30s" -# HTTP request body limit, 64Mb by default. -# the following units are supported: B, KB, KiB, MB, MiB, GB, GiB, TB, TiB, PB, PiB +## HTTP request body limit. +## Support the following units are supported: `B`, `KB`, `KiB`, `MB`, `MiB`, `GB`, `GiB`, `TB`, `TiB`, `PB`, `PiB`. body_limit = "64MB" -# gRPC server options. +## The gRPC server options. [grpc] -# Server address, "127.0.0.1:4001" by default. +## The address to bind the gRPC server. addr = "127.0.0.1:4001" -# The number of server worker threads, 8 by default. +## The number of server worker threads. runtime_size = 8 -# MySQL server options. +## MySQL server options. [mysql] -# Whether to enable +## Whether to enable. enable = true -# Server address, "127.0.0.1:4002" by default. +## The addr to bind the MySQL server. addr = "127.0.0.1:4002" -# The number of server worker threads, 2 by default. +## The number of server worker threads. runtime_size = 2 # MySQL server TLS options. [mysql.tls] -# TLS mode, refer to https://www.postgresql.org/docs/current/libpq-ssl.html -# - "disable" (default value) -# - "prefer" -# - "require" -# - "verify-ca" -# - "verify-full" + +## TLS mode, refer to https://www.postgresql.org/docs/current/libpq-ssl.html +## - `disable` (default value) +## - `prefer` +## - `require` +## - `verify-ca` +## - `verify-full` mode = "disable" -# Certificate file path. + +## Certificate file path. +## +toml2docs:none-default cert_path = "" -# Private key file path. + +## Private key file path. +## +toml2docs:none-default key_path = "" -# Watch for Certificate and key file change and auto reload + +## Watch for Certificate and key file change and auto reload watch = false -# PostgresSQL server options. +## PostgresSQL server options. [postgres] -# Whether to enable +## Whether to enable enable = true -# Server address, "127.0.0.1:4003" by default. +## The addr to bind the PostgresSQL server. addr = "127.0.0.1:4003" -# The number of server worker threads, 2 by default. +## The number of server worker threads. runtime_size = 2 -# PostgresSQL server TLS options, see `[mysql_options.tls]` section. +## PostgresSQL server TLS options, see `mysql_options.tls` section. [postgres.tls] -# TLS mode. +## TLS mode. mode = "disable" -# certificate file path. + +## Certificate file path. +## +toml2docs:none-default cert_path = "" -# private key file path. + +## Private key file path. +## +toml2docs:none-default key_path = "" -# Watch for Certificate and key file change and auto reload + +## Watch for Certificate and key file change and auto reload watch = false -# OpenTSDB protocol options. +## OpenTSDB protocol options. [opentsdb] -# Whether to enable +## Whether to enable enable = true -# OpenTSDB telnet API server address, "127.0.0.1:4242" by default. +## OpenTSDB telnet API server address. addr = "127.0.0.1:4242" -# The number of server worker threads, 2 by default. +## The number of server worker threads. runtime_size = 2 -# InfluxDB protocol options. +## InfluxDB protocol options. [influxdb] -# Whether to enable InfluxDB protocol in HTTP API, true by default. +## Whether to enable InfluxDB protocol in HTTP API. enable = true -# Prometheus remote storage options +## Prometheus remote storage options [prom_store] -# Whether to enable Prometheus remote write and read in HTTP API, true by default. +## Whether to enable Prometheus remote write and read in HTTP API. enable = true -# Whether to store the data from Prometheus remote write in metric engine. -# true by default +## Whether to store the data from Prometheus remote write in metric engine. with_metric_engine = true +## The WAL options. [wal] -# Available wal providers: -# - "raft_engine" (default) -# - "kafka" +## The provider of the WAL. +## - `raft_engine`: the wal is stored in the local file system by raft-engine. +## - `kafka`: it's remote wal that data is stored in Kafka. provider = "raft_engine" -# Raft-engine wal options. -# WAL data directory -# dir = "/tmp/greptimedb/wal" -# WAL file size in bytes. +## The directory to store the WAL files. +## **It's only used when the provider is `raft_engine`**. +## +toml2docs:none-default +dir = "/tmp/greptimedb/wal" + +## The size of the WAL segment file. +## **It's only used when the provider is `raft_engine`**. file_size = "256MB" -# WAL purge threshold. + +## The threshold of the WAL size to trigger a flush. +## **It's only used when the provider is `raft_engine`**. purge_threshold = "4GB" -# WAL purge interval in seconds. + +## The interval to trigger a flush. +## **It's only used when the provider is `raft_engine`**. purge_interval = "10m" -# WAL read batch size. + +## The read batch size. +## **It's only used when the provider is `raft_engine`**. read_batch_size = 128 -# Whether to sync log file after every write. + +## Whether to use sync write. +## **It's only used when the provider is `raft_engine`**. sync_write = false -# Whether to reuse logically truncated log files. + +## Whether to reuse logically truncated log files. +## **It's only used when the provider is `raft_engine`**. enable_log_recycle = true -# Whether to pre-create log files on start up + +## Whether to pre-create log files on start up. +## **It's only used when the provider is `raft_engine`**. prefill_log_files = false -# Duration for fsyncing log files. -sync_period = "1000ms" -# Kafka wal options. -# The broker endpoints of the Kafka cluster. ["127.0.0.1:9092"] by default. -# broker_endpoints = ["127.0.0.1:9092"] +## Duration for fsyncing log files. +## **It's only used when the provider is `raft_engine`**. +sync_period = "10s" -# Number of topics to be created upon start. -# num_topics = 64 -# Topic selector type. -# Available selector types: -# - "round_robin" (default) -# selector_type = "round_robin" -# The prefix of topic name. -# topic_name_prefix = "greptimedb_wal_topic" -# The number of replicas of each partition. -# Warning: the replication factor must be positive and must not be greater than the number of broker endpoints. -# replication_factor = 1 +## The Kafka broker endpoints. +## **It's only used when the provider is `kafka`**. +broker_endpoints = ["127.0.0.1:9092"] -# The max size of a single producer batch. -# Warning: Kafka has a default limit of 1MB per message in a topic. -# max_batch_size = "1MB" -# The linger duration. -# linger = "200ms" -# The consumer wait timeout. -# consumer_wait_timeout = "100ms" -# Create topic timeout. -# create_topic_timeout = "30s" +## The max size of a single producer batch. +## Warning: Kafka has a default limit of 1MB per message in a topic. +## **It's only used when the provider is `kafka`**. +max_batch_size = "1MB" -# The initial backoff delay. -# backoff_init = "500ms" -# The maximum backoff delay. -# backoff_max = "10s" -# Exponential backoff rate, i.e. next backoff = base * current backoff. -# backoff_base = 2 -# The deadline of retries. -# backoff_deadline = "5mins" +## The linger duration of a kafka batch producer. +## **It's only used when the provider is `kafka`**. +linger = "200ms" -# Metadata storage options. +## The consumer wait timeout. +## **It's only used when the provider is `kafka`**. +consumer_wait_timeout = "100ms" + +## The initial backoff delay. +## **It's only used when the provider is `kafka`**. +backoff_init = "500ms" + +## The maximum backoff delay. +## **It's only used when the provider is `kafka`**. +backoff_max = "10s" + +## The exponential backoff rate, i.e. next backoff = base * current backoff. +## **It's only used when the provider is `kafka`**. +backoff_base = 2 + +## The deadline of retries. +## **It's only used when the provider is `kafka`**. +backoff_deadline = "5mins" + +## Metadata storage options. [metadata_store] -# Kv file size in bytes. +## Kv file size in bytes. file_size = "256MB" -# Kv purge threshold. +## Kv purge threshold. purge_threshold = "4GB" -# Procedure storage options. +## Procedure storage options. [procedure] -# Procedure max retry time. +## Procedure max retry time. max_retry_times = 3 -# Initial retry delay of procedures, increases exponentially +## Initial retry delay of procedures, increases exponentially retry_delay = "500ms" -# Storage options. +# Example of using S3 as the storage. +# [storage] +# type = "S3" +# bucket = "greptimedb" +# root = "data" +# access_key_id = "test" +# secret_access_key = "123456" +# endpoint = "https://s3.amazonaws.com" +# region = "us-west-2" + +# Example of using Oss as the storage. +# [storage] +# type = "Oss" +# bucket = "greptimedb" +# root = "data" +# access_key_id = "test" +# access_key_secret = "123456" +# endpoint = "https://oss-cn-hangzhou.aliyuncs.com" + +# Example of using Azblob as the storage. +# [storage] +# type = "Azblob" +# container = "greptimedb" +# root = "data" +# account_name = "test" +# account_key = "123456" +# endpoint = "https://greptimedb.blob.core.windows.net" +# sas_token = "" + +# Example of using Gcs as the storage. +# [storage] +# type = "Gcs" +# bucket = "greptimedb" +# root = "data" +# scope = "test" +# credential_path = "123456" +# endpoint = "https://storage.googleapis.com" + +## The data storage options. [storage] -# The working home directory. +## The working home directory. data_home = "/tmp/greptimedb/" -# Storage type. + +## The storage type used to store the data. +## - `File`: the data is stored in the local file system. +## - `S3`: the data is stored in the S3 object storage. +## - `Gcs`: the data is stored in the Google Cloud Storage. +## - `Azblob`: the data is stored in the Azure Blob Storage. +## - `Oss`: the data is stored in the Aliyun OSS. type = "File" -# Cache configuration for object storage such as 'S3' etc. -# cache_path = "/path/local_cache" -# The local file cache capacity in bytes. -# cache_capacity = "256MB" + +## Cache configuration for object storage such as 'S3' etc. +## The local file cache directory. +## +toml2docs:none-default +cache_path = "/path/local_cache" + +## The local file cache capacity in bytes. +## +toml2docs:none-default +cache_capacity = "256MB" + +## The S3 bucket name. +## **It's only used when the storage type is `S3`, `Oss` and `Gcs`**. +## +toml2docs:none-default +bucket = "greptimedb" + +## The S3 data will be stored in the specified prefix, for example, `s3://${bucket}/${root}`. +## **It's only used when the storage type is `S3`, `Oss` and `Azblob`**. +## +toml2docs:none-default +root = "greptimedb" + +## The access key id of the aws account. +## It's **highly recommended** to use AWS IAM roles instead of hardcoding the access key id and secret key. +## **It's only used when the storage type is `S3` and `Oss`**. +## +toml2docs:none-default +access_key_id = "test" + +## The secret access key of the aws account. +## It's **highly recommended** to use AWS IAM roles instead of hardcoding the access key id and secret key. +## **It's only used when the storage type is `S3`**. +## +toml2docs:none-default +secret_access_key = "test" + +## The secret access key of the aliyun account. +## **It's only used when the storage type is `Oss`**. +## +toml2docs:none-default +access_key_secret = "test" + +## The account key of the azure account. +## **It's only used when the storage type is `Azblob`**. +## +toml2docs:none-default +account_name = "test" + +## The account key of the azure account. +## **It's only used when the storage type is `Azblob`**. +## +toml2docs:none-default +account_key = "test" + +## The scope of the google cloud storage. +## **It's only used when the storage type is `Gcs`**. +## +toml2docs:none-default +scope = "test" + +## The credential path of the google cloud storage. +## **It's only used when the storage type is `Gcs`**. +## +toml2docs:none-default +credential_path = "test" + +## The container of the azure account. +## **It's only used when the storage type is `Azblob`**. +## +toml2docs:none-default +container = "greptimedb" + +## The sas token of the azure account. +## **It's only used when the storage type is `Azblob`**. +## +toml2docs:none-default +sas_token = "" + +## The endpoint of the S3 service. +## **It's only used when the storage type is `S3`, `Oss`, `Gcs` and `Azblob`**. +## +toml2docs:none-default +endpoint = "https://s3.amazonaws.com" + +## The region of the S3 service. +## **It's only used when the storage type is `S3`, `Oss`, `Gcs` and `Azblob`**. +## +toml2docs:none-default +region = "us-west-2" # Custom storage options -#[[storage.providers]] -#type = "S3" -#[[storage.providers]] -#type = "Gcs" +# [[storage.providers]] +# type = "S3" +# [[storage.providers]] +# type = "Gcs" -# Mito engine options +## The region engine options. You can configure multiple region engines. [[region_engine]] + +## The Mito engine options. [region_engine.mito] -# Number of region workers + +## Number of region workers. num_workers = 8 -# Request channel size of each worker + +## Request channel size of each worker. worker_channel_size = 128 -# Max batch size for a worker to handle requests + +## Max batch size for a worker to handle requests. worker_request_batch_size = 64 -# Number of meta action updated to trigger a new checkpoint for the manifest + +## Number of meta action updated to trigger a new checkpoint for the manifest. manifest_checkpoint_distance = 10 -# Whether to compress manifest and checkpoint file by gzip (default false). + +## Whether to compress manifest and checkpoint file by gzip (default false). compress_manifest = false -# Max number of running background jobs + +## Max number of running background jobs max_background_jobs = 4 -# Interval to auto flush a region if it has not flushed yet. + +## Interval to auto flush a region if it has not flushed yet. auto_flush_interval = "1h" -# Global write buffer size for all regions. If not set, it's default to 1/8 of OS memory with a max limitation of 1GB. + +## Global write buffer size for all regions. If not set, it's default to 1/8 of OS memory with a max limitation of 1GB. global_write_buffer_size = "1GB" -# Global write buffer size threshold to reject write requests. If not set, it's default to 2 times of `global_write_buffer_size` + +## Global write buffer size threshold to reject write requests. If not set, it's default to 2 times of `global_write_buffer_size` global_write_buffer_reject_size = "2GB" -# Cache size for SST metadata. Setting it to 0 to disable the cache. -# If not set, it's default to 1/32 of OS memory with a max limitation of 128MB. + +## Cache size for SST metadata. Setting it to 0 to disable the cache. +## If not set, it's default to 1/32 of OS memory with a max limitation of 128MB. sst_meta_cache_size = "128MB" -# Cache size for vectors and arrow arrays. Setting it to 0 to disable the cache. -# If not set, it's default to 1/16 of OS memory with a max limitation of 512MB. + +## Cache size for vectors and arrow arrays. Setting it to 0 to disable the cache. +## If not set, it's default to 1/16 of OS memory with a max limitation of 512MB. vector_cache_size = "512MB" -# Cache size for pages of SST row groups. Setting it to 0 to disable the cache. -# If not set, it's default to 1/16 of OS memory with a max limitation of 512MB. + +## Cache size for pages of SST row groups. Setting it to 0 to disable the cache. +## If not set, it's default to 1/16 of OS memory with a max limitation of 512MB. page_cache_size = "512MB" -# Buffer size for SST writing. + +## Buffer size for SST writing. sst_write_buffer_size = "8MB" -# Parallelism to scan a region (default: 1/4 of cpu cores). -# - 0: using the default value (1/4 of cpu cores). -# - 1: scan in current thread. -# - n: scan in parallelism n. + +## Parallelism to scan a region (default: 1/4 of cpu cores). +## - `0`: using the default value (1/4 of cpu cores). +## - `1`: scan in current thread. +## - `n`: scan in parallelism n. scan_parallelism = 0 -# Capacity of the channel to send data from parallel scan tasks to the main task (default 32). + +## Capacity of the channel to send data from parallel scan tasks to the main task. parallel_scan_channel_size = 32 -# Whether to allow stale WAL entries read during replay. + +## Whether to allow stale WAL entries read during replay. allow_stale_entries = false +## The options for inverted index in Mito engine. [region_engine.mito.inverted_index] -# Whether to create the index on flush. -# - "auto": automatically -# - "disable": never + +## Whether to create the index on flush. +## - `auto`: automatically +## - `disable`: never create_on_flush = "auto" -# Whether to create the index on compaction. -# - "auto": automatically -# - "disable": never + +## Whether to create the index on compaction. +## - `auto`: automatically +## - `disable`: never create_on_compaction = "auto" -# Whether to apply the index on query -# - "auto": automatically -# - "disable": never + +## Whether to apply the index on query +## - `auto`: automatically +## - `disable`: never apply_on_query = "auto" -# Memory threshold for performing an external sort during index creation. -# Setting to empty will disable external sorting, forcing all sorting operations to happen in memory. + +## Memory threshold for performing an external sort during index creation. +## Setting to empty will disable external sorting, forcing all sorting operations to happen in memory. mem_threshold_on_create = "64M" -# File system path to store intermediate files for external sorting (default `{data_home}/index_intermediate`). + +## File system path to store intermediate files for external sorting (default `{data_home}/index_intermediate`). intermediate_path = "" [region_engine.mito.memtable] -# Memtable type. -# - "partition_tree": partition tree memtable -# - "time_series": time-series memtable (deprecated) +## Memtable type. +## - `partition_tree`: partition tree memtable +## - `time_series`: time-series memtable (deprecated) type = "partition_tree" -# The max number of keys in one shard. + +## The max number of keys in one shard. index_max_keys_per_shard = 8192 -# The max rows of data inside the actively writing buffer in one shard. + +## The max rows of data inside the actively writing buffer in one shard. data_freeze_threshold = 32768 -# Max dictionary bytes. + +## Max dictionary bytes. fork_dictionary_bytes = "1GiB" -# Log options -# [logging] -# Specify logs directory. -# dir = "/tmp/greptimedb/logs" -# Specify the log level [info | debug | error | warn] -# level = "info" -# whether enable tracing, default is false -# enable_otlp_tracing = false -# tracing exporter endpoint with format `ip:port`, we use grpc oltp as exporter, default endpoint is `localhost:4317` -# otlp_endpoint = "localhost:4317" -# Whether to append logs to stdout. Defaults to true. -# append_stdout = true -# The percentage of tracing will be sampled and exported. Valid range `[0, 1]`, 1 means all traces are sampled, 0 means all traces are not sampled, the default value is 1. ratio > 1 are treated as 1. Fractions < 0 are treated as 0 -# [logging.tracing_sample_ratio] -# default_ratio = 0.0 +## The logging options. +[logging] +## The directory to store the log files. +dir = "/tmp/greptimedb/logs" -# Standalone export the metrics generated by itself -# encoded to Prometheus remote-write format -# and send to Prometheus remote-write compatible receiver (e.g. send to `greptimedb` itself) -# This is only used for `greptimedb` to export its own metrics internally. It's different from prometheus scrape. -# [export_metrics] -# whether enable export metrics, default is false -# enable = false -# The interval of export metrics -# write_interval = "30s" -# for `standalone`, `self_import` is recommend to collect metrics generated by itself -# [export_metrics.self_import] -# db = "information_schema" +## The log level. Can be `info`/`debug`/`warn`/`error`. +## +toml2docs:none-default +level = "info" + +## Enable OTLP tracing. +enable_otlp_tracing = false + +## The OTLP tracing endpoint. +## +toml2docs:none-default +otlp_endpoint = "" + +## Whether to append logs to stdout. +append_stdout = true + +## The percentage of tracing will be sampled and exported. +## Valid range `[0, 1]`, 1 means all traces are sampled, 0 means all traces are not sampled, the default value is 1. +## ratio > 1 are treated as 1. Fractions < 0 are treated as 0 +[logging.tracing_sample_ratio] +default_ratio = 1.0 + +## The datanode can export its metrics and send to Prometheus compatible service (e.g. send to `greptimedb` itself) from remote-write API. +## This is only used for `greptimedb` to export its own metrics internally. It's different from prometheus scrape. +[export_metrics] + +## whether enable export metrics. +enable = false + +## The interval of export metrics. +write_interval = "30s" + +## For `standalone` mode, `self_import` is recommend to collect metrics generated by itself +[export_metrics.self_import] +## +toml2docs:none-default +db = "information_schema" + +[export_metrics.remote_write] +## The url the metrics send to. The url example can be: `http://127.0.0.1:4000/v1/prometheus/write?db=information_schema`. +url = "" + +## HTTP headers of Prometheus remote-write carry. +headers = { }