Merge branch 'main' into create-view

build(deps): bump rustls from 0.22.3 to 0.22.4 (#3764 )
Bumps [rustls](https://github.com/rustls/rustls) from 0.22.3 to 0.22.4. - [Release notes](https://github.com/rustls/rustls/releases) - [Changelog](https://github.com/rustls/rustls/blob/main/CHANGELOG.md) - [Commits](https://github.com/rustls/rustls/compare/v/0.22.3...v/0.22.4) --- updated-dependencies: - dependency-name: rustls dependency-type: direct:production ... Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2025-12-25 07:30:02 +00:00 · 2024-04-22 21:08:22 +08:00 · 2024-04-22 17:19:08 +08:00 · 2024-04-20 06:01:32 +00:00 · 2024-04-19 09:56:09 +00:00 · 2024-04-19 06:38:34 +00:00
714 changed files with 34498 additions and 10985 deletions
--- a/.github/CODEOWNERS
+++ b/.github/CODEOWNERS
@@ -0,0 +1,27 @@
+# GreptimeDB CODEOWNERS
+
+# These owners will be the default owners for everything in the repo.
+
+* @GreptimeTeam/db-approver
+
+## [Module] Databse Engine
+/src/index @zhongzc
+/src/mito2 @evenyag @v0y4g3r @waynexia
+/src/query @evenyag
+
+## [Module] Distributed
+/src/common/meta @MichaelScofield
+/src/common/procedure @MichaelScofield
+/src/meta-client @MichaelScofield
+/src/meta-srv @MichaelScofield
+
+## [Module] Write Ahead Log
+/src/log-store @v0y4g3r
+/src/store-api @v0y4g3r
+
+## [Module] Metrics Engine
+/src/metric-engine @waynexia
+/src/promql @waynexia
+
+## [Module] Flow
+/src/flow @zhongzc @waynexia
--- a/.github/ISSUE_TEMPLATE/bug_report.yml
+++ b/.github/ISSUE_TEMPLATE/bug_report.yml
@@ -39,7 +39,7 @@ body:
        - Query Engine
        - Table Engine
        - Write Protocols
-        - MetaSrv
+        - Metasrv
        - Frontend
        - Datanode
        - Other
--- a/.github/actions/build-windows-artifacts/action.yml
+++ b/.github/actions/build-windows-artifacts/action.yml
@@ -26,6 +26,8 @@ runs:
  using: composite
  steps:
    - uses: arduino/setup-protoc@v3
+      with:
+        repo-token: ${{ secrets.GITHUB_TOKEN }}

    - name: Install rust toolchain
      uses: dtolnay/rust-toolchain@master
--- a/.github/workflows/apidoc.yml
+++ b/.github/workflows/apidoc.yml
@@ -13,7 +13,7 @@ on:
 name: Build API docs

 env:
-  RUST_TOOLCHAIN: nightly-2023-12-19
+  RUST_TOOLCHAIN: nightly-2024-04-18

 jobs:
  apidoc:
@@ -40,3 +40,4 @@ jobs:
      uses: JamesIves/github-pages-deploy-action@v4
      with:
        folder: target/doc
+        single-commit: true
--- a/.github/workflows/develop.yml
+++ b/.github/workflows/develop.yml
@@ -30,15 +30,20 @@ concurrency:
  cancel-in-progress: true

 env:
-  RUST_TOOLCHAIN: nightly-2023-12-19
+  RUST_TOOLCHAIN: nightly-2024-04-18

 jobs:
-  typos:
-    name: Spell Check with Typos
+  check-typos-and-docs:
+    name: Check typos and docs
    runs-on: ubuntu-20.04
    steps:
      - uses: actions/checkout@v4
      - uses: crate-ci/typos@v1.13.10
+      - name: Check the config docs
+        run: |
+          make config-docs && \
+          git diff --name-only --exit-code ./config/config.md  \
+          || (echo "'config/config.md' is not up-to-date, please run 'make config-docs'." && exit 1)

  check:
    name: Check
@@ -93,6 +98,8 @@ jobs:
    steps:
      - uses: actions/checkout@v4
      - uses: arduino/setup-protoc@v3
+        with:
+          repo-token: ${{ secrets.GITHUB_TOKEN }}
      - uses: dtolnay/rust-toolchain@master
        with:
          toolchain: ${{ env.RUST_TOOLCHAIN }}
@@ -123,10 +130,12 @@ jobs:
    runs-on: ubuntu-latest
    strategy:
      matrix:
-        target: [ "fuzz_create_table", "fuzz_alter_table" ]
+        target: [ "fuzz_create_table", "fuzz_alter_table", "fuzz_create_database" ]
    steps:
      - uses: actions/checkout@v4
      - uses: arduino/setup-protoc@v3
+        with:
+          repo-token: ${{ secrets.GITHUB_TOKEN }}
      - uses: dtolnay/rust-toolchain@master
        with:
          toolchain: ${{ env.RUST_TOOLCHAIN }}
@@ -305,10 +314,10 @@ jobs:
          CARGO_BUILD_RUSTFLAGS: "-C link-arg=-fuse-ld=lld"
          RUST_BACKTRACE: 1
          CARGO_INCREMENTAL: 0
-          GT_S3_BUCKET: ${{ secrets.S3_BUCKET }}
-          GT_S3_ACCESS_KEY_ID: ${{ secrets.S3_ACCESS_KEY_ID }}
-          GT_S3_ACCESS_KEY: ${{ secrets.S3_ACCESS_KEY }}
-          GT_S3_REGION: ${{ secrets.S3_REGION }}
+          GT_S3_BUCKET: ${{ vars.AWS_CI_TEST_BUCKET }}
+          GT_S3_ACCESS_KEY_ID: ${{ secrets.AWS_CI_TEST_ACCESS_KEY_ID }}
+          GT_S3_ACCESS_KEY: ${{ secrets.AWS_CI_TEST_SECRET_ACCESS_KEY }}
+          GT_S3_REGION: ${{ vars.AWS_CI_TEST_BUCKET_REGION }}
          GT_ETCD_ENDPOINTS: http://127.0.0.1:2379
          GT_KAFKA_ENDPOINTS: 127.0.0.1:9092
          UNITTEST_LOG_DIR: "__unittest_logs"
--- a/.github/workflows/license.yaml
+++ b/.github/workflows/license.yaml
@@ -13,4 +13,4 @@ jobs:
    steps:
    - uses: actions/checkout@v4
    - name: Check License Header
-      uses: korandoru/hawkeye@v4
+      uses: korandoru/hawkeye@v5
--- a/.github/workflows/nightly-ci.yml
+++ b/.github/workflows/nightly-ci.yml
@@ -12,7 +12,7 @@ concurrency:
  cancel-in-progress: true

 env:
-  RUST_TOOLCHAIN: nightly-2023-12-19
+  RUST_TOOLCHAIN: nightly-2024-04-18

 jobs:
  sqlness:
@@ -85,10 +85,10 @@ jobs:
        env:
          RUST_BACKTRACE: 1
          CARGO_INCREMENTAL: 0
-          GT_S3_BUCKET: ${{ secrets.S3_BUCKET }}
-          GT_S3_ACCESS_KEY_ID: ${{ secrets.S3_ACCESS_KEY_ID }}
-          GT_S3_ACCESS_KEY: ${{ secrets.S3_ACCESS_KEY }}
-          GT_S3_REGION: ${{ secrets.S3_REGION }}
+          GT_S3_BUCKET: ${{ vars.AWS_CI_TEST_BUCKET }}
+          GT_S3_ACCESS_KEY_ID: ${{ secrets.AWS_CI_TEST_ACCESS_KEY_ID }}
+          GT_S3_ACCESS_KEY: ${{ secrets.AWS_CI_TEST_SECRET_ACCESS_KEY }}
+          GT_S3_REGION: ${{ vars.AWS_CI_TEST_BUCKET_REGION }}
          UNITTEST_LOG_DIR: "__unittest_logs"
      - name: Notify slack if failed
        if: failure()
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -82,7 +82,7 @@ on:
 # Use env variables to control all the release process.
 env:
  # The arguments of building greptime.
-  RUST_TOOLCHAIN: nightly-2023-12-19
+  RUST_TOOLCHAIN: nightly-2024-04-18
  CARGO_PROFILE: nightly

  # Controls whether to run tests, include unit-test, integration-test and sqlness.
--- a/.github/workflows/unassign.yml
+++ b/.github/workflows/unassign.yml
@@ -0,0 +1,21 @@
+name: Auto Unassign
+on:
+  schedule:
+    - cron: '4 2 * * *'
+  workflow_dispatch:
+
+permissions:
+  contents: read
+  issues: write
+  pull-requests: write
+
+jobs:
+  auto-unassign:
+    name: Auto Unassign
+    runs-on: ubuntu-latest
+    steps:
+      - name: Auto Unassign
+        uses: tisonspieces/auto-unassign@main
+        with:
+          token: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }}
+          repository: ${{ github.repository }}
--- a/CODE_OF_CONDUCT.md
+++ b/CODE_OF_CONDUCT.md
@@ -1,132 +0,0 @@
-# Contributor Covenant Code of Conduct
-
-## Our Pledge
-
-We as members, contributors, and leaders pledge to make participation in our
-community a harassment-free experience for everyone, regardless of age, body
-size, visible or invisible disability, ethnicity, sex characteristics, gender
-identity and expression, level of experience, education, socio-economic status,
-nationality, personal appearance, race, caste, color, religion, or sexual
-identity and orientation.
-
-We pledge to act and interact in ways that contribute to an open, welcoming,
-diverse, inclusive, and healthy community.
-
-## Our Standards
-
-Examples of behavior that contributes to a positive environment for our
-community include:
-
-* Demonstrating empathy and kindness toward other people
-* Being respectful of differing opinions, viewpoints, and experiences
-* Giving and gracefully accepting constructive feedback
-* Accepting responsibility and apologizing to those affected by our mistakes,
-  and learning from the experience
-* Focusing on what is best not just for us as individuals, but for the overall
-  community
-
-Examples of unacceptable behavior include:
-
-* The use of sexualized language or imagery, and sexual attention or advances of
-  any kind
-* Trolling, insulting or derogatory comments, and personal or political attacks
-* Public or private harassment
-* Publishing others' private information, such as a physical or email address,
-  without their explicit permission
-* Other conduct which could reasonably be considered inappropriate in a
-  professional setting
-
-## Enforcement Responsibilities
-
-Community leaders are responsible for clarifying and enforcing our standards of
-acceptable behavior and will take appropriate and fair corrective action in
-response to any behavior that they deem inappropriate, threatening, offensive,
-or harmful.
-
-Community leaders have the right and responsibility to remove, edit, or reject
-comments, commits, code, wiki edits, issues, and other contributions that are
-not aligned to this Code of Conduct, and will communicate reasons for moderation
-decisions when appropriate.
-
-## Scope
-
-This Code of Conduct applies within all community spaces, and also applies when
-an individual is officially representing the community in public spaces.
-Examples of representing our community include using an official e-mail address,
-posting via an official social media account, or acting as an appointed
-representative at an online or offline event.
-
-## Enforcement
-
-Instances of abusive, harassing, or otherwise unacceptable behavior may be
-reported to the community leaders responsible for enforcement at
-info@greptime.com.
-All complaints will be reviewed and investigated promptly and fairly.
-
-All community leaders are obligated to respect the privacy and security of the
-reporter of any incident.
-
-## Enforcement Guidelines
-
-Community leaders will follow these Community Impact Guidelines in determining
-the consequences for any action they deem in violation of this Code of Conduct:
-
-### 1. Correction
-
-**Community Impact**: Use of inappropriate language or other behavior deemed
-unprofessional or unwelcome in the community.
-
-**Consequence**: A private, written warning from community leaders, providing
-clarity around the nature of the violation and an explanation of why the
-behavior was inappropriate. A public apology may be requested.
-
-### 2. Warning
-
-**Community Impact**: A violation through a single incident or series of
-actions.
-
-**Consequence**: A warning with consequences for continued behavior. No
-interaction with the people involved, including unsolicited interaction with
-those enforcing the Code of Conduct, for a specified period of time. This
-includes avoiding interactions in community spaces as well as external channels
-like social media. Violating these terms may lead to a temporary or permanent
-ban.
-
-### 3. Temporary Ban
-
-**Community Impact**: A serious violation of community standards, including
-sustained inappropriate behavior.
-
-**Consequence**: A temporary ban from any sort of interaction or public
-communication with the community for a specified period of time. No public or
-private interaction with the people involved, including unsolicited interaction
-with those enforcing the Code of Conduct, is allowed during this period.
-Violating these terms may lead to a permanent ban.
-
-### 4. Permanent Ban
-
-**Community Impact**: Demonstrating a pattern of violation of community
-standards, including sustained inappropriate behavior, harassment of an
-individual, or aggression toward or disparagement of classes of individuals.
-
-**Consequence**: A permanent ban from any sort of public interaction within the
-community.
-
-## Attribution
-
-This Code of Conduct is adapted from the [Contributor Covenant][homepage],
-version 2.1, available at
-[https://www.contributor-covenant.org/version/2/1/code_of_conduct.html][v2.1].
-
-Community Impact Guidelines were inspired by
-[Mozilla's code of conduct enforcement ladder][Mozilla CoC].
-
-For answers to common questions about this code of conduct, see the FAQ at
-[https://www.contributor-covenant.org/faq][FAQ]. Translations are available at
-[https://www.contributor-covenant.org/translations][translations].
-
-[homepage]: https://www.contributor-covenant.org
-[v2.1]: https://www.contributor-covenant.org/version/2/1/code_of_conduct.html
-[Mozilla CoC]: https://github.com/mozilla/diversity
-[FAQ]: https://www.contributor-covenant.org/faq
-[translations]: https://www.contributor-covenant.org/translations
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -50,7 +50,7 @@ GreptimeDB uses the [Apache 2.0 license](https://github.com/GreptimeTeam/greptim

 - To ensure that community is free and confident in its ability to use your contributions, please sign the Contributor License Agreement (CLA) which will be incorporated in the pull request process.
 - Make sure all files have proper license header (running `docker run --rm -v $(pwd):/github/workspace ghcr.io/korandoru/hawkeye-native:v3 format` from the project root).
- Make sure all your codes are formatted and follow the [coding style](https://pingcap.github.io/style-guide/rust/).
+- Make sure all your codes are formatted and follow the [coding style](https://pingcap.github.io/style-guide/rust/) and [style guide](http://github.com/greptimeTeam/docs/style-guide.md).
 - Make sure all unit tests are passed (using `cargo test --workspace` or [nextest](https://nexte.st/index.html) `cargo nextest run`).
 - Make sure all clippy warnings are fixed (you can check it locally by running `cargo clippy --workspace --all-targets -- -D warnings`).

--- a/Cargo.lock
+++ b/Cargo.lock
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -62,7 +62,7 @@ members = [
 resolver = "2"

 [workspace.package]
-version = "0.7.1"
+version = "0.7.2"
 edition = "2021"
 license = "Apache-2.0"

@@ -70,16 +70,24 @@ license = "Apache-2.0"
 clippy.print_stdout = "warn"
 clippy.print_stderr = "warn"
 clippy.implicit_clone = "warn"
+clippy.readonly_write_lock = "allow"
 rust.unknown_lints = "deny"
+# Remove this after https://github.com/PyO3/pyo3/issues/4094
+rust.non_local_definitions = "allow"

 [workspace.dependencies]
+# We turn off default-features for some dependencies here so the workspaces which inherit them can
+# selectively turn them on if needed, since we can override default-features = true (from false)
+# for the inherited dependency but cannot do the reverse (override from true to false).
+#
+# See for more detaiils: https://github.com/rust-lang/cargo/issues/11329
 ahash = { version = "0.8", features = ["compile-time-rng"] }
 aquamarine = "0.3"
-arrow = { version = "47.0" }
-arrow-array = "47.0"
-arrow-flight = "47.0"
-arrow-ipc = { version = "47.0", features = ["lz4"] }
-arrow-schema = { version = "47.0", features = ["serde"] }
+arrow = { version = "51.0.0", features = ["prettyprint"] }
+arrow-array = { version = "51.0.0", default-features = false, features = ["chrono-tz"] }
+arrow-flight = "51.0"
+arrow-ipc = { version = "51.0.0", default-features = false, features = ["lz4"] }
+arrow-schema = { version = "51.0", features = ["serde"] }
 async-stream = "0.3"
 async-trait = "0.1"
 axum = { version = "0.6", features = ["headers"] }
@@ -91,33 +99,38 @@ bytes = { version = "1.5", features = ["serde"] }
 chrono = { version = "0.4", features = ["serde"] }
 clap = { version = "4.4", features = ["derive"] }
 dashmap = "5.4"
-datafusion = { git = "https://github.com/apache/arrow-datafusion.git", rev = "26e43acac3a96cec8dd4c8365f22dfb1a84306e9" }
-datafusion-common = { git = "https://github.com/apache/arrow-datafusion.git", rev = "26e43acac3a96cec8dd4c8365f22dfb1a84306e9" }
-datafusion-expr = { git = "https://github.com/apache/arrow-datafusion.git", rev = "26e43acac3a96cec8dd4c8365f22dfb1a84306e9" }
-datafusion-optimizer = { git = "https://github.com/apache/arrow-datafusion.git", rev = "26e43acac3a96cec8dd4c8365f22dfb1a84306e9" }
-datafusion-physical-expr = { git = "https://github.com/apache/arrow-datafusion.git", rev = "26e43acac3a96cec8dd4c8365f22dfb1a84306e9" }
-datafusion-sql = { git = "https://github.com/apache/arrow-datafusion.git", rev = "26e43acac3a96cec8dd4c8365f22dfb1a84306e9" }
-datafusion-substrait = { git = "https://github.com/apache/arrow-datafusion.git", rev = "26e43acac3a96cec8dd4c8365f22dfb1a84306e9" }
+datafusion = { git = "https://github.com/apache/arrow-datafusion.git", rev = "34eda15b73a9e278af8844b30ed2f1c21c10359c" }
+datafusion-common = { git = "https://github.com/apache/arrow-datafusion.git", rev = "34eda15b73a9e278af8844b30ed2f1c21c10359c" }
+datafusion-expr = { git = "https://github.com/apache/arrow-datafusion.git", rev = "34eda15b73a9e278af8844b30ed2f1c21c10359c" }
+datafusion-functions = { git = "https://github.com/apache/arrow-datafusion.git", rev = "34eda15b73a9e278af8844b30ed2f1c21c10359c" }
+datafusion-optimizer = { git = "https://github.com/apache/arrow-datafusion.git", rev = "34eda15b73a9e278af8844b30ed2f1c21c10359c" }
+datafusion-physical-expr = { git = "https://github.com/apache/arrow-datafusion.git", rev = "34eda15b73a9e278af8844b30ed2f1c21c10359c" }
+datafusion-sql = { git = "https://github.com/apache/arrow-datafusion.git", rev = "34eda15b73a9e278af8844b30ed2f1c21c10359c" }
+datafusion-substrait = { git = "https://github.com/apache/arrow-datafusion.git", rev = "34eda15b73a9e278af8844b30ed2f1c21c10359c" }
 derive_builder = "0.12"
-etcd-client = "0.12"
+dotenv = "0.15"
+# TODO(LFC): Wait for https://github.com/etcdv3/etcd-client/pull/76
+etcd-client = { git = "https://github.com/MichaelScofield/etcd-client.git", rev = "4c371e9b3ea8e0a8ee2f9cbd7ded26e54a45df3b" }
 fst = "0.4.7"
 futures = "0.3"
 futures-util = "0.3"
-greptime-proto = { git = "https://github.com/GreptimeTeam/greptime-proto.git", rev = "96f1f0404f421ee560a4310c73c5071e49168168" }
+greptime-proto = { git = "https://github.com/GreptimeTeam/greptime-proto.git", rev = "73ac0207ab71dfea48f30259ffdb611501b5ecb8" }
+humantime = "2.1"
 humantime-serde = "1.1"
 itertools = "0.10"
 lazy_static = "1.4"
 meter-core = { git = "https://github.com/GreptimeTeam/greptime-meter.git", rev = "80b72716dcde47ec4161478416a5c6c21343364d" }
 mockall = "0.11.4"
 moka = "0.12"
+notify = "6.1"
 num_cpus = "1.16"
 once_cell = "1.18"
-opentelemetry-proto = { git = "https://github.com/waynexia/opentelemetry-rust.git", rev = "33841b38dda79b15f2024952be5f32533325ca02", features = [
+opentelemetry-proto = { version = "0.5", features = [
    "gen-tonic",
    "metrics",
    "trace",
 ] }
-parquet = "47.0"
+parquet = { version = "51.0.0", default-features = false, features = ["arrow", "async", "object_store"] }
 paste = "1.0"
 pin-project = "1.0"
 prometheus = { version = "0.13.3", features = ["process"] }
@@ -125,32 +138,35 @@ prost = "0.12"
 raft-engine = { version = "0.4.1", default-features = false }
 rand = "0.8"
 regex = "1.8"
-regex-automata = { version = "0.2", features = ["transducer"] }
+regex-automata = { version = "0.4" }
 reqwest = { version = "0.11", default-features = false, features = [
    "json",
    "rustls-tls-native-roots",
    "stream",
+    "multipart",
 ] }
 rskafka = "0.5"
 rust_decimal = "1.33"
+schemars = "0.8"
 serde = { version = "1.0", features = ["derive"] }
 serde_json = { version = "1.0", features = ["float_roundtrip"] }
 serde_with = "3"
 smallvec = { version = "1", features = ["serde"] }
 snafu = "0.7"
 sysinfo = "0.30"
-# on branch v0.38.x
-sqlparser = { git = "https://github.com/GreptimeTeam/sqlparser-rs.git", rev = "6a93567ae38d42be5c8d08b13c8ff4dde26502ef", features = [
+# on branch v0.44.x
+sqlparser = { git = "https://github.com/GreptimeTeam/sqlparser-rs.git", rev = "c919990bf62ad38d2b0c0a3bc90b26ad919d51b0", features = [
    "visitor",
 ] }
 strum = { version = "0.25", features = ["derive"] }
 tempfile = "3"
-tokio = { version = "1.28", features = ["full"] }
+tokio = { version = "1.36", features = ["full"] }
 tokio-stream = { version = "0.1" }
 tokio-util = { version = "0.7", features = ["io-util", "compat"] }
 toml = "0.8.8"
-tonic = { version = "0.10", features = ["tls"] }
-uuid = { version = "1", features = ["serde", "v4", "fast-rng"] }
+tonic = { version = "0.11", features = ["tls"] }
+uuid = { version = "1.7", features = ["serde", "v4", "fast-rng"] }
+zstd = "0.13"

 ## workspaces members
 api = { path = "src/api" }
--- a/14
+++ b/14
@@ -169,6 +169,10 @@ check: ## Cargo check all the targets.
 clippy: ## Check clippy rules.
 	cargo clippy --workspace --all-targets --all-features -- -D warnings

+.PHONY: fix-clippy
+fix-clippy: ## Fix clippy violations.
+	cargo clippy --workspace --all-targets --all-features --fix
+
 .PHONY: fmt-check
 fmt-check: ## Check code format.
 	cargo fmt --all -- --check
@@ -188,6 +192,16 @@ run-it-in-container: start-etcd ## Run integration tests in dev-builder.
 	-w /greptimedb ${IMAGE_REGISTRY}/${IMAGE_NAMESPACE}/dev-builder-${BASE_IMAGE}:latest \
 	make test sqlness-test BUILD_JOBS=${BUILD_JOBS}

+##@ Docs
+config-docs: ## Generate configuration documentation from toml files.
+	docker run --rm \
+    -v ${PWD}:/greptimedb \
+    -w /greptimedb/config \
+    toml2docs/toml2docs:latest \
+    -p '##' \
+    -t ./config-docs-template.md \
+    -o ./config.md
+
 ##@ General

 # The help target prints out all targets with their descriptions organized
--- a/README.md
+++ b/README.md
@@ -6,36 +6,90 @@
  </picture>
 </p>

-[![codecov](https://codecov.io/gh/GrepTimeTeam/greptimedb/branch/main/graph/badge.svg?token=FITFDI3J3C)](https://codecov.io/gh/GrepTimeTeam/greptimedb)
-[![GitHub Actions](https://github.com/GreptimeTeam/greptimedb/actions/workflows/develop.yml/badge.svg)](https://github.com/GreptimeTeam/greptimedb/actions/workflows/develop.yml)
-[![License](https://img.shields.io/github/license/greptimeTeam/greptimedb)](https://github.com/greptimeTeam/greptimedb/blob/main/LICENSE)
+<h1 align="center">Cloud-scale, Fast and Efficient Time Series Database</h1>
+
+<div align="center">
+<h3 align="center">
+  <a href="https://greptime.com/product/cloud">GreptimeCloud</a> |
+  <a href="https://docs.greptime.com/">User guide</a> |
+  <a href="https://greptimedb.rs/">API Docs</a> |
+  <a href="https://github.com/GreptimeTeam/greptimedb/issues/3412">Roadmap 2024</a>
+</h4>
+
+<a href="https://github.com/GreptimeTeam/greptimedb/releases/latest">
+<img src="https://img.shields.io/github/v/release/GreptimeTeam/greptimedb.svg" alt="Version"/>
+</a>
+<a href="https://github.com/GreptimeTeam/greptimedb/releases/latest">
+<img src="https://img.shields.io/github/release-date/GreptimeTeam/greptimedb.svg" alt="Releases"/>
+</a>
+<a href="https://hub.docker.com/r/greptime/greptimedb/">
+<img src="https://img.shields.io/docker/pulls/greptime/greptimedb.svg" alt="Docker Pulls"/>
+</a>
+<a href="https://github.com/GreptimeTeam/greptimedb/actions/workflows/develop.yml">
+<img src="https://github.com/GreptimeTeam/greptimedb/actions/workflows/develop.yml/badge.svg" alt="GitHub Actions"/>
+</a>
+<a href="https://codecov.io/gh/GrepTimeTeam/greptimedb">
+<img src="https://codecov.io/gh/GrepTimeTeam/greptimedb/branch/main/graph/badge.svg?token=FITFDI3J3C" alt="Codecov"/>
+</a>
+<a href="https://github.com/greptimeTeam/greptimedb/blob/main/LICENSE">
+<img src="https://img.shields.io/github/license/greptimeTeam/greptimedb" alt="License"/>
+</a>

 <br/>

-[![Twitter](https://img.shields.io/badge/twitter-follow_us-1d9bf0.svg?style=for-the-badge)](https://twitter.com/greptime/)
-[![LinkedIn](https://img.shields.io/badge/linkedin-connect_with_us-0a66c2.svg?style=for-the-badge)](https://www.linkedin.com/company/greptime/)
-[![Slack](https://img.shields.io/badge/slack-GreptimeDB-0abd59?logo=slack&style=for-the-badge)](https://greptime.com/slack)
+<a href="https://greptime.com/slack">
+<img src="https://img.shields.io/badge/slack-GreptimeDB-0abd59?logo=slack&style=for-the-badge" alt="Slack"/>
+</a>
+<a href="https://twitter.com/greptime">
+<img src="https://img.shields.io/badge/twitter-follow_us-1d9bf0.svg?style=for-the-badge" alt="Twitter"/>
+</a>
+<a href="https://www.linkedin.com/company/greptime/">
+<img src="https://img.shields.io/badge/linkedin-connect_with_us-0a66c2.svg?style=for-the-badge" alt="LinkedIn"/>
+</a>
+</div>

-## What is GreptimeDB
+## Introduction

-GreptimeDB is an open-source time-series database focusing on efficiency, scalability, and analytical capabilities.
-It's designed to work on infrastructure of the cloud era, and users benefit from its elasticity and commodity storage.
+**GreptimeDB** is an open-source time-series database focusing on efficiency, scalability, and analytical capabilities.
+Designed to work on infrastructure of the cloud era, GreptimeDB benefits users with its elasticity and commodity storage, offering a fast and cost-effective **alternative to InfluxDB** and a **long-term storage for Prometheus**.

-Our core developers have been building time-series data platforms for years. Based on their best-practices, GreptimeDB is born to give you:
+## Why GreptimeDB

-* **Compatible with InfluxDB, Prometheus and more protocols**: Widely adopted database protocols and APIs, including MySQL, PostgreSQL, and Prometheus Remote Storage, etc. [Read more](https://docs.greptime.com/user-guide/clients/overview).
-* **Easy horizontal scaling**: Seamless scalability from a standalone binary at edge to a robust, highly available distributed cluster in cloud, with a transparent experience for both developers and administrators.
-* **Analyzing time-series data**: Native SQL and PromQL for queries, and Python scripting to facilitate complex analytical tasks.
-* **Cloud-native distributed database**: Fully open-source distributed cluster architecture that harnesses the power of cloud-native elastic computing resources.
-* **Performance and Cost-effective**: Flexible indexing capabilities and distributed, parallel-processing query engine, tackling high cardinality issues down. Optimized columnar layout for handling time-series data; compacted, compressed, and stored on various storage backends, particularly cloud object storage with 50x cost efficiency.
+Our core developers have been building time-series data platforms for years. Based on our best-practices, GreptimeDB is born to give you:

-## Quickstart with [GreptimePlay](https://greptime.com/playground)
+* **Easy horizontal scaling**
+
+  Seamless scalability from a standalone binary at edge to a robust, highly available distributed cluster in cloud, with a transparent experience for both developers and administrators.
+
+* **Analyzing time-series data**
+
+  Query your time-series data with SQL and PromQL. Use Python scripts to facilitate complex analytical tasks.
+
+* **Cloud-native distributed database**
+
+  Fully open-source distributed cluster architecture that harnesses the power of cloud-native elastic computing resources.
+
+* **Performance and Cost-effective**
+
+  Flexible indexing capabilities and distributed, parallel-processing query engine, tackling high cardinality issues down. Optimized columnar layout for handling time-series data; compacted, compressed, and stored on various storage backends, particularly cloud object storage with 50x cost efficiency.
+
+* **Compatible with InfluxDB, Prometheus and more protocols**
+
+  Widely adopted database protocols and APIs, including MySQL, PostgreSQL, and Prometheus Remote Storage, etc. [Read more](https://docs.greptime.com/user-guide/clients/overview).
+
+## Try GreptimeDB
+
+### 1. [GreptimePlay](https://greptime.com/playground)

 Try out the features of GreptimeDB right from your browser.

-## Up & Running
+### 2. [GreptimeCloud](https://console.greptime.cloud/)

-The recommended way to install GreptimeDB is via Docker:
+Start instantly with a free cluster.
+
+### 3. Docker Image
+
+To install GreptimeDB locally, the recommended way is via Docker:

 ```shell
 docker pull greptime/greptimedb
@@ -44,32 +98,13 @@ docker pull greptime/greptimedb
 Start a GreptimeDB container with:

 ```shell
-docker run -p 4000-4003:4000-4003 \
-  -p 4242:4242 -v "$(pwd)/greptimedb:/tmp/greptimedb" \
-  --name greptime --rm \
-  greptime/greptimedb standalone start \
-  --http-addr 0.0.0.0:4000 \
-  --rpc-addr 0.0.0.0:4001 \
-  --mysql-addr 0.0.0.0:4002 \
-  --postgres-addr 0.0.0.0:4003 \
-  --opentsdb-addr 0.0.0.0:4242
+docker run --rm --name greptime --net=host greptime/greptimedb standalone start
 ```

-Connect to the server and test:
+Read more about [Installation](https://docs.greptime.com/getting-started/installation/overview) on docs.

-```shell
-curl -X POST -d 'sql=SELECT 42&format=csv' http://localhost:4000/v1/sql
-```
+## Getting Started

-You should get a reply as:
-
-```
-42
-```
-
-Read more on docs:
-
-* [Installation](https://docs.greptime.com/getting-started/installation/overview)
 * [Quickstart](https://docs.greptime.com/getting-started/quick-start/overview)
 * [Write Data](https://docs.greptime.com/user-guide/clients/overview)
 * [Query Data](https://docs.greptime.com/user-guide/query-data/overview)
@@ -95,11 +130,6 @@ Run a standalone server:
 cargo run -- standalone start
 ```

-## Documentation
-
- [User guide](https://docs.greptime.com/user-guide/concepts/overview)
- [API docs](https://greptimedb.rs)
-
 ## Extension

 ### Dashboard
@@ -113,7 +143,7 @@ cargo run -- standalone start
 - [GreptimeDB C++ Ingester](https://github.com/GreptimeTeam/greptimedb-ingester-cpp)
 - [GreptimeDB Erlang Ingester](https://github.com/GreptimeTeam/greptimedb-ingester-erl)
 - [GreptimeDB Rust Ingester](https://github.com/GreptimeTeam/greptimedb-ingester-rust)
- [GreptimeDB JavaScript Ingester](https://github.com/GreptimeTeam/greptime-ingester-js)
+- [GreptimeDB JavaScript Ingester](https://github.com/GreptimeTeam/greptimedb-ingester-js)

 ### Grafana Dashboard

@@ -121,12 +151,9 @@ Our official Grafana dashboard is available at [grafana](grafana/README.md) dire

 ## Project Status

-This project is in its early stage and under heavy development. We move fast and
-break things. Benchmark on development branch may not represent its potential
-performance. We release pre-built binaries constantly for functional
-evaluation. Do not use it in production at the moment.
-
-For future plans, check out [GreptimeDB roadmap](https://github.com/GreptimeTeam/greptimedb/issues/3412).
+The current version has not yet reached General Availability version standards.
+In line with our Greptime 2024 Roadmap, we plan to achieve a production-level
+version with the update to v1.0 in August. [[Join Force]](https://github.com/GreptimeTeam/greptimedb/issues/3412)

 ## Community

--- a/benchmarks/Cargo.toml
+++ b/benchmarks/Cargo.toml
@@ -8,12 +8,31 @@ license.workspace = true
 workspace = true

 [dependencies]
+api.workspace = true
 arrow.workspace = true
 chrono.workspace = true
 clap.workspace = true
 client.workspace = true
+common-base.workspace = true
+common-telemetry.workspace = true
+common-wal.workspace = true
+dotenv.workspace = true
+futures.workspace = true
 futures-util.workspace = true
+humantime.workspace = true
+humantime-serde.workspace = true
 indicatif = "0.17.1"
 itertools.workspace = true
+lazy_static.workspace = true
+log-store.workspace = true
+mito2.workspace = true
+num_cpus.workspace = true
 parquet.workspace = true
+prometheus.workspace = true
+rand.workspace = true
+rskafka.workspace = true
+serde.workspace = true
+store-api.workspace = true
 tokio.workspace = true
+toml.workspace = true
+uuid.workspace = true
--- a/benchmarks/README.md
+++ b/benchmarks/README.md
@@ -0,0 +1,11 @@
+Benchmarkers for GreptimeDB
+--------------------------------
+
+## Wal Benchmarker
+The wal benchmarker serves to evaluate the performance of GreptimeDB's Write-Ahead Log (WAL) component. It meticulously assesses the read/write performance of the WAL under diverse workloads generated by the benchmarker. 
+
+
+### How to use
+To compile the benchmarker, navigate to the `greptimedb/benchmarks` directory and execute `cargo build --release`. Subsequently, you'll find the compiled target located at `greptimedb/target/release/wal_bench`.
+
+The `./wal_bench -h` command reveals numerous arguments that the target accepts. Among these, a notable one is the `cfg-file` argument. By utilizing a configuration file in the TOML format, you can bypass the need to repeatedly specify cumbersome arguments.
--- a/benchmarks/config/wal_bench.example.toml
+++ b/benchmarks/config/wal_bench.example.toml
@@ -0,0 +1,21 @@
+# Refers to the documents of `Args` in benchmarks/src/wal.rs`.
+wal_provider = "kafka"
+bootstrap_brokers = ["localhost:9092"]
+num_workers = 10
+num_topics = 32
+num_regions = 1000
+num_scrapes = 1000
+num_rows = 5
+col_types = "ifs"
+max_batch_size = "512KB"
+linger = "1ms"
+backoff_init = "10ms"
+backoff_max = "1ms"
+backoff_base = 2
+backoff_deadline = "3s"
+compression = "zstd"
+rng_seed = 42
+skip_read = false
+skip_write = false
+random_topics = true
+report_metrics = false
--- a/benchmarks/src/bin/nyc-taxi.rs
+++ b/benchmarks/src/bin/nyc-taxi.rs
@@ -215,37 +215,7 @@ fn build_values(column: &ArrayRef) -> (Values, ColumnDataType) {
                ColumnDataType::String,
            )
        }
-        DataType::Null
-        | DataType::Boolean
-        | DataType::Int8
-        | DataType::Int16
-        | DataType::Int32
-        | DataType::UInt8
-        | DataType::UInt16
-        | DataType::UInt32
-        | DataType::UInt64
-        | DataType::Float16
-        | DataType::Float32
-        | DataType::Date32
-        | DataType::Date64
-        | DataType::Time32(_)
-        | DataType::Time64(_)
-        | DataType::Duration(_)
-        | DataType::Interval(_)
-        | DataType::Binary
-        | DataType::FixedSizeBinary(_)
-        | DataType::LargeBinary
-        | DataType::LargeUtf8
-        | DataType::List(_)
-        | DataType::FixedSizeList(_, _)
-        | DataType::LargeList(_)
-        | DataType::Struct(_)
-        | DataType::Union(_, _)
-        | DataType::Dictionary(_, _)
-        | DataType::Decimal128(_, _)
-        | DataType::Decimal256(_, _)
-        | DataType::RunEndEncoded(_, _)
-        | DataType::Map(_, _) => todo!(),
+        _ => unimplemented!(),
    }
 }

@@ -444,7 +414,7 @@ fn create_table_expr(table_name: &str) -> CreateTableExpr {
 fn query_set(table_name: &str) -> HashMap<String, String> {
    HashMap::from([
        (
-            "count_all".to_string(), 
+            "count_all".to_string(),
            format!("SELECT COUNT(*) FROM {table_name};"),
        ),
        (
--- a/benchmarks/src/bin/wal_bench.rs
+++ b/benchmarks/src/bin/wal_bench.rs
@@ -0,0 +1,326 @@
+// Copyright 2023 Greptime Team
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#![feature(int_roundings)]
+
+use std::fs;
+use std::sync::Arc;
+use std::time::Instant;
+
+use api::v1::{ColumnDataType, ColumnSchema, SemanticType};
+use benchmarks::metrics;
+use benchmarks::wal_bench::{Args, Config, Region, WalProvider};
+use clap::Parser;
+use common_telemetry::info;
+use common_wal::config::kafka::common::BackoffConfig;
+use common_wal::config::kafka::DatanodeKafkaConfig as KafkaConfig;
+use common_wal::config::raft_engine::RaftEngineConfig;
+use common_wal::options::{KafkaWalOptions, WalOptions};
+use itertools::Itertools;
+use log_store::kafka::log_store::KafkaLogStore;
+use log_store::raft_engine::log_store::RaftEngineLogStore;
+use mito2::wal::Wal;
+use prometheus::{Encoder, TextEncoder};
+use rand::distributions::{Alphanumeric, DistString};
+use rand::rngs::SmallRng;
+use rand::SeedableRng;
+use rskafka::client::partition::Compression;
+use rskafka::client::ClientBuilder;
+use store_api::logstore::LogStore;
+use store_api::storage::RegionId;
+
+async fn run_benchmarker<S: LogStore>(cfg: &Config, topics: &[String], wal: Arc<Wal<S>>) {
+    let chunk_size = cfg.num_regions.div_ceil(cfg.num_workers);
+    let region_chunks = (0..cfg.num_regions)
+        .map(|id| {
+            build_region(
+                id as u64,
+                topics,
+                &mut SmallRng::seed_from_u64(cfg.rng_seed),
+                cfg,
+            )
+        })
+        .chunks(chunk_size as usize)
+        .into_iter()
+        .map(|chunk| Arc::new(chunk.collect::<Vec<_>>()))
+        .collect::<Vec<_>>();
+
+    let mut write_elapsed = 0;
+    let mut read_elapsed = 0;
+
+    if !cfg.skip_write {
+        info!("Benchmarking write ...");
+
+        let num_scrapes = cfg.num_scrapes;
+        let timer = Instant::now();
+        futures::future::join_all((0..cfg.num_workers).map(|i| {
+            let wal = wal.clone();
+            let regions = region_chunks[i as usize].clone();
+            tokio::spawn(async move {
+                for _ in 0..num_scrapes {
+                    let mut wal_writer = wal.writer();
+                    regions
+                        .iter()
+                        .for_each(|region| region.add_wal_entry(&mut wal_writer));
+                    wal_writer.write_to_wal().await.unwrap();
+                }
+            })
+        }))
+        .await;
+        write_elapsed += timer.elapsed().as_millis();
+    }
+
+    if !cfg.skip_read {
+        info!("Benchmarking read ...");
+
+        let timer = Instant::now();
+        futures::future::join_all((0..cfg.num_workers).map(|i| {
+            let wal = wal.clone();
+            let regions = region_chunks[i as usize].clone();
+            tokio::spawn(async move {
+                for region in regions.iter() {
+                    region.replay(&wal).await;
+                }
+            })
+        }))
+        .await;
+        read_elapsed = timer.elapsed().as_millis();
+    }
+
+    dump_report(cfg, write_elapsed, read_elapsed);
+}
+
+fn build_region(id: u64, topics: &[String], rng: &mut SmallRng, cfg: &Config) -> Region {
+    let wal_options = match cfg.wal_provider {
+        WalProvider::Kafka => {
+            assert!(!topics.is_empty());
+            WalOptions::Kafka(KafkaWalOptions {
+                topic: topics.get(id as usize % topics.len()).cloned().unwrap(),
+            })
+        }
+        WalProvider::RaftEngine => WalOptions::RaftEngine,
+    };
+    Region::new(
+        RegionId::from_u64(id),
+        build_schema(&parse_col_types(&cfg.col_types), rng),
+        wal_options,
+        cfg.num_rows,
+        cfg.rng_seed,
+    )
+}
+
+fn build_schema(col_types: &[ColumnDataType], mut rng: &mut SmallRng) -> Vec<ColumnSchema> {
+    col_types
+        .iter()
+        .map(|col_type| ColumnSchema {
+            column_name: Alphanumeric.sample_string(&mut rng, 5),
+            datatype: *col_type as i32,
+            semantic_type: SemanticType::Field as i32,
+            datatype_extension: None,
+        })
+        .chain(vec![ColumnSchema {
+            column_name: "ts".to_string(),
+            datatype: ColumnDataType::TimestampMillisecond as i32,
+            semantic_type: SemanticType::Tag as i32,
+            datatype_extension: None,
+        }])
+        .collect()
+}
+
+fn dump_report(cfg: &Config, write_elapsed: u128, read_elapsed: u128) {
+    let cost_report = format!(
+        "write costs: {} ms, read costs: {} ms",
+        write_elapsed, read_elapsed,
+    );
+
+    let total_written_bytes = metrics::METRIC_WAL_WRITE_BYTES_TOTAL.get() as u128;
+    let write_throughput = if write_elapsed > 0 {
+        (total_written_bytes * 1000).div_floor(write_elapsed)
+    } else {
+        0
+    };
+    let total_read_bytes = metrics::METRIC_WAL_READ_BYTES_TOTAL.get() as u128;
+    let read_throughput = if read_elapsed > 0 {
+        (total_read_bytes * 1000).div_floor(read_elapsed)
+    } else {
+        0
+    };
+
+    let throughput_report = format!(
+        "total written bytes: {} bytes, total read bytes: {} bytes, write throuput: {} bytes/s ({} mb/s), read throughput: {} bytes/s ({} mb/s)",
+        total_written_bytes,
+        total_read_bytes,
+        write_throughput,
+        write_throughput.div_floor(1 << 20),
+        read_throughput,
+        read_throughput.div_floor(1 << 20),
+    );
+
+    let metrics_report = if cfg.report_metrics {
+        let mut buffer = Vec::new();
+        let encoder = TextEncoder::new();
+        let metrics = prometheus::gather();
+        encoder.encode(&metrics, &mut buffer).unwrap();
+        String::from_utf8(buffer).unwrap()
+    } else {
+        String::new()
+    };
+
+    info!(
+        r#"
+Benchmark config: 
+{cfg:?}
+
+Benchmark report:
+{cost_report}
+{throughput_report}
+{metrics_report}"#
+    );
+}
+
+async fn create_topics(cfg: &Config) -> Vec<String> {
+    // Creates topics.
+    let client = ClientBuilder::new(cfg.bootstrap_brokers.clone())
+        .build()
+        .await
+        .unwrap();
+    let ctrl_client = client.controller_client().unwrap();
+    let (topics, tasks): (Vec<_>, Vec<_>) = (0..cfg.num_topics)
+        .map(|i| {
+            let topic = if cfg.random_topics {
+                format!(
+                    "greptime_wal_bench_topic_{}_{}",
+                    uuid::Uuid::new_v4().as_u128(),
+                    i
+                )
+            } else {
+                format!("greptime_wal_bench_topic_{}", i)
+            };
+            let task = ctrl_client.create_topic(
+                topic.clone(),
+                1,
+                cfg.bootstrap_brokers.len() as i16,
+                2000,
+            );
+            (topic, task)
+        })
+        .unzip();
+    // Must ignore errors since we allow topics being created more than once.
+    let _ = futures::future::try_join_all(tasks).await;
+
+    topics
+}
+
+fn parse_compression(comp: &str) -> Compression {
+    match comp {
+        "no" => Compression::NoCompression,
+        "gzip" => Compression::Gzip,
+        "lz4" => Compression::Lz4,
+        "snappy" => Compression::Snappy,
+        "zstd" => Compression::Zstd,
+        other => unreachable!("Unrecognized compression {other}"),
+    }
+}
+
+fn parse_col_types(col_types: &str) -> Vec<ColumnDataType> {
+    let parts = col_types.split('x').collect::<Vec<_>>();
+    assert!(parts.len() <= 2);
+
+    let pattern = parts[0];
+    let repeat = parts
+        .get(1)
+        .map(|r| r.parse::<usize>().unwrap())
+        .unwrap_or(1);
+
+    pattern
+        .chars()
+        .map(|c| match c {
+            'i' | 'I' => ColumnDataType::Int64,
+            'f' | 'F' => ColumnDataType::Float64,
+            's' | 'S' => ColumnDataType::String,
+            other => unreachable!("Cannot parse {other} as a column data type"),
+        })
+        .cycle()
+        .take(pattern.len() * repeat)
+        .collect()
+}
+
+fn main() {
+    // Sets the global logging to INFO and suppress loggings from rskafka other than ERROR and upper ones.
+    std::env::set_var("UNITTEST_LOG_LEVEL", "info,rskafka=error");
+    common_telemetry::init_default_ut_logging();
+
+    let args = Args::parse();
+    let cfg = if !args.cfg_file.is_empty() {
+        toml::from_str(&fs::read_to_string(&args.cfg_file).unwrap()).unwrap()
+    } else {
+        Config::from(args)
+    };
+
+    // Validates arguments.
+    if cfg.num_regions < cfg.num_workers {
+        panic!("num_regions must be greater than or equal to num_workers");
+    }
+    if cfg
+        .num_workers
+        .min(cfg.num_topics)
+        .min(cfg.num_regions)
+        .min(cfg.num_scrapes)
+        .min(cfg.max_batch_size.as_bytes() as u32)
+        .min(cfg.bootstrap_brokers.len() as u32)
+        == 0
+    {
+        panic!("Invalid arguments");
+    }
+
+    tokio::runtime::Builder::new_multi_thread()
+        .enable_all()
+        .build()
+        .unwrap()
+        .block_on(async {
+            match cfg.wal_provider {
+                WalProvider::Kafka => {
+                    let topics = create_topics(&cfg).await;
+                    let kafka_cfg = KafkaConfig {
+                        broker_endpoints: cfg.bootstrap_brokers.clone(),
+                        max_batch_size: cfg.max_batch_size,
+                        linger: cfg.linger,
+                        backoff: BackoffConfig {
+                            init: cfg.backoff_init,
+                            max: cfg.backoff_max,
+                            base: cfg.backoff_base,
+                            deadline: Some(cfg.backoff_deadline),
+                        },
+                        compression: parse_compression(&cfg.compression),
+                        ..Default::default()
+                    };
+                    let store = Arc::new(KafkaLogStore::try_new(&kafka_cfg).await.unwrap());
+                    let wal = Arc::new(Wal::new(store));
+                    run_benchmarker(&cfg, &topics, wal).await;
+                }
+                WalProvider::RaftEngine => {
+                    // The benchmarker assumes the raft engine directory exists.
+                    let store = RaftEngineLogStore::try_new(
+                        "/tmp/greptimedb/raft-engine-wal".to_string(),
+                        RaftEngineConfig::default(),
+                    )
+                    .await
+                    .map(Arc::new)
+                    .unwrap();
+                    let wal = Arc::new(Wal::new(store));
+                    run_benchmarker(&cfg, &[], wal).await;
+                }
+            }
+        });
+}
--- a/src/partition/src/metrics.rs
+++ b/src/partition/src/metrics.rs
@@ -11,3 +11,6 @@
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
+
+pub mod metrics;
+pub mod wal_bench;
--- a/benchmarks/src/metrics.rs
+++ b/benchmarks/src/metrics.rs
@@ -0,0 +1,39 @@
+// Copyright 2023 Greptime Team
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use lazy_static::lazy_static;
+use prometheus::*;
+
+/// Logstore label.
+pub const LOGSTORE_LABEL: &str = "logstore";
+/// Operation type label.
+pub const OPTYPE_LABEL: &str = "optype";
+
+lazy_static! {
+    /// Counters of bytes of each operation on a logstore.
+    pub static ref METRIC_WAL_OP_BYTES_TOTAL: IntCounterVec = register_int_counter_vec!(
+        "greptime_bench_wal_op_bytes_total",
+        "wal operation bytes total",
+        &[OPTYPE_LABEL],
+    )
+    .unwrap();
+    /// Counter of bytes of the append_batch operation.
+    pub static ref METRIC_WAL_WRITE_BYTES_TOTAL: IntCounter = METRIC_WAL_OP_BYTES_TOTAL.with_label_values(
+        &["write"],
+    );
+    /// Counter of bytes of the read operation.
+    pub static ref METRIC_WAL_READ_BYTES_TOTAL: IntCounter = METRIC_WAL_OP_BYTES_TOTAL.with_label_values(
+        &["read"],
+    );
+}
--- a/benchmarks/src/wal_bench.rs
+++ b/benchmarks/src/wal_bench.rs
@@ -0,0 +1,361 @@
+// Copyright 2023 Greptime Team
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use std::mem::size_of;
+use std::sync::atomic::{AtomicI64, AtomicU64, Ordering};
+use std::sync::{Arc, Mutex};
+use std::time::Duration;
+
+use api::v1::value::ValueData;
+use api::v1::{ColumnDataType, ColumnSchema, Mutation, OpType, Row, Rows, Value, WalEntry};
+use clap::{Parser, ValueEnum};
+use common_base::readable_size::ReadableSize;
+use common_wal::options::WalOptions;
+use futures::StreamExt;
+use mito2::wal::{Wal, WalWriter};
+use rand::distributions::{Alphanumeric, DistString, Uniform};
+use rand::rngs::SmallRng;
+use rand::{Rng, SeedableRng};
+use serde::{Deserialize, Serialize};
+use store_api::logstore::LogStore;
+use store_api::storage::RegionId;
+
+use crate::metrics;
+
+/// The wal provider.
+#[derive(Clone, ValueEnum, Default, Debug, PartialEq, Serialize, Deserialize)]
+#[serde(rename_all = "snake_case")]
+pub enum WalProvider {
+    #[default]
+    RaftEngine,
+    Kafka,
+}
+
+#[derive(Parser)]
+pub struct Args {
+    /// The provided configuration file.
+    /// The example configuration file can be found at `greptimedb/benchmarks/config/wal_bench.example.toml`.
+    #[clap(long, short = 'c')]
+    pub cfg_file: String,
+
+    /// The wal provider.
+    #[clap(long, value_enum, default_value_t = WalProvider::default())]
+    pub wal_provider: WalProvider,
+
+    /// The advertised addresses of the kafka brokers.
+    /// If there're multiple bootstrap brokers, their addresses should be separated by comma, for e.g. "localhost:9092,localhost:9093".
+    #[clap(long, short = 'b', default_value = "localhost:9092")]
+    pub bootstrap_brokers: String,
+
+    /// The number of workers each running in a dedicated thread.
+    #[clap(long, default_value_t = num_cpus::get() as u32)]
+    pub num_workers: u32,
+
+    /// The number of kafka topics to be created.
+    #[clap(long, default_value_t = 32)]
+    pub num_topics: u32,
+
+    /// The number of regions.
+    #[clap(long, default_value_t = 1000)]
+    pub num_regions: u32,
+
+    /// The number of times each region is scraped.
+    #[clap(long, default_value_t = 1000)]
+    pub num_scrapes: u32,
+
+    /// The number of rows in each wal entry.
+    /// Each time a region is scraped, a wal entry containing will be produced.
+    #[clap(long, default_value_t = 5)]
+    pub num_rows: u32,
+
+    /// The column types of the schema for each region.
+    /// Currently, three column types are supported:
+    /// - i = ColumnDataType::Int64
+    /// - f = ColumnDataType::Float64
+    /// - s = ColumnDataType::String  
+    /// For e.g., "ifs" will be parsed as three columns: i64, f64, and string.
+    ///
+    /// Additionally, a "x" sign can be provided to repeat the column types for a given number of times.
+    /// For e.g., "iix2" will be parsed as 4 columns: i64, i64, i64, and i64.
+    /// This feature is useful if you want to specify many columns.
+    #[clap(long, default_value = "ifs")]
+    pub col_types: String,
+
+    /// The maximum size of a batch of kafka records.
+    /// The default value is 1mb.
+    #[clap(long, default_value = "512KB")]
+    pub max_batch_size: ReadableSize,
+
+    /// The minimum latency the kafka client issues a batch of kafka records.
+    /// However, a batch of kafka records would be immediately issued if a record cannot be fit into the batch.
+    #[clap(long, default_value = "1ms")]
+    pub linger: String,
+
+    /// The initial backoff delay of the kafka consumer.
+    #[clap(long, default_value = "10ms")]
+    pub backoff_init: String,
+
+    /// The maximum backoff delay of the kafka consumer.
+    #[clap(long, default_value = "1s")]
+    pub backoff_max: String,
+
+    /// The exponential backoff rate of the kafka consumer. The next back off = base * the current backoff.
+    #[clap(long, default_value_t = 2)]
+    pub backoff_base: u32,
+
+    /// The deadline of backoff. The backoff ends if the total backoff delay reaches the deadline.
+    #[clap(long, default_value = "3s")]
+    pub backoff_deadline: String,
+
+    /// The client-side compression algorithm for kafka records.
+    #[clap(long, default_value = "zstd")]
+    pub compression: String,
+
+    /// The seed of random number generators.
+    #[clap(long, default_value_t = 42)]
+    pub rng_seed: u64,
+
+    /// Skips the read phase, aka. region replay, if set to true.
+    #[clap(long, default_value_t = false)]
+    pub skip_read: bool,
+
+    /// Skips the write phase if set to true.
+    #[clap(long, default_value_t = false)]
+    pub skip_write: bool,
+
+    /// Randomly generates topic names if set to true.
+    /// Useful when you want to run the benchmarker without worrying about the topics created before.
+    #[clap(long, default_value_t = false)]
+    pub random_topics: bool,
+
+    /// Logs out the gathered prometheus metrics when the benchmarker ends.
+    #[clap(long, default_value_t = false)]
+    pub report_metrics: bool,
+}
+
+/// Benchmarker config.
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct Config {
+    pub wal_provider: WalProvider,
+    pub bootstrap_brokers: Vec<String>,
+    pub num_workers: u32,
+    pub num_topics: u32,
+    pub num_regions: u32,
+    pub num_scrapes: u32,
+    pub num_rows: u32,
+    pub col_types: String,
+    pub max_batch_size: ReadableSize,
+    #[serde(with = "humantime_serde")]
+    pub linger: Duration,
+    #[serde(with = "humantime_serde")]
+    pub backoff_init: Duration,
+    #[serde(with = "humantime_serde")]
+    pub backoff_max: Duration,
+    pub backoff_base: u32,
+    #[serde(with = "humantime_serde")]
+    pub backoff_deadline: Duration,
+    pub compression: String,
+    pub rng_seed: u64,
+    pub skip_read: bool,
+    pub skip_write: bool,
+    pub random_topics: bool,
+    pub report_metrics: bool,
+}
+
+impl From<Args> for Config {
+    fn from(args: Args) -> Self {
+        let cfg = Self {
+            wal_provider: args.wal_provider,
+            bootstrap_brokers: args
+                .bootstrap_brokers
+                .split(',')
+                .map(ToString::to_string)
+                .collect::<Vec<_>>(),
+            num_workers: args.num_workers.min(num_cpus::get() as u32),
+            num_topics: args.num_topics,
+            num_regions: args.num_regions,
+            num_scrapes: args.num_scrapes,
+            num_rows: args.num_rows,
+            col_types: args.col_types,
+            max_batch_size: args.max_batch_size,
+            linger: humantime::parse_duration(&args.linger).unwrap(),
+            backoff_init: humantime::parse_duration(&args.backoff_init).unwrap(),
+            backoff_max: humantime::parse_duration(&args.backoff_max).unwrap(),
+            backoff_base: args.backoff_base,
+            backoff_deadline: humantime::parse_duration(&args.backoff_deadline).unwrap(),
+            compression: args.compression,
+            rng_seed: args.rng_seed,
+            skip_read: args.skip_read,
+            skip_write: args.skip_write,
+            random_topics: args.random_topics,
+            report_metrics: args.report_metrics,
+        };
+
+        cfg
+    }
+}
+
+/// The region used for wal benchmarker.
+pub struct Region {
+    id: RegionId,
+    schema: Vec<ColumnSchema>,
+    wal_options: WalOptions,
+    next_sequence: AtomicU64,
+    next_entry_id: AtomicU64,
+    next_timestamp: AtomicI64,
+    rng: Mutex<Option<SmallRng>>,
+    num_rows: u32,
+}
+
+impl Region {
+    /// Creates a new region.
+    pub fn new(
+        id: RegionId,
+        schema: Vec<ColumnSchema>,
+        wal_options: WalOptions,
+        num_rows: u32,
+        rng_seed: u64,
+    ) -> Self {
+        Self {
+            id,
+            schema,
+            wal_options,
+            next_sequence: AtomicU64::new(1),
+            next_entry_id: AtomicU64::new(1),
+            next_timestamp: AtomicI64::new(1655276557000),
+            rng: Mutex::new(Some(SmallRng::seed_from_u64(rng_seed))),
+            num_rows,
+        }
+    }
+
+    /// Scrapes the region and adds the generated entry to wal.
+    pub fn add_wal_entry<S: LogStore>(&self, wal_writer: &mut WalWriter<S>) {
+        let mutation = Mutation {
+            op_type: OpType::Put as i32,
+            sequence: self
+                .next_sequence
+                .fetch_add(self.num_rows as u64, Ordering::Relaxed),
+            rows: Some(self.build_rows()),
+        };
+        let entry = WalEntry {
+            mutations: vec![mutation],
+        };
+        metrics::METRIC_WAL_WRITE_BYTES_TOTAL.inc_by(Self::entry_estimated_size(&entry) as u64);
+
+        wal_writer
+            .add_entry(
+                self.id,
+                self.next_entry_id.fetch_add(1, Ordering::Relaxed),
+                &entry,
+                &self.wal_options,
+            )
+            .unwrap();
+    }
+
+    /// Replays the region.
+    pub async fn replay<S: LogStore>(&self, wal: &Arc<Wal<S>>) {
+        let mut wal_stream = wal.scan(self.id, 0, &self.wal_options).unwrap();
+        while let Some(res) = wal_stream.next().await {
+            let (_, entry) = res.unwrap();
+            metrics::METRIC_WAL_READ_BYTES_TOTAL.inc_by(Self::entry_estimated_size(&entry) as u64);
+        }
+    }
+
+    /// Computes the estimated size in bytes of the entry.
+    pub fn entry_estimated_size(entry: &WalEntry) -> usize {
+        let wrapper_size = size_of::<WalEntry>()
+            + entry.mutations.capacity() * size_of::<Mutation>()
+            + size_of::<Rows>();
+
+        let rows = entry.mutations[0].rows.as_ref().unwrap();
+
+        let schema_size = rows.schema.capacity() * size_of::<ColumnSchema>()
+            + rows
+                .schema
+                .iter()
+                .map(|s| s.column_name.capacity())
+                .sum::<usize>();
+        let values_size = (rows.rows.capacity() * size_of::<Row>())
+            + rows
+                .rows
+                .iter()
+                .map(|r| r.values.capacity() * size_of::<Value>())
+                .sum::<usize>();
+
+        wrapper_size + schema_size + values_size
+    }
+
+    fn build_rows(&self) -> Rows {
+        let cols = self
+            .schema
+            .iter()
+            .map(|col_schema| {
+                let col_data_type = ColumnDataType::try_from(col_schema.datatype).unwrap();
+                self.build_col(&col_data_type, self.num_rows)
+            })
+            .collect::<Vec<_>>();
+
+        let rows = (0..self.num_rows)
+            .map(|i| {
+                let values = cols.iter().map(|col| col[i as usize].clone()).collect();
+                Row { values }
+            })
+            .collect();
+
+        Rows {
+            schema: self.schema.clone(),
+            rows,
+        }
+    }
+
+    fn build_col(&self, col_data_type: &ColumnDataType, num_rows: u32) -> Vec<Value> {
+        let mut rng_guard = self.rng.lock().unwrap();
+        let rng = rng_guard.as_mut().unwrap();
+        match col_data_type {
+            ColumnDataType::TimestampMillisecond => (0..num_rows)
+                .map(|_| {
+                    let ts = self.next_timestamp.fetch_add(1000, Ordering::Relaxed);
+                    Value {
+                        value_data: Some(ValueData::TimestampMillisecondValue(ts)),
+                    }
+                })
+                .collect(),
+            ColumnDataType::Int64 => (0..num_rows)
+                .map(|_| {
+                    let v = rng.sample(Uniform::new(0, 10_000));
+                    Value {
+                        value_data: Some(ValueData::I64Value(v)),
+                    }
+                })
+                .collect(),
+            ColumnDataType::Float64 => (0..num_rows)
+                .map(|_| {
+                    let v = rng.sample(Uniform::new(0.0, 5000.0));
+                    Value {
+                        value_data: Some(ValueData::F64Value(v)),
+                    }
+                })
+                .collect(),
+            ColumnDataType::String => (0..num_rows)
+                .map(|_| {
+                    let v = Alphanumeric.sample_string(rng, 10);
+                    Value {
+                        value_data: Some(ValueData::StringValue(v)),
+                    }
+                })
+                .collect(),
+            _ => unreachable!(),
+        }
+    }
+}
--- a/cliff.toml
+++ b/cliff.toml
@@ -0,0 +1,127 @@
+# https://git-cliff.org/docs/configuration
+
+[remote.github]
+owner = "GreptimeTeam"
+repo = "greptimedb"
+
+[changelog]
+header = ""
+footer = ""
+# template for the changelog body
+# https://keats.github.io/tera/docs/#introduction
+body = """
+# {{ version }}
+
+Release date: {{ timestamp | date(format="%B %d, %Y") }}
+
+{%- set breakings = commits | filter(attribute="breaking", value=true) -%}
+{%- if breakings | length > 0 %}
+
+## Breaking changes
+    {% for commit in breakings %}
+      * {{ commit.github.pr_title }}\
+        {% if commit.github.username %} by \
+          {% set author = commit.github.username -%}
+          [@{{ author }}](https://github.com/{{ author }})
+        {%- endif -%}
+        {% if commit.github.pr_number %} in \
+          {% set number = commit.github.pr_number -%}
+          [#{{ number }}]({{ self::remote_url() }}/pull/{{ number }})
+        {%- endif %}
+    {%- endfor %}
+{%- endif -%}
+
+{%- set grouped_commits = commits | filter(attribute="breaking", value=false) | group_by(attribute="group") -%}
+{% for group, commits in grouped_commits %}
+
+    ### {{ group | striptags | trim | upper_first }}
+    {% for commit in commits %}
+        * {{ commit.github.pr_title }}\
+            {% if commit.github.username %} by \
+              {% set author = commit.github.username -%}
+              [@{{ author }}](https://github.com/{{ author }})
+            {%- endif -%}
+            {% if commit.github.pr_number %} in \
+              {% set number = commit.github.pr_number -%}
+              [#{{ number }}]({{ self::remote_url() }}/pull/{{ number }})
+            {%- endif %}
+    {%- endfor -%}
+{% endfor %}
+
+{%- if github.contributors | filter(attribute="is_first_time", value=true) | length != 0 %}
+  {% raw %}\n{% endraw -%}
+  ## New Contributors
+{% endif -%}
+{% for contributor in github.contributors | filter(attribute="is_first_time", value=true) %}
+  * [@{{ contributor.username }}](https://github.com/{{ contributor.username }}) made their first contribution
+    {%- if contributor.pr_number %} in \
+      [#{{ contributor.pr_number }}]({{ self::remote_url() }}/pull/{{ contributor.pr_number }}) \
+    {%- endif %}
+{%- endfor -%}
+
+{% if github.contributors | length != 0 %}
+  {% raw %}\n{% endraw -%}
+## All Contributors
+
+We would like to thank the following contributors from the GreptimeDB community:
+
+{%- set contributors = github.contributors | sort(attribute="username") | map(attribute="username") -%}
+{%- set bots = ['dependabot[bot]'] %}
+
+{% for contributor in contributors %}
+{%- if bots is containing(contributor) -%}{% continue %}{%- endif -%}
+{%- if loop.first -%}
+  [@{{ contributor }}](https://github.com/{{ contributor }})
+{%- else -%}
+  , [@{{ contributor }}](https://github.com/{{ contributor }})
+{%- endif -%}
+{%- endfor %}
+{%- endif %}
+{% raw %}\n{% endraw %}
+
+{%- macro remote_url() -%}
+  https://github.com/{{ remote.github.owner }}/{{ remote.github.repo }}
+{%- endmacro -%}
+"""
+trim = true
+
+[git]
+# parse the commits based on https://www.conventionalcommits.org
+conventional_commits = true
+# filter out the commits that are not conventional
+filter_unconventional = true
+# process each line of a commit as an individual commit
+split_commits = false
+# regex for parsing and grouping commits
+commit_parsers = [
+  { message = "^feat", group = "<!-- 0 -->🚀 Features" },
+  { message = "^fix", group = "<!-- 1 -->🐛 Bug Fixes" },
+  { message = "^doc", group = "<!-- 3 -->📚 Documentation" },
+  { message = "^perf", group = "<!-- 4 -->⚡ Performance" },
+  { message = "^refactor", group = "<!-- 2 -->🚜 Refactor" },
+  { message = "^style", group = "<!-- 5 -->🎨 Styling" },
+  { message = "^test", group = "<!-- 6 -->🧪 Testing" },
+  { message = "^chore\\(release\\): prepare for", skip = true },
+  { message = "^chore\\(deps.*\\)", skip = true },
+  { message = "^chore\\(pr\\)", skip = true },
+  { message = "^chore\\(pull\\)", skip = true },
+  { message = "^chore|^ci", group = "<!-- 7 -->⚙️ Miscellaneous Tasks" },
+  { body = ".*security", group = "<!-- 8 -->🛡️ Security" },
+  { message = "^revert", group = "<!-- 9 -->◀️ Revert" },
+]
+# protect breaking changes from being skipped due to matching a skipping commit_parser
+protect_breaking_commits = false
+# filter out the commits that are not matched by commit parsers
+filter_commits = false
+# regex for matching git tags
+# tag_pattern = "v[0-9].*"
+# regex for skipping tags
+# skip_tags = ""
+# regex for ignoring tags
+ignore_tags = ".*-nightly-.*"
+# sort the tags topologically
+topo_order = false
+# sort the commits inside sections by oldest/newest order
+sort_commits = "oldest"
+# limit the number of commits included in the changelog.
+# limit_commits = 42
--- a/config/config-docs-template.md
+++ b/config/config-docs-template.md
@@ -0,0 +1,19 @@
+# Configurations
+
+## Standalone Mode
+
+{{ toml2docs "./standalone.example.toml" }}
+
+## Cluster Mode
+
+### Frontend
+
+{{ toml2docs "./frontend.example.toml" }}
+
+### Metasrv
+
+{{ toml2docs "./metasrv.example.toml" }}
+
+### Datanode
+
+{{ toml2docs "./datanode.example.toml" }}
--- a/config/config.md
+++ b/config/config.md
@@ -0,0 +1,376 @@
+# Configurations
+
+## Standalone Mode
+
+| Key | Type | Default | Descriptions |
+| --- | -----| ------- | ----------- |
+| `mode` | String | `standalone` | The running mode of the datanode. It can be `standalone` or `distributed`. |
+| `enable_telemetry` | Bool | `true` | Enable telemetry to collect anonymous usage data. |
+| `default_timezone` | String | `None` | The default timezone of the server. |
+| `http` | -- | -- | The HTTP server options. |
+| `http.addr` | String | `127.0.0.1:4000` | The address to bind the HTTP server. |
+| `http.timeout` | String | `30s` | HTTP request timeout. |
+| `http.body_limit` | String | `64MB` | HTTP request body limit.<br/>Support the following units are supported: `B`, `KB`, `KiB`, `MB`, `MiB`, `GB`, `GiB`, `TB`, `TiB`, `PB`, `PiB`. |
+| `grpc` | -- | -- | The gRPC server options. |
+| `grpc.addr` | String | `127.0.0.1:4001` | The address to bind the gRPC server. |
+| `grpc.runtime_size` | Integer | `8` | The number of server worker threads. |
+| `mysql` | -- | -- | MySQL server options. |
+| `mysql.enable` | Bool | `true` | Whether to enable. |
+| `mysql.addr` | String | `127.0.0.1:4002` | The addr to bind the MySQL server. |
+| `mysql.runtime_size` | Integer | `2` | The number of server worker threads. |
+| `mysql.tls` | -- | -- | -- |
+| `mysql.tls.mode` | String | `disable` | TLS mode, refer to https://www.postgresql.org/docs/current/libpq-ssl.html<br/>- `disable` (default value)<br/>- `prefer`<br/>- `require`<br/>- `verify-ca`<br/>- `verify-full` |
+| `mysql.tls.cert_path` | String | `None` | Certificate file path. |
+| `mysql.tls.key_path` | String | `None` | Private key file path. |
+| `mysql.tls.watch` | Bool | `false` | Watch for Certificate and key file change and auto reload |
+| `postgres` | -- | -- | PostgresSQL server options. |
+| `postgres.enable` | Bool | `true` | Whether to enable |
+| `postgres.addr` | String | `127.0.0.1:4003` | The addr to bind the PostgresSQL server. |
+| `postgres.runtime_size` | Integer | `2` | The number of server worker threads. |
+| `postgres.tls` | -- | -- | PostgresSQL server TLS options, see `mysql_options.tls` section. |
+| `postgres.tls.mode` | String | `disable` | TLS mode. |
+| `postgres.tls.cert_path` | String | `None` | Certificate file path. |
+| `postgres.tls.key_path` | String | `None` | Private key file path. |
+| `postgres.tls.watch` | Bool | `false` | Watch for Certificate and key file change and auto reload |
+| `opentsdb` | -- | -- | OpenTSDB protocol options. |
+| `opentsdb.enable` | Bool | `true` | Whether to enable |
+| `opentsdb.addr` | String | `127.0.0.1:4242` | OpenTSDB telnet API server address. |
+| `opentsdb.runtime_size` | Integer | `2` | The number of server worker threads. |
+| `influxdb` | -- | -- | InfluxDB protocol options. |
+| `influxdb.enable` | Bool | `true` | Whether to enable InfluxDB protocol in HTTP API. |
+| `prom_store` | -- | -- | Prometheus remote storage options |
+| `prom_store.enable` | Bool | `true` | Whether to enable Prometheus remote write and read in HTTP API. |
+| `prom_store.with_metric_engine` | Bool | `true` | Whether to store the data from Prometheus remote write in metric engine. |
+| `wal` | -- | -- | The WAL options. |
+| `wal.provider` | String | `raft_engine` | The provider of the WAL.<br/>- `raft_engine`: the wal is stored in the local file system by raft-engine.<br/>- `kafka`: it's remote wal that data is stored in Kafka. |
+| `wal.dir` | String | `None` | The directory to store the WAL files.<br/>**It's only used when the provider is `raft_engine`**. |
+| `wal.file_size` | String | `256MB` | The size of the WAL segment file.<br/>**It's only used when the provider is `raft_engine`**. |
+| `wal.purge_threshold` | String | `4GB` | The threshold of the WAL size to trigger a flush.<br/>**It's only used when the provider is `raft_engine`**. |
+| `wal.purge_interval` | String | `10m` | The interval to trigger a flush.<br/>**It's only used when the provider is `raft_engine`**. |
+| `wal.read_batch_size` | Integer | `128` | The read batch size.<br/>**It's only used when the provider is `raft_engine`**. |
+| `wal.sync_write` | Bool | `false` | Whether to use sync write.<br/>**It's only used when the provider is `raft_engine`**. |
+| `wal.enable_log_recycle` | Bool | `true` | Whether to reuse logically truncated log files.<br/>**It's only used when the provider is `raft_engine`**. |
+| `wal.prefill_log_files` | Bool | `false` | Whether to pre-create log files on start up.<br/>**It's only used when the provider is `raft_engine`**. |
+| `wal.sync_period` | String | `10s` | Duration for fsyncing log files.<br/>**It's only used when the provider is `raft_engine`**. |
+| `wal.broker_endpoints` | Array | -- | The Kafka broker endpoints.<br/>**It's only used when the provider is `kafka`**. |
+| `wal.max_batch_size` | String | `1MB` | The max size of a single producer batch.<br/>Warning: Kafka has a default limit of 1MB per message in a topic.<br/>**It's only used when the provider is `kafka`**. |
+| `wal.linger` | String | `200ms` | The linger duration of a kafka batch producer.<br/>**It's only used when the provider is `kafka`**. |
+| `wal.consumer_wait_timeout` | String | `100ms` | The consumer wait timeout.<br/>**It's only used when the provider is `kafka`**. |
+| `wal.backoff_init` | String | `500ms` | The initial backoff delay.<br/>**It's only used when the provider is `kafka`**. |
+| `wal.backoff_max` | String | `10s` | The maximum backoff delay.<br/>**It's only used when the provider is `kafka`**. |
+| `wal.backoff_base` | Integer | `2` | The exponential backoff rate, i.e. next backoff = base * current backoff.<br/>**It's only used when the provider is `kafka`**. |
+| `wal.backoff_deadline` | String | `5mins` | The deadline of retries.<br/>**It's only used when the provider is `kafka`**. |
+| `metadata_store` | -- | -- | Metadata storage options. |
+| `metadata_store.file_size` | String | `256MB` | Kv file size in bytes. |
+| `metadata_store.purge_threshold` | String | `4GB` | Kv purge threshold. |
+| `procedure` | -- | -- | Procedure storage options. |
+| `procedure.max_retry_times` | Integer | `3` | Procedure max retry time. |
+| `procedure.retry_delay` | String | `500ms` | Initial retry delay of procedures, increases exponentially |
+| `storage` | -- | -- | The data storage options. |
+| `storage.data_home` | String | `/tmp/greptimedb/` | The working home directory. |
+| `storage.type` | String | `File` | The storage type used to store the data.<br/>- `File`: the data is stored in the local file system.<br/>- `S3`: the data is stored in the S3 object storage.<br/>- `Gcs`: the data is stored in the Google Cloud Storage.<br/>- `Azblob`: the data is stored in the Azure Blob Storage.<br/>- `Oss`: the data is stored in the Aliyun OSS. |
+| `storage.cache_path` | String | `None` | Cache configuration for object storage such as 'S3' etc.<br/>The local file cache directory. |
+| `storage.cache_capacity` | String | `None` | The local file cache capacity in bytes. |
+| `storage.bucket` | String | `None` | The S3 bucket name.<br/>**It's only used when the storage type is `S3`, `Oss` and `Gcs`**. |
+| `storage.root` | String | `None` | The S3 data will be stored in the specified prefix, for example, `s3://${bucket}/${root}`.<br/>**It's only used when the storage type is `S3`, `Oss` and `Azblob`**. |
+| `storage.access_key_id` | String | `None` | The access key id of the aws account.<br/>It's **highly recommended** to use AWS IAM roles instead of hardcoding the access key id and secret key.<br/>**It's only used when the storage type is `S3` and `Oss`**. |
+| `storage.secret_access_key` | String | `None` | The secret access key of the aws account.<br/>It's **highly recommended** to use AWS IAM roles instead of hardcoding the access key id and secret key.<br/>**It's only used when the storage type is `S3`**. |
+| `storage.access_key_secret` | String | `None` | The secret access key of the aliyun account.<br/>**It's only used when the storage type is `Oss`**. |
+| `storage.account_name` | String | `None` | The account key of the azure account.<br/>**It's only used when the storage type is `Azblob`**. |
+| `storage.account_key` | String | `None` | The account key of the azure account.<br/>**It's only used when the storage type is `Azblob`**. |
+| `storage.scope` | String | `None` | The scope of the google cloud storage.<br/>**It's only used when the storage type is `Gcs`**. |
+| `storage.credential_path` | String | `None` | The credential path of the google cloud storage.<br/>**It's only used when the storage type is `Gcs`**. |
+| `storage.container` | String | `None` | The container of the azure account.<br/>**It's only used when the storage type is `Azblob`**. |
+| `storage.sas_token` | String | `None` | The sas token of the azure account.<br/>**It's only used when the storage type is `Azblob`**. |
+| `storage.endpoint` | String | `None` | The endpoint of the S3 service.<br/>**It's only used when the storage type is `S3`, `Oss`, `Gcs` and `Azblob`**. |
+| `storage.region` | String | `None` | The region of the S3 service.<br/>**It's only used when the storage type is `S3`, `Oss`, `Gcs` and `Azblob`**. |
+| `[[region_engine]]` | -- | -- | The region engine options. You can configure multiple region engines. |
+| `region_engine.mito` | -- | -- | The Mito engine options. |
+| `region_engine.mito.num_workers` | Integer | `8` | Number of region workers. |
+| `region_engine.mito.worker_channel_size` | Integer | `128` | Request channel size of each worker. |
+| `region_engine.mito.worker_request_batch_size` | Integer | `64` | Max batch size for a worker to handle requests. |
+| `region_engine.mito.manifest_checkpoint_distance` | Integer | `10` | Number of meta action updated to trigger a new checkpoint for the manifest. |
+| `region_engine.mito.compress_manifest` | Bool | `false` | Whether to compress manifest and checkpoint file by gzip (default false). |
+| `region_engine.mito.max_background_jobs` | Integer | `4` | Max number of running background jobs |
+| `region_engine.mito.auto_flush_interval` | String | `1h` | Interval to auto flush a region if it has not flushed yet. |
+| `region_engine.mito.global_write_buffer_size` | String | `1GB` | Global write buffer size for all regions. If not set, it's default to 1/8 of OS memory with a max limitation of 1GB. |
+| `region_engine.mito.global_write_buffer_reject_size` | String | `2GB` | Global write buffer size threshold to reject write requests. If not set, it's default to 2 times of `global_write_buffer_size` |
+| `region_engine.mito.sst_meta_cache_size` | String | `128MB` | Cache size for SST metadata. Setting it to 0 to disable the cache.<br/>If not set, it's default to 1/32 of OS memory with a max limitation of 128MB. |
+| `region_engine.mito.vector_cache_size` | String | `512MB` | Cache size for vectors and arrow arrays. Setting it to 0 to disable the cache.<br/>If not set, it's default to 1/16 of OS memory with a max limitation of 512MB. |
+| `region_engine.mito.page_cache_size` | String | `512MB` | Cache size for pages of SST row groups. Setting it to 0 to disable the cache.<br/>If not set, it's default to 1/16 of OS memory with a max limitation of 512MB. |
+| `region_engine.mito.sst_write_buffer_size` | String | `8MB` | Buffer size for SST writing. |
+| `region_engine.mito.scan_parallelism` | Integer | `0` | Parallelism to scan a region (default: 1/4 of cpu cores).<br/>- `0`: using the default value (1/4 of cpu cores).<br/>- `1`: scan in current thread.<br/>- `n`: scan in parallelism n. |
+| `region_engine.mito.parallel_scan_channel_size` | Integer | `32` | Capacity of the channel to send data from parallel scan tasks to the main task. |
+| `region_engine.mito.allow_stale_entries` | Bool | `false` | Whether to allow stale WAL entries read during replay. |
+| `region_engine.mito.inverted_index` | -- | -- | The options for inverted index in Mito engine. |
+| `region_engine.mito.inverted_index.create_on_flush` | String | `auto` | Whether to create the index on flush.<br/>- `auto`: automatically<br/>- `disable`: never |
+| `region_engine.mito.inverted_index.create_on_compaction` | String | `auto` | Whether to create the index on compaction.<br/>- `auto`: automatically<br/>- `disable`: never |
+| `region_engine.mito.inverted_index.apply_on_query` | String | `auto` | Whether to apply the index on query<br/>- `auto`: automatically<br/>- `disable`: never |
+| `region_engine.mito.inverted_index.mem_threshold_on_create` | String | `64M` | Memory threshold for performing an external sort during index creation.<br/>Setting to empty will disable external sorting, forcing all sorting operations to happen in memory. |
+| `region_engine.mito.inverted_index.intermediate_path` | String | `""` | File system path to store intermediate files for external sorting (default `{data_home}/index_intermediate`). |
+| `region_engine.mito.memtable` | -- | -- | -- |
+| `region_engine.mito.memtable.type` | String | `time_series` | Memtable type.<br/>- `time_series`: time-series memtable<br/>- `partition_tree`: partition tree memtable (experimental) |
+| `region_engine.mito.memtable.index_max_keys_per_shard` | Integer | `8192` | The max number of keys in one shard.<br/>Only available for `partition_tree` memtable. |
+| `region_engine.mito.memtable.data_freeze_threshold` | Integer | `32768` | The max rows of data inside the actively writing buffer in one shard.<br/>Only available for `partition_tree` memtable. |
+| `region_engine.mito.memtable.fork_dictionary_bytes` | String | `1GiB` | Max dictionary bytes.<br/>Only available for `partition_tree` memtable. |
+| `logging` | -- | -- | The logging options. |
+| `logging.dir` | String | `/tmp/greptimedb/logs` | The directory to store the log files. |
+| `logging.level` | String | `None` | The log level. Can be `info`/`debug`/`warn`/`error`. |
+| `logging.enable_otlp_tracing` | Bool | `false` | Enable OTLP tracing. |
+| `logging.otlp_endpoint` | String | `None` | The OTLP tracing endpoint. |
+| `logging.append_stdout` | Bool | `true` | Whether to append logs to stdout. |
+| `logging.tracing_sample_ratio` | -- | -- | The percentage of tracing will be sampled and exported.<br/>Valid range `[0, 1]`, 1 means all traces are sampled, 0 means all traces are not sampled, the default value is 1.<br/>ratio > 1 are treated as 1. Fractions < 0 are treated as 0 |
+| `logging.tracing_sample_ratio.default_ratio` | Float | `1.0` | -- |
+| `export_metrics` | -- | -- | The datanode can export its metrics and send to Prometheus compatible service (e.g. send to `greptimedb` itself) from remote-write API.<br/>This is only used for `greptimedb` to export its own metrics internally. It's different from prometheus scrape. |
+| `export_metrics.enable` | Bool | `false` | whether enable export metrics. |
+| `export_metrics.write_interval` | String | `30s` | The interval of export metrics. |
+| `export_metrics.self_import` | -- | -- | For `standalone` mode, `self_import` is recommend to collect metrics generated by itself |
+| `export_metrics.self_import.db` | String | `None` | -- |
+| `export_metrics.remote_write` | -- | -- | -- |
+| `export_metrics.remote_write.url` | String | `""` | The url the metrics send to. The url example can be: `http://127.0.0.1:4000/v1/prometheus/write?db=information_schema`. |
+| `export_metrics.remote_write.headers` | InlineTable | -- | HTTP headers of Prometheus remote-write carry. |
+
+
+## Cluster Mode
+
+### Frontend
+
+| Key | Type | Default | Descriptions |
+| --- | -----| ------- | ----------- |
+| `mode` | String | `standalone` | The running mode of the datanode. It can be `standalone` or `distributed`. |
+| `default_timezone` | String | `None` | The default timezone of the server. |
+| `heartbeat` | -- | -- | The heartbeat options. |
+| `heartbeat.interval` | String | `18s` | Interval for sending heartbeat messages to the metasrv. |
+| `heartbeat.retry_interval` | String | `3s` | Interval for retrying to send heartbeat messages to the metasrv. |
+| `http` | -- | -- | The HTTP server options. |
+| `http.addr` | String | `127.0.0.1:4000` | The address to bind the HTTP server. |
+| `http.timeout` | String | `30s` | HTTP request timeout. |
+| `http.body_limit` | String | `64MB` | HTTP request body limit.<br/>Support the following units are supported: `B`, `KB`, `KiB`, `MB`, `MiB`, `GB`, `GiB`, `TB`, `TiB`, `PB`, `PiB`. |
+| `grpc` | -- | -- | The gRPC server options. |
+| `grpc.addr` | String | `127.0.0.1:4001` | The address to bind the gRPC server. |
+| `grpc.runtime_size` | Integer | `8` | The number of server worker threads. |
+| `mysql` | -- | -- | MySQL server options. |
+| `mysql.enable` | Bool | `true` | Whether to enable. |
+| `mysql.addr` | String | `127.0.0.1:4002` | The addr to bind the MySQL server. |
+| `mysql.runtime_size` | Integer | `2` | The number of server worker threads. |
+| `mysql.tls` | -- | -- | -- |
+| `mysql.tls.mode` | String | `disable` | TLS mode, refer to https://www.postgresql.org/docs/current/libpq-ssl.html<br/>- `disable` (default value)<br/>- `prefer`<br/>- `require`<br/>- `verify-ca`<br/>- `verify-full` |
+| `mysql.tls.cert_path` | String | `None` | Certificate file path. |
+| `mysql.tls.key_path` | String | `None` | Private key file path. |
+| `mysql.tls.watch` | Bool | `false` | Watch for Certificate and key file change and auto reload |
+| `postgres` | -- | -- | PostgresSQL server options. |
+| `postgres.enable` | Bool | `true` | Whether to enable |
+| `postgres.addr` | String | `127.0.0.1:4003` | The addr to bind the PostgresSQL server. |
+| `postgres.runtime_size` | Integer | `2` | The number of server worker threads. |
+| `postgres.tls` | -- | -- | PostgresSQL server TLS options, see `mysql_options.tls` section. |
+| `postgres.tls.mode` | String | `disable` | TLS mode. |
+| `postgres.tls.cert_path` | String | `None` | Certificate file path. |
+| `postgres.tls.key_path` | String | `None` | Private key file path. |
+| `postgres.tls.watch` | Bool | `false` | Watch for Certificate and key file change and auto reload |
+| `opentsdb` | -- | -- | OpenTSDB protocol options. |
+| `opentsdb.enable` | Bool | `true` | Whether to enable |
+| `opentsdb.addr` | String | `127.0.0.1:4242` | OpenTSDB telnet API server address. |
+| `opentsdb.runtime_size` | Integer | `2` | The number of server worker threads. |
+| `influxdb` | -- | -- | InfluxDB protocol options. |
+| `influxdb.enable` | Bool | `true` | Whether to enable InfluxDB protocol in HTTP API. |
+| `prom_store` | -- | -- | Prometheus remote storage options |
+| `prom_store.enable` | Bool | `true` | Whether to enable Prometheus remote write and read in HTTP API. |
+| `prom_store.with_metric_engine` | Bool | `true` | Whether to store the data from Prometheus remote write in metric engine. |
+| `meta_client` | -- | -- | The metasrv client options. |
+| `meta_client.metasrv_addrs` | Array | -- | The addresses of the metasrv. |
+| `meta_client.timeout` | String | `3s` | Operation timeout. |
+| `meta_client.heartbeat_timeout` | String | `500ms` | Heartbeat timeout. |
+| `meta_client.ddl_timeout` | String | `10s` | DDL timeout. |
+| `meta_client.connect_timeout` | String | `1s` | Connect server timeout. |
+| `meta_client.tcp_nodelay` | Bool | `true` | `TCP_NODELAY` option for accepted connections. |
+| `meta_client.metadata_cache_max_capacity` | Integer | `100000` | The configuration about the cache of the metadata. |
+| `meta_client.metadata_cache_ttl` | String | `10m` | TTL of the metadata cache. |
+| `meta_client.metadata_cache_tti` | String | `5m` | -- |
+| `datanode` | -- | -- | Datanode options. |
+| `datanode.client` | -- | -- | Datanode client options. |
+| `datanode.client.timeout` | String | `10s` | -- |
+| `datanode.client.connect_timeout` | String | `10s` | -- |
+| `datanode.client.tcp_nodelay` | Bool | `true` | -- |
+| `logging` | -- | -- | The logging options. |
+| `logging.dir` | String | `/tmp/greptimedb/logs` | The directory to store the log files. |
+| `logging.level` | String | `None` | The log level. Can be `info`/`debug`/`warn`/`error`. |
+| `logging.enable_otlp_tracing` | Bool | `false` | Enable OTLP tracing. |
+| `logging.otlp_endpoint` | String | `None` | The OTLP tracing endpoint. |
+| `logging.append_stdout` | Bool | `true` | Whether to append logs to stdout. |
+| `logging.tracing_sample_ratio` | -- | -- | The percentage of tracing will be sampled and exported.<br/>Valid range `[0, 1]`, 1 means all traces are sampled, 0 means all traces are not sampled, the default value is 1.<br/>ratio > 1 are treated as 1. Fractions < 0 are treated as 0 |
+| `logging.tracing_sample_ratio.default_ratio` | Float | `1.0` | -- |
+| `export_metrics` | -- | -- | The datanode can export its metrics and send to Prometheus compatible service (e.g. send to `greptimedb` itself) from remote-write API.<br/>This is only used for `greptimedb` to export its own metrics internally. It's different from prometheus scrape. |
+| `export_metrics.enable` | Bool | `false` | whether enable export metrics. |
+| `export_metrics.write_interval` | String | `30s` | The interval of export metrics. |
+| `export_metrics.self_import` | -- | -- | For `standalone` mode, `self_import` is recommend to collect metrics generated by itself |
+| `export_metrics.self_import.db` | String | `None` | -- |
+| `export_metrics.remote_write` | -- | -- | -- |
+| `export_metrics.remote_write.url` | String | `""` | The url the metrics send to. The url example can be: `http://127.0.0.1:4000/v1/prometheus/write?db=information_schema`. |
+| `export_metrics.remote_write.headers` | InlineTable | -- | HTTP headers of Prometheus remote-write carry. |
+
+
+### Metasrv
+
+| Key | Type | Default | Descriptions |
+| --- | -----| ------- | ----------- |
+| `data_home` | String | `/tmp/metasrv/` | The working home directory. |
+| `bind_addr` | String | `127.0.0.1:3002` | The bind address of metasrv. |
+| `server_addr` | String | `127.0.0.1:3002` | The communication server address for frontend and datanode to connect to metasrv,  "127.0.0.1:3002" by default for localhost. |
+| `store_addr` | String | `127.0.0.1:2379` | Etcd server address. |
+| `selector` | String | `lease_based` | Datanode selector type.<br/>- `lease_based` (default value).<br/>- `load_based`<br/>For details, please see "https://docs.greptime.com/developer-guide/metasrv/selector". |
+| `use_memory_store` | Bool | `false` | Store data in memory. |
+| `enable_telemetry` | Bool | `true` | Whether to enable greptimedb telemetry. |
+| `store_key_prefix` | String | `""` | If it's not empty, the metasrv will store all data with this key prefix. |
+| `procedure` | -- | -- | Procedure storage options. |
+| `procedure.max_retry_times` | Integer | `12` | Procedure max retry time. |
+| `procedure.retry_delay` | String | `500ms` | Initial retry delay of procedures, increases exponentially |
+| `procedure.max_metadata_value_size` | String | `1500KiB` | Auto split large value<br/>GreptimeDB procedure uses etcd as the default metadata storage backend.<br/>The etcd the maximum size of any request is 1.5 MiB<br/>1500KiB = 1536KiB (1.5MiB) - 36KiB (reserved size of key)<br/>Comments out the `max_metadata_value_size`, for don't split large value (no limit). |
+| `failure_detector` | -- | -- | -- |
+| `failure_detector.threshold` | Float | `8.0` | -- |
+| `failure_detector.min_std_deviation` | String | `100ms` | -- |
+| `failure_detector.acceptable_heartbeat_pause` | String | `3000ms` | -- |
+| `failure_detector.first_heartbeat_estimate` | String | `1000ms` | -- |
+| `datanode` | -- | -- | Datanode options. |
+| `datanode.client` | -- | -- | Datanode client options. |
+| `datanode.client.timeout` | String | `10s` | -- |
+| `datanode.client.connect_timeout` | String | `10s` | -- |
+| `datanode.client.tcp_nodelay` | Bool | `true` | -- |
+| `wal` | -- | -- | -- |
+| `wal.provider` | String | `raft_engine` | -- |
+| `wal.broker_endpoints` | Array | -- | The broker endpoints of the Kafka cluster. |
+| `wal.num_topics` | Integer | `64` | Number of topics to be created upon start. |
+| `wal.selector_type` | String | `round_robin` | Topic selector type.<br/>Available selector types:<br/>- `round_robin` (default) |
+| `wal.topic_name_prefix` | String | `greptimedb_wal_topic` | A Kafka topic is constructed by concatenating `topic_name_prefix` and `topic_id`. |
+| `wal.replication_factor` | Integer | `1` | Expected number of replicas of each partition. |
+| `wal.create_topic_timeout` | String | `30s` | Above which a topic creation operation will be cancelled. |
+| `wal.backoff_init` | String | `500ms` | The initial backoff for kafka clients. |
+| `wal.backoff_max` | String | `10s` | The maximum backoff for kafka clients. |
+| `wal.backoff_base` | Integer | `2` | Exponential backoff rate, i.e. next backoff = base * current backoff. |
+| `wal.backoff_deadline` | String | `5mins` | Stop reconnecting if the total wait time reaches the deadline. If this config is missing, the reconnecting won't terminate. |
+| `logging` | -- | -- | The logging options. |
+| `logging.dir` | String | `/tmp/greptimedb/logs` | The directory to store the log files. |
+| `logging.level` | String | `None` | The log level. Can be `info`/`debug`/`warn`/`error`. |
+| `logging.enable_otlp_tracing` | Bool | `false` | Enable OTLP tracing. |
+| `logging.otlp_endpoint` | String | `None` | The OTLP tracing endpoint. |
+| `logging.append_stdout` | Bool | `true` | Whether to append logs to stdout. |
+| `logging.tracing_sample_ratio` | -- | -- | The percentage of tracing will be sampled and exported.<br/>Valid range `[0, 1]`, 1 means all traces are sampled, 0 means all traces are not sampled, the default value is 1.<br/>ratio > 1 are treated as 1. Fractions < 0 are treated as 0 |
+| `logging.tracing_sample_ratio.default_ratio` | Float | `1.0` | -- |
+| `export_metrics` | -- | -- | The datanode can export its metrics and send to Prometheus compatible service (e.g. send to `greptimedb` itself) from remote-write API.<br/>This is only used for `greptimedb` to export its own metrics internally. It's different from prometheus scrape. |
+| `export_metrics.enable` | Bool | `false` | whether enable export metrics. |
+| `export_metrics.write_interval` | String | `30s` | The interval of export metrics. |
+| `export_metrics.self_import` | -- | -- | For `standalone` mode, `self_import` is recommend to collect metrics generated by itself |
+| `export_metrics.self_import.db` | String | `None` | -- |
+| `export_metrics.remote_write` | -- | -- | -- |
+| `export_metrics.remote_write.url` | String | `""` | The url the metrics send to. The url example can be: `http://127.0.0.1:4000/v1/prometheus/write?db=information_schema`. |
+| `export_metrics.remote_write.headers` | InlineTable | -- | HTTP headers of Prometheus remote-write carry. |
+
+
+### Datanode
+
+| Key | Type | Default | Descriptions |
+| --- | -----| ------- | ----------- |
+| `mode` | String | `standalone` | The running mode of the datanode. It can be `standalone` or `distributed`. |
+| `node_id` | Integer | `None` | The datanode identifier and should be unique in the cluster. |
+| `require_lease_before_startup` | Bool | `false` | Start services after regions have obtained leases.<br/>It will block the datanode start if it can't receive leases in the heartbeat from metasrv. |
+| `init_regions_in_background` | Bool | `false` | Initialize all regions in the background during the startup.<br/>By default, it provides services after all regions have been initialized. |
+| `rpc_addr` | String | `127.0.0.1:3001` | The gRPC address of the datanode. |
+| `rpc_hostname` | String | `None` | The hostname of the datanode. |
+| `rpc_runtime_size` | Integer | `8` | The number of gRPC server worker threads. |
+| `rpc_max_recv_message_size` | String | `512MB` | The maximum receive message size for gRPC server. |
+| `rpc_max_send_message_size` | String | `512MB` | The maximum send message size for gRPC server. |
+| `enable_telemetry` | Bool | `true` | Enable telemetry to collect anonymous usage data. |
+| `heartbeat` | -- | -- | The heartbeat options. |
+| `heartbeat.interval` | String | `3s` | Interval for sending heartbeat messages to the metasrv. |
+| `heartbeat.retry_interval` | String | `3s` | Interval for retrying to send heartbeat messages to the metasrv. |
+| `meta_client` | -- | -- | The metasrv client options. |
+| `meta_client.metasrv_addrs` | Array | -- | The addresses of the metasrv. |
+| `meta_client.timeout` | String | `3s` | Operation timeout. |
+| `meta_client.heartbeat_timeout` | String | `500ms` | Heartbeat timeout. |
+| `meta_client.ddl_timeout` | String | `10s` | DDL timeout. |
+| `meta_client.connect_timeout` | String | `1s` | Connect server timeout. |
+| `meta_client.tcp_nodelay` | Bool | `true` | `TCP_NODELAY` option for accepted connections. |
+| `meta_client.metadata_cache_max_capacity` | Integer | `100000` | The configuration about the cache of the metadata. |
+| `meta_client.metadata_cache_ttl` | String | `10m` | TTL of the metadata cache. |
+| `meta_client.metadata_cache_tti` | String | `5m` | -- |
+| `wal` | -- | -- | The WAL options. |
+| `wal.provider` | String | `raft_engine` | The provider of the WAL.<br/>- `raft_engine`: the wal is stored in the local file system by raft-engine.<br/>- `kafka`: it's remote wal that data is stored in Kafka. |
+| `wal.dir` | String | `None` | The directory to store the WAL files.<br/>**It's only used when the provider is `raft_engine`**. |
+| `wal.file_size` | String | `256MB` | The size of the WAL segment file.<br/>**It's only used when the provider is `raft_engine`**. |
+| `wal.purge_threshold` | String | `4GB` | The threshold of the WAL size to trigger a flush.<br/>**It's only used when the provider is `raft_engine`**. |
+| `wal.purge_interval` | String | `10m` | The interval to trigger a flush.<br/>**It's only used when the provider is `raft_engine`**. |
+| `wal.read_batch_size` | Integer | `128` | The read batch size.<br/>**It's only used when the provider is `raft_engine`**. |
+| `wal.sync_write` | Bool | `false` | Whether to use sync write.<br/>**It's only used when the provider is `raft_engine`**. |
+| `wal.enable_log_recycle` | Bool | `true` | Whether to reuse logically truncated log files.<br/>**It's only used when the provider is `raft_engine`**. |
+| `wal.prefill_log_files` | Bool | `false` | Whether to pre-create log files on start up.<br/>**It's only used when the provider is `raft_engine`**. |
+| `wal.sync_period` | String | `10s` | Duration for fsyncing log files.<br/>**It's only used when the provider is `raft_engine`**. |
+| `wal.broker_endpoints` | Array | -- | The Kafka broker endpoints.<br/>**It's only used when the provider is `kafka`**. |
+| `wal.max_batch_size` | String | `1MB` | The max size of a single producer batch.<br/>Warning: Kafka has a default limit of 1MB per message in a topic.<br/>**It's only used when the provider is `kafka`**. |
+| `wal.linger` | String | `200ms` | The linger duration of a kafka batch producer.<br/>**It's only used when the provider is `kafka`**. |
+| `wal.consumer_wait_timeout` | String | `100ms` | The consumer wait timeout.<br/>**It's only used when the provider is `kafka`**. |
+| `wal.backoff_init` | String | `500ms` | The initial backoff delay.<br/>**It's only used when the provider is `kafka`**. |
+| `wal.backoff_max` | String | `10s` | The maximum backoff delay.<br/>**It's only used when the provider is `kafka`**. |
+| `wal.backoff_base` | Integer | `2` | The exponential backoff rate, i.e. next backoff = base * current backoff.<br/>**It's only used when the provider is `kafka`**. |
+| `wal.backoff_deadline` | String | `5mins` | The deadline of retries.<br/>**It's only used when the provider is `kafka`**. |
+| `storage` | -- | -- | The data storage options. |
+| `storage.data_home` | String | `/tmp/greptimedb/` | The working home directory. |
+| `storage.type` | String | `File` | The storage type used to store the data.<br/>- `File`: the data is stored in the local file system.<br/>- `S3`: the data is stored in the S3 object storage.<br/>- `Gcs`: the data is stored in the Google Cloud Storage.<br/>- `Azblob`: the data is stored in the Azure Blob Storage.<br/>- `Oss`: the data is stored in the Aliyun OSS. |
+| `storage.cache_path` | String | `None` | Cache configuration for object storage such as 'S3' etc.<br/>The local file cache directory. |
+| `storage.cache_capacity` | String | `None` | The local file cache capacity in bytes. |
+| `storage.bucket` | String | `None` | The S3 bucket name.<br/>**It's only used when the storage type is `S3`, `Oss` and `Gcs`**. |
+| `storage.root` | String | `None` | The S3 data will be stored in the specified prefix, for example, `s3://${bucket}/${root}`.<br/>**It's only used when the storage type is `S3`, `Oss` and `Azblob`**. |
+| `storage.access_key_id` | String | `None` | The access key id of the aws account.<br/>It's **highly recommended** to use AWS IAM roles instead of hardcoding the access key id and secret key.<br/>**It's only used when the storage type is `S3` and `Oss`**. |
+| `storage.secret_access_key` | String | `None` | The secret access key of the aws account.<br/>It's **highly recommended** to use AWS IAM roles instead of hardcoding the access key id and secret key.<br/>**It's only used when the storage type is `S3`**. |
+| `storage.access_key_secret` | String | `None` | The secret access key of the aliyun account.<br/>**It's only used when the storage type is `Oss`**. |
+| `storage.account_name` | String | `None` | The account key of the azure account.<br/>**It's only used when the storage type is `Azblob`**. |
+| `storage.account_key` | String | `None` | The account key of the azure account.<br/>**It's only used when the storage type is `Azblob`**. |
+| `storage.scope` | String | `None` | The scope of the google cloud storage.<br/>**It's only used when the storage type is `Gcs`**. |
+| `storage.credential_path` | String | `None` | The credential path of the google cloud storage.<br/>**It's only used when the storage type is `Gcs`**. |
+| `storage.container` | String | `None` | The container of the azure account.<br/>**It's only used when the storage type is `Azblob`**. |
+| `storage.sas_token` | String | `None` | The sas token of the azure account.<br/>**It's only used when the storage type is `Azblob`**. |
+| `storage.endpoint` | String | `None` | The endpoint of the S3 service.<br/>**It's only used when the storage type is `S3`, `Oss`, `Gcs` and `Azblob`**. |
+| `storage.region` | String | `None` | The region of the S3 service.<br/>**It's only used when the storage type is `S3`, `Oss`, `Gcs` and `Azblob`**. |
+| `[[region_engine]]` | -- | -- | The region engine options. You can configure multiple region engines. |
+| `region_engine.mito` | -- | -- | The Mito engine options. |
+| `region_engine.mito.num_workers` | Integer | `8` | Number of region workers. |
+| `region_engine.mito.worker_channel_size` | Integer | `128` | Request channel size of each worker. |
+| `region_engine.mito.worker_request_batch_size` | Integer | `64` | Max batch size for a worker to handle requests. |
+| `region_engine.mito.manifest_checkpoint_distance` | Integer | `10` | Number of meta action updated to trigger a new checkpoint for the manifest. |
+| `region_engine.mito.compress_manifest` | Bool | `false` | Whether to compress manifest and checkpoint file by gzip (default false). |
+| `region_engine.mito.max_background_jobs` | Integer | `4` | Max number of running background jobs |
+| `region_engine.mito.auto_flush_interval` | String | `1h` | Interval to auto flush a region if it has not flushed yet. |
+| `region_engine.mito.global_write_buffer_size` | String | `1GB` | Global write buffer size for all regions. If not set, it's default to 1/8 of OS memory with a max limitation of 1GB. |
+| `region_engine.mito.global_write_buffer_reject_size` | String | `2GB` | Global write buffer size threshold to reject write requests. If not set, it's default to 2 times of `global_write_buffer_size` |
+| `region_engine.mito.sst_meta_cache_size` | String | `128MB` | Cache size for SST metadata. Setting it to 0 to disable the cache.<br/>If not set, it's default to 1/32 of OS memory with a max limitation of 128MB. |
+| `region_engine.mito.vector_cache_size` | String | `512MB` | Cache size for vectors and arrow arrays. Setting it to 0 to disable the cache.<br/>If not set, it's default to 1/16 of OS memory with a max limitation of 512MB. |
+| `region_engine.mito.page_cache_size` | String | `512MB` | Cache size for pages of SST row groups. Setting it to 0 to disable the cache.<br/>If not set, it's default to 1/16 of OS memory with a max limitation of 512MB. |
+| `region_engine.mito.sst_write_buffer_size` | String | `8MB` | Buffer size for SST writing. |
+| `region_engine.mito.scan_parallelism` | Integer | `0` | Parallelism to scan a region (default: 1/4 of cpu cores).<br/>- `0`: using the default value (1/4 of cpu cores).<br/>- `1`: scan in current thread.<br/>- `n`: scan in parallelism n. |
+| `region_engine.mito.parallel_scan_channel_size` | Integer | `32` | Capacity of the channel to send data from parallel scan tasks to the main task. |
+| `region_engine.mito.allow_stale_entries` | Bool | `false` | Whether to allow stale WAL entries read during replay. |
+| `region_engine.mito.inverted_index` | -- | -- | The options for inverted index in Mito engine. |
+| `region_engine.mito.inverted_index.create_on_flush` | String | `auto` | Whether to create the index on flush.<br/>- `auto`: automatically<br/>- `disable`: never |
+| `region_engine.mito.inverted_index.create_on_compaction` | String | `auto` | Whether to create the index on compaction.<br/>- `auto`: automatically<br/>- `disable`: never |
+| `region_engine.mito.inverted_index.apply_on_query` | String | `auto` | Whether to apply the index on query<br/>- `auto`: automatically<br/>- `disable`: never |
+| `region_engine.mito.inverted_index.mem_threshold_on_create` | String | `64M` | Memory threshold for performing an external sort during index creation.<br/>Setting to empty will disable external sorting, forcing all sorting operations to happen in memory. |
+| `region_engine.mito.inverted_index.intermediate_path` | String | `""` | File system path to store intermediate files for external sorting (default `{data_home}/index_intermediate`). |
+| `region_engine.mito.memtable` | -- | -- | -- |
+| `region_engine.mito.memtable.type` | String | `time_series` | Memtable type.<br/>- `time_series`: time-series memtable<br/>- `partition_tree`: partition tree memtable (experimental) |
+| `region_engine.mito.memtable.index_max_keys_per_shard` | Integer | `8192` | The max number of keys in one shard.<br/>Only available for `partition_tree` memtable. |
+| `region_engine.mito.memtable.data_freeze_threshold` | Integer | `32768` | The max rows of data inside the actively writing buffer in one shard.<br/>Only available for `partition_tree` memtable. |
+| `region_engine.mito.memtable.fork_dictionary_bytes` | String | `1GiB` | Max dictionary bytes.<br/>Only available for `partition_tree` memtable. |
+| `logging` | -- | -- | The logging options. |
+| `logging.dir` | String | `/tmp/greptimedb/logs` | The directory to store the log files. |
+| `logging.level` | String | `None` | The log level. Can be `info`/`debug`/`warn`/`error`. |
+| `logging.enable_otlp_tracing` | Bool | `false` | Enable OTLP tracing. |
+| `logging.otlp_endpoint` | String | `None` | The OTLP tracing endpoint. |
+| `logging.append_stdout` | Bool | `true` | Whether to append logs to stdout. |
+| `logging.tracing_sample_ratio` | -- | -- | The percentage of tracing will be sampled and exported.<br/>Valid range `[0, 1]`, 1 means all traces are sampled, 0 means all traces are not sampled, the default value is 1.<br/>ratio > 1 are treated as 1. Fractions < 0 are treated as 0 |
+| `logging.tracing_sample_ratio.default_ratio` | Float | `1.0` | -- |
+| `export_metrics` | -- | -- | The datanode can export its metrics and send to Prometheus compatible service (e.g. send to `greptimedb` itself) from remote-write API.<br/>This is only used for `greptimedb` to export its own metrics internally. It's different from prometheus scrape. |
+| `export_metrics.enable` | Bool | `false` | whether enable export metrics. |
+| `export_metrics.write_interval` | String | `30s` | The interval of export metrics. |
+| `export_metrics.self_import` | -- | -- | For `standalone` mode, `self_import` is recommend to collect metrics generated by itself |
+| `export_metrics.self_import.db` | String | `None` | -- |
+| `export_metrics.remote_write` | -- | -- | -- |
+| `export_metrics.remote_write.url` | String | `""` | The url the metrics send to. The url example can be: `http://127.0.0.1:4000/v1/prometheus/write?db=information_schema`. |
+| `export_metrics.remote_write.headers` | InlineTable | -- | HTTP headers of Prometheus remote-write carry. |
--- a/config/datanode.example.toml
+++ b/config/datanode.example.toml
@@ -1,171 +1,430 @@
-# Node running mode, see `standalone.example.toml`.
-mode = "distributed"
-# The datanode identifier, should be unique.
+## The running mode of the datanode. It can be `standalone` or `distributed`.
+mode = "standalone"
+
+## The datanode identifier and should be unique in the cluster.
+## +toml2docs:none-default
 node_id = 42
-# gRPC server address, "127.0.0.1:3001" by default.
-rpc_addr = "127.0.0.1:3001"
-# Hostname of this node.
-rpc_hostname = "127.0.0.1"
-# The number of gRPC server worker threads, 8 by default.
-rpc_runtime_size = 8
-# Start services after regions have obtained leases.
-# It will block the datanode start if it can't receive leases in the heartbeat from metasrv.
+
+## Start services after regions have obtained leases.
+## It will block the datanode start if it can't receive leases in the heartbeat from metasrv.
 require_lease_before_startup = false

-# Initialize all regions in the background during the startup.
-# By default, it provides services after all regions have been initialized.
+## Initialize all regions in the background during the startup.
+## By default, it provides services after all regions have been initialized.
 init_regions_in_background = false

+## The gRPC address of the datanode.
+rpc_addr = "127.0.0.1:3001"
+
+## The hostname of the datanode.
+## +toml2docs:none-default
+rpc_hostname = "127.0.0.1"
+
+## The number of gRPC server worker threads.
+rpc_runtime_size = 8
+
+## The maximum receive message size for gRPC server.
+rpc_max_recv_message_size = "512MB"
+
+## The maximum send message size for gRPC server.
+rpc_max_send_message_size = "512MB"
+
+## Enable telemetry to collect anonymous usage data.
+enable_telemetry = true
+
+## The heartbeat options.
 [heartbeat]
-# Interval for sending heartbeat messages to the Metasrv, 3 seconds by default.
+## Interval for sending heartbeat messages to the metasrv.
 interval = "3s"

-# Metasrv client options.
+## Interval for retrying to send heartbeat messages to the metasrv.
+retry_interval = "3s"
+
+## The metasrv client options.
 [meta_client]
-# Metasrv address list.
+## The addresses of the metasrv.
 metasrv_addrs = ["127.0.0.1:3002"]
-# Heartbeat timeout, 500 milliseconds by default.
-heartbeat_timeout = "500ms"
-# Operation timeout, 3 seconds by default.
+
+## Operation timeout.
 timeout = "3s"
-# Connect server timeout, 1 second by default.
+
+## Heartbeat timeout.
+heartbeat_timeout = "500ms"
+
+## DDL timeout.
+ddl_timeout = "10s"
+
+## Connect server timeout.
 connect_timeout = "1s"
-# `TCP_NODELAY` option for accepted connections, true by default.
+
+## `TCP_NODELAY` option for accepted connections.
 tcp_nodelay = true

-# WAL options.
+## The configuration about the cache of the metadata.
+metadata_cache_max_capacity = 100000
+
+## TTL of the metadata cache.
+metadata_cache_ttl = "10m"
+
+# TTI of the metadata cache.
+metadata_cache_tti = "5m"
+
+## The WAL options.
 [wal]
+## The provider of the WAL.
+## - `raft_engine`: the wal is stored in the local file system by raft-engine.
+## - `kafka`: it's remote wal that data is stored in Kafka.
 provider = "raft_engine"

-# Raft-engine wal options, see `standalone.example.toml`.
-# dir = "/tmp/greptimedb/wal"
+## The directory to store the WAL files.
+## **It's only used when the provider is `raft_engine`**.
+## +toml2docs:none-default
+dir = "/tmp/greptimedb/wal"
+
+## The size of the WAL segment file.
+## **It's only used when the provider is `raft_engine`**.
 file_size = "256MB"
+
+## The threshold of the WAL size to trigger a flush.
+## **It's only used when the provider is `raft_engine`**.
 purge_threshold = "4GB"
+
+## The interval to trigger a flush.
+## **It's only used when the provider is `raft_engine`**.
 purge_interval = "10m"
+
+## The read batch size.
+## **It's only used when the provider is `raft_engine`**.
 read_batch_size = 128
+
+## Whether to use sync write.
+## **It's only used when the provider is `raft_engine`**.
 sync_write = false

-# Kafka wal options, see `standalone.example.toml`.
-# broker_endpoints = ["127.0.0.1:9092"]
-# Warning: Kafka has a default limit of 1MB per message in a topic.
-# max_batch_size = "1MB"
-# linger = "200ms"
-# consumer_wait_timeout = "100ms"
-# backoff_init = "500ms"
-# backoff_max = "10s"
-# backoff_base = 2
-# backoff_deadline = "5mins"
+## Whether to reuse logically truncated log files.
+## **It's only used when the provider is `raft_engine`**.
+enable_log_recycle = true

-# Storage options, see `standalone.example.toml`.
+## Whether to pre-create log files on start up.
+## **It's only used when the provider is `raft_engine`**.
+prefill_log_files = false
+
+## Duration for fsyncing log files.
+## **It's only used when the provider is `raft_engine`**.
+sync_period = "10s"
+
+## The Kafka broker endpoints.
+## **It's only used when the provider is `kafka`**.
+broker_endpoints = ["127.0.0.1:9092"]
+
+## The max size of a single producer batch.
+## Warning: Kafka has a default limit of 1MB per message in a topic.
+## **It's only used when the provider is `kafka`**.
+max_batch_size = "1MB"
+
+## The linger duration of a kafka batch producer.
+## **It's only used when the provider is `kafka`**.
+linger = "200ms"
+
+## The consumer wait timeout.
+## **It's only used when the provider is `kafka`**.
+consumer_wait_timeout = "100ms"
+
+## The initial backoff delay.
+## **It's only used when the provider is `kafka`**.
+backoff_init = "500ms"
+
+## The maximum backoff delay.
+## **It's only used when the provider is `kafka`**.
+backoff_max = "10s"
+
+## The exponential backoff rate, i.e. next backoff = base * current backoff.
+## **It's only used when the provider is `kafka`**.
+backoff_base = 2
+
+## The deadline of retries.
+## **It's only used when the provider is `kafka`**.
+backoff_deadline = "5mins"
+
+# Example of using S3 as the storage.
+# [storage]
+# type = "S3"
+# bucket = "greptimedb"
+# root = "data"
+# access_key_id = "test"
+# secret_access_key = "123456"
+# endpoint = "https://s3.amazonaws.com"
+# region = "us-west-2"
+
+# Example of using Oss as the storage.
+# [storage]
+# type = "Oss"
+# bucket = "greptimedb"
+# root = "data"
+# access_key_id = "test"
+# access_key_secret = "123456"
+# endpoint = "https://oss-cn-hangzhou.aliyuncs.com"
+
+# Example of using Azblob as the storage.
+# [storage]
+# type = "Azblob"
+# container = "greptimedb"
+# root = "data"
+# account_name = "test"
+# account_key = "123456"
+# endpoint = "https://greptimedb.blob.core.windows.net"
+# sas_token = ""
+
+# Example of using Gcs as the storage.
+# [storage]
+# type = "Gcs"
+# bucket = "greptimedb"
+# root = "data"
+# scope = "test"
+# credential_path = "123456"
+# endpoint = "https://storage.googleapis.com"
+
+## The data storage options.
 [storage]
-# The working home directory.
+## The working home directory.
 data_home = "/tmp/greptimedb/"
-# Storage type.
-type = "File"
-# TTL for all tables. Disabled by default.
-# global_ttl = "7d"

-# Cache configuration for object storage such as 'S3' etc.
-# The local file cache directory
-# cache_path = "/path/local_cache"
-# The local file cache capacity in bytes.
-# cache_capacity = "256MB"
+## The storage type used to store the data.
+## - `File`: the data is stored in the local file system.
+## - `S3`: the data is stored in the S3 object storage.
+## - `Gcs`: the data is stored in the Google Cloud Storage.
+## - `Azblob`: the data is stored in the Azure Blob Storage.
+## - `Oss`: the data is stored in the Aliyun OSS.
+type = "File"
+
+## Cache configuration for object storage such as 'S3' etc.
+## The local file cache directory.
+## +toml2docs:none-default
+cache_path = "/path/local_cache"
+
+## The local file cache capacity in bytes.
+## +toml2docs:none-default
+cache_capacity = "256MB"
+
+## The S3 bucket name.
+## **It's only used when the storage type is `S3`, `Oss` and `Gcs`**.
+## +toml2docs:none-default
+bucket = "greptimedb"
+
+## The S3 data will be stored in the specified prefix, for example, `s3://${bucket}/${root}`.
+## **It's only used when the storage type is `S3`, `Oss` and `Azblob`**.
+## +toml2docs:none-default
+root = "greptimedb"
+
+## The access key id of the aws account.
+## It's **highly recommended** to use AWS IAM roles instead of hardcoding the access key id and secret key.
+## **It's only used when the storage type is `S3` and `Oss`**.
+## +toml2docs:none-default
+access_key_id = "test"
+
+## The secret access key of the aws account.
+## It's **highly recommended** to use AWS IAM roles instead of hardcoding the access key id and secret key.
+## **It's only used when the storage type is `S3`**.
+## +toml2docs:none-default
+secret_access_key = "test"
+
+## The secret access key of the aliyun account.
+## **It's only used when the storage type is `Oss`**.
+## +toml2docs:none-default
+access_key_secret = "test"
+
+## The account key of the azure account.
+## **It's only used when the storage type is `Azblob`**.
+## +toml2docs:none-default
+account_name = "test"
+
+## The account key of the azure account.
+## **It's only used when the storage type is `Azblob`**.
+## +toml2docs:none-default
+account_key = "test"
+
+## The scope of the google cloud storage.
+## **It's only used when the storage type is `Gcs`**.
+## +toml2docs:none-default
+scope = "test"
+
+## The credential path of the google cloud storage.
+## **It's only used when the storage type is `Gcs`**.
+## +toml2docs:none-default
+credential_path = "test"
+
+## The container of the azure account.
+## **It's only used when the storage type is `Azblob`**.
+## +toml2docs:none-default
+container = "greptimedb"
+
+## The sas token of the azure account.
+## **It's only used when the storage type is `Azblob`**.
+## +toml2docs:none-default
+sas_token = ""
+
+## The endpoint of the S3 service.
+## **It's only used when the storage type is `S3`, `Oss`, `Gcs` and `Azblob`**.
+## +toml2docs:none-default
+endpoint = "https://s3.amazonaws.com"
+
+## The region of the S3 service.
+## **It's only used when the storage type is `S3`, `Oss`, `Gcs` and `Azblob`**.
+## +toml2docs:none-default
+region = "us-west-2"

 # Custom storage options
-#[[storage.providers]]
-#type = "S3"
-#[[storage.providers]]
-#type = "Gcs"
+# [[storage.providers]]
+# type = "S3"
+# [[storage.providers]]
+# type = "Gcs"

-# Mito engine options
+## The region engine options. You can configure multiple region engines.
 [[region_engine]]
+
+## The Mito engine options.
 [region_engine.mito]
-# Number of region workers
+
+## Number of region workers.
 num_workers = 8
-# Request channel size of each worker
+
+## Request channel size of each worker.
 worker_channel_size = 128
-# Max batch size for a worker to handle requests
+
+## Max batch size for a worker to handle requests.
 worker_request_batch_size = 64
-# Number of meta action updated to trigger a new checkpoint for the manifest
+
+## Number of meta action updated to trigger a new checkpoint for the manifest.
 manifest_checkpoint_distance = 10
-# Whether to compress manifest and checkpoint file by gzip (default false).
+
+## Whether to compress manifest and checkpoint file by gzip (default false).
 compress_manifest = false
-# Max number of running background jobs
+
+## Max number of running background jobs
 max_background_jobs = 4
-# Interval to auto flush a region if it has not flushed yet.
+
+## Interval to auto flush a region if it has not flushed yet.
 auto_flush_interval = "1h"
-# Global write buffer size for all regions. If not set, it's default to 1/8 of OS memory with a max limitation of 1GB.
+
+## Global write buffer size for all regions. If not set, it's default to 1/8 of OS memory with a max limitation of 1GB.
 global_write_buffer_size = "1GB"
-# Global write buffer size threshold to reject write requests. If not set, it's default to 2 times of `global_write_buffer_size`
+
+## Global write buffer size threshold to reject write requests. If not set, it's default to 2 times of `global_write_buffer_size`
 global_write_buffer_reject_size = "2GB"
-# Cache size for SST metadata. Setting it to 0 to disable the cache.
-# If not set, it's default to 1/32 of OS memory with a max limitation of 128MB.
+
+## Cache size for SST metadata. Setting it to 0 to disable the cache.
+## If not set, it's default to 1/32 of OS memory with a max limitation of 128MB.
 sst_meta_cache_size = "128MB"
-# Cache size for vectors and arrow arrays. Setting it to 0 to disable the cache.
-# If not set, it's default to 1/16 of OS memory with a max limitation of 512MB.
+
+## Cache size for vectors and arrow arrays. Setting it to 0 to disable the cache.
+## If not set, it's default to 1/16 of OS memory with a max limitation of 512MB.
 vector_cache_size = "512MB"
-# Cache size for pages of SST row groups. Setting it to 0 to disable the cache.
-# If not set, it's default to 1/16 of OS memory with a max limitation of 512MB.
+
+## Cache size for pages of SST row groups. Setting it to 0 to disable the cache.
+## If not set, it's default to 1/16 of OS memory with a max limitation of 512MB.
 page_cache_size = "512MB"
-# Buffer size for SST writing.
+
+## Buffer size for SST writing.
 sst_write_buffer_size = "8MB"
-# Parallelism to scan a region (default: 1/4 of cpu cores).
-# - 0: using the default value (1/4 of cpu cores).
-# - 1: scan in current thread.
-# - n: scan in parallelism n.
+
+## Parallelism to scan a region (default: 1/4 of cpu cores).
+## - `0`: using the default value (1/4 of cpu cores).
+## - `1`: scan in current thread.
+## - `n`: scan in parallelism n.
 scan_parallelism = 0
-# Capacity of the channel to send data from parallel scan tasks to the main task (default 32).
+
+## Capacity of the channel to send data from parallel scan tasks to the main task.
 parallel_scan_channel_size = 32
-# Whether to allow stale WAL entries read during replay.
+
+## Whether to allow stale WAL entries read during replay.
 allow_stale_entries = false

+## The options for inverted index in Mito engine.
 [region_engine.mito.inverted_index]
-# Whether to create the index on flush.
-# - "auto": automatically
-# - "disable": never
+
+## Whether to create the index on flush.
+## - `auto`: automatically
+## - `disable`: never
 create_on_flush = "auto"
-# Whether to create the index on compaction.
-# - "auto": automatically
-# - "disable": never
+
+## Whether to create the index on compaction.
+## - `auto`: automatically
+## - `disable`: never
 create_on_compaction = "auto"
-# Whether to apply the index on query
-# - "auto": automatically
-# - "disable": never
+
+## Whether to apply the index on query
+## - `auto`: automatically
+## - `disable`: never
 apply_on_query = "auto"
-# Memory threshold for performing an external sort during index creation.
-# Setting to empty will disable external sorting, forcing all sorting operations to happen in memory.
+
+## Memory threshold for performing an external sort during index creation.
+## Setting to empty will disable external sorting, forcing all sorting operations to happen in memory.
 mem_threshold_on_create = "64M"
-# File system path to store intermediate files for external sorting (default `{data_home}/index_intermediate`).
+
+## File system path to store intermediate files for external sorting (default `{data_home}/index_intermediate`).
 intermediate_path = ""

 [region_engine.mito.memtable]
-# Memtable type.
-# - "experimental": experimental memtable
-# - "time_series": time-series memtable (deprecated)
-type = "experimental"
-# The max number of keys in one shard.
+## Memtable type.
+## - `time_series`: time-series memtable
+## - `partition_tree`: partition tree memtable (experimental)
+type = "time_series"
+
+## The max number of keys in one shard.
+## Only available for `partition_tree` memtable.
 index_max_keys_per_shard = 8192
-# The max rows of data inside the actively writing buffer in one shard.
+
+## The max rows of data inside the actively writing buffer in one shard.
+## Only available for `partition_tree` memtable.
 data_freeze_threshold = 32768
-# Max dictionary bytes.
+
+## Max dictionary bytes.
+## Only available for `partition_tree` memtable.
 fork_dictionary_bytes = "1GiB"

-# Log options, see `standalone.example.toml`
-# [logging]
-# dir = "/tmp/greptimedb/logs"
-# level = "info"
+## The logging options.
+[logging]
+## The directory to store the log files.
+dir = "/tmp/greptimedb/logs"

-# Datanode export the metrics generated by itself
-# encoded to Prometheus remote-write format
-# and send to Prometheus remote-write compatible receiver (e.g. send to `greptimedb` itself)
-# This is only used for `greptimedb` to export its own metrics internally. It's different from prometheus scrape.
-# [export_metrics]
-# whether enable export metrics, default is false
-# enable = false
-# The interval of export metrics
-# write_interval = "30s"
-# [export_metrics.remote_write]
-# The url the metrics send to. The url is empty by default, url example: `http://127.0.0.1:4000/v1/prometheus/write?db=information_schema`
-# url = ""
-# HTTP headers of Prometheus remote-write carry
-# headers = {}
+## The log level. Can be `info`/`debug`/`warn`/`error`.
+## +toml2docs:none-default
+level = "info"
+
+## Enable OTLP tracing.
+enable_otlp_tracing = false
+
+## The OTLP tracing endpoint.
+## +toml2docs:none-default
+otlp_endpoint = ""
+
+## Whether to append logs to stdout.
+append_stdout = true
+
+## The percentage of tracing will be sampled and exported.
+## Valid range `[0, 1]`, 1 means all traces are sampled, 0 means all traces are not sampled, the default value is 1.
+## ratio > 1 are treated as 1. Fractions < 0 are treated as 0
+[logging.tracing_sample_ratio]
+default_ratio = 1.0
+
+## The datanode can export its metrics and send to Prometheus compatible service (e.g. send to `greptimedb` itself) from remote-write API.
+## This is only used for `greptimedb` to export its own metrics internally. It's different from prometheus scrape.
+[export_metrics]
+
+## whether enable export metrics.
+enable = false
+
+## The interval of export metrics.
+write_interval = "30s"
+
+## For `standalone` mode, `self_import` is recommend to collect metrics generated by itself
+[export_metrics.self_import]
+## +toml2docs:none-default
+db = "information_schema"
+
+[export_metrics.remote_write]
+## The url the metrics send to. The url example can be: `http://127.0.0.1:4000/v1/prometheus/write?db=information_schema`.
+url = ""
+
+## HTTP headers of Prometheus remote-write carry.
+headers = { }
--- a/config/frontend.example.toml
+++ b/config/frontend.example.toml
@@ -1,106 +1,192 @@
-# Node running mode, see `standalone.example.toml`.
-mode = "distributed"
-# The default timezone of the server
-# default_timezone = "UTC"
+## The running mode of the datanode. It can be `standalone` or `distributed`.
+mode = "standalone"

+## The default timezone of the server.
+## +toml2docs:none-default
+default_timezone = "UTC"
+
+## The heartbeat options.
 [heartbeat]
-# Interval for sending heartbeat task to the Metasrv, 5 seconds by default.
-interval = "5s"
-# Interval for retry sending heartbeat task, 5 seconds by default.
-retry_interval = "5s"
+## Interval for sending heartbeat messages to the metasrv.
+interval = "18s"

-# HTTP server options, see `standalone.example.toml`.
+## Interval for retrying to send heartbeat messages to the metasrv.
+retry_interval = "3s"
+
+## The HTTP server options.
 [http]
+## The address to bind the HTTP server.
 addr = "127.0.0.1:4000"
+## HTTP request timeout.
 timeout = "30s"
+## HTTP request body limit.
+## Support the following units are supported: `B`, `KB`, `KiB`, `MB`, `MiB`, `GB`, `GiB`, `TB`, `TiB`, `PB`, `PiB`.
 body_limit = "64MB"

-# gRPC server options, see `standalone.example.toml`.
+## The gRPC server options.
 [grpc]
+## The address to bind the gRPC server.
 addr = "127.0.0.1:4001"
+## The number of server worker threads.
 runtime_size = 8

-# MySQL server options, see `standalone.example.toml`.
+## MySQL server options.
 [mysql]
+## Whether to enable.
 enable = true
+## The addr to bind the MySQL server.
 addr = "127.0.0.1:4002"
+## The number of server worker threads.
 runtime_size = 2

-# MySQL server TLS options, see `standalone.example.toml`.
+# MySQL server TLS options.
 [mysql.tls]
+
+## TLS mode, refer to https://www.postgresql.org/docs/current/libpq-ssl.html
+## - `disable` (default value)
+## - `prefer`
+## - `require`
+## - `verify-ca`
+## - `verify-full`
 mode = "disable"
+
+## Certificate file path.
+## +toml2docs:none-default
 cert_path = ""
+
+## Private key file path.
+## +toml2docs:none-default
 key_path = ""
+
+## Watch for Certificate and key file change and auto reload
 watch = false

-# PostgresSQL server options, see `standalone.example.toml`.
+## PostgresSQL server options.
 [postgres]
+## Whether to enable
 enable = true
+## The addr to bind the PostgresSQL server.
 addr = "127.0.0.1:4003"
+## The number of server worker threads.
 runtime_size = 2

-# PostgresSQL server TLS options, see `standalone.example.toml`.
+## PostgresSQL server TLS options, see `mysql_options.tls` section.
 [postgres.tls]
+## TLS mode.
 mode = "disable"
+
+## Certificate file path.
+## +toml2docs:none-default
 cert_path = ""
+
+## Private key file path.
+## +toml2docs:none-default
 key_path = ""
+
+## Watch for Certificate and key file change and auto reload
 watch = false

-# OpenTSDB protocol options, see `standalone.example.toml`.
+## OpenTSDB protocol options.
 [opentsdb]
+## Whether to enable
 enable = true
+## OpenTSDB telnet API server address.
 addr = "127.0.0.1:4242"
+## The number of server worker threads.
 runtime_size = 2

-# InfluxDB protocol options, see `standalone.example.toml`.
+## InfluxDB protocol options.
 [influxdb]
+## Whether to enable InfluxDB protocol in HTTP API.
 enable = true

-# Prometheus remote storage options, see `standalone.example.toml`.
+## Prometheus remote storage options
 [prom_store]
+## Whether to enable Prometheus remote write and read in HTTP API.
 enable = true
-# Whether to store the data from Prometheus remote write in metric engine.
-# true by default
+## Whether to store the data from Prometheus remote write in metric engine.
 with_metric_engine = true

-# Metasrv client options, see `datanode.example.toml`.
+## The metasrv client options.
 [meta_client]
+## The addresses of the metasrv.
 metasrv_addrs = ["127.0.0.1:3002"]
+
+## Operation timeout.
 timeout = "3s"
-# DDL timeouts options.
+
+## Heartbeat timeout.
+heartbeat_timeout = "500ms"
+
+## DDL timeout.
 ddl_timeout = "10s"
+
+## Connect server timeout.
 connect_timeout = "1s"
+
+## `TCP_NODELAY` option for accepted connections.
 tcp_nodelay = true
-# The configuration about the cache of the Metadata.
-# default: 100000
+
+## The configuration about the cache of the metadata.
 metadata_cache_max_capacity = 100000
-# default: 10m
+
+## TTL of the metadata cache.
 metadata_cache_ttl = "10m"
-# default: 5m
+
+# TTI of the metadata cache.
 metadata_cache_tti = "5m"

-# Log options, see `standalone.example.toml`
-# [logging]
-# dir = "/tmp/greptimedb/logs"
-# level = "info"
-
-# Datanode options.
+## Datanode options.
 [datanode]
-# Datanode client options.
+## Datanode client options.
 [datanode.client]
 timeout = "10s"
 connect_timeout = "10s"
 tcp_nodelay = true

-# Frontend export the metrics generated by itself
-# encoded to Prometheus remote-write format
-# and send to Prometheus remote-write compatible receiver (e.g. send to `greptimedb` itself)
-# This is only used for `greptimedb` to export its own metrics internally. It's different from prometheus scrape.
-# [export_metrics]
-# whether enable export metrics, default is false
-# enable = false
-# The interval of export metrics
-# write_interval = "30s"
-# for `frontend`, `self_import` is recommend to collect metrics generated by itself
-# [export_metrics.self_import]
-# db = "information_schema"
+## The logging options.
+[logging]
+## The directory to store the log files.
+dir = "/tmp/greptimedb/logs"
+
+## The log level. Can be `info`/`debug`/`warn`/`error`.
+## +toml2docs:none-default
+level = "info"
+
+## Enable OTLP tracing.
+enable_otlp_tracing = false
+
+## The OTLP tracing endpoint.
+## +toml2docs:none-default
+otlp_endpoint = ""
+
+## Whether to append logs to stdout.
+append_stdout = true
+
+## The percentage of tracing will be sampled and exported.
+## Valid range `[0, 1]`, 1 means all traces are sampled, 0 means all traces are not sampled, the default value is 1.
+## ratio > 1 are treated as 1. Fractions < 0 are treated as 0
+[logging.tracing_sample_ratio]
+default_ratio = 1.0
+
+## The datanode can export its metrics and send to Prometheus compatible service (e.g. send to `greptimedb` itself) from remote-write API.
+## This is only used for `greptimedb` to export its own metrics internally. It's different from prometheus scrape.
+[export_metrics]
+
+## whether enable export metrics.
+enable = false
+
+## The interval of export metrics.
+write_interval = "30s"
+
+## For `standalone` mode, `self_import` is recommend to collect metrics generated by itself
+[export_metrics.self_import]
+## +toml2docs:none-default
+db = "information_schema"
+
+[export_metrics.remote_write]
+## The url the metrics send to. The url example can be: `http://127.0.0.1:4000/v1/prometheus/write?db=information_schema`.
+url = ""
+
+## HTTP headers of Prometheus remote-write carry.
+headers = { }
--- a/config/metasrv.example.toml
+++ b/config/metasrv.example.toml
@@ -1,35 +1,46 @@
-# The working home directory.
+## The working home directory.
 data_home = "/tmp/metasrv/"
-# The bind address of metasrv, "127.0.0.1:3002" by default.
+
+## The bind address of metasrv.
 bind_addr = "127.0.0.1:3002"
-# The communication server address for frontend and datanode to connect to metasrv,  "127.0.0.1:3002" by default for localhost.
+
+## The communication server address for frontend and datanode to connect to metasrv,  "127.0.0.1:3002" by default for localhost.
 server_addr = "127.0.0.1:3002"
-# Etcd server address, "127.0.0.1:2379" by default.
+
+## Etcd server address.
 store_addr = "127.0.0.1:2379"
-# Datanode selector type.
-# - "lease_based" (default value).
-# - "load_based"
-# For details, please see "https://docs.greptime.com/developer-guide/metasrv/selector".
+
+## Datanode selector type.
+## - `lease_based` (default value).
+## - `load_based`
+## For details, please see "https://docs.greptime.com/developer-guide/metasrv/selector".
 selector = "lease_based"
-# Store data in memory, false by default.
+
+## Store data in memory.
 use_memory_store = false
-# Whether to enable greptimedb telemetry, true by default.
+
+## Whether to enable greptimedb telemetry.
 enable_telemetry = true
-# If it's not empty, the metasrv will store all data with this key prefix.
+
+## If it's not empty, the metasrv will store all data with this key prefix.
 store_key_prefix = ""

-# Log options, see `standalone.example.toml`
-# [logging]
-# dir = "/tmp/greptimedb/logs"
-# level = "info"
-
-# Procedure storage options.
+## Procedure storage options.
 [procedure]
-# Procedure max retry time.
+
+## Procedure max retry time.
 max_retry_times = 12
-# Initial retry delay of procedures, increases exponentially
+
+## Initial retry delay of procedures, increases exponentially
 retry_delay = "500ms"

+## Auto split large value
+## GreptimeDB procedure uses etcd as the default metadata storage backend.
+## The etcd the maximum size of any request is 1.5 MiB
+## 1500KiB = 1536KiB (1.5MiB) - 36KiB (reserved size of key)
+## Comments out the `max_metadata_value_size`, for don't split large value (no limit).
+max_metadata_value_size = "1500KiB"
+
 # Failure detectors options.
 [failure_detector]
 threshold = 8.0
@@ -37,57 +48,96 @@ min_std_deviation = "100ms"
 acceptable_heartbeat_pause = "3000ms"
 first_heartbeat_estimate = "1000ms"

-# # Datanode options.
-# [datanode]
-# # Datanode client options.
-# [datanode.client_options]
-# timeout = "10s"
-# connect_timeout = "10s"
-# tcp_nodelay = true
+## Datanode options.
+[datanode]
+## Datanode client options.
+[datanode.client]
+timeout = "10s"
+connect_timeout = "10s"
+tcp_nodelay = true

 [wal]
 # Available wal providers:
-# - "raft_engine" (default)
-# - "kafka"
+# - `raft_engine` (default): there're none raft-engine wal config since metasrv only involves in remote wal currently.
+# - `kafka`: metasrv **have to be** configured with kafka wal config when using kafka wal provider in datanode.
 provider = "raft_engine"

-# There're none raft-engine wal config since meta srv only involves in remote wal currently.
-
 # Kafka wal config.
-# The broker endpoints of the Kafka cluster. ["127.0.0.1:9092"] by default.
-# broker_endpoints = ["127.0.0.1:9092"]
-# Number of topics to be created upon start.
-# num_topics = 64
-# Topic selector type.
-# Available selector types: 
-# - "round_robin" (default)
-# selector_type = "round_robin"
-# A Kafka topic is constructed by concatenating `topic_name_prefix` and `topic_id`.
-# topic_name_prefix = "greptimedb_wal_topic"
-# Expected number of replicas of each partition.
-# replication_factor = 1
-# Above which a topic creation operation will be cancelled.
-# create_topic_timeout = "30s"
-# The initial backoff for kafka clients.
-# backoff_init = "500ms"
-# The maximum backoff for kafka clients.
-# backoff_max = "10s"
-# Exponential backoff rate, i.e. next backoff = base * current backoff.
-# backoff_base = 2
-# Stop reconnecting if the total wait time reaches the deadline. If this config is missing, the reconnecting won't terminate.
-# backoff_deadline = "5mins"

-# Metasrv export the metrics generated by itself
-# encoded to Prometheus remote-write format
-# and send to Prometheus remote-write compatible receiver (e.g. send to `greptimedb` itself)
-# This is only used for `greptimedb` to export its own metrics internally. It's different from prometheus scrape.
-# [export_metrics]
-# whether enable export metrics, default is false
-# enable = false
-# The interval of export metrics
-# write_interval = "30s"
-# [export_metrics.remote_write]
-# The url the metrics send to. The url is empty by default, url example: `http://127.0.0.1:4000/v1/prometheus/write?db=information_schema`
-# url = ""
-# HTTP headers of Prometheus remote-write carry
-# headers = {}
+## The broker endpoints of the Kafka cluster.
+broker_endpoints = ["127.0.0.1:9092"]
+
+## Number of topics to be created upon start.
+num_topics = 64
+
+## Topic selector type.
+## Available selector types:
+## - `round_robin` (default)
+selector_type = "round_robin"
+
+## A Kafka topic is constructed by concatenating `topic_name_prefix` and `topic_id`.
+topic_name_prefix = "greptimedb_wal_topic"
+
+## Expected number of replicas of each partition.
+replication_factor = 1
+
+## Above which a topic creation operation will be cancelled.
+create_topic_timeout = "30s"
+## The initial backoff for kafka clients.
+backoff_init = "500ms"
+
+## The maximum backoff for kafka clients.
+backoff_max = "10s"
+
+## Exponential backoff rate, i.e. next backoff = base * current backoff.
+backoff_base = 2
+
+## Stop reconnecting if the total wait time reaches the deadline. If this config is missing, the reconnecting won't terminate.
+backoff_deadline = "5mins"
+
+## The logging options.
+[logging]
+## The directory to store the log files.
+dir = "/tmp/greptimedb/logs"
+
+## The log level. Can be `info`/`debug`/`warn`/`error`.
+## +toml2docs:none-default
+level = "info"
+
+## Enable OTLP tracing.
+enable_otlp_tracing = false
+
+## The OTLP tracing endpoint.
+## +toml2docs:none-default
+otlp_endpoint = ""
+
+## Whether to append logs to stdout.
+append_stdout = true
+
+## The percentage of tracing will be sampled and exported.
+## Valid range `[0, 1]`, 1 means all traces are sampled, 0 means all traces are not sampled, the default value is 1.
+## ratio > 1 are treated as 1. Fractions < 0 are treated as 0
+[logging.tracing_sample_ratio]
+default_ratio = 1.0
+
+## The datanode can export its metrics and send to Prometheus compatible service (e.g. send to `greptimedb` itself) from remote-write API.
+## This is only used for `greptimedb` to export its own metrics internally. It's different from prometheus scrape.
+[export_metrics]
+
+## whether enable export metrics.
+enable = false
+
+## The interval of export metrics.
+write_interval = "30s"
+
+## For `standalone` mode, `self_import` is recommend to collect metrics generated by itself
+[export_metrics.self_import]
+## +toml2docs:none-default
+db = "information_schema"
+
+[export_metrics.remote_write]
+## The url the metrics send to. The url example can be: `http://127.0.0.1:4000/v1/prometheus/write?db=information_schema`.
+url = ""
+
+## HTTP headers of Prometheus remote-write carry.
+headers = { }
--- a/config/standalone.example.toml
+++ b/config/standalone.example.toml
@@ -1,286 +1,477 @@
-# Node running mode, "standalone" or "distributed".
+## The running mode of the datanode. It can be `standalone` or `distributed`.
 mode = "standalone"
-# Whether to enable greptimedb telemetry, true by default.
-enable_telemetry = true
-# The default timezone of the server
-# default_timezone = "UTC"

-# HTTP server options.
+## Enable telemetry to collect anonymous usage data.
+enable_telemetry = true
+
+## The default timezone of the server.
+## +toml2docs:none-default
+default_timezone = "UTC"
+
+## The HTTP server options.
 [http]
-# Server address, "127.0.0.1:4000" by default.
+## The address to bind the HTTP server.
 addr = "127.0.0.1:4000"
-# HTTP request timeout, 30s by default.
+## HTTP request timeout.
 timeout = "30s"
-# HTTP request body limit, 64Mb by default.
-# the following units are supported: B, KB, KiB, MB, MiB, GB, GiB, TB, TiB, PB, PiB
+## HTTP request body limit.
+## Support the following units are supported: `B`, `KB`, `KiB`, `MB`, `MiB`, `GB`, `GiB`, `TB`, `TiB`, `PB`, `PiB`.
 body_limit = "64MB"

-# gRPC server options.
+## The gRPC server options.
 [grpc]
-# Server address, "127.0.0.1:4001" by default.
+## The address to bind the gRPC server.
 addr = "127.0.0.1:4001"
-# The number of server worker threads, 8 by default.
+## The number of server worker threads.
 runtime_size = 8

-# MySQL server options.
+## MySQL server options.
 [mysql]
-# Whether to enable
+## Whether to enable.
 enable = true
-# Server address, "127.0.0.1:4002" by default.
+## The addr to bind the MySQL server.
 addr = "127.0.0.1:4002"
-# The number of server worker threads, 2 by default.
+## The number of server worker threads.
 runtime_size = 2

 # MySQL server TLS options.
 [mysql.tls]
-# TLS mode, refer to https://www.postgresql.org/docs/current/libpq-ssl.html
-# - "disable" (default value)
-# - "prefer"
-# - "require"
-# - "verify-ca"
-# - "verify-full"
+
+## TLS mode, refer to https://www.postgresql.org/docs/current/libpq-ssl.html
+## - `disable` (default value)
+## - `prefer`
+## - `require`
+## - `verify-ca`
+## - `verify-full`
 mode = "disable"
-# Certificate file path.
+
+## Certificate file path.
+## +toml2docs:none-default
 cert_path = ""
-# Private key file path.
+
+## Private key file path.
+## +toml2docs:none-default
 key_path = ""
-# Watch for Certificate and key file change and auto reload
+
+## Watch for Certificate and key file change and auto reload
 watch = false

-# PostgresSQL server options.
+## PostgresSQL server options.
 [postgres]
-# Whether to enable
+## Whether to enable
 enable = true
-# Server address, "127.0.0.1:4003" by default.
+## The addr to bind the PostgresSQL server.
 addr = "127.0.0.1:4003"
-# The number of server worker threads, 2 by default.
+## The number of server worker threads.
 runtime_size = 2

-# PostgresSQL server TLS options, see `[mysql_options.tls]` section.
+## PostgresSQL server TLS options, see `mysql_options.tls` section.
 [postgres.tls]
-# TLS mode.
+## TLS mode.
 mode = "disable"
-# certificate file path.
+
+## Certificate file path.
+## +toml2docs:none-default
 cert_path = ""
-# private key file path.
+
+## Private key file path.
+## +toml2docs:none-default
 key_path = ""
-# Watch for Certificate and key file change and auto reload
+
+## Watch for Certificate and key file change and auto reload
 watch = false

-# OpenTSDB protocol options.
+## OpenTSDB protocol options.
 [opentsdb]
-# Whether to enable
+## Whether to enable
 enable = true
-# OpenTSDB telnet API server address, "127.0.0.1:4242" by default.
+## OpenTSDB telnet API server address.
 addr = "127.0.0.1:4242"
-# The number of server worker threads, 2 by default.
+## The number of server worker threads.
 runtime_size = 2

-# InfluxDB protocol options.
+## InfluxDB protocol options.
 [influxdb]
-# Whether to enable InfluxDB protocol in HTTP API, true by default.
+## Whether to enable InfluxDB protocol in HTTP API.
 enable = true

-# Prometheus remote storage options
+## Prometheus remote storage options
 [prom_store]
-# Whether to enable Prometheus remote write and read in HTTP API, true by default.
+## Whether to enable Prometheus remote write and read in HTTP API.
 enable = true
-# Whether to store the data from Prometheus remote write in metric engine.
-# true by default
+## Whether to store the data from Prometheus remote write in metric engine.
 with_metric_engine = true

+## The WAL options.
 [wal]
-# Available wal providers:
-# - "raft_engine" (default)
-# - "kafka"
+## The provider of the WAL.
+## - `raft_engine`: the wal is stored in the local file system by raft-engine.
+## - `kafka`: it's remote wal that data is stored in Kafka.
 provider = "raft_engine"

-# Raft-engine wal options.
-# WAL data directory
-# dir = "/tmp/greptimedb/wal"
-# WAL file size in bytes.
+## The directory to store the WAL files.
+## **It's only used when the provider is `raft_engine`**.
+## +toml2docs:none-default
+dir = "/tmp/greptimedb/wal"
+
+## The size of the WAL segment file.
+## **It's only used when the provider is `raft_engine`**.
 file_size = "256MB"
-# WAL purge threshold.
+
+## The threshold of the WAL size to trigger a flush.
+## **It's only used when the provider is `raft_engine`**.
 purge_threshold = "4GB"
-# WAL purge interval in seconds.
+
+## The interval to trigger a flush.
+## **It's only used when the provider is `raft_engine`**.
 purge_interval = "10m"
-# WAL read batch size.
+
+## The read batch size.
+## **It's only used when the provider is `raft_engine`**.
 read_batch_size = 128
-# Whether to sync log file after every write.
+
+## Whether to use sync write.
+## **It's only used when the provider is `raft_engine`**.
 sync_write = false
-# Whether to reuse logically truncated log files.
+
+## Whether to reuse logically truncated log files.
+## **It's only used when the provider is `raft_engine`**.
 enable_log_recycle = true
-# Whether to pre-create log files on start up
+
+## Whether to pre-create log files on start up.
+## **It's only used when the provider is `raft_engine`**.
 prefill_log_files = false
-# Duration for fsyncing log files.
-sync_period = "1000ms"

-# Kafka wal options.
-# The broker endpoints of the Kafka cluster. ["127.0.0.1:9092"] by default.
-# broker_endpoints = ["127.0.0.1:9092"]
+## Duration for fsyncing log files.
+## **It's only used when the provider is `raft_engine`**.
+sync_period = "10s"

-# Number of topics to be created upon start.
-# num_topics = 64
-# Topic selector type.
-# Available selector types:
-# - "round_robin" (default)
-# selector_type = "round_robin"
-# The prefix of topic name.
-# topic_name_prefix = "greptimedb_wal_topic"
-# The number of replicas of each partition.
-# Warning: the replication factor must be positive and must not be greater than the number of broker endpoints.
-# replication_factor = 1
+## The Kafka broker endpoints.
+## **It's only used when the provider is `kafka`**.
+broker_endpoints = ["127.0.0.1:9092"]

-# The max size of a single producer batch.
-# Warning: Kafka has a default limit of 1MB per message in a topic.
-# max_batch_size = "1MB"
-# The linger duration.
-# linger = "200ms"
-# The consumer wait timeout.
-# consumer_wait_timeout = "100ms"
-# Create topic timeout.
-# create_topic_timeout = "30s"
+## The max size of a single producer batch.
+## Warning: Kafka has a default limit of 1MB per message in a topic.
+## **It's only used when the provider is `kafka`**.
+max_batch_size = "1MB"

-# The initial backoff delay.
-# backoff_init = "500ms"
-# The maximum backoff delay.
-# backoff_max = "10s"
-# Exponential backoff rate, i.e. next backoff = base * current backoff.
-# backoff_base = 2
-# The deadline of retries.
-# backoff_deadline = "5mins"
+## The linger duration of a kafka batch producer.
+## **It's only used when the provider is `kafka`**.
+linger = "200ms"

-# Metadata storage options.
+## The consumer wait timeout.
+## **It's only used when the provider is `kafka`**.
+consumer_wait_timeout = "100ms"
+
+## The initial backoff delay.
+## **It's only used when the provider is `kafka`**.
+backoff_init = "500ms"
+
+## The maximum backoff delay.
+## **It's only used when the provider is `kafka`**.
+backoff_max = "10s"
+
+## The exponential backoff rate, i.e. next backoff = base * current backoff.
+## **It's only used when the provider is `kafka`**.
+backoff_base = 2
+
+## The deadline of retries.
+## **It's only used when the provider is `kafka`**.
+backoff_deadline = "5mins"
+
+## Metadata storage options.
 [metadata_store]
-# Kv file size in bytes.
+## Kv file size in bytes.
 file_size = "256MB"
-# Kv purge threshold.
+## Kv purge threshold.
 purge_threshold = "4GB"

-# Procedure storage options.
+## Procedure storage options.
 [procedure]
-# Procedure max retry time.
+## Procedure max retry time.
 max_retry_times = 3
-# Initial retry delay of procedures, increases exponentially
+## Initial retry delay of procedures, increases exponentially
 retry_delay = "500ms"

-# Storage options.
+# Example of using S3 as the storage.
+# [storage]
+# type = "S3"
+# bucket = "greptimedb"
+# root = "data"
+# access_key_id = "test"
+# secret_access_key = "123456"
+# endpoint = "https://s3.amazonaws.com"
+# region = "us-west-2"
+
+# Example of using Oss as the storage.
+# [storage]
+# type = "Oss"
+# bucket = "greptimedb"
+# root = "data"
+# access_key_id = "test"
+# access_key_secret = "123456"
+# endpoint = "https://oss-cn-hangzhou.aliyuncs.com"
+
+# Example of using Azblob as the storage.
+# [storage]
+# type = "Azblob"
+# container = "greptimedb"
+# root = "data"
+# account_name = "test"
+# account_key = "123456"
+# endpoint = "https://greptimedb.blob.core.windows.net"
+# sas_token = ""
+
+# Example of using Gcs as the storage.
+# [storage]
+# type = "Gcs"
+# bucket = "greptimedb"
+# root = "data"
+# scope = "test"
+# credential_path = "123456"
+# endpoint = "https://storage.googleapis.com"
+
+## The data storage options.
 [storage]
-# The working home directory.
+## The working home directory.
 data_home = "/tmp/greptimedb/"
-# Storage type.
+
+## The storage type used to store the data.
+## - `File`: the data is stored in the local file system.
+## - `S3`: the data is stored in the S3 object storage.
+## - `Gcs`: the data is stored in the Google Cloud Storage.
+## - `Azblob`: the data is stored in the Azure Blob Storage.
+## - `Oss`: the data is stored in the Aliyun OSS.
 type = "File"
-# TTL for all tables. Disabled by default.
-# global_ttl = "7d"
-# Cache configuration for object storage such as 'S3' etc.
-# cache_path = "/path/local_cache"
-# The local file cache capacity in bytes.
-# cache_capacity = "256MB"
+
+## Cache configuration for object storage such as 'S3' etc.
+## The local file cache directory.
+## +toml2docs:none-default
+cache_path = "/path/local_cache"
+
+## The local file cache capacity in bytes.
+## +toml2docs:none-default
+cache_capacity = "256MB"
+
+## The S3 bucket name.
+## **It's only used when the storage type is `S3`, `Oss` and `Gcs`**.
+## +toml2docs:none-default
+bucket = "greptimedb"
+
+## The S3 data will be stored in the specified prefix, for example, `s3://${bucket}/${root}`.
+## **It's only used when the storage type is `S3`, `Oss` and `Azblob`**.
+## +toml2docs:none-default
+root = "greptimedb"
+
+## The access key id of the aws account.
+## It's **highly recommended** to use AWS IAM roles instead of hardcoding the access key id and secret key.
+## **It's only used when the storage type is `S3` and `Oss`**.
+## +toml2docs:none-default
+access_key_id = "test"
+
+## The secret access key of the aws account.
+## It's **highly recommended** to use AWS IAM roles instead of hardcoding the access key id and secret key.
+## **It's only used when the storage type is `S3`**.
+## +toml2docs:none-default
+secret_access_key = "test"
+
+## The secret access key of the aliyun account.
+## **It's only used when the storage type is `Oss`**.
+## +toml2docs:none-default
+access_key_secret = "test"
+
+## The account key of the azure account.
+## **It's only used when the storage type is `Azblob`**.
+## +toml2docs:none-default
+account_name = "test"
+
+## The account key of the azure account.
+## **It's only used when the storage type is `Azblob`**.
+## +toml2docs:none-default
+account_key = "test"
+
+## The scope of the google cloud storage.
+## **It's only used when the storage type is `Gcs`**.
+## +toml2docs:none-default
+scope = "test"
+
+## The credential path of the google cloud storage.
+## **It's only used when the storage type is `Gcs`**.
+## +toml2docs:none-default
+credential_path = "test"
+
+## The container of the azure account.
+## **It's only used when the storage type is `Azblob`**.
+## +toml2docs:none-default
+container = "greptimedb"
+
+## The sas token of the azure account.
+## **It's only used when the storage type is `Azblob`**.
+## +toml2docs:none-default
+sas_token = ""
+
+## The endpoint of the S3 service.
+## **It's only used when the storage type is `S3`, `Oss`, `Gcs` and `Azblob`**.
+## +toml2docs:none-default
+endpoint = "https://s3.amazonaws.com"
+
+## The region of the S3 service.
+## **It's only used when the storage type is `S3`, `Oss`, `Gcs` and `Azblob`**.
+## +toml2docs:none-default
+region = "us-west-2"

 # Custom storage options
-#[[storage.providers]]
-#type = "S3"
-#[[storage.providers]]
-#type = "Gcs"
+# [[storage.providers]]
+# type = "S3"
+# [[storage.providers]]
+# type = "Gcs"

-# Mito engine options
+## The region engine options. You can configure multiple region engines.
 [[region_engine]]
+
+## The Mito engine options.
 [region_engine.mito]
-# Number of region workers
+
+## Number of region workers.
 num_workers = 8
-# Request channel size of each worker
+
+## Request channel size of each worker.
 worker_channel_size = 128
-# Max batch size for a worker to handle requests
+
+## Max batch size for a worker to handle requests.
 worker_request_batch_size = 64
-# Number of meta action updated to trigger a new checkpoint for the manifest
+
+## Number of meta action updated to trigger a new checkpoint for the manifest.
 manifest_checkpoint_distance = 10
-# Whether to compress manifest and checkpoint file by gzip (default false).
+
+## Whether to compress manifest and checkpoint file by gzip (default false).
 compress_manifest = false
-# Max number of running background jobs
+
+## Max number of running background jobs
 max_background_jobs = 4
-# Interval to auto flush a region if it has not flushed yet.
+
+## Interval to auto flush a region if it has not flushed yet.
 auto_flush_interval = "1h"
-# Global write buffer size for all regions. If not set, it's default to 1/8 of OS memory with a max limitation of 1GB.
+
+## Global write buffer size for all regions. If not set, it's default to 1/8 of OS memory with a max limitation of 1GB.
 global_write_buffer_size = "1GB"
-# Global write buffer size threshold to reject write requests. If not set, it's default to 2 times of `global_write_buffer_size`
+
+## Global write buffer size threshold to reject write requests. If not set, it's default to 2 times of `global_write_buffer_size`
 global_write_buffer_reject_size = "2GB"
-# Cache size for SST metadata. Setting it to 0 to disable the cache.
-# If not set, it's default to 1/32 of OS memory with a max limitation of 128MB.
+
+## Cache size for SST metadata. Setting it to 0 to disable the cache.
+## If not set, it's default to 1/32 of OS memory with a max limitation of 128MB.
 sst_meta_cache_size = "128MB"
-# Cache size for vectors and arrow arrays. Setting it to 0 to disable the cache.
-# If not set, it's default to 1/16 of OS memory with a max limitation of 512MB.
+
+## Cache size for vectors and arrow arrays. Setting it to 0 to disable the cache.
+## If not set, it's default to 1/16 of OS memory with a max limitation of 512MB.
 vector_cache_size = "512MB"
-# Cache size for pages of SST row groups. Setting it to 0 to disable the cache.
-# If not set, it's default to 1/16 of OS memory with a max limitation of 512MB.
+
+## Cache size for pages of SST row groups. Setting it to 0 to disable the cache.
+## If not set, it's default to 1/16 of OS memory with a max limitation of 512MB.
 page_cache_size = "512MB"
-# Buffer size for SST writing.
+
+## Buffer size for SST writing.
 sst_write_buffer_size = "8MB"
-# Parallelism to scan a region (default: 1/4 of cpu cores).
-# - 0: using the default value (1/4 of cpu cores).
-# - 1: scan in current thread.
-# - n: scan in parallelism n.
+
+## Parallelism to scan a region (default: 1/4 of cpu cores).
+## - `0`: using the default value (1/4 of cpu cores).
+## - `1`: scan in current thread.
+## - `n`: scan in parallelism n.
 scan_parallelism = 0
-# Capacity of the channel to send data from parallel scan tasks to the main task (default 32).
+
+## Capacity of the channel to send data from parallel scan tasks to the main task.
 parallel_scan_channel_size = 32
-# Whether to allow stale WAL entries read during replay.
+
+## Whether to allow stale WAL entries read during replay.
 allow_stale_entries = false

+## The options for inverted index in Mito engine.
 [region_engine.mito.inverted_index]
-# Whether to create the index on flush.
-# - "auto": automatically
-# - "disable": never
+
+## Whether to create the index on flush.
+## - `auto`: automatically
+## - `disable`: never
 create_on_flush = "auto"
-# Whether to create the index on compaction.
-# - "auto": automatically
-# - "disable": never
+
+## Whether to create the index on compaction.
+## - `auto`: automatically
+## - `disable`: never
 create_on_compaction = "auto"
-# Whether to apply the index on query
-# - "auto": automatically
-# - "disable": never
+
+## Whether to apply the index on query
+## - `auto`: automatically
+## - `disable`: never
 apply_on_query = "auto"
-# Memory threshold for performing an external sort during index creation.
-# Setting to empty will disable external sorting, forcing all sorting operations to happen in memory.
+
+## Memory threshold for performing an external sort during index creation.
+## Setting to empty will disable external sorting, forcing all sorting operations to happen in memory.
 mem_threshold_on_create = "64M"
-# File system path to store intermediate files for external sorting (default `{data_home}/index_intermediate`).
+
+## File system path to store intermediate files for external sorting (default `{data_home}/index_intermediate`).
 intermediate_path = ""

 [region_engine.mito.memtable]
-# Memtable type.
-# - "experimental": experimental memtable
-# - "time_series": time-series memtable (deprecated)
-type = "experimental"
-# The max number of keys in one shard.
+## Memtable type.
+## - `time_series`: time-series memtable
+## - `partition_tree`: partition tree memtable (experimental)
+type = "time_series"
+
+## The max number of keys in one shard.
+## Only available for `partition_tree` memtable.
 index_max_keys_per_shard = 8192
-# The max rows of data inside the actively writing buffer in one shard.
+
+## The max rows of data inside the actively writing buffer in one shard.
+## Only available for `partition_tree` memtable.
 data_freeze_threshold = 32768
-# Max dictionary bytes.
+
+## Max dictionary bytes.
+## Only available for `partition_tree` memtable.
 fork_dictionary_bytes = "1GiB"

-# Log options
-# [logging]
-# Specify logs directory.
-# dir = "/tmp/greptimedb/logs"
-# Specify the log level [info | debug | error | warn]
-# level = "info"
-# whether enable tracing, default is false
-# enable_otlp_tracing = false
-# tracing exporter endpoint with format `ip:port`, we use grpc oltp as exporter, default endpoint is `localhost:4317`
-# otlp_endpoint = "localhost:4317"
-# Whether to append logs to stdout. Defaults to true.
-# append_stdout = true
-# The percentage of tracing will be sampled and exported. Valid range `[0, 1]`, 1 means all traces are sampled, 0 means all traces are not sampled, the default value is 1. ratio > 1 are treated as 1. Fractions < 0 are treated as 0
-# [logging.tracing_sample_ratio]
-# default_ratio = 0.0
+## The logging options.
+[logging]
+## The directory to store the log files.
+dir = "/tmp/greptimedb/logs"

-# Standalone export the metrics generated by itself
-# encoded to Prometheus remote-write format
-# and send to Prometheus remote-write compatible receiver (e.g. send to `greptimedb` itself)
-# This is only used for `greptimedb` to export its own metrics internally. It's different from prometheus scrape.
-# [export_metrics]
-# whether enable export metrics, default is false
-# enable = false
-# The interval of export metrics
-# write_interval = "30s"
-# for `standalone`, `self_import` is recommend to collect metrics generated by itself
-# [export_metrics.self_import]
-# db = "information_schema"
+## The log level. Can be `info`/`debug`/`warn`/`error`.
+## +toml2docs:none-default
+level = "info"
+
+## Enable OTLP tracing.
+enable_otlp_tracing = false
+
+## The OTLP tracing endpoint.
+## +toml2docs:none-default
+otlp_endpoint = ""
+
+## Whether to append logs to stdout.
+append_stdout = true
+
+## The percentage of tracing will be sampled and exported.
+## Valid range `[0, 1]`, 1 means all traces are sampled, 0 means all traces are not sampled, the default value is 1.
+## ratio > 1 are treated as 1. Fractions < 0 are treated as 0
+[logging.tracing_sample_ratio]
+default_ratio = 1.0
+
+## The datanode can export its metrics and send to Prometheus compatible service (e.g. send to `greptimedb` itself) from remote-write API.
+## This is only used for `greptimedb` to export its own metrics internally. It's different from prometheus scrape.
+[export_metrics]
+
+## whether enable export metrics.
+enable = false
+
+## The interval of export metrics.
+write_interval = "30s"
+
+## For `standalone` mode, `self_import` is recommend to collect metrics generated by itself
+[export_metrics.self_import]
+## +toml2docs:none-default
+db = "information_schema"
+
+[export_metrics.remote_write]
+## The url the metrics send to. The url example can be: `http://127.0.0.1:4000/v1/prometheus/write?db=information_schema`.
+url = ""
+
+## HTTP headers of Prometheus remote-write carry.
+headers = { }
--- a/docs/rfcs/2023-07-06-table-engine-refactor.md
+++ b/docs/rfcs/2023-07-06-table-engine-refactor.md
@@ -27,8 +27,8 @@ subgraph Frontend["Frontend"]
    end
 end

-MyTable --> MetaSrv
-MetaSrv --> ETCD
+MyTable --> Metasrv
+Metasrv --> ETCD

 MyTable-->TableEngine0
 MyTable-->TableEngine1
@@ -95,8 +95,8 @@ subgraph Frontend["Frontend"]
    end
 end

-MyTable --> MetaSrv
-MetaSrv --> ETCD
+MyTable --> Metasrv
+Metasrv --> ETCD

 MyTable-->RegionEngine
 MyTable-->RegionEngine1
--- a/docs/rfcs/2024-01-17-dataflow-framework.md
+++ b/docs/rfcs/2024-01-17-dataflow-framework.md
@@ -36,7 +36,7 @@ Hence, we choose the third option, and use a simple logical plan that's anagonis
 ## Deploy mode and protocol
 - Greptime Flow is an independent streaming compute component. It can be used either within a standalone node or as a dedicated node at the same level as frontend in distributed mode.
 - It accepts insert request Rows, which is used between frontend and datanode.
- New flow job is submitted in the format of modified SQL query like snowflake do, like: `CREATE TASK avg_over_5m WINDOW_SIZE = "5m" AS SELECT avg(value) FROM table WHERE time > now() - 5m GROUP BY time(1m)`. Flow job then got stored in MetaSrv.
+- New flow job is submitted in the format of modified SQL query like snowflake do, like: `CREATE TASK avg_over_5m WINDOW_SIZE = "5m" AS SELECT avg(value) FROM table WHERE time > now() - 5m GROUP BY time(1m)`. Flow job then got stored in Metasrv.
 - It also persists results in the format of Rows to frontend.
 - The query plan uses Substrait as codec format. It's the same with GreptimeDB's query engine.
 - Greptime Flow needs a WAL for recovering. It's possible to reuse datanode's.
--- a/docs/style-guide.md
+++ b/docs/style-guide.md
@@ -0,0 +1,46 @@
+# GreptimeDB Style Guide
+
+This style guide is intended to help contributors to GreptimeDB write code that is consistent with the rest of the codebase. It is a living document and will be updated as the codebase evolves.
+
+It's mainly an complement to the [Rust Style Guide](https://pingcap.github.io/style-guide/rust/).
+
+## Table of Contents
+
+- Formatting
+- Modules
+- Comments
+
+## Formatting
+
+- Place all `mod` declaration before any `use`.
+- Use `unimplemented!()` instead of `todo!()` for things that aren't likely to be implemented.
+- Add an empty line before and after declaration blocks.
+- Place comment before attributes (`#[]`) and derive (`#[derive]`).
+
+## Modules
+
+- Use the file with same name instead of `mod.rs` to define a module. E.g.:
+
+```
+.
+├── cache
+│  ├── cache_size.rs
+│  └── write_cache.rs
+└── cache.rs
+```
+
+## Comments
+
+- Add comments for public functions and structs.
+- Prefer document comment (`///`) over normal comment (`//`) for structs, fields, functions etc.
+- Add link (`[]`) to struct, method, or any other reference. And make sure that link works.
+
+## Error handling
+
+- Define a custom error type for the module if needed.
+- Prefer `with_context()` over `context()` when allocation is needed to construct an error.
+- Use `error!()` or `warn!()` macros in the `common_telemetry` crate to log errors. E.g.:
+
+```rust
+error!(e; "Failed to do something");
+```
--- a/licenserc.toml
+++ b/licenserc.toml
@@ -19,6 +19,12 @@ includes = [
    "*.py",
 ]

+excludes = [
+    # copied sources
+    "src/common/base/src/readable_size.rs",
+    "src/servers/src/repeated_field.rs",
+]
+
 [properties]
 inceptionYear = 2023
 copyrightOwner = "Greptime Team"
--- a/rust-toolchain.toml
+++ b/rust-toolchain.toml
@@ -1,2 +1,2 @@
 [toolchain]
-channel = "nightly-2023-12-19"
+channel = "nightly-2024-04-18"
--- a/scripts/fetch-dashboard-assets.sh
+++ b/scripts/fetch-dashboard-assets.sh
@@ -27,7 +27,7 @@ function retry_fetch() {
        echo "Failed to download $url"
        echo "You may try to set http_proxy and https_proxy environment variables."
        if [[ -z "$GITHUB_PROXY_URL" ]]; then
-          echo "You may try to set GITHUB_PROXY_URL=http://mirror.ghproxy.com/"
+          echo "You may try to set GITHUB_PROXY_URL=http://mirror.ghproxy.com/https://github.com/"
        fi
        exit 1
     }
@@ -39,7 +39,7 @@ function retry_fetch() {
 retry_fetch "${GITHUB_URL}/GreptimeTeam/dashboard/releases/download/${RELEASE_VERSION}/sha256.txt" sha256.txt

 # Download the tar file containing the built dashboard assets.
-retry_fetch "${GITHUB_URL}/GreptimeTeam/dashboard/releases/download/$RELEASE_VERSION/build.tar.gz" build.tar.gz
+retry_fetch "${GITHUB_URL}/GreptimeTeam/dashboard/releases/download/${RELEASE_VERSION}/build.tar.gz" build.tar.gz

 # Verify the checksums match; exit if they don't.
 case "$(uname -s)" in
--- a/src/api/Cargo.toml
+++ b/src/api/Cargo.toml
@@ -18,7 +18,6 @@ greptime-proto.workspace = true
 paste = "1.0"
 prost.workspace = true
 snafu.workspace = true
-tonic.workspace = true

 [build-dependencies]
 tonic-build = "0.9"
--- a/src/api/src/helper.rs
+++ b/src/api/src/helper.rs
@@ -707,7 +707,6 @@ pub fn pb_values_to_vector_ref(data_type: &ConcreteDataType, values: Values) ->
 }

 pub fn pb_values_to_values(data_type: &ConcreteDataType, values: Values) -> Vec<Value> {
-    // TODO(fys): use macros to optimize code
    match data_type {
        ConcreteDataType::Int64(_) => values
            .i64_values
--- a/src/api/src/lib.rs
+++ b/src/api/src/lib.rs
@@ -21,6 +21,7 @@ pub mod prom_store {
    }
 }

+pub mod region;
 pub mod v1;

 pub use greptime_proto;
--- a/src/api/src/region.rs
+++ b/src/api/src/region.rs
@@ -0,0 +1,42 @@
+// Copyright 2023 Greptime Team
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use std::collections::HashMap;
+
+use common_base::AffectedRows;
+use greptime_proto::v1::region::RegionResponse as RegionResponseV1;
+
+/// This result struct is derived from [RegionResponseV1]
+#[derive(Debug)]
+pub struct RegionResponse {
+    pub affected_rows: AffectedRows,
+    pub extension: HashMap<String, Vec<u8>>,
+}
+
+impl RegionResponse {
+    pub fn from_region_response(region_response: RegionResponseV1) -> Self {
+        Self {
+            affected_rows: region_response.affected_rows as _,
+            extension: region_response.extension,
+        }
+    }
+
+    /// Creates one response without extension
+    pub fn new(affected_rows: AffectedRows) -> Self {
+        Self {
+            affected_rows,
+            extension: Default::default(),
+        }
+    }
+}
--- a/src/auth/Cargo.toml
+++ b/src/auth/Cargo.toml
@@ -16,8 +16,9 @@ api.workspace = true
 async-trait.workspace = true
 common-error.workspace = true
 common-macro.workspace = true
+common-telemetry.workspace = true
 digest = "0.10"
-hex = { version = "0.4" }
+notify.workspace = true
 secrecy = { version = "0.8", features = ["serde", "alloc"] }
 sha1 = "0.10"
 snafu.workspace = true
--- a/src/auth/src/common.rs
+++ b/src/auth/src/common.rs
@@ -22,6 +22,9 @@ use snafu::{ensure, OptionExt};
 use crate::error::{IllegalParamSnafu, InvalidConfigSnafu, Result, UserPasswordMismatchSnafu};
 use crate::user_info::DefaultUserInfo;
 use crate::user_provider::static_user_provider::{StaticUserProvider, STATIC_USER_PROVIDER};
+use crate::user_provider::watch_file_user_provider::{
+    WatchFileUserProvider, WATCH_FILE_USER_PROVIDER,
+};
 use crate::{UserInfoRef, UserProviderRef};

 pub(crate) const DEFAULT_USERNAME: &str = "greptime";
@@ -40,9 +43,12 @@ pub fn user_provider_from_option(opt: &String) -> Result<UserProviderRef> {
    match name {
        STATIC_USER_PROVIDER => {
            let provider =
-                StaticUserProvider::try_from(content).map(|p| Arc::new(p) as UserProviderRef)?;
+                StaticUserProvider::new(content).map(|p| Arc::new(p) as UserProviderRef)?;
            Ok(provider)
        }
+        WATCH_FILE_USER_PROVIDER => {
+            WatchFileUserProvider::new(content).map(|p| Arc::new(p) as UserProviderRef)
+        }
        _ => InvalidConfigSnafu {
            value: name.to_string(),
            msg: "Invalid UserProviderOption",
--- a/src/auth/src/error.rs
+++ b/src/auth/src/error.rs
@@ -64,6 +64,13 @@ pub enum Error {
        username: String,
    },

+    #[snafu(display("Failed to initialize a watcher for file {}", path))]
+    FileWatch {
+        path: String,
+        #[snafu(source)]
+        error: notify::Error,
+    },
+
    #[snafu(display("User is not authorized to perform this action"))]
    PermissionDenied { location: Location },
 }
@@ -73,6 +80,7 @@ impl ErrorExt for Error {
        match self {
            Error::InvalidConfig { .. } => StatusCode::InvalidArguments,
            Error::IllegalParam { .. } => StatusCode::InvalidArguments,
+            Error::FileWatch { .. } => StatusCode::InvalidArguments,
            Error::InternalState { .. } => StatusCode::Unexpected,
            Error::Io { .. } => StatusCode::Internal,
            Error::AuthBackend { .. } => StatusCode::Internal,
--- a/src/auth/src/tests.rs
+++ b/src/auth/src/tests.rs
@@ -45,9 +45,9 @@ impl Default for MockUserProvider {

 impl MockUserProvider {
    pub fn set_authorization_info(&mut self, info: DatabaseAuthInfo) {
-        self.catalog = info.catalog.to_owned();
-        self.schema = info.schema.to_owned();
-        self.username = info.username.to_owned();
+        info.catalog.clone_into(&mut self.catalog);
+        info.schema.clone_into(&mut self.schema);
+        info.username.clone_into(&mut self.username);
    }
 }

--- a/src/auth/src/user_provider.rs
+++ b/src/auth/src/user_provider.rs
@@ -13,10 +13,24 @@
 // limitations under the License.

 pub(crate) mod static_user_provider;
+pub(crate) mod watch_file_user_provider;
+
+use std::collections::HashMap;
+use std::fs::File;
+use std::io;
+use std::io::BufRead;
+use std::path::Path;
+
+use secrecy::ExposeSecret;
+use snafu::{ensure, OptionExt, ResultExt};

 use crate::common::{Identity, Password};
-use crate::error::Result;
-use crate::UserInfoRef;
+use crate::error::{
+    IllegalParamSnafu, InvalidConfigSnafu, IoSnafu, Result, UnsupportedPasswordTypeSnafu,
+    UserNotFoundSnafu, UserPasswordMismatchSnafu,
+};
+use crate::user_info::DefaultUserInfo;
+use crate::{auth_mysql, UserInfoRef};

 #[async_trait::async_trait]
 pub trait UserProvider: Send + Sync {
@@ -44,3 +58,88 @@ pub trait UserProvider: Send + Sync {
        Ok(user_info)
    }
 }
+
+fn load_credential_from_file(filepath: &str) -> Result<Option<HashMap<String, Vec<u8>>>> {
+    // check valid path
+    let path = Path::new(filepath);
+    if !path.exists() {
+        return Ok(None);
+    }
+
+    ensure!(
+        path.is_file(),
+        InvalidConfigSnafu {
+            value: filepath,
+            msg: "UserProvider file must be a file",
+        }
+    );
+    let file = File::open(path).context(IoSnafu)?;
+    let credential = io::BufReader::new(file)
+        .lines()
+        .map_while(std::result::Result::ok)
+        .filter_map(|line| {
+            if let Some((k, v)) = line.split_once('=') {
+                Some((k.to_string(), v.as_bytes().to_vec()))
+            } else {
+                None
+            }
+        })
+        .collect::<HashMap<String, Vec<u8>>>();
+
+    ensure!(
+        !credential.is_empty(),
+        InvalidConfigSnafu {
+            value: filepath,
+            msg: "UserProvider's file must contains at least one valid credential",
+        }
+    );
+
+    Ok(Some(credential))
+}
+
+fn authenticate_with_credential(
+    users: &HashMap<String, Vec<u8>>,
+    input_id: Identity<'_>,
+    input_pwd: Password<'_>,
+) -> Result<UserInfoRef> {
+    match input_id {
+        Identity::UserId(username, _) => {
+            ensure!(
+                !username.is_empty(),
+                IllegalParamSnafu {
+                    msg: "blank username"
+                }
+            );
+            let save_pwd = users.get(username).context(UserNotFoundSnafu {
+                username: username.to_string(),
+            })?;
+
+            match input_pwd {
+                Password::PlainText(pwd) => {
+                    ensure!(
+                        !pwd.expose_secret().is_empty(),
+                        IllegalParamSnafu {
+                            msg: "blank password"
+                        }
+                    );
+                    if save_pwd == pwd.expose_secret().as_bytes() {
+                        Ok(DefaultUserInfo::with_name(username))
+                    } else {
+                        UserPasswordMismatchSnafu {
+                            username: username.to_string(),
+                        }
+                        .fail()
+                    }
+                }
+                Password::MysqlNativePassword(auth_data, salt) => {
+                    auth_mysql(auth_data, salt, username, save_pwd)
+                        .map(|_| DefaultUserInfo::with_name(username))
+                }
+                Password::PgMD5(_, _) => UnsupportedPasswordTypeSnafu {
+                    password_type: "pg_md5",
+                }
+                .fail(),
+            }
+        }
+    }
+}
--- a/src/auth/src/user_provider/static_user_provider.rs
+++ b/src/auth/src/user_provider/static_user_provider.rs
@@ -13,60 +13,34 @@
 // limitations under the License.

 use std::collections::HashMap;
-use std::fs::File;
-use std::io;
-use std::io::BufRead;
-use std::path::Path;

 use async_trait::async_trait;
-use secrecy::ExposeSecret;
-use snafu::{ensure, OptionExt, ResultExt};
+use snafu::OptionExt;

-use crate::error::{
-    Error, IllegalParamSnafu, InvalidConfigSnafu, IoSnafu, Result, UnsupportedPasswordTypeSnafu,
-    UserNotFoundSnafu, UserPasswordMismatchSnafu,
-};
-use crate::user_info::DefaultUserInfo;
-use crate::{auth_mysql, Identity, Password, UserInfoRef, UserProvider};
+use crate::error::{InvalidConfigSnafu, Result};
+use crate::user_provider::{authenticate_with_credential, load_credential_from_file};
+use crate::{Identity, Password, UserInfoRef, UserProvider};

 pub(crate) const STATIC_USER_PROVIDER: &str = "static_user_provider";

-impl TryFrom<&str> for StaticUserProvider {
-    type Error = Error;
+pub(crate) struct StaticUserProvider {
+    users: HashMap<String, Vec<u8>>,
+}

-    fn try_from(value: &str) -> Result<Self> {
+impl StaticUserProvider {
+    pub(crate) fn new(value: &str) -> Result<Self> {
        let (mode, content) = value.split_once(':').context(InvalidConfigSnafu {
            value: value.to_string(),
            msg: "StaticUserProviderOption must be in format `<option>:<value>`",
        })?;
        return match mode {
            "file" => {
-                // check valid path
-                let path = Path::new(content);
-                ensure!(path.exists() && path.is_file(), InvalidConfigSnafu {
-                    value: content.to_string(),
-                    msg: "StaticUserProviderOption file must be a valid file path",
-                });
-
-                let file = File::open(path).context(IoSnafu)?;
-                let credential = io::BufReader::new(file)
-                    .lines()
-                    .map_while(std::result::Result::ok)
-                    .filter_map(|line| {
-                        if let Some((k, v)) = line.split_once('=') {
-                            Some((k.to_string(), v.as_bytes().to_vec()))
-                        } else {
-                            None
-                        }
-                    })
-                    .collect::<HashMap<String, Vec<u8>>>();
-
-                ensure!(!credential.is_empty(), InvalidConfigSnafu {
-                    value: content.to_string(),
-                    msg: "StaticUserProviderOption file must contains at least one valid credential",
-                });
-
-                Ok(StaticUserProvider { users: credential, })
+                let users = load_credential_from_file(content)?
+                    .context(InvalidConfigSnafu {
+                        value: content.to_string(),
+                        msg: "StaticFileUserProvider must be a valid file path",
+                    })?;
+                Ok(StaticUserProvider { users })
            }
            "cmd" => content
                .split(',')
@@ -83,66 +57,19 @@ impl TryFrom<&str> for StaticUserProvider {
                value: mode.to_string(),
                msg: "StaticUserProviderOption must be in format `file:<path>` or `cmd:<values>`",
            }
-            .fail(),
+                .fail(),
        };
    }
 }

-pub(crate) struct StaticUserProvider {
-    users: HashMap<String, Vec<u8>>,
-}
-
 #[async_trait]
 impl UserProvider for StaticUserProvider {
    fn name(&self) -> &str {
        STATIC_USER_PROVIDER
    }

-    async fn authenticate(
-        &self,
-        input_id: Identity<'_>,
-        input_pwd: Password<'_>,
-    ) -> Result<UserInfoRef> {
-        match input_id {
-            Identity::UserId(username, _) => {
-                ensure!(
-                    !username.is_empty(),
-                    IllegalParamSnafu {
-                        msg: "blank username"
-                    }
-                );
-                let save_pwd = self.users.get(username).context(UserNotFoundSnafu {
-                    username: username.to_string(),
-                })?;
-
-                match input_pwd {
-                    Password::PlainText(pwd) => {
-                        ensure!(
-                            !pwd.expose_secret().is_empty(),
-                            IllegalParamSnafu {
-                                msg: "blank password"
-                            }
-                        );
-                        return if save_pwd == pwd.expose_secret().as_bytes() {
-                            Ok(DefaultUserInfo::with_name(username))
-                        } else {
-                            UserPasswordMismatchSnafu {
-                                username: username.to_string(),
-                            }
-                            .fail()
-                        };
-                    }
-                    Password::MysqlNativePassword(auth_data, salt) => {
-                        auth_mysql(auth_data, salt, username, save_pwd)
-                            .map(|_| DefaultUserInfo::with_name(username))
-                    }
-                    Password::PgMD5(_, _) => UnsupportedPasswordTypeSnafu {
-                        password_type: "pg_md5",
-                    }
-                    .fail(),
-                }
-            }
-        }
+    async fn authenticate(&self, id: Identity<'_>, pwd: Password<'_>) -> Result<UserInfoRef> {
+        authenticate_with_credential(&self.users, id, pwd)
    }

    async fn authorize(
@@ -181,7 +108,7 @@ pub mod test {
    #[tokio::test]
    async fn test_authorize() {
        let user_info = DefaultUserInfo::with_name("root");
-        let provider = StaticUserProvider::try_from("cmd:root=123456,admin=654321").unwrap();
+        let provider = StaticUserProvider::new("cmd:root=123456,admin=654321").unwrap();
        provider
            .authorize("catalog", "schema", &user_info)
            .await
@@ -190,7 +117,7 @@ pub mod test {

    #[tokio::test]
    async fn test_inline_provider() {
-        let provider = StaticUserProvider::try_from("cmd:root=123456,admin=654321").unwrap();
+        let provider = StaticUserProvider::new("cmd:root=123456,admin=654321").unwrap();
        test_authenticate(&provider, "root", "123456").await;
        test_authenticate(&provider, "admin", "654321").await;
    }
@@ -214,7 +141,7 @@ admin=654321",
        }

        let param = format!("file:{file_path}");
-        let provider = StaticUserProvider::try_from(param.as_str()).unwrap();
+        let provider = StaticUserProvider::new(param.as_str()).unwrap();
        test_authenticate(&provider, "root", "123456").await;
        test_authenticate(&provider, "admin", "654321").await;
    }
--- a/src/auth/src/user_provider/watch_file_user_provider.rs
+++ b/src/auth/src/user_provider/watch_file_user_provider.rs
@@ -0,0 +1,215 @@
+// Copyright 2023 Greptime Team
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use std::collections::HashMap;
+use std::path::Path;
+use std::sync::mpsc::channel;
+use std::sync::{Arc, Mutex};
+
+use async_trait::async_trait;
+use common_telemetry::{info, warn};
+use notify::{EventKind, RecursiveMode, Watcher};
+use snafu::{ensure, ResultExt};
+
+use crate::error::{FileWatchSnafu, InvalidConfigSnafu, Result};
+use crate::user_info::DefaultUserInfo;
+use crate::user_provider::{authenticate_with_credential, load_credential_from_file};
+use crate::{Identity, Password, UserInfoRef, UserProvider};
+
+pub(crate) const WATCH_FILE_USER_PROVIDER: &str = "watch_file_user_provider";
+
+type WatchedCredentialRef = Arc<Mutex<Option<HashMap<String, Vec<u8>>>>>;
+
+/// A user provider that reads user credential from a file and watches the file for changes.
+///
+/// Empty file is invalid; but file not exist means every user can be authenticated.
+pub(crate) struct WatchFileUserProvider {
+    users: WatchedCredentialRef,
+}
+
+impl WatchFileUserProvider {
+    pub fn new(filepath: &str) -> Result<Self> {
+        let credential = load_credential_from_file(filepath)?;
+        let users = Arc::new(Mutex::new(credential));
+        let this = WatchFileUserProvider {
+            users: users.clone(),
+        };
+
+        let (tx, rx) = channel::<notify::Result<notify::Event>>();
+        let mut debouncer =
+            notify::recommended_watcher(tx).context(FileWatchSnafu { path: "<none>" })?;
+        let mut dir = Path::new(filepath).to_path_buf();
+        ensure!(
+            dir.pop(),
+            InvalidConfigSnafu {
+                value: filepath,
+                msg: "UserProvider path must be a file path",
+            }
+        );
+        debouncer
+            .watch(&dir, RecursiveMode::NonRecursive)
+            .context(FileWatchSnafu { path: filepath })?;
+
+        let filepath = filepath.to_string();
+        std::thread::spawn(move || {
+            let filename = Path::new(&filepath).file_name();
+            let _hold = debouncer;
+            while let Ok(res) = rx.recv() {
+                if let Ok(event) = res {
+                    let is_this_file = event.paths.iter().any(|p| p.file_name() == filename);
+                    let is_relevant_event = matches!(
+                        event.kind,
+                        EventKind::Modify(_) | EventKind::Create(_) | EventKind::Remove(_)
+                    );
+                    if is_this_file && is_relevant_event {
+                        info!(?event.kind, "User provider file {} changed", &filepath);
+                        match load_credential_from_file(&filepath) {
+                            Ok(credential) => {
+                                let mut users =
+                                    users.lock().expect("users credential must be valid");
+                                #[cfg(not(test))]
+                                info!("User provider file {filepath} reloaded");
+                                #[cfg(test)]
+                                info!("User provider file {filepath} reloaded: {credential:?}");
+                                *users = credential;
+                            }
+                            Err(err) => {
+                                warn!(
+                                    ?err,
+                                    "Fail to load credential from file {filepath}; keep the old one",
+                                )
+                            }
+                        }
+                    }
+                }
+            }
+        });
+
+        Ok(this)
+    }
+}
+
+#[async_trait]
+impl UserProvider for WatchFileUserProvider {
+    fn name(&self) -> &str {
+        WATCH_FILE_USER_PROVIDER
+    }
+
+    async fn authenticate(&self, id: Identity<'_>, password: Password<'_>) -> Result<UserInfoRef> {
+        let users = self.users.lock().expect("users credential must be valid");
+        if let Some(users) = users.as_ref() {
+            authenticate_with_credential(users, id, password)
+        } else {
+            match id {
+                Identity::UserId(id, _) => {
+                    warn!(id, "User provider file not exist, allow all users");
+                    Ok(DefaultUserInfo::with_name(id))
+                }
+            }
+        }
+    }
+
+    async fn authorize(&self, _: &str, _: &str, _: &UserInfoRef) -> Result<()> {
+        // default allow all
+        Ok(())
+    }
+}
+
+#[cfg(test)]
+pub mod test {
+    use std::time::{Duration, Instant};
+
+    use common_test_util::temp_dir::create_temp_dir;
+    use tokio::time::sleep;
+
+    use crate::user_provider::watch_file_user_provider::WatchFileUserProvider;
+    use crate::user_provider::{Identity, Password};
+    use crate::UserProvider;
+
+    async fn test_authenticate(
+        provider: &dyn UserProvider,
+        username: &str,
+        password: &str,
+        ok: bool,
+        timeout: Option<Duration>,
+    ) {
+        if let Some(timeout) = timeout {
+            let deadline = Instant::now().checked_add(timeout).unwrap();
+            loop {
+                let re = provider
+                    .authenticate(
+                        Identity::UserId(username, None),
+                        Password::PlainText(password.to_string().into()),
+                    )
+                    .await;
+                if re.is_ok() == ok {
+                    break;
+                } else if Instant::now() < deadline {
+                    sleep(Duration::from_millis(100)).await;
+                } else {
+                    panic!("timeout (username: {username}, password: {password}, expected: {ok})");
+                }
+            }
+        } else {
+            let re = provider
+                .authenticate(
+                    Identity::UserId(username, None),
+                    Password::PlainText(password.to_string().into()),
+                )
+                .await;
+            assert_eq!(
+                re.is_ok(),
+                ok,
+                "username: {}, password: {}",
+                username,
+                password
+            );
+        }
+    }
+
+    #[tokio::test]
+    async fn test_file_provider() {
+        common_telemetry::init_default_ut_logging();
+
+        let dir = create_temp_dir("test_file_provider");
+        let file_path = format!("{}/test_file_provider", dir.path().to_str().unwrap());
+
+        // write a tmp file
+        assert!(std::fs::write(&file_path, "root=123456\nadmin=654321\n").is_ok());
+        let provider = WatchFileUserProvider::new(file_path.as_str()).unwrap();
+        let timeout = Duration::from_secs(60);
+
+        test_authenticate(&provider, "root", "123456", true, None).await;
+        test_authenticate(&provider, "admin", "654321", true, None).await;
+        test_authenticate(&provider, "root", "654321", false, None).await;
+
+        // update the tmp file
+        assert!(std::fs::write(&file_path, "root=654321\n").is_ok());
+        test_authenticate(&provider, "root", "123456", false, Some(timeout)).await;
+        test_authenticate(&provider, "root", "654321", true, Some(timeout)).await;
+        test_authenticate(&provider, "admin", "654321", false, Some(timeout)).await;
+
+        // remove the tmp file
+        assert!(std::fs::remove_file(&file_path).is_ok());
+        test_authenticate(&provider, "root", "123456", true, Some(timeout)).await;
+        test_authenticate(&provider, "root", "654321", true, Some(timeout)).await;
+        test_authenticate(&provider, "admin", "654321", true, Some(timeout)).await;
+
+        // recreate the tmp file
+        assert!(std::fs::write(&file_path, "root=123456\n").is_ok());
+        test_authenticate(&provider, "root", "123456", true, Some(timeout)).await;
+        test_authenticate(&provider, "root", "654321", false, Some(timeout)).await;
+        test_authenticate(&provider, "admin", "654321", false, Some(timeout)).await;
+    }
+}
--- a/src/catalog/Cargo.toml
+++ b/src/catalog/Cargo.toml
@@ -12,19 +12,16 @@ workspace = true

 [dependencies]
 api.workspace = true
-arc-swap = "1.0"
 arrow.workspace = true
 arrow-schema.workspace = true
 async-stream.workspace = true
 async-trait = "0.1"
 common-catalog.workspace = true
 common-error.workspace = true
-common-grpc.workspace = true
 common-macro.workspace = true
 common-meta.workspace = true
 common-query.workspace = true
 common-recordbatch.workspace = true
-common-runtime.workspace = true
 common-telemetry.workspace = true
 common-time.workspace = true
 common-version.workspace = true
@@ -37,15 +34,13 @@ itertools.workspace = true
 lazy_static.workspace = true
 meta-client.workspace = true
 moka = { workspace = true, features = ["future", "sync"] }
-parking_lot = "0.12"
 partition.workspace = true
 paste = "1.0"
 prometheus.workspace = true
-regex.workspace = true
-serde.workspace = true
 serde_json.workspace = true
 session.workspace = true
 snafu.workspace = true
+sql.workspace = true
 store-api.workspace = true
 table.workspace = true
 tokio.workspace = true
--- a/src/catalog/src/error.rs
+++ b/src/catalog/src/error.rs
@@ -216,7 +216,7 @@ pub enum Error {
    },

    #[snafu(display("Failed to perform metasrv operation"))]
-    MetaSrv {
+    Metasrv {
        location: Location,
        source: meta_client::error::Error,
    },
@@ -304,7 +304,7 @@ impl ErrorExt for Error {
            | Error::CreateTable { source, .. }
            | Error::TableSchemaMismatch { source, .. } => source.status_code(),

-            Error::MetaSrv { source, .. } => source.status_code(),
+            Error::Metasrv { source, .. } => source.status_code(),
            Error::SystemCatalogTableScan { source, .. } => source.status_code(),
            Error::SystemCatalogTableScanExec { source, .. } => source.status_code(),
            Error::InvalidTableInfoInCatalog { source, .. } => source.status_code(),
--- a/src/catalog/src/information_schema.rs
+++ b/src/catalog/src/information_schema.rs
@@ -12,14 +12,15 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

-mod columns;
-mod key_column_usage;
+pub mod columns;
+pub mod key_column_usage;
 mod memory_table;
 mod partitions;
 mod predicate;
 mod region_peers;
 mod runtime_metrics;
 pub mod schemata;
+mod table_constraints;
 mod table_names;
 pub mod tables;

@@ -41,8 +42,7 @@ use table::error::{SchemaConversionSnafu, TablesRecordBatchSnafu};
 use table::metadata::{
    FilterPushDownType, TableInfoBuilder, TableInfoRef, TableMetaBuilder, TableType,
 };
-use table::thin_table::{ThinTable, ThinTableAdapter};
-use table::TableRef;
+use table::{Table, TableRef};
 pub use table_names::*;

 use self::columns::InformationSchemaColumns;
@@ -53,6 +53,7 @@ use crate::information_schema::partitions::InformationSchemaPartitions;
 use crate::information_schema::region_peers::InformationSchemaRegionPeers;
 use crate::information_schema::runtime_metrics::InformationSchemaMetrics;
 use crate::information_schema::schemata::InformationSchemaSchemata;
+use crate::information_schema::table_constraints::InformationSchemaTableConstraints;
 use crate::information_schema::tables::InformationSchemaTables;
 use crate::CatalogManager;

@@ -174,6 +175,10 @@ impl InformationSchemaProvider {
            KEY_COLUMN_USAGE.to_string(),
            self.build_table(KEY_COLUMN_USAGE).unwrap(),
        );
+        tables.insert(
+            TABLE_CONSTRAINTS.to_string(),
+            self.build_table(TABLE_CONSTRAINTS).unwrap(),
+        );

        // Add memory tables
        for name in MEMORY_TABLES.iter() {
@@ -187,10 +192,9 @@ impl InformationSchemaProvider {
        self.information_table(name).map(|table| {
            let table_info = Self::table_info(self.catalog_name.clone(), &table);
            let filter_pushdown = FilterPushDownType::Inexact;
-            let thin_table = ThinTable::new(table_info, filter_pushdown);
-
            let data_source = Arc::new(InformationTableDataSource::new(table));
-            Arc::new(ThinTableAdapter::new(thin_table, data_source)) as _
+            let table = Table::new(table_info, filter_pushdown, data_source);
+            Arc::new(table)
        })
    }

@@ -243,6 +247,10 @@ impl InformationSchemaProvider {
                self.catalog_name.clone(),
                self.catalog_manager.clone(),
            )) as _),
+            TABLE_CONSTRAINTS => Some(Arc::new(InformationSchemaTableConstraints::new(
+                self.catalog_name.clone(),
+                self.catalog_manager.clone(),
+            )) as _),
            _ => None,
        }
    }
--- a/src/catalog/src/information_schema/columns.rs
+++ b/src/catalog/src/information_schema/columns.rs
@@ -26,13 +26,16 @@ use common_recordbatch::{RecordBatch, SendableRecordBatchStream};
 use datafusion::physical_plan::stream::RecordBatchStreamAdapter as DfRecordBatchStreamAdapter;
 use datafusion::physical_plan::streaming::PartitionStream as DfPartitionStream;
 use datafusion::physical_plan::SendableRecordBatchStream as DfSendableRecordBatchStream;
-use datatypes::prelude::{ConcreteDataType, DataType};
+use datatypes::prelude::{ConcreteDataType, DataType, MutableVector};
 use datatypes::scalars::ScalarVectorBuilder;
 use datatypes::schema::{ColumnSchema, Schema, SchemaRef};
 use datatypes::value::Value;
-use datatypes::vectors::{StringVectorBuilder, VectorRef};
+use datatypes::vectors::{
+    ConstantVector, Int64Vector, Int64VectorBuilder, StringVector, StringVectorBuilder, VectorRef,
+};
 use futures::TryStreamExt;
 use snafu::{OptionExt, ResultExt};
+use sql::statements;
 use store_api::storage::{ScanRequest, TableId};

 use super::{InformationTable, COLUMNS};
@@ -48,18 +51,42 @@ pub(super) struct InformationSchemaColumns {
    catalog_manager: Weak<dyn CatalogManager>,
 }

-const TABLE_CATALOG: &str = "table_catalog";
-const TABLE_SCHEMA: &str = "table_schema";
-const TABLE_NAME: &str = "table_name";
-const COLUMN_NAME: &str = "column_name";
-const DATA_TYPE: &str = "data_type";
-const SEMANTIC_TYPE: &str = "semantic_type";
-const COLUMN_DEFAULT: &str = "column_default";
-const IS_NULLABLE: &str = "is_nullable";
+pub const TABLE_CATALOG: &str = "table_catalog";
+pub const TABLE_SCHEMA: &str = "table_schema";
+pub const TABLE_NAME: &str = "table_name";
+pub const COLUMN_NAME: &str = "column_name";
+const ORDINAL_POSITION: &str = "ordinal_position";
+const CHARACTER_MAXIMUM_LENGTH: &str = "character_maximum_length";
+const CHARACTER_OCTET_LENGTH: &str = "character_octet_length";
+const NUMERIC_PRECISION: &str = "numeric_precision";
+const NUMERIC_SCALE: &str = "numeric_scale";
+const DATETIME_PRECISION: &str = "datetime_precision";
+const CHARACTER_SET_NAME: &str = "character_set_name";
+pub const COLLATION_NAME: &str = "collation_name";
+pub const COLUMN_KEY: &str = "column_key";
+pub const EXTRA: &str = "extra";
+pub const PRIVILEGES: &str = "privileges";
+const GENERATION_EXPRESSION: &str = "generation_expression";
+// Extension field to keep greptime data type name
+pub const GREPTIME_DATA_TYPE: &str = "greptime_data_type";
+pub const DATA_TYPE: &str = "data_type";
+pub const SEMANTIC_TYPE: &str = "semantic_type";
+pub const COLUMN_DEFAULT: &str = "column_default";
+pub const IS_NULLABLE: &str = "is_nullable";
 const COLUMN_TYPE: &str = "column_type";
-const COLUMN_COMMENT: &str = "column_comment";
+pub const COLUMN_COMMENT: &str = "column_comment";
+const SRS_ID: &str = "srs_id";
 const INIT_CAPACITY: usize = 42;

+// The maximum length of string type
+const MAX_STRING_LENGTH: i64 = 2147483647;
+const UTF8_CHARSET_NAME: &str = "utf8";
+const UTF8_COLLATE_NAME: &str = "utf8_bin";
+const PRI_COLUMN_KEY: &str = "PRI";
+const TIME_INDEX_COLUMN_KEY: &str = "TIME INDEX";
+const DEFAULT_PRIVILEGES: &str = "select,insert";
+const EMPTY_STR: &str = "";
+
 impl InformationSchemaColumns {
    pub(super) fn new(catalog_name: String, catalog_manager: Weak<dyn CatalogManager>) -> Self {
        Self {
@@ -75,12 +102,46 @@ impl InformationSchemaColumns {
            ColumnSchema::new(TABLE_SCHEMA, ConcreteDataType::string_datatype(), false),
            ColumnSchema::new(TABLE_NAME, ConcreteDataType::string_datatype(), false),
            ColumnSchema::new(COLUMN_NAME, ConcreteDataType::string_datatype(), false),
+            ColumnSchema::new(ORDINAL_POSITION, ConcreteDataType::int64_datatype(), false),
+            ColumnSchema::new(
+                CHARACTER_MAXIMUM_LENGTH,
+                ConcreteDataType::int64_datatype(),
+                true,
+            ),
+            ColumnSchema::new(
+                CHARACTER_OCTET_LENGTH,
+                ConcreteDataType::int64_datatype(),
+                true,
+            ),
+            ColumnSchema::new(NUMERIC_PRECISION, ConcreteDataType::int64_datatype(), true),
+            ColumnSchema::new(NUMERIC_SCALE, ConcreteDataType::int64_datatype(), true),
+            ColumnSchema::new(DATETIME_PRECISION, ConcreteDataType::int64_datatype(), true),
+            ColumnSchema::new(
+                CHARACTER_SET_NAME,
+                ConcreteDataType::string_datatype(),
+                true,
+            ),
+            ColumnSchema::new(COLLATION_NAME, ConcreteDataType::string_datatype(), true),
+            ColumnSchema::new(COLUMN_KEY, ConcreteDataType::string_datatype(), false),
+            ColumnSchema::new(EXTRA, ConcreteDataType::string_datatype(), false),
+            ColumnSchema::new(PRIVILEGES, ConcreteDataType::string_datatype(), false),
+            ColumnSchema::new(
+                GENERATION_EXPRESSION,
+                ConcreteDataType::string_datatype(),
+                false,
+            ),
+            ColumnSchema::new(
+                GREPTIME_DATA_TYPE,
+                ConcreteDataType::string_datatype(),
+                false,
+            ),
            ColumnSchema::new(DATA_TYPE, ConcreteDataType::string_datatype(), false),
            ColumnSchema::new(SEMANTIC_TYPE, ConcreteDataType::string_datatype(), false),
            ColumnSchema::new(COLUMN_DEFAULT, ConcreteDataType::string_datatype(), true),
            ColumnSchema::new(IS_NULLABLE, ConcreteDataType::string_datatype(), false),
            ColumnSchema::new(COLUMN_TYPE, ConcreteDataType::string_datatype(), false),
            ColumnSchema::new(COLUMN_COMMENT, ConcreteDataType::string_datatype(), true),
+            ColumnSchema::new(SRS_ID, ConcreteDataType::int64_datatype(), true),
        ]))
    }

@@ -136,9 +197,18 @@ struct InformationSchemaColumnsBuilder {
    schema_names: StringVectorBuilder,
    table_names: StringVectorBuilder,
    column_names: StringVectorBuilder,
+    ordinal_positions: Int64VectorBuilder,
+    character_maximum_lengths: Int64VectorBuilder,
+    character_octet_lengths: Int64VectorBuilder,
+    numeric_precisions: Int64VectorBuilder,
+    numeric_scales: Int64VectorBuilder,
+    datetime_precisions: Int64VectorBuilder,
+    character_set_names: StringVectorBuilder,
+    collation_names: StringVectorBuilder,
+    column_keys: StringVectorBuilder,
+    greptime_data_types: StringVectorBuilder,
    data_types: StringVectorBuilder,
    semantic_types: StringVectorBuilder,
-
    column_defaults: StringVectorBuilder,
    is_nullables: StringVectorBuilder,
    column_types: StringVectorBuilder,
@@ -159,6 +229,16 @@ impl InformationSchemaColumnsBuilder {
            schema_names: StringVectorBuilder::with_capacity(INIT_CAPACITY),
            table_names: StringVectorBuilder::with_capacity(INIT_CAPACITY),
            column_names: StringVectorBuilder::with_capacity(INIT_CAPACITY),
+            ordinal_positions: Int64VectorBuilder::with_capacity(INIT_CAPACITY),
+            character_maximum_lengths: Int64VectorBuilder::with_capacity(INIT_CAPACITY),
+            character_octet_lengths: Int64VectorBuilder::with_capacity(INIT_CAPACITY),
+            numeric_precisions: Int64VectorBuilder::with_capacity(INIT_CAPACITY),
+            numeric_scales: Int64VectorBuilder::with_capacity(INIT_CAPACITY),
+            datetime_precisions: Int64VectorBuilder::with_capacity(INIT_CAPACITY),
+            character_set_names: StringVectorBuilder::with_capacity(INIT_CAPACITY),
+            collation_names: StringVectorBuilder::with_capacity(INIT_CAPACITY),
+            column_keys: StringVectorBuilder::with_capacity(INIT_CAPACITY),
+            greptime_data_types: StringVectorBuilder::with_capacity(INIT_CAPACITY),
            data_types: StringVectorBuilder::with_capacity(INIT_CAPACITY),
            semantic_types: StringVectorBuilder::with_capacity(INIT_CAPACITY),
            column_defaults: StringVectorBuilder::with_capacity(INIT_CAPACITY),
@@ -195,6 +275,7 @@ impl InformationSchemaColumnsBuilder {

                    self.add_column(
                        &predicates,
+                        idx,
                        &catalog_name,
                        &schema_name,
                        &table.table_info().name,
@@ -208,16 +289,27 @@ impl InformationSchemaColumnsBuilder {
        self.finish()
    }

+    #[allow(clippy::too_many_arguments)]
    fn add_column(
        &mut self,
        predicates: &Predicates,
+        index: usize,
        catalog_name: &str,
        schema_name: &str,
        table_name: &str,
        semantic_type: &str,
        column_schema: &ColumnSchema,
    ) {
-        let data_type = &column_schema.data_type.name();
+        // Use sql data type name
+        let data_type = statements::concrete_data_type_to_sql_data_type(&column_schema.data_type)
+            .map(|dt| dt.to_string().to_lowercase())
+            .unwrap_or_else(|_| column_schema.data_type.name());
+
+        let column_key = match semantic_type {
+            SEMANTIC_TYPE_PRIMARY_KEY => PRI_COLUMN_KEY,
+            SEMANTIC_TYPE_TIME_INDEX => TIME_INDEX_COLUMN_KEY,
+            _ => EMPTY_STR,
+        };

        let row = [
            (TABLE_CATALOG, &Value::from(catalog_name)),
@@ -226,6 +318,8 @@ impl InformationSchemaColumnsBuilder {
            (COLUMN_NAME, &Value::from(column_schema.name.as_str())),
            (DATA_TYPE, &Value::from(data_type.as_str())),
            (SEMANTIC_TYPE, &Value::from(semantic_type)),
+            (ORDINAL_POSITION, &Value::from((index + 1) as i64)),
+            (COLUMN_KEY, &Value::from(column_key)),
        ];

        if !predicates.eval(&row) {
@@ -236,7 +330,63 @@ impl InformationSchemaColumnsBuilder {
        self.schema_names.push(Some(schema_name));
        self.table_names.push(Some(table_name));
        self.column_names.push(Some(&column_schema.name));
-        self.data_types.push(Some(data_type));
+        // Starts from 1
+        self.ordinal_positions.push(Some((index + 1) as i64));
+
+        if column_schema.data_type.is_string() {
+            self.character_maximum_lengths.push(Some(MAX_STRING_LENGTH));
+            self.character_octet_lengths.push(Some(MAX_STRING_LENGTH));
+            self.numeric_precisions.push(None);
+            self.numeric_scales.push(None);
+            self.datetime_precisions.push(None);
+            self.character_set_names.push(Some(UTF8_CHARSET_NAME));
+            self.collation_names.push(Some(UTF8_COLLATE_NAME));
+        } else if column_schema.data_type.is_numeric() || column_schema.data_type.is_decimal() {
+            self.character_maximum_lengths.push(None);
+            self.character_octet_lengths.push(None);
+
+            self.numeric_precisions.push(
+                column_schema
+                    .data_type
+                    .numeric_precision()
+                    .map(|x| x as i64),
+            );
+            self.numeric_scales
+                .push(column_schema.data_type.numeric_scale().map(|x| x as i64));
+
+            self.datetime_precisions.push(None);
+            self.character_set_names.push(None);
+            self.collation_names.push(None);
+        } else {
+            self.character_maximum_lengths.push(None);
+            self.character_octet_lengths.push(None);
+            self.numeric_precisions.push(None);
+            self.numeric_scales.push(None);
+
+            match &column_schema.data_type {
+                ConcreteDataType::DateTime(datetime_type) => {
+                    self.datetime_precisions
+                        .push(Some(datetime_type.precision() as i64));
+                }
+                ConcreteDataType::Timestamp(ts_type) => {
+                    self.datetime_precisions
+                        .push(Some(ts_type.precision() as i64));
+                }
+                ConcreteDataType::Time(time_type) => {
+                    self.datetime_precisions
+                        .push(Some(time_type.precision() as i64));
+                }
+                _ => self.datetime_precisions.push(None),
+            }
+
+            self.character_set_names.push(None);
+            self.collation_names.push(None);
+        }
+
+        self.column_keys.push(Some(column_key));
+        self.greptime_data_types
+            .push(Some(&column_schema.data_type.name()));
+        self.data_types.push(Some(&data_type));
        self.semantic_types.push(Some(semantic_type));
        self.column_defaults.push(
            column_schema
@@ -249,23 +399,52 @@ impl InformationSchemaColumnsBuilder {
        } else {
            self.is_nullables.push(Some("No"));
        }
-        self.column_types.push(Some(data_type));
+        self.column_types.push(Some(&data_type));
        self.column_comments
            .push(column_schema.column_comment().map(|x| x.as_ref()));
    }

    fn finish(&mut self) -> Result<RecordBatch> {
+        let rows_num = self.collation_names.len();
+
+        let privileges = Arc::new(ConstantVector::new(
+            Arc::new(StringVector::from(vec![DEFAULT_PRIVILEGES])),
+            rows_num,
+        ));
+        let empty_string = Arc::new(ConstantVector::new(
+            Arc::new(StringVector::from(vec![EMPTY_STR])),
+            rows_num,
+        ));
+        let srs_ids = Arc::new(ConstantVector::new(
+            Arc::new(Int64Vector::from(vec![None])),
+            rows_num,
+        ));
+
        let columns: Vec<VectorRef> = vec![
            Arc::new(self.catalog_names.finish()),
            Arc::new(self.schema_names.finish()),
            Arc::new(self.table_names.finish()),
            Arc::new(self.column_names.finish()),
+            Arc::new(self.ordinal_positions.finish()),
+            Arc::new(self.character_maximum_lengths.finish()),
+            Arc::new(self.character_octet_lengths.finish()),
+            Arc::new(self.numeric_precisions.finish()),
+            Arc::new(self.numeric_scales.finish()),
+            Arc::new(self.datetime_precisions.finish()),
+            Arc::new(self.character_set_names.finish()),
+            Arc::new(self.collation_names.finish()),
+            Arc::new(self.column_keys.finish()),
+            empty_string.clone(),
+            privileges,
+            empty_string,
+            Arc::new(self.greptime_data_types.finish()),
            Arc::new(self.data_types.finish()),
            Arc::new(self.semantic_types.finish()),
            Arc::new(self.column_defaults.finish()),
            Arc::new(self.is_nullables.finish()),
            Arc::new(self.column_types.finish()),
            Arc::new(self.column_comments.finish()),
+            srs_ids,
        ];

        RecordBatch::new(self.schema.clone(), columns).context(CreateRecordBatchSnafu)
--- a/src/catalog/src/information_schema/key_column_usage.rs
+++ b/src/catalog/src/information_schema/key_column_usage.rs
@@ -37,15 +37,23 @@ use crate::error::{
 use crate::information_schema::{InformationTable, Predicates};
 use crate::CatalogManager;

-const CONSTRAINT_SCHEMA: &str = "constraint_schema";
-const CONSTRAINT_NAME: &str = "constraint_name";
-const TABLE_CATALOG: &str = "table_catalog";
-const TABLE_SCHEMA: &str = "table_schema";
-const TABLE_NAME: &str = "table_name";
-const COLUMN_NAME: &str = "column_name";
-const ORDINAL_POSITION: &str = "ordinal_position";
+pub const CONSTRAINT_SCHEMA: &str = "constraint_schema";
+pub const CONSTRAINT_NAME: &str = "constraint_name";
+// It's always `def` in MySQL
+pub const TABLE_CATALOG: &str = "table_catalog";
+// The real catalog name for this key column.
+pub const REAL_TABLE_CATALOG: &str = "real_table_catalog";
+pub const TABLE_SCHEMA: &str = "table_schema";
+pub const TABLE_NAME: &str = "table_name";
+pub const COLUMN_NAME: &str = "column_name";
+pub const ORDINAL_POSITION: &str = "ordinal_position";
 const INIT_CAPACITY: usize = 42;

+/// Primary key constraint name
+pub(crate) const PRI_CONSTRAINT_NAME: &str = "PRIMARY";
+/// Time index constraint name
+pub(crate) const TIME_INDEX_CONSTRAINT_NAME: &str = "TIME INDEX";
+
 /// The virtual table implementation for `information_schema.KEY_COLUMN_USAGE`.
 pub(super) struct InformationSchemaKeyColumnUsage {
    schema: SchemaRef,
@@ -76,6 +84,11 @@ impl InformationSchemaKeyColumnUsage {
            ),
            ColumnSchema::new(CONSTRAINT_NAME, ConcreteDataType::string_datatype(), false),
            ColumnSchema::new(TABLE_CATALOG, ConcreteDataType::string_datatype(), false),
+            ColumnSchema::new(
+                REAL_TABLE_CATALOG,
+                ConcreteDataType::string_datatype(),
+                false,
+            ),
            ColumnSchema::new(TABLE_SCHEMA, ConcreteDataType::string_datatype(), false),
            ColumnSchema::new(TABLE_NAME, ConcreteDataType::string_datatype(), false),
            ColumnSchema::new(COLUMN_NAME, ConcreteDataType::string_datatype(), false),
@@ -158,6 +171,7 @@ struct InformationSchemaKeyColumnUsageBuilder {
    constraint_schema: StringVectorBuilder,
    constraint_name: StringVectorBuilder,
    table_catalog: StringVectorBuilder,
+    real_table_catalog: StringVectorBuilder,
    table_schema: StringVectorBuilder,
    table_name: StringVectorBuilder,
    column_name: StringVectorBuilder,
@@ -179,6 +193,7 @@ impl InformationSchemaKeyColumnUsageBuilder {
            constraint_schema: StringVectorBuilder::with_capacity(INIT_CAPACITY),
            constraint_name: StringVectorBuilder::with_capacity(INIT_CAPACITY),
            table_catalog: StringVectorBuilder::with_capacity(INIT_CAPACITY),
+            real_table_catalog: StringVectorBuilder::with_capacity(INIT_CAPACITY),
            table_schema: StringVectorBuilder::with_capacity(INIT_CAPACITY),
            table_name: StringVectorBuilder::with_capacity(INIT_CAPACITY),
            column_name: StringVectorBuilder::with_capacity(INIT_CAPACITY),
@@ -222,7 +237,8 @@ impl InformationSchemaKeyColumnUsageBuilder {
                            self.add_key_column_usage(
                                &predicates,
                                &schema_name,
-                                "TIME INDEX",
+                                TIME_INDEX_CONSTRAINT_NAME,
+                                &catalog_name,
                                &schema_name,
                                &table_name,
                                &column.name,
@@ -231,6 +247,7 @@ impl InformationSchemaKeyColumnUsageBuilder {
                        }
                        if keys.contains(&idx) {
                            primary_constraints.push((
+                                catalog_name.clone(),
                                schema_name.clone(),
                                table_name.clone(),
                                column.name.clone(),
@@ -244,13 +261,14 @@ impl InformationSchemaKeyColumnUsageBuilder {
            }
        }

-        for (i, (schema_name, table_name, column_name)) in
+        for (i, (catalog_name, schema_name, table_name, column_name)) in
            primary_constraints.into_iter().enumerate()
        {
            self.add_key_column_usage(
                &predicates,
                &schema_name,
-                "PRIMARY",
+                PRI_CONSTRAINT_NAME,
+                &catalog_name,
                &schema_name,
                &table_name,
                &column_name,
@@ -269,6 +287,7 @@ impl InformationSchemaKeyColumnUsageBuilder {
        predicates: &Predicates,
        constraint_schema: &str,
        constraint_name: &str,
+        table_catalog: &str,
        table_schema: &str,
        table_name: &str,
        column_name: &str,
@@ -277,6 +296,7 @@ impl InformationSchemaKeyColumnUsageBuilder {
        let row = [
            (CONSTRAINT_SCHEMA, &Value::from(constraint_schema)),
            (CONSTRAINT_NAME, &Value::from(constraint_name)),
+            (REAL_TABLE_CATALOG, &Value::from(table_catalog)),
            (TABLE_SCHEMA, &Value::from(table_schema)),
            (TABLE_NAME, &Value::from(table_name)),
            (COLUMN_NAME, &Value::from(column_name)),
@@ -291,6 +311,7 @@ impl InformationSchemaKeyColumnUsageBuilder {
        self.constraint_schema.push(Some(constraint_schema));
        self.constraint_name.push(Some(constraint_name));
        self.table_catalog.push(Some("def"));
+        self.real_table_catalog.push(Some(table_catalog));
        self.table_schema.push(Some(table_schema));
        self.table_name.push(Some(table_name));
        self.column_name.push(Some(column_name));
@@ -310,6 +331,7 @@ impl InformationSchemaKeyColumnUsageBuilder {
            Arc::new(self.constraint_schema.finish()),
            Arc::new(self.constraint_name.finish()),
            Arc::new(self.table_catalog.finish()),
+            Arc::new(self.real_table_catalog.finish()),
            Arc::new(self.table_schema.finish()),
            Arc::new(self.table_name.finish()),
            Arc::new(self.column_name.finish()),
--- a/src/catalog/src/information_schema/memory_table/tables.rs
+++ b/src/catalog/src/information_schema/memory_table/tables.rs
@@ -14,13 +14,15 @@

 use std::sync::Arc;

-use common_catalog::consts::MITO_ENGINE;
+use common_catalog::consts::{METRIC_ENGINE, MITO_ENGINE};
 use datatypes::prelude::{ConcreteDataType, VectorRef};
 use datatypes::schema::{ColumnSchema, Schema, SchemaRef};
 use datatypes::vectors::{Int64Vector, StringVector};

 use crate::information_schema::table_names::*;

+const NO_VALUE: &str = "NO";
+
 /// Find the schema and columns by the table_name, only valid for memory tables.
 /// Safety: the user MUST ensure the table schema exists, panic otherwise.
 pub fn get_schema_columns(table_name: &str) -> (SchemaRef, Vec<VectorRef>) {
@@ -59,14 +61,15 @@ pub fn get_schema_columns(table_name: &str) -> (SchemaRef, Vec<VectorRef>) {
                "SAVEPOINTS",
            ]),
            vec![
-                Arc::new(StringVector::from(vec![MITO_ENGINE])),
-                Arc::new(StringVector::from(vec!["DEFAULT"])),
+                Arc::new(StringVector::from(vec![MITO_ENGINE, METRIC_ENGINE])),
+                Arc::new(StringVector::from(vec!["DEFAULT", "YES"])),
                Arc::new(StringVector::from(vec![
                    "Storage engine for time-series data",
+                    "Storage engine for observability scenarios, which is adept at handling a large number of small tables, making it particularly suitable for cloud-native monitoring",
                ])),
-                Arc::new(StringVector::from(vec!["NO"])),
-                Arc::new(StringVector::from(vec!["NO"])),
-                Arc::new(StringVector::from(vec!["NO"])),
+                Arc::new(StringVector::from(vec![NO_VALUE, NO_VALUE])),
+                Arc::new(StringVector::from(vec![NO_VALUE, NO_VALUE])),
+                Arc::new(StringVector::from(vec![NO_VALUE, NO_VALUE])),
            ],
        ),

--- a/src/catalog/src/information_schema/predicate.rs
+++ b/src/catalog/src/information_schema/predicate.rs
@@ -109,11 +109,7 @@ impl Predicate {
                };
            }
            Predicate::Not(p) => {
-                let Some(b) = p.eval(row) else {
-                    return None;
-                };
-
-                return Some(!b);
+                return Some(!p.eval(row)?);
            }
        }

@@ -125,13 +121,7 @@ impl Predicate {
    fn from_expr(expr: DfExpr) -> Option<Predicate> {
        match expr {
            // NOT expr
-            DfExpr::Not(expr) => {
-                let Some(p) = Self::from_expr(*expr) else {
-                    return None;
-                };
-
-                Some(Predicate::Not(Box::new(p)))
-            }
+            DfExpr::Not(expr) => Some(Predicate::Not(Box::new(Self::from_expr(*expr)?))),
            // expr LIKE pattern
            DfExpr::Like(Like {
                negated,
@@ -178,25 +168,15 @@ impl Predicate {
                }
                // left AND right
                (left, Operator::And, right) => {
-                    let Some(left) = Self::from_expr(left) else {
-                        return None;
-                    };
-
-                    let Some(right) = Self::from_expr(right) else {
-                        return None;
-                    };
+                    let left = Self::from_expr(left)?;
+                    let right = Self::from_expr(right)?;

                    Some(Predicate::And(Box::new(left), Box::new(right)))
                }
                // left OR right
                (left, Operator::Or, right) => {
-                    let Some(left) = Self::from_expr(left) else {
-                        return None;
-                    };
-
-                    let Some(right) = Self::from_expr(right) else {
-                        return None;
-                    };
+                    let left = Self::from_expr(left)?;
+                    let right = Self::from_expr(right)?;

                    Some(Predicate::Or(Box::new(left), Box::new(right)))
                }
--- a/src/catalog/src/information_schema/table_constraints.rs
+++ b/src/catalog/src/information_schema/table_constraints.rs
@@ -0,0 +1,286 @@
+// Copyright 2023 Greptime Team
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use std::sync::{Arc, Weak};
+
+use arrow_schema::SchemaRef as ArrowSchemaRef;
+use common_catalog::consts::INFORMATION_SCHEMA_TABLE_CONSTRAINTS_TABLE_ID;
+use common_error::ext::BoxedError;
+use common_query::physical_plan::TaskContext;
+use common_recordbatch::adapter::RecordBatchStreamAdapter;
+use common_recordbatch::{RecordBatch, SendableRecordBatchStream};
+use datafusion::physical_plan::stream::RecordBatchStreamAdapter as DfRecordBatchStreamAdapter;
+use datafusion::physical_plan::streaming::PartitionStream as DfPartitionStream;
+use datafusion::physical_plan::SendableRecordBatchStream as DfSendableRecordBatchStream;
+use datatypes::prelude::{ConcreteDataType, MutableVector};
+use datatypes::scalars::ScalarVectorBuilder;
+use datatypes::schema::{ColumnSchema, Schema, SchemaRef};
+use datatypes::value::Value;
+use datatypes::vectors::{ConstantVector, StringVector, StringVectorBuilder, VectorRef};
+use futures::TryStreamExt;
+use snafu::{OptionExt, ResultExt};
+use store_api::storage::{ScanRequest, TableId};
+
+use super::{InformationTable, TABLE_CONSTRAINTS};
+use crate::error::{
+    CreateRecordBatchSnafu, InternalSnafu, Result, UpgradeWeakCatalogManagerRefSnafu,
+};
+use crate::information_schema::key_column_usage::{
+    PRI_CONSTRAINT_NAME, TIME_INDEX_CONSTRAINT_NAME,
+};
+use crate::information_schema::Predicates;
+use crate::CatalogManager;
+
+/// The `TABLE_CONSTRAINTS` table describes which tables have constraints.
+pub(super) struct InformationSchemaTableConstraints {
+    schema: SchemaRef,
+    catalog_name: String,
+    catalog_manager: Weak<dyn CatalogManager>,
+}
+
+const CONSTRAINT_CATALOG: &str = "constraint_catalog";
+const CONSTRAINT_SCHEMA: &str = "constraint_schema";
+const CONSTRAINT_NAME: &str = "constraint_name";
+const TABLE_SCHEMA: &str = "table_schema";
+const TABLE_NAME: &str = "table_name";
+const CONSTRAINT_TYPE: &str = "constraint_type";
+const ENFORCED: &str = "enforced";
+
+const INIT_CAPACITY: usize = 42;
+
+const TIME_INDEX_CONSTRAINT_TYPE: &str = "TIME INDEX";
+const PRI_KEY_CONSTRAINT_TYPE: &str = "PRIMARY KEY";
+
+impl InformationSchemaTableConstraints {
+    pub(super) fn new(catalog_name: String, catalog_manager: Weak<dyn CatalogManager>) -> Self {
+        Self {
+            schema: Self::schema(),
+            catalog_name,
+            catalog_manager,
+        }
+    }
+
+    fn schema() -> SchemaRef {
+        Arc::new(Schema::new(vec![
+            ColumnSchema::new(
+                CONSTRAINT_CATALOG,
+                ConcreteDataType::string_datatype(),
+                false,
+            ),
+            ColumnSchema::new(
+                CONSTRAINT_SCHEMA,
+                ConcreteDataType::string_datatype(),
+                false,
+            ),
+            ColumnSchema::new(CONSTRAINT_NAME, ConcreteDataType::string_datatype(), false),
+            ColumnSchema::new(TABLE_SCHEMA, ConcreteDataType::string_datatype(), false),
+            ColumnSchema::new(TABLE_NAME, ConcreteDataType::string_datatype(), false),
+            ColumnSchema::new(CONSTRAINT_TYPE, ConcreteDataType::string_datatype(), false),
+            ColumnSchema::new(ENFORCED, ConcreteDataType::string_datatype(), false),
+        ]))
+    }
+
+    fn builder(&self) -> InformationSchemaTableConstraintsBuilder {
+        InformationSchemaTableConstraintsBuilder::new(
+            self.schema.clone(),
+            self.catalog_name.clone(),
+            self.catalog_manager.clone(),
+        )
+    }
+}
+
+impl InformationTable for InformationSchemaTableConstraints {
+    fn table_id(&self) -> TableId {
+        INFORMATION_SCHEMA_TABLE_CONSTRAINTS_TABLE_ID
+    }
+
+    fn table_name(&self) -> &'static str {
+        TABLE_CONSTRAINTS
+    }
+
+    fn schema(&self) -> SchemaRef {
+        self.schema.clone()
+    }
+
+    fn to_stream(&self, request: ScanRequest) -> Result<SendableRecordBatchStream> {
+        let schema = self.schema.arrow_schema().clone();
+        let mut builder = self.builder();
+        let stream = Box::pin(DfRecordBatchStreamAdapter::new(
+            schema,
+            futures::stream::once(async move {
+                builder
+                    .make_table_constraints(Some(request))
+                    .await
+                    .map(|x| x.into_df_record_batch())
+                    .map_err(Into::into)
+            }),
+        ));
+        Ok(Box::pin(
+            RecordBatchStreamAdapter::try_new(stream)
+                .map_err(BoxedError::new)
+                .context(InternalSnafu)?,
+        ))
+    }
+}
+
+struct InformationSchemaTableConstraintsBuilder {
+    schema: SchemaRef,
+    catalog_name: String,
+    catalog_manager: Weak<dyn CatalogManager>,
+
+    constraint_schemas: StringVectorBuilder,
+    constraint_names: StringVectorBuilder,
+    table_schemas: StringVectorBuilder,
+    table_names: StringVectorBuilder,
+    constraint_types: StringVectorBuilder,
+}
+
+impl InformationSchemaTableConstraintsBuilder {
+    fn new(
+        schema: SchemaRef,
+        catalog_name: String,
+        catalog_manager: Weak<dyn CatalogManager>,
+    ) -> Self {
+        Self {
+            schema,
+            catalog_name,
+            catalog_manager,
+            constraint_schemas: StringVectorBuilder::with_capacity(INIT_CAPACITY),
+            constraint_names: StringVectorBuilder::with_capacity(INIT_CAPACITY),
+            table_schemas: StringVectorBuilder::with_capacity(INIT_CAPACITY),
+            table_names: StringVectorBuilder::with_capacity(INIT_CAPACITY),
+            constraint_types: StringVectorBuilder::with_capacity(INIT_CAPACITY),
+        }
+    }
+
+    /// Construct the `information_schema.table_constraints` virtual table
+    async fn make_table_constraints(
+        &mut self,
+        request: Option<ScanRequest>,
+    ) -> Result<RecordBatch> {
+        let catalog_name = self.catalog_name.clone();
+        let catalog_manager = self
+            .catalog_manager
+            .upgrade()
+            .context(UpgradeWeakCatalogManagerRefSnafu)?;
+        let predicates = Predicates::from_scan_request(&request);
+
+        for schema_name in catalog_manager.schema_names(&catalog_name).await? {
+            let mut stream = catalog_manager.tables(&catalog_name, &schema_name).await;
+
+            while let Some(table) = stream.try_next().await? {
+                let keys = &table.table_info().meta.primary_key_indices;
+                let schema = table.schema();
+
+                if schema.timestamp_index().is_some() {
+                    self.add_table_constraint(
+                        &predicates,
+                        &schema_name,
+                        TIME_INDEX_CONSTRAINT_NAME,
+                        &schema_name,
+                        &table.table_info().name,
+                        TIME_INDEX_CONSTRAINT_TYPE,
+                    );
+                }
+
+                if !keys.is_empty() {
+                    self.add_table_constraint(
+                        &predicates,
+                        &schema_name,
+                        PRI_CONSTRAINT_NAME,
+                        &schema_name,
+                        &table.table_info().name,
+                        PRI_KEY_CONSTRAINT_TYPE,
+                    );
+                }
+            }
+        }
+
+        self.finish()
+    }
+
+    fn add_table_constraint(
+        &mut self,
+        predicates: &Predicates,
+        constraint_schema: &str,
+        constraint_name: &str,
+        table_schema: &str,
+        table_name: &str,
+        constraint_type: &str,
+    ) {
+        let row = [
+            (CONSTRAINT_SCHEMA, &Value::from(constraint_schema)),
+            (CONSTRAINT_NAME, &Value::from(constraint_name)),
+            (TABLE_SCHEMA, &Value::from(table_schema)),
+            (TABLE_NAME, &Value::from(table_name)),
+            (CONSTRAINT_TYPE, &Value::from(constraint_type)),
+        ];
+
+        if !predicates.eval(&row) {
+            return;
+        }
+
+        self.constraint_schemas.push(Some(constraint_schema));
+        self.constraint_names.push(Some(constraint_name));
+        self.table_schemas.push(Some(table_schema));
+        self.table_names.push(Some(table_name));
+        self.constraint_types.push(Some(constraint_type));
+    }
+
+    fn finish(&mut self) -> Result<RecordBatch> {
+        let rows_num = self.constraint_names.len();
+
+        let constraint_catalogs = Arc::new(ConstantVector::new(
+            Arc::new(StringVector::from(vec!["def"])),
+            rows_num,
+        ));
+        let enforceds = Arc::new(ConstantVector::new(
+            Arc::new(StringVector::from(vec!["YES"])),
+            rows_num,
+        ));
+
+        let columns: Vec<VectorRef> = vec![
+            constraint_catalogs,
+            Arc::new(self.constraint_schemas.finish()),
+            Arc::new(self.constraint_names.finish()),
+            Arc::new(self.table_schemas.finish()),
+            Arc::new(self.table_names.finish()),
+            Arc::new(self.constraint_types.finish()),
+            enforceds,
+        ];
+
+        RecordBatch::new(self.schema.clone(), columns).context(CreateRecordBatchSnafu)
+    }
+}
+
+impl DfPartitionStream for InformationSchemaTableConstraints {
+    fn schema(&self) -> &ArrowSchemaRef {
+        self.schema.arrow_schema()
+    }
+
+    fn execute(&self, _: Arc<TaskContext>) -> DfSendableRecordBatchStream {
+        let schema = self.schema.arrow_schema().clone();
+        let mut builder = self.builder();
+        Box::pin(DfRecordBatchStreamAdapter::new(
+            schema,
+            futures::stream::once(async move {
+                builder
+                    .make_table_constraints(None)
+                    .await
+                    .map(|x| x.into_df_record_batch())
+                    .map_err(Into::into)
+            }),
+        ))
+    }
+}
--- a/src/catalog/src/information_schema/table_names.rs
+++ b/src/catalog/src/information_schema/table_names.rs
@@ -41,3 +41,4 @@ pub const SESSION_STATUS: &str = "session_status";
 pub const RUNTIME_METRICS: &str = "runtime_metrics";
 pub const PARTITIONS: &str = "partitions";
 pub const REGION_PEERS: &str = "greptime_region_peers";
+pub const TABLE_CONSTRAINTS: &str = "table_constraints";
--- a/src/catalog/src/kvbackend/client.rs
+++ b/src/catalog/src/kvbackend/client.rs
@@ -17,7 +17,6 @@ use std::fmt::Debug;
 use std::sync::atomic::{AtomicUsize, Ordering};
 use std::sync::{Arc, Mutex};
 use std::time::Duration;
-use std::usize;

 use common_error::ext::BoxedError;
 use common_meta::cache_invalidator::KvCacheInvalidator;
@@ -364,6 +363,10 @@ impl KvBackend for MetaKvBackend {
        "MetaKvBackend"
    }

+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+
    async fn range(&self, req: RangeRequest) -> Result<RangeResponse> {
        self.client
            .range(req)
@@ -372,27 +375,6 @@ impl KvBackend for MetaKvBackend {
            .context(ExternalSnafu)
    }

-    async fn get(&self, key: &[u8]) -> Result<Option<KeyValue>> {
-        let mut response = self
-            .client
-            .range(RangeRequest::new().with_key(key))
-            .await
-            .map_err(BoxedError::new)
-            .context(ExternalSnafu)?;
-        Ok(response.take_kvs().get_mut(0).map(|kv| KeyValue {
-            key: kv.take_key(),
-            value: kv.take_value(),
-        }))
-    }
-
-    async fn batch_put(&self, req: BatchPutRequest) -> Result<BatchPutResponse> {
-        self.client
-            .batch_put(req)
-            .await
-            .map_err(BoxedError::new)
-            .context(ExternalSnafu)
-    }
-
    async fn put(&self, req: PutRequest) -> Result<PutResponse> {
        self.client
            .put(req)
@@ -401,17 +383,9 @@ impl KvBackend for MetaKvBackend {
            .context(ExternalSnafu)
    }

-    async fn delete_range(&self, req: DeleteRangeRequest) -> Result<DeleteRangeResponse> {
+    async fn batch_put(&self, req: BatchPutRequest) -> Result<BatchPutResponse> {
        self.client
-            .delete_range(req)
-            .await
-            .map_err(BoxedError::new)
-            .context(ExternalSnafu)
-    }
-
-    async fn batch_delete(&self, req: BatchDeleteRequest) -> Result<BatchDeleteResponse> {
-        self.client
-            .batch_delete(req)
+            .batch_put(req)
            .await
            .map_err(BoxedError::new)
            .context(ExternalSnafu)
@@ -436,8 +410,33 @@ impl KvBackend for MetaKvBackend {
            .context(ExternalSnafu)
    }

-    fn as_any(&self) -> &dyn Any {
-        self
+    async fn delete_range(&self, req: DeleteRangeRequest) -> Result<DeleteRangeResponse> {
+        self.client
+            .delete_range(req)
+            .await
+            .map_err(BoxedError::new)
+            .context(ExternalSnafu)
+    }
+
+    async fn batch_delete(&self, req: BatchDeleteRequest) -> Result<BatchDeleteResponse> {
+        self.client
+            .batch_delete(req)
+            .await
+            .map_err(BoxedError::new)
+            .context(ExternalSnafu)
+    }
+
+    async fn get(&self, key: &[u8]) -> Result<Option<KeyValue>> {
+        let mut response = self
+            .client
+            .range(RangeRequest::new().with_key(key))
+            .await
+            .map_err(BoxedError::new)
+            .context(ExternalSnafu)?;
+        Ok(response.take_kvs().get_mut(0).map(|kv| KeyValue {
+            key: kv.take_key(),
+            value: kv.take_value(),
+        }))
    }
 }

@@ -506,32 +505,32 @@ mod tests {
        }

        async fn range(&self, _req: RangeRequest) -> Result<RangeResponse, Self::Error> {
-            todo!()
+            unimplemented!()
        }

        async fn batch_put(&self, _req: BatchPutRequest) -> Result<BatchPutResponse, Self::Error> {
-            todo!()
+            unimplemented!()
        }

        async fn compare_and_put(
            &self,
            _req: CompareAndPutRequest,
        ) -> Result<CompareAndPutResponse, Self::Error> {
-            todo!()
+            unimplemented!()
        }

        async fn delete_range(
            &self,
            _req: DeleteRangeRequest,
        ) -> Result<DeleteRangeResponse, Self::Error> {
-            todo!()
+            unimplemented!()
        }

        async fn batch_delete(
            &self,
            _req: BatchDeleteRequest,
        ) -> Result<BatchDeleteResponse, Self::Error> {
-            todo!()
+            unimplemented!()
        }
    }

--- a/src/catalog/src/kvbackend/manager.rs
+++ b/src/catalog/src/kvbackend/manager.rs
@@ -23,15 +23,14 @@ use common_catalog::consts::{
 };
 use common_catalog::format_full_table_name;
 use common_error::ext::BoxedError;
-use common_meta::cache_invalidator::{CacheInvalidator, CacheInvalidatorRef, Context};
-use common_meta::error::Result as MetaResult;
+use common_meta::cache_invalidator::{CacheInvalidator, Context, MultiCacheInvalidator};
+use common_meta::instruction::CacheIdent;
 use common_meta::key::catalog_name::CatalogNameKey;
 use common_meta::key::schema_name::SchemaNameKey;
 use common_meta::key::table_info::TableInfoValue;
 use common_meta::key::table_name::TableNameKey;
 use common_meta::key::{TableMetadataManager, TableMetadataManagerRef};
 use common_meta::kv_backend::KvBackendRef;
-use common_meta::table_name::TableName;
 use futures_util::stream::BoxStream;
 use futures_util::{StreamExt, TryStreamExt};
 use moka::future::{Cache as AsyncCache, CacheBuilder};
@@ -39,14 +38,13 @@ use moka::sync::Cache;
 use partition::manager::{PartitionRuleManager, PartitionRuleManagerRef};
 use snafu::prelude::*;
 use table::dist_table::DistTable;
-use table::metadata::TableId;
 use table::table::numbers::{NumbersTable, NUMBERS_TABLE_NAME};
 use table::TableRef;

 use crate::error::Error::{GetTableCache, TableCacheNotGet};
 use crate::error::{
-    self as catalog_err, ListCatalogsSnafu, ListSchemasSnafu, ListTablesSnafu,
-    Result as CatalogResult, TableCacheNotGetSnafu, TableMetadataManagerSnafu,
+    InvalidTableInfoInCatalogSnafu, ListCatalogsSnafu, ListSchemasSnafu, ListTablesSnafu, Result,
+    TableCacheNotGetSnafu, TableMetadataManagerSnafu,
 };
 use crate::information_schema::InformationSchemaProvider;
 use crate::CatalogManager;
@@ -58,10 +56,6 @@ use crate::CatalogManager;
 /// comes from `SystemCatalog`, which is static and read-only.
 #[derive(Clone)]
 pub struct KvBackendCatalogManager {
-    // TODO(LFC): Maybe use a real implementation for Standalone mode.
-    // Now we use `NoopKvCacheInvalidator` for Standalone mode. In Standalone mode, the KV backend
-    // is implemented by RaftEngine. Maybe we need a cache for it?
-    cache_invalidator: CacheInvalidatorRef,
    partition_manager: PartitionRuleManagerRef,
    table_metadata_manager: TableMetadataManagerRef,
    /// A sub-CatalogManager that handles system tables
@@ -69,33 +63,33 @@ pub struct KvBackendCatalogManager {
    table_cache: AsyncCache<String, TableRef>,
 }

-fn make_table(table_info_value: TableInfoValue) -> CatalogResult<TableRef> {
-    let table_info = table_info_value
-        .table_info
-        .try_into()
-        .context(catalog_err::InvalidTableInfoInCatalogSnafu)?;
-    Ok(DistTable::table(Arc::new(table_info)))
+struct TableCacheInvalidator {
+    table_cache: AsyncCache<String, TableRef>,
+}
+
+impl TableCacheInvalidator {
+    pub fn new(table_cache: AsyncCache<String, TableRef>) -> Self {
+        Self { table_cache }
+    }
 }

 #[async_trait::async_trait]
-impl CacheInvalidator for KvBackendCatalogManager {
-    async fn invalidate_table_id(&self, ctx: &Context, table_id: TableId) -> MetaResult<()> {
-        self.cache_invalidator
-            .invalidate_table_id(ctx, table_id)
-            .await
-    }
-
-    async fn invalidate_table_name(&self, ctx: &Context, table_name: TableName) -> MetaResult<()> {
-        let table_cache_key = format_full_table_name(
-            &table_name.catalog_name,
-            &table_name.schema_name,
-            &table_name.table_name,
-        );
-        self.cache_invalidator
-            .invalidate_table_name(ctx, table_name)
-            .await?;
-        self.table_cache.invalidate(&table_cache_key).await;
-
+impl CacheInvalidator for TableCacheInvalidator {
+    async fn invalidate(
+        &self,
+        _ctx: &Context,
+        caches: Vec<CacheIdent>,
+    ) -> common_meta::error::Result<()> {
+        for cache in caches {
+            if let CacheIdent::TableName(table_name) = cache {
+                let table_cache_key = format_full_table_name(
+                    &table_name.catalog_name,
+                    &table_name.schema_name,
+                    &table_name.table_name,
+                );
+                self.table_cache.invalidate(&table_cache_key).await;
+            }
+        }
        Ok(())
    }
 }
@@ -106,11 +100,21 @@ const TABLE_CACHE_TTL: Duration = Duration::from_secs(10 * 60);
 const TABLE_CACHE_TTI: Duration = Duration::from_secs(5 * 60);

 impl KvBackendCatalogManager {
-    pub fn new(backend: KvBackendRef, cache_invalidator: CacheInvalidatorRef) -> Arc<Self> {
+    pub async fn new(
+        backend: KvBackendRef,
+        multi_cache_invalidator: Arc<MultiCacheInvalidator>,
+    ) -> Arc<Self> {
+        let table_cache: AsyncCache<String, TableRef> = CacheBuilder::new(TABLE_CACHE_MAX_CAPACITY)
+            .time_to_live(TABLE_CACHE_TTL)
+            .time_to_idle(TABLE_CACHE_TTI)
+            .build();
+        multi_cache_invalidator
+            .add_invalidator(Arc::new(TableCacheInvalidator::new(table_cache.clone())))
+            .await;
+
        Arc::new_cyclic(|me| Self {
            partition_manager: Arc::new(PartitionRuleManager::new(backend.clone())),
            table_metadata_manager: Arc::new(TableMetadataManager::new(backend)),
-            cache_invalidator,
            system_catalog: SystemCatalog {
                catalog_manager: me.clone(),
                catalog_cache: Cache::new(CATALOG_CACHE_MAX_CAPACITY),
@@ -119,10 +123,7 @@ impl KvBackendCatalogManager {
                    me.clone(),
                )),
            },
-            table_cache: CacheBuilder::new(TABLE_CACHE_MAX_CAPACITY)
-                .time_to_live(TABLE_CACHE_TTL)
-                .time_to_idle(TABLE_CACHE_TTI)
-                .build(),
+            table_cache,
        })
    }

@@ -141,12 +142,11 @@ impl CatalogManager for KvBackendCatalogManager {
        self
    }

-    async fn catalog_names(&self) -> CatalogResult<Vec<String>> {
+    async fn catalog_names(&self) -> Result<Vec<String>> {
        let stream = self
            .table_metadata_manager
            .catalog_manager()
-            .catalog_names()
-            .await;
+            .catalog_names();

        let keys = stream
            .try_collect::<Vec<_>>()
@@ -157,12 +157,11 @@ impl CatalogManager for KvBackendCatalogManager {
        Ok(keys)
    }

-    async fn schema_names(&self, catalog: &str) -> CatalogResult<Vec<String>> {
+    async fn schema_names(&self, catalog: &str) -> Result<Vec<String>> {
        let stream = self
            .table_metadata_manager
            .schema_manager()
-            .schema_names(catalog)
-            .await;
+            .schema_names(catalog);
        let mut keys = stream
            .try_collect::<BTreeSet<_>>()
            .await
@@ -174,12 +173,11 @@ impl CatalogManager for KvBackendCatalogManager {
        Ok(keys.into_iter().collect())
    }

-    async fn table_names(&self, catalog: &str, schema: &str) -> CatalogResult<Vec<String>> {
+    async fn table_names(&self, catalog: &str, schema: &str) -> Result<Vec<String>> {
        let stream = self
            .table_metadata_manager
            .table_name_manager()
-            .tables(catalog, schema)
-            .await;
+            .tables(catalog, schema);
        let mut tables = stream
            .try_collect::<Vec<_>>()
            .await
@@ -193,7 +191,7 @@ impl CatalogManager for KvBackendCatalogManager {
        Ok(tables.into_iter().collect())
    }

-    async fn catalog_exists(&self, catalog: &str) -> CatalogResult<bool> {
+    async fn catalog_exists(&self, catalog: &str) -> Result<bool> {
        self.table_metadata_manager
            .catalog_manager()
            .exists(CatalogNameKey::new(catalog))
@@ -201,7 +199,7 @@ impl CatalogManager for KvBackendCatalogManager {
            .context(TableMetadataManagerSnafu)
    }

-    async fn schema_exists(&self, catalog: &str, schema: &str) -> CatalogResult<bool> {
+    async fn schema_exists(&self, catalog: &str, schema: &str) -> Result<bool> {
        if self.system_catalog.schema_exist(schema) {
            return Ok(true);
        }
@@ -213,7 +211,7 @@ impl CatalogManager for KvBackendCatalogManager {
            .context(TableMetadataManagerSnafu)
    }

-    async fn table_exists(&self, catalog: &str, schema: &str, table: &str) -> CatalogResult<bool> {
+    async fn table_exists(&self, catalog: &str, schema: &str, table: &str) -> Result<bool> {
        if self.system_catalog.table_exist(schema, table) {
            return Ok(true);
        }
@@ -232,7 +230,7 @@ impl CatalogManager for KvBackendCatalogManager {
        catalog: &str,
        schema: &str,
        table_name: &str,
-    ) -> CatalogResult<Option<TableRef>> {
+    ) -> Result<Option<TableRef>> {
        if let Some(table) = self.system_catalog.table(catalog, schema, table_name) {
            return Ok(Some(table));
        }
@@ -266,7 +264,7 @@ impl CatalogManager for KvBackendCatalogManager {
                }
                .fail();
            };
-            make_table(table_info_value)
+            build_table(table_info_value)
        };

        match self
@@ -289,7 +287,7 @@ impl CatalogManager for KvBackendCatalogManager {
        &'a self,
        catalog: &'a str,
        schema: &'a str,
-    ) -> BoxStream<'a, CatalogResult<TableRef>> {
+    ) -> BoxStream<'a, Result<TableRef>> {
        let sys_tables = try_stream!({
            // System tables
            let sys_table_names = self.system_catalog.table_names(schema);
@@ -304,7 +302,6 @@ impl CatalogManager for KvBackendCatalogManager {
            .table_metadata_manager
            .table_name_manager()
            .tables(catalog, schema)
-            .await
            .map_ok(|(_, v)| v.table_id());
        const BATCH_SIZE: usize = 128;
        let user_tables = try_stream!({
@@ -314,7 +311,7 @@ impl CatalogManager for KvBackendCatalogManager {
            while let Some(table_ids) = table_id_chunks.next().await {
                let table_ids = table_ids
                    .into_iter()
-                    .collect::<Result<Vec<_>, _>>()
+                    .collect::<std::result::Result<Vec<_>, _>>()
                    .map_err(BoxedError::new)
                    .context(ListTablesSnafu { catalog, schema })?;

@@ -326,7 +323,7 @@ impl CatalogManager for KvBackendCatalogManager {
                    .context(TableMetadataManagerSnafu)?;

                for table_info_value in table_info_values.into_values() {
-                    yield make_table(table_info_value)?;
+                    yield build_table(table_info_value)?;
                }
            }
        });
@@ -335,6 +332,14 @@ impl CatalogManager for KvBackendCatalogManager {
    }
 }

+fn build_table(table_info_value: TableInfoValue) -> Result<TableRef> {
+    let table_info = table_info_value
+        .table_info
+        .try_into()
+        .context(InvalidTableInfoInCatalogSnafu)?;
+    Ok(DistTable::table(Arc::new(table_info)))
+}
+
 // TODO: This struct can hold a static map of all system tables when
 // the upper layer (e.g., procedure) can inform the catalog manager
 // a new catalog is created.
--- a/src/catalog/src/lib.rs
+++ b/src/catalog/src/lib.rs
@@ -19,10 +19,10 @@ use std::any::Any;
 use std::fmt::{Debug, Formatter};
 use std::sync::Arc;

+use api::v1::CreateTableExpr;
 use futures::future::BoxFuture;
 use futures_util::stream::BoxStream;
 use table::metadata::TableId;
-use table::requests::CreateTableRequest;
 use table::TableRef;

 use crate::error::Result;
@@ -75,9 +75,9 @@ pub type OpenSystemTableHook =
 /// Register system table request:
 /// - When system table is already created and registered, the hook will be called
 ///     with table ref after opening the system table
-/// - When system table is not exists, create and register the table by create_table_request and calls open_hook with the created table.
+/// - When system table is not exists, create and register the table by `create_table_expr` and calls `open_hook` with the created table.
 pub struct RegisterSystemTableRequest {
-    pub create_table_request: CreateTableRequest,
+    pub create_table_expr: CreateTableExpr,
    pub open_hook: Option<OpenSystemTableHook>,
 }

--- a/src/catalog/src/table_source.rs
+++ b/src/catalog/src/table_source.rs
@@ -49,10 +49,7 @@ impl DfTableSourceProvider {
        }
    }

-    pub fn resolve_table_ref<'a>(
-        &'a self,
-        table_ref: TableReference<'a>,
-    ) -> Result<ResolvedTableReference<'a>> {
+    pub fn resolve_table_ref(&self, table_ref: TableReference) -> Result<ResolvedTableReference> {
        if self.disallow_cross_catalog_query {
            match &table_ref {
                TableReference::Bare { .. } => (),
@@ -76,7 +73,7 @@ impl DfTableSourceProvider {

    pub async fn resolve_table(
        &mut self,
-        table_ref: TableReference<'_>,
+        table_ref: TableReference,
    ) -> Result<Arc<dyn TableSource>> {
        let table_ref = self.resolve_table_ref(table_ref)?;

@@ -106,8 +103,6 @@ impl DfTableSourceProvider {

 #[cfg(test)]
 mod tests {
-    use std::borrow::Cow;
-
    use session::context::QueryContext;

    use super::*;
@@ -120,68 +115,37 @@ mod tests {
        let table_provider =
            DfTableSourceProvider::new(MemoryCatalogManager::with_default_setup(), true, query_ctx);

-        let table_ref = TableReference::Bare {
-            table: Cow::Borrowed("table_name"),
-        };
+        let table_ref = TableReference::bare("table_name");
        let result = table_provider.resolve_table_ref(table_ref);
        assert!(result.is_ok());

-        let table_ref = TableReference::Partial {
-            schema: Cow::Borrowed("public"),
-            table: Cow::Borrowed("table_name"),
-        };
+        let table_ref = TableReference::partial("public", "table_name");
        let result = table_provider.resolve_table_ref(table_ref);
        assert!(result.is_ok());

-        let table_ref = TableReference::Partial {
-            schema: Cow::Borrowed("wrong_schema"),
-            table: Cow::Borrowed("table_name"),
-        };
+        let table_ref = TableReference::partial("wrong_schema", "table_name");
        let result = table_provider.resolve_table_ref(table_ref);
        assert!(result.is_ok());

-        let table_ref = TableReference::Full {
-            catalog: Cow::Borrowed("greptime"),
-            schema: Cow::Borrowed("public"),
-            table: Cow::Borrowed("table_name"),
-        };
+        let table_ref = TableReference::full("greptime", "public", "table_name");
        let result = table_provider.resolve_table_ref(table_ref);
        assert!(result.is_ok());

-        let table_ref = TableReference::Full {
-            catalog: Cow::Borrowed("wrong_catalog"),
-            schema: Cow::Borrowed("public"),
-            table: Cow::Borrowed("table_name"),
-        };
+        let table_ref = TableReference::full("wrong_catalog", "public", "table_name");
        let result = table_provider.resolve_table_ref(table_ref);
        assert!(result.is_err());

-        let table_ref = TableReference::Partial {
-            schema: Cow::Borrowed("information_schema"),
-            table: Cow::Borrowed("columns"),
-        };
+        let table_ref = TableReference::partial("information_schema", "columns");
        let result = table_provider.resolve_table_ref(table_ref);
        assert!(result.is_ok());

-        let table_ref = TableReference::Full {
-            catalog: Cow::Borrowed("greptime"),
-            schema: Cow::Borrowed("information_schema"),
-            table: Cow::Borrowed("columns"),
-        };
+        let table_ref = TableReference::full("greptime", "information_schema", "columns");
        assert!(table_provider.resolve_table_ref(table_ref).is_ok());

-        let table_ref = TableReference::Full {
-            catalog: Cow::Borrowed("dummy"),
-            schema: Cow::Borrowed("information_schema"),
-            table: Cow::Borrowed("columns"),
-        };
+        let table_ref = TableReference::full("dummy", "information_schema", "columns");
        assert!(table_provider.resolve_table_ref(table_ref).is_err());

-        let table_ref = TableReference::Full {
-            catalog: Cow::Borrowed("greptime"),
-            schema: Cow::Borrowed("greptime_private"),
-            table: Cow::Borrowed("columns"),
-        };
+        let table_ref = TableReference::full("greptime", "greptime_private", "columns");
        assert!(table_provider.resolve_table_ref(table_ref).is_ok());
    }
 }
--- a/src/client/Cargo.toml
+++ b/src/client/Cargo.toml
@@ -16,7 +16,6 @@ arc-swap = "1.6"
 arrow-flight.workspace = true
 async-stream.workspace = true
 async-trait.workspace = true
-common-base.workspace = true
 common-catalog.workspace = true
 common-error.workspace = true
 common-grpc.workspace = true
@@ -25,10 +24,6 @@ common-meta.workspace = true
 common-query.workspace = true
 common-recordbatch.workspace = true
 common-telemetry.workspace = true
-common-time.workspace = true
-datafusion.workspace = true
-datatypes.workspace = true
-derive_builder.workspace = true
 enum_dispatch = "0.3"
 futures-util.workspace = true
 lazy_static.workspace = true
@@ -37,9 +32,7 @@ parking_lot = "0.12"
 prometheus.workspace = true
 prost.workspace = true
 rand.workspace = true
-serde.workspace = true
 serde_json.workspace = true
-session.workspace = true
 snafu.workspace = true
 tokio.workspace = true
 tokio-stream = { workspace = true, features = ["net"] }
--- a/src/client/src/database.rs
+++ b/src/client/src/database.rs
@@ -37,6 +37,8 @@ use snafu::{ensure, ResultExt};
 use crate::error::{ConvertFlightDataSnafu, Error, IllegalFlightMessagesSnafu, ServerSnafu};
 use crate::{error, from_grpc_response, metrics, Client, Result, StreamInserter};

+pub const DEFAULT_LOOKBACK_STRING: &str = "5m";
+
 #[derive(Clone, Debug, Default)]
 pub struct Database {
    // The "catalog" and "schema" to be used in processing the requests at the server side.
@@ -215,6 +217,7 @@ impl Database {
                start: start.to_string(),
                end: end.to_string(),
                step: step.to_string(),
+                lookback: DEFAULT_LOOKBACK_STRING.to_string(),
            })),
        }))
        .await
--- a/src/client/src/region.rs
+++ b/src/client/src/region.rs
@@ -14,7 +14,8 @@

 use std::sync::Arc;

-use api::v1::region::{QueryRequest, RegionRequest, RegionResponse};
+use api::region::RegionResponse;
+use api::v1::region::{QueryRequest, RegionRequest};
 use api::v1::ResponseHeader;
 use arc_swap::ArcSwapOption;
 use arrow_flight::Ticket;
@@ -23,7 +24,7 @@ use async_trait::async_trait;
 use common_error::ext::{BoxedError, ErrorExt};
 use common_error::status_code::StatusCode;
 use common_grpc::flight::{FlightDecoder, FlightMessage};
-use common_meta::datanode_manager::{AffectedRows, Datanode};
+use common_meta::datanode_manager::Datanode;
 use common_meta::error::{self as meta_error, Result as MetaResult};
 use common_recordbatch::error::ExternalSnafu;
 use common_recordbatch::{RecordBatchStreamWrapper, SendableRecordBatchStream};
@@ -46,7 +47,7 @@ pub struct RegionRequester {

 #[async_trait]
 impl Datanode for RegionRequester {
-    async fn handle(&self, request: RegionRequest) -> MetaResult<AffectedRows> {
+    async fn handle(&self, request: RegionRequest) -> MetaResult<RegionResponse> {
        self.handle_inner(request).await.map_err(|err| {
            if err.should_retry() {
                meta_error::Error::RetryLater {
@@ -165,7 +166,7 @@ impl RegionRequester {
        Ok(Box::pin(record_batch_stream))
    }

-    async fn handle_inner(&self, request: RegionRequest) -> Result<AffectedRows> {
+    async fn handle_inner(&self, request: RegionRequest) -> Result<RegionResponse> {
        let request_type = request
            .body
            .as_ref()
@@ -178,10 +179,7 @@ impl RegionRequester {

        let mut client = self.client.raw_region_client()?;

-        let RegionResponse {
-            header,
-            affected_rows,
-        } = client
+        let response = client
            .handle(request)
            .await
            .map_err(|e| {
@@ -195,19 +193,20 @@ impl RegionRequester {
            })?
            .into_inner();

-        check_response_header(header)?;
+        check_response_header(&response.header)?;

-        Ok(affected_rows as _)
+        Ok(RegionResponse::from_region_response(response))
    }

-    pub async fn handle(&self, request: RegionRequest) -> Result<AffectedRows> {
+    pub async fn handle(&self, request: RegionRequest) -> Result<RegionResponse> {
        self.handle_inner(request).await
    }
 }

-pub fn check_response_header(header: Option<ResponseHeader>) -> Result<()> {
+pub fn check_response_header(header: &Option<ResponseHeader>) -> Result<()> {
    let status = header
-        .and_then(|header| header.status)
+        .as_ref()
+        .and_then(|header| header.status.as_ref())
        .context(IllegalDatabaseResponseSnafu {
            err_msg: "either response header or status is missing",
        })?;
@@ -221,7 +220,7 @@ pub fn check_response_header(header: Option<ResponseHeader>) -> Result<()> {
            })?;
        ServerSnafu {
            code,
-            msg: status.err_msg,
+            msg: status.err_msg.clone(),
        }
        .fail()
    }
@@ -236,19 +235,19 @@ mod test {

    #[test]
    fn test_check_response_header() {
-        let result = check_response_header(None);
+        let result = check_response_header(&None);
        assert!(matches!(
            result.unwrap_err(),
            IllegalDatabaseResponse { .. }
        ));

-        let result = check_response_header(Some(ResponseHeader { status: None }));
+        let result = check_response_header(&Some(ResponseHeader { status: None }));
        assert!(matches!(
            result.unwrap_err(),
            IllegalDatabaseResponse { .. }
        ));

-        let result = check_response_header(Some(ResponseHeader {
+        let result = check_response_header(&Some(ResponseHeader {
            status: Some(PbStatus {
                status_code: StatusCode::Success as u32,
                err_msg: String::default(),
@@ -256,7 +255,7 @@ mod test {
        }));
        assert!(result.is_ok());

-        let result = check_response_header(Some(ResponseHeader {
+        let result = check_response_header(&Some(ResponseHeader {
            status: Some(PbStatus {
                status_code: u32::MAX,
                err_msg: String::default(),
@@ -267,7 +266,7 @@ mod test {
            IllegalDatabaseResponse { .. }
        ));

-        let result = check_response_header(Some(ResponseHeader {
+        let result = check_response_header(&Some(ResponseHeader {
            status: Some(PbStatus {
                status_code: StatusCode::Internal as u32,
                err_msg: "blabla".to_string(),
--- a/src/cmd/Cargo.toml
+++ b/src/cmd/Cargo.toml
@@ -16,7 +16,6 @@ tokio-console = ["common-telemetry/tokio-console"]
 workspace = true

 [dependencies]
-anymap = "1.0.0-beta.2"
 async-trait.workspace = true
 auth.workspace = true
 catalog.workspace = true
@@ -52,7 +51,6 @@ meta-client.workspace = true
 meta-srv.workspace = true
 mito2.workspace = true
 nu-ansi-term = "0.46"
-partition.workspace = true
 plugins.workspace = true
 prometheus.workspace = true
 prost.workspace = true
@@ -78,6 +76,7 @@ tikv-jemallocator = "0.5"
 common-test-util.workspace = true
 serde.workspace = true
 temp-env = "0.3"
+tempfile.workspace = true

 [target.'cfg(not(windows))'.dev-dependencies]
 rexpect = "0.5"
--- a/src/cmd/build.rs
+++ b/src/cmd/build.rs
@@ -13,5 +13,8 @@
 // limitations under the License.

 fn main() {
+    // Trigger this script if the git branch/commit changes
+    println!("cargo:rerun-if-changed=.git/refs/heads");
+
    common_version::setup_build_info();
 }
--- a/src/cmd/src/cli.rs
+++ b/src/cmd/src/cli.rs
@@ -84,10 +84,10 @@ impl Command {
        let mut logging_opts = LoggingOptions::default();

        if let Some(dir) = &cli_options.log_dir {
-            logging_opts.dir = dir.clone();
+            logging_opts.dir.clone_from(dir);
        }

-        logging_opts.level = cli_options.log_level.clone();
+        logging_opts.level.clone_from(&cli_options.log_level);

        Ok(Options::Cli(Box::new(logging_opts)))
    }
--- a/src/cmd/src/cli/bench/metadata.rs
+++ b/src/cmd/src/cli/bench/metadata.rs
@@ -106,9 +106,12 @@ impl TableMetadataBencher {
                    .await
                    .unwrap();
                let start = Instant::now();
+                let table_info = table_info.unwrap();
+                let table_route = table_route.unwrap();
+                let table_id = table_info.table_info.ident.table_id;
                let _ = self
                    .table_metadata_manager
-                    .delete_table_metadata(&table_info.unwrap(), &table_route.unwrap())
+                    .delete_table_metadata(table_id, &table_info.table_name(), &table_route)
                    .await;
                start.elapsed()
            },
@@ -134,7 +137,7 @@ impl TableMetadataBencher {
                let start = Instant::now();
                let _ = self
                    .table_metadata_manager
-                    .rename_table(table_info.unwrap(), new_table_name)
+                    .rename_table(&table_info.unwrap(), new_table_name)
                    .await;

                start.elapsed()
--- a/src/cmd/src/cli/export.rs
+++ b/src/cmd/src/cli/export.rs
@@ -226,7 +226,10 @@ impl Export {
    }

    async fn show_create_table(&self, catalog: &str, schema: &str, table: &str) -> Result<String> {
-        let sql = format!("show create table {}.{}.{}", catalog, schema, table);
+        let sql = format!(
+            r#"show create table "{}"."{}"."{}""#,
+            catalog, schema, table
+        );
        let mut client = self.client.clone();
        client.set_catalog(catalog);
        client.set_schema(schema);
@@ -273,7 +276,7 @@ impl Export {
                for (c, s, t) in table_list {
                    match self.show_create_table(&c, &s, &t).await {
                        Err(e) => {
-                            error!(e; "Failed to export table {}.{}.{}", c, s, t)
+                            error!(e; r#"Failed to export table "{}"."{}"."{}""#, c, s, t)
                        }
                        Ok(create_table) => {
                            file.write_all(create_table.as_bytes())
@@ -417,3 +420,84 @@ fn split_database(database: &str) -> Result<(String, Option<String>)> {
        Ok((catalog.to_string(), Some(schema.to_string())))
    }
 }
+
+#[cfg(test)]
+mod tests {
+    use clap::Parser;
+    use client::{Client, Database};
+    use common_catalog::consts::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME};
+
+    use crate::error::Result;
+    use crate::options::{CliOptions, Options};
+    use crate::{cli, standalone, App};
+
+    #[tokio::test(flavor = "multi_thread")]
+    async fn test_export_create_table_with_quoted_names() -> Result<()> {
+        let output_dir = tempfile::tempdir().unwrap();
+
+        let standalone = standalone::Command::parse_from([
+            "standalone",
+            "start",
+            "--data-home",
+            &*output_dir.path().to_string_lossy(),
+        ]);
+        let Options::Standalone(standalone_opts) =
+            standalone.load_options(&CliOptions::default())?
+        else {
+            unreachable!()
+        };
+        let mut instance = standalone.build(*standalone_opts).await?;
+        instance.start().await?;
+
+        let client = Client::with_urls(["127.0.0.1:4001"]);
+        let database = Database::new(DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, client);
+        database
+            .sql(r#"CREATE DATABASE "cli.export.create_table";"#)
+            .await
+            .unwrap();
+        database
+            .sql(
+                r#"CREATE TABLE "cli.export.create_table"."a.b.c"(
+                        ts TIMESTAMP,
+                        TIME INDEX (ts)
+                    ) engine=mito;
+                "#,
+            )
+            .await
+            .unwrap();
+
+        let output_dir = tempfile::tempdir().unwrap();
+        let cli = cli::Command::parse_from([
+            "cli",
+            "export",
+            "--addr",
+            "127.0.0.1:4001",
+            "--output-dir",
+            &*output_dir.path().to_string_lossy(),
+            "--target",
+            "create-table",
+        ]);
+        let mut cli_app = cli.build().await?;
+        cli_app.start().await?;
+
+        instance.stop().await?;
+
+        let output_file = output_dir
+            .path()
+            .join("greptime-cli.export.create_table.sql");
+        let res = std::fs::read_to_string(output_file).unwrap();
+        let expect = r#"CREATE TABLE IF NOT EXISTS "a.b.c" (
+  "ts" TIMESTAMP(3) NOT NULL,
+  TIME INDEX ("ts")
+)
+
+ENGINE=mito
+WITH(
+  regions = 1
+);
+"#;
+        assert_eq!(res.trim(), expect.trim());
+
+        Ok(())
+    }
+}
--- a/src/cmd/src/cli/repl.rs
+++ b/src/cmd/src/cli/repl.rs
@@ -22,6 +22,7 @@ use catalog::kvbackend::{
 use client::{Client, Database, OutputData, DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME};
 use common_base::Plugins;
 use common_error::ext::ErrorExt;
+use common_meta::cache_invalidator::MultiCacheInvalidator;
 use common_query::Output;
 use common_recordbatch::RecordBatches;
 use common_telemetry::logging;
@@ -252,9 +253,11 @@ async fn create_query_engine(meta_addr: &str) -> Result<DatafusionQueryEngine> {

    let cached_meta_backend =
        Arc::new(CachedMetaKvBackendBuilder::new(meta_client.clone()).build());
-
+    let multi_cache_invalidator = Arc::new(MultiCacheInvalidator::with_invalidators(vec![
+        cached_meta_backend.clone(),
+    ]));
    let catalog_list =
-        KvBackendCatalogManager::new(cached_meta_backend.clone(), cached_meta_backend);
+        KvBackendCatalogManager::new(cached_meta_backend.clone(), multi_cache_invalidator).await;
    let plugins: Plugins = Default::default();
    let state = Arc::new(QueryEngineState::new(
        catalog_list,
--- a/src/cmd/src/datanode.rs
+++ b/src/cmd/src/datanode.rs
@@ -139,19 +139,19 @@ impl StartCommand {
        )?;

        if let Some(dir) = &cli_options.log_dir {
-            opts.logging.dir = dir.clone();
+            opts.logging.dir.clone_from(dir);
        }

        if cli_options.log_level.is_some() {
-            opts.logging.level = cli_options.log_level.clone();
+            opts.logging.level.clone_from(&cli_options.log_level);
        }

        if let Some(addr) = &self.rpc_addr {
-            opts.rpc_addr = addr.clone();
+            opts.rpc_addr.clone_from(addr);
        }

        if self.rpc_hostname.is_some() {
-            opts.rpc_hostname = self.rpc_hostname.clone();
+            opts.rpc_hostname.clone_from(&self.rpc_hostname);
        }

        if let Some(node_id) = self.node_id {
@@ -161,7 +161,8 @@ impl StartCommand {
        if let Some(metasrv_addrs) = &self.metasrv_addr {
            opts.meta_client
                .get_or_insert_with(MetaClientOptions::default)
-                .metasrv_addrs = metasrv_addrs.clone();
+                .metasrv_addrs
+                .clone_from(metasrv_addrs);
            opts.mode = Mode::Distributed;
        }

@@ -173,7 +174,7 @@ impl StartCommand {
        }

        if let Some(data_home) = &self.data_home {
-            opts.storage.data_home = data_home.clone();
+            opts.storage.data_home.clone_from(data_home);
        }

        // `wal_dir` only affects raft-engine config.
@@ -191,7 +192,7 @@ impl StartCommand {
        }

        if let Some(http_addr) = &self.http_addr {
-            opts.http.addr = http_addr.clone();
+            opts.http.addr.clone_from(http_addr);
        }

        if let Some(http_timeout) = self.http_timeout {
--- a/src/cmd/src/frontend.rs
+++ b/src/cmd/src/frontend.rs
@@ -16,9 +16,10 @@ use std::sync::Arc;
 use std::time::Duration;

 use async_trait::async_trait;
-use catalog::kvbackend::CachedMetaKvBackendBuilder;
+use catalog::kvbackend::{CachedMetaKvBackendBuilder, KvBackendCatalogManager};
 use clap::Parser;
 use client::client_manager::DatanodeClients;
+use common_meta::cache_invalidator::MultiCacheInvalidator;
 use common_meta::heartbeat::handler::parse_mailbox_message::ParseMailboxMessageHandler;
 use common_meta::heartbeat::handler::HandlerGroupExecutor;
 use common_telemetry::logging;
@@ -156,11 +157,11 @@ impl StartCommand {
        )?;

        if let Some(dir) = &cli_options.log_dir {
-            opts.logging.dir = dir.clone();
+            opts.logging.dir.clone_from(dir);
        }

        if cli_options.log_level.is_some() {
-            opts.logging.level = cli_options.log_level.clone();
+            opts.logging.level.clone_from(&cli_options.log_level);
        }

        let tls_opts = TlsOption::new(
@@ -170,7 +171,7 @@ impl StartCommand {
        );

        if let Some(addr) = &self.http_addr {
-            opts.http.addr = addr.clone()
+            opts.http.addr.clone_from(addr);
        }

        if let Some(http_timeout) = self.http_timeout {
@@ -182,24 +183,24 @@ impl StartCommand {
        }

        if let Some(addr) = &self.rpc_addr {
-            opts.grpc.addr = addr.clone()
+            opts.grpc.addr.clone_from(addr);
        }

        if let Some(addr) = &self.mysql_addr {
            opts.mysql.enable = true;
-            opts.mysql.addr = addr.clone();
+            opts.mysql.addr.clone_from(addr);
            opts.mysql.tls = tls_opts.clone();
        }

        if let Some(addr) = &self.postgres_addr {
            opts.postgres.enable = true;
-            opts.postgres.addr = addr.clone();
+            opts.postgres.addr.clone_from(addr);
            opts.postgres.tls = tls_opts;
        }

        if let Some(addr) = &self.opentsdb_addr {
            opts.opentsdb.enable = true;
-            opts.opentsdb.addr = addr.clone();
+            opts.opentsdb.addr.clone_from(addr);
        }

        if let Some(enable) = self.influxdb_enable {
@@ -209,11 +210,12 @@ impl StartCommand {
        if let Some(metasrv_addrs) = &self.metasrv_addr {
            opts.meta_client
                .get_or_insert_with(MetaClientOptions::default)
-                .metasrv_addrs = metasrv_addrs.clone();
+                .metasrv_addrs
+                .clone_from(metasrv_addrs);
            opts.mode = Mode::Distributed;
        }

-        opts.user_provider = self.user_provider.clone();
+        opts.user_provider.clone_from(&self.user_provider);

        Ok(Options::Frontend(Box::new(opts)))
    }
@@ -247,11 +249,19 @@ impl StartCommand {
            .cache_tti(cache_tti)
            .build();
        let cached_meta_backend = Arc::new(cached_meta_backend);
+        let multi_cache_invalidator = Arc::new(MultiCacheInvalidator::with_invalidators(vec![
+            cached_meta_backend.clone(),
+        ]));
+        let catalog_manager = KvBackendCatalogManager::new(
+            cached_meta_backend.clone(),
+            multi_cache_invalidator.clone(),
+        )
+        .await;

        let executor = HandlerGroupExecutor::new(vec![
            Arc::new(ParseMailboxMessageHandler),
            Arc::new(InvalidateTableCacheHandler::new(
-                cached_meta_backend.clone(),
+                multi_cache_invalidator.clone(),
            )),
        ]);

@@ -263,11 +273,12 @@ impl StartCommand {

        let mut instance = FrontendBuilder::new(
            cached_meta_backend.clone(),
+            catalog_manager,
            Arc::new(DatanodeClients::default()),
            meta_client,
        )
-        .with_cache_invalidator(cached_meta_backend)
        .with_plugin(plugins.clone())
+        .with_cache_invalidator(multi_cache_invalidator)
        .with_heartbeat_task(heartbeat_task)
        .try_build()
        .await
--- a/src/cmd/src/metasrv.rs
+++ b/src/cmd/src/metasrv.rs
@@ -17,8 +17,8 @@ use std::time::Duration;
 use async_trait::async_trait;
 use clap::Parser;
 use common_telemetry::logging;
-use meta_srv::bootstrap::MetaSrvInstance;
-use meta_srv::metasrv::MetaSrvOptions;
+use meta_srv::bootstrap::MetasrvInstance;
+use meta_srv::metasrv::MetasrvOptions;
 use snafu::ResultExt;

 use crate::error::{self, Result, StartMetaServerSnafu};
@@ -26,11 +26,11 @@ use crate::options::{CliOptions, Options};
 use crate::App;

 pub struct Instance {
-    instance: MetaSrvInstance,
+    instance: MetasrvInstance,
 }

 impl Instance {
-    fn new(instance: MetaSrvInstance) -> Self {
+    fn new(instance: MetasrvInstance) -> Self {
        Self { instance }
    }
 }
@@ -42,7 +42,7 @@ impl App for Instance {
    }

    async fn start(&mut self) -> Result<()> {
-        plugins::start_meta_srv_plugins(self.instance.plugins())
+        plugins::start_metasrv_plugins(self.instance.plugins())
            .await
            .context(StartMetaServerSnafu)?;

@@ -64,7 +64,7 @@ pub struct Command {
 }

 impl Command {
-    pub async fn build(self, opts: MetaSrvOptions) -> Result<Instance> {
+    pub async fn build(self, opts: MetasrvOptions) -> Result<Instance> {
        self.subcmd.build(opts).await
    }

@@ -79,7 +79,7 @@ enum SubCommand {
 }

 impl SubCommand {
-    async fn build(self, opts: MetaSrvOptions) -> Result<Instance> {
+    async fn build(self, opts: MetasrvOptions) -> Result<Instance> {
        match self {
            SubCommand::Start(cmd) => cmd.build(opts).await,
        }
@@ -127,30 +127,30 @@ struct StartCommand {

 impl StartCommand {
    fn load_options(&self, cli_options: &CliOptions) -> Result<Options> {
-        let mut opts: MetaSrvOptions = Options::load_layered_options(
+        let mut opts: MetasrvOptions = Options::load_layered_options(
            self.config_file.as_deref(),
            self.env_prefix.as_ref(),
-            MetaSrvOptions::env_list_keys(),
+            MetasrvOptions::env_list_keys(),
        )?;

        if let Some(dir) = &cli_options.log_dir {
-            opts.logging.dir = dir.clone();
+            opts.logging.dir.clone_from(dir);
        }

        if cli_options.log_level.is_some() {
-            opts.logging.level = cli_options.log_level.clone();
+            opts.logging.level.clone_from(&cli_options.log_level);
        }

        if let Some(addr) = &self.bind_addr {
-            opts.bind_addr = addr.clone();
+            opts.bind_addr.clone_from(addr);
        }

        if let Some(addr) = &self.server_addr {
-            opts.server_addr = addr.clone();
+            opts.server_addr.clone_from(addr);
        }

        if let Some(addr) = &self.store_addr {
-            opts.store_addr = addr.clone();
+            opts.store_addr.clone_from(addr);
        }

        if let Some(selector_type) = &self.selector {
@@ -168,7 +168,7 @@ impl StartCommand {
        }

        if let Some(http_addr) = &self.http_addr {
-            opts.http.addr = http_addr.clone();
+            opts.http.addr.clone_from(http_addr);
        }

        if let Some(http_timeout) = self.http_timeout {
@@ -176,11 +176,11 @@ impl StartCommand {
        }

        if let Some(data_home) = &self.data_home {
-            opts.data_home = data_home.clone();
+            opts.data_home.clone_from(data_home);
        }

        if !self.store_key_prefix.is_empty() {
-            opts.store_key_prefix = self.store_key_prefix.clone()
+            opts.store_key_prefix.clone_from(&self.store_key_prefix)
        }

        if let Some(max_txn_ops) = self.max_txn_ops {
@@ -193,20 +193,20 @@ impl StartCommand {
        Ok(Options::Metasrv(Box::new(opts)))
    }

-    async fn build(self, mut opts: MetaSrvOptions) -> Result<Instance> {
-        let plugins = plugins::setup_meta_srv_plugins(&mut opts)
+    async fn build(self, mut opts: MetasrvOptions) -> Result<Instance> {
+        let plugins = plugins::setup_metasrv_plugins(&mut opts)
            .await
            .context(StartMetaServerSnafu)?;

-        logging::info!("MetaSrv start command: {:#?}", self);
-        logging::info!("MetaSrv options: {:#?}", opts);
+        logging::info!("Metasrv start command: {:#?}", self);
+        logging::info!("Metasrv options: {:#?}", opts);

        let builder = meta_srv::bootstrap::metasrv_builder(&opts, plugins.clone(), None)
            .await
            .context(error::BuildMetaServerSnafu)?;
        let metasrv = builder.build().await.context(error::BuildMetaServerSnafu)?;

-        let instance = MetaSrvInstance::new(opts, plugins, metasrv)
+        let instance = MetasrvInstance::new(opts, plugins, metasrv)
            .await
            .context(error::BuildMetaServerSnafu)?;

@@ -218,6 +218,7 @@ impl StartCommand {
 mod tests {
    use std::io::Write;

+    use common_base::readable_size::ReadableSize;
    use common_test_util::temp_dir::create_named_temp_file;
    use meta_srv::selector::SelectorType;

@@ -297,6 +298,10 @@ mod tests {
                .first_heartbeat_estimate
                .as_millis()
        );
+        assert_eq!(
+            options.procedure.max_metadata_value_size,
+            Some(ReadableSize::kb(1500))
+        );
    }

    #[test]
--- a/src/cmd/src/options.rs
+++ b/src/cmd/src/options.rs
@@ -15,12 +15,12 @@
 use clap::ArgMatches;
 use common_config::KvBackendConfig;
 use common_telemetry::logging::{LoggingOptions, TracingOptions};
-use common_wal::config::MetaSrvWalConfig;
+use common_wal::config::MetasrvWalConfig;
 use config::{Config, Environment, File, FileFormat};
 use datanode::config::{DatanodeOptions, ProcedureConfig};
 use frontend::error::{Result as FeResult, TomlFormatSnafu};
 use frontend::frontend::{FrontendOptions, TomlSerializable};
-use meta_srv::metasrv::MetaSrvOptions;
+use meta_srv::metasrv::MetasrvOptions;
 use serde::{Deserialize, Serialize};
 use snafu::ResultExt;

@@ -38,7 +38,7 @@ pub struct MixOptions {
    pub frontend: FrontendOptions,
    pub datanode: DatanodeOptions,
    pub logging: LoggingOptions,
-    pub wal_meta: MetaSrvWalConfig,
+    pub wal_meta: MetasrvWalConfig,
 }

 impl From<MixOptions> for FrontendOptions {
@@ -56,7 +56,7 @@ impl TomlSerializable for MixOptions {
 pub enum Options {
    Datanode(Box<DatanodeOptions>),
    Frontend(Box<FrontendOptions>),
-    Metasrv(Box<MetaSrvOptions>),
+    Metasrv(Box<MetasrvOptions>),
    Standalone(Box<MixOptions>),
    Cli(Box<LoggingOptions>),
 }
--- a/src/cmd/src/standalone.rs
+++ b/src/cmd/src/standalone.rs
@@ -16,10 +16,11 @@ use std::sync::Arc;
 use std::{fs, path};

 use async_trait::async_trait;
+use catalog::kvbackend::KvBackendCatalogManager;
 use clap::Parser;
 use common_catalog::consts::MIN_USER_TABLE_ID;
 use common_config::{metadata_store_dir, KvBackendConfig};
-use common_meta::cache_invalidator::DummyCacheInvalidator;
+use common_meta::cache_invalidator::{CacheInvalidatorRef, MultiCacheInvalidator};
 use common_meta::datanode_manager::DatanodeManagerRef;
 use common_meta::ddl::table_meta::{TableMetadataAllocator, TableMetadataAllocatorRef};
 use common_meta::ddl::ProcedureExecutorRef;
@@ -292,11 +293,11 @@ impl StartCommand {
        opts.mode = Mode::Standalone;

        if let Some(dir) = &cli_options.log_dir {
-            opts.logging.dir = dir.clone();
+            opts.logging.dir.clone_from(dir);
        }

        if cli_options.log_level.is_some() {
-            opts.logging.level = cli_options.log_level.clone();
+            opts.logging.level.clone_from(&cli_options.log_level);
        }

        let tls_opts = TlsOption::new(
@@ -306,11 +307,11 @@ impl StartCommand {
        );

        if let Some(addr) = &self.http_addr {
-            opts.http.addr = addr.clone()
+            opts.http.addr.clone_from(addr);
        }

        if let Some(data_home) = &self.data_home {
-            opts.storage.data_home = data_home.clone();
+            opts.storage.data_home.clone_from(data_home);
        }

        if let Some(addr) = &self.rpc_addr {
@@ -324,31 +325,31 @@ impl StartCommand {
                }
                .fail();
            }
-            opts.grpc.addr = addr.clone()
+            opts.grpc.addr.clone_from(addr)
        }

        if let Some(addr) = &self.mysql_addr {
            opts.mysql.enable = true;
-            opts.mysql.addr = addr.clone();
+            opts.mysql.addr.clone_from(addr);
            opts.mysql.tls = tls_opts.clone();
        }

        if let Some(addr) = &self.postgres_addr {
            opts.postgres.enable = true;
-            opts.postgres.addr = addr.clone();
+            opts.postgres.addr.clone_from(addr);
            opts.postgres.tls = tls_opts;
        }

        if let Some(addr) = &self.opentsdb_addr {
            opts.opentsdb.enable = true;
-            opts.opentsdb.addr = addr.clone();
+            opts.opentsdb.addr.clone_from(addr);
        }

        if self.influxdb_enable {
            opts.influxdb.enable = self.influxdb_enable;
        }

-        opts.user_provider = self.user_provider.clone();
+        opts.user_provider.clone_from(&self.user_provider);

        let metadata_store = opts.metadata_store.clone();
        let procedure = opts.procedure.clone();
@@ -399,6 +400,10 @@ impl StartCommand {
        .await
        .context(StartFrontendSnafu)?;

+        let multi_cache_invalidator = Arc::new(MultiCacheInvalidator::default());
+        let catalog_manager =
+            KvBackendCatalogManager::new(kv_backend.clone(), multi_cache_invalidator.clone()).await;
+
        let builder =
            DatanodeBuilder::new(dn_opts, fe_plugins.clone()).with_kv_backend(kv_backend.clone());
        let datanode = builder.build().await.context(StartDatanodeSnafu)?;
@@ -422,22 +427,27 @@ impl StartCommand {
        let table_meta_allocator = Arc::new(TableMetadataAllocator::new(
            table_id_sequence,
            wal_options_allocator.clone(),
-            table_metadata_manager.table_name_manager().clone(),
        ));

        let ddl_task_executor = Self::create_ddl_task_executor(
            table_metadata_manager,
            procedure_manager.clone(),
            datanode_manager.clone(),
+            multi_cache_invalidator,
            table_meta_allocator,
        )
        .await?;

-        let mut frontend = FrontendBuilder::new(kv_backend, datanode_manager, ddl_task_executor)
-            .with_plugin(fe_plugins.clone())
-            .try_build()
-            .await
-            .context(StartFrontendSnafu)?;
+        let mut frontend = FrontendBuilder::new(
+            kv_backend,
+            catalog_manager,
+            datanode_manager,
+            ddl_task_executor,
+        )
+        .with_plugin(fe_plugins.clone())
+        .try_build()
+        .await
+        .context(StartFrontendSnafu)?;

        let servers = Services::new(fe_opts.clone(), Arc::new(frontend.clone()), fe_plugins)
            .build()
@@ -459,16 +469,18 @@ impl StartCommand {
        table_metadata_manager: TableMetadataManagerRef,
        procedure_manager: ProcedureManagerRef,
        datanode_manager: DatanodeManagerRef,
+        cache_invalidator: CacheInvalidatorRef,
        table_meta_allocator: TableMetadataAllocatorRef,
    ) -> Result<ProcedureExecutorRef> {
        let procedure_executor: ProcedureExecutorRef = Arc::new(
            DdlManager::try_new(
                procedure_manager,
                datanode_manager,
-                Arc::new(DummyCacheInvalidator),
+                cache_invalidator,
                table_metadata_manager,
                table_meta_allocator,
                Arc::new(MemoryRegionKeeper::default()),
+                true,
            )
            .context(InitDdlManagerSnafu)?,
        );
--- a/src/common/base/src/readable_size.rs
+++ b/src/common/base/src/readable_size.rs
@@ -1,20 +1,6 @@
 // Copyright (c) 2017-present, PingCAP, Inc. Licensed under Apache-2.0.

-// Copyright 2023 Greptime Team
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-// This file is copied from https://github.com/tikv/raft-engine/blob/8dd2a39f359ff16f5295f35343f626e0c10132fa/src/util.rs
+// This file is copied from https://github.com/tikv/raft-engine/blob/0.3.0/src/util.rs

 use std::fmt::{self, Debug, Display, Write};
 use std::ops::{Div, Mul};
--- a/src/common/catalog/src/consts.rs
+++ b/src/common/catalog/src/consts.rs
@@ -86,6 +86,8 @@ pub const INFORMATION_SCHEMA_RUNTIME_METRICS_TABLE_ID: u32 = 27;
 pub const INFORMATION_SCHEMA_PARTITIONS_TABLE_ID: u32 = 28;
 /// id for information_schema.REGION_PEERS
 pub const INFORMATION_SCHEMA_REGION_PEERS_TABLE_ID: u32 = 29;
+/// id for information_schema.columns
+pub const INFORMATION_SCHEMA_TABLE_CONSTRAINTS_TABLE_ID: u32 = 30;
 /// ----- End of information_schema tables -----

 pub const MITO_ENGINE: &str = "mito";
--- a/src/common/catalog/src/lib.rs
+++ b/src/common/catalog/src/lib.rs
@@ -55,10 +55,10 @@ pub fn build_db_string(catalog: &str, schema: &str) -> String {
 /// schema name
 /// - if `[<catalog>-]` is provided, we split database name with `-` and use
 /// `<catalog>` and `<schema>`.
-pub fn parse_catalog_and_schema_from_db_string(db: &str) -> (&str, &str) {
+pub fn parse_catalog_and_schema_from_db_string(db: &str) -> (String, String) {
    match parse_optional_catalog_and_schema_from_db_string(db) {
        (Some(catalog), schema) => (catalog, schema),
-        (None, schema) => (DEFAULT_CATALOG_NAME, schema),
+        (None, schema) => (DEFAULT_CATALOG_NAME.to_string(), schema),
    }
 }

@@ -66,12 +66,12 @@ pub fn parse_catalog_and_schema_from_db_string(db: &str) -> (&str, &str) {
 ///
 /// Similar to [`parse_catalog_and_schema_from_db_string`] but returns an optional
 /// catalog if it's not provided in the database name.
-pub fn parse_optional_catalog_and_schema_from_db_string(db: &str) -> (Option<&str>, &str) {
+pub fn parse_optional_catalog_and_schema_from_db_string(db: &str) -> (Option<String>, String) {
    let parts = db.splitn(2, '-').collect::<Vec<&str>>();
    if parts.len() == 2 {
-        (Some(parts[0]), parts[1])
+        (Some(parts[0].to_lowercase()), parts[1].to_lowercase())
    } else {
-        (None, db)
+        (None, db.to_lowercase())
    }
 }

@@ -88,32 +88,37 @@ mod tests {
    #[test]
    fn test_parse_catalog_and_schema() {
        assert_eq!(
-            (DEFAULT_CATALOG_NAME, "fullschema"),
+            (DEFAULT_CATALOG_NAME.to_string(), "fullschema".to_string()),
            parse_catalog_and_schema_from_db_string("fullschema")
        );

        assert_eq!(
-            ("catalog", "schema"),
+            ("catalog".to_string(), "schema".to_string()),
            parse_catalog_and_schema_from_db_string("catalog-schema")
        );

        assert_eq!(
-            ("catalog", "schema1-schema2"),
+            ("catalog".to_string(), "schema1-schema2".to_string()),
            parse_catalog_and_schema_from_db_string("catalog-schema1-schema2")
        );

        assert_eq!(
-            (None, "fullschema"),
+            (None, "fullschema".to_string()),
            parse_optional_catalog_and_schema_from_db_string("fullschema")
        );

        assert_eq!(
-            (Some("catalog"), "schema"),
+            (Some("catalog".to_string()), "schema".to_string()),
            parse_optional_catalog_and_schema_from_db_string("catalog-schema")
        );

        assert_eq!(
-            (Some("catalog"), "schema1-schema2"),
+            (Some("catalog".to_string()), "schema".to_string()),
+            parse_optional_catalog_and_schema_from_db_string("CATALOG-SCHEMA")
+        );
+
+        assert_eq!(
+            (Some("catalog".to_string()), "schema1-schema2".to_string()),
            parse_optional_catalog_and_schema_from_db_string("catalog-schema1-schema2")
        );
    }
--- a/src/common/config/Cargo.toml
+++ b/src/common/config/Cargo.toml
@@ -9,7 +9,6 @@ workspace = true

 [dependencies]
 common-base.workspace = true
-humantime-serde.workspace = true
 num_cpus.workspace = true
 serde.workspace = true
 sysinfo.workspace = true
--- a/src/common/datasource/Cargo.toml
+++ b/src/common/datasource/Cargo.toml
@@ -30,7 +30,7 @@ derive_builder.workspace = true
 futures.workspace = true
 lazy_static.workspace = true
 object-store.workspace = true
-orc-rust = "0.2"
+orc-rust = { git = "https://github.com/MichaelScofield/orc-rs.git", rev = "17347f5f084ac937863317df882218055c4ea8c1" }
 parquet.workspace = true
 paste = "1.0"
 regex = "1.7"
--- a/src/common/datasource/src/buffered_writer.rs
+++ b/src/common/datasource/src/buffered_writer.rs
@@ -60,12 +60,6 @@ impl<
            .context(error::BufferedWriterClosedSnafu)?;
        let metadata = encoder.close().await?;

-        // Use `rows_written` to keep a track of if any rows have been written.
-        // If no row's been written, then we can simply close the underlying
-        // writer without flush so that no file will be actually created.
-        if self.rows_written != 0 {
-            self.bytes_written += self.try_flush(true).await?;
-        }
        // It's important to shut down! flushes all pending writes
        self.close_inner_writer().await?;
        Ok((metadata, self.bytes_written))
@@ -79,8 +73,15 @@ impl<
        Fut: Future<Output = Result<T>>,
    > LazyBufferedWriter<T, U, F>
 {
-    /// Closes the writer without flushing the buffer data.
+    /// Closes the writer and flushes the buffer data.
    pub async fn close_inner_writer(&mut self) -> Result<()> {
+        // Use `rows_written` to keep a track of if any rows have been written.
+        // If no row's been written, then we can simply close the underlying
+        // writer without flush so that no file will be actually created.
+        if self.rows_written != 0 {
+            self.bytes_written += self.try_flush(true).await?;
+        }
+
        if let Some(writer) = &mut self.writer {
            writer.shutdown().await.context(error::AsyncWriteSnafu)?;
        }
@@ -117,7 +118,7 @@ impl<
        Ok(())
    }

-    pub async fn try_flush(&mut self, all: bool) -> Result<u64> {
+    async fn try_flush(&mut self, all: bool) -> Result<u64> {
        let mut bytes_written: u64 = 0;

        // Once buffered data size reaches threshold, split the data in chunks (typically 4MB)
--- a/src/common/datasource/src/file_format.rs
+++ b/src/common/datasource/src/file_format.rs
@@ -213,10 +213,6 @@ pub async fn stream_to_file<T: DfRecordBatchEncoder, U: Fn(SharedBuffer) -> T>(
        writer.write(&batch).await?;
        rows += batch.num_rows();
    }
-
-    // Flushes all pending writes
-    let _ = writer.try_flush(true).await?;
    writer.close_inner_writer().await?;
-
    Ok(rows)
 }
--- a/src/common/datasource/src/file_format/csv.rs
+++ b/src/common/datasource/src/file_format/csv.rs
@@ -117,7 +117,7 @@ impl CsvConfig {
        let mut builder = csv::ReaderBuilder::new(self.file_schema.clone())
            .with_delimiter(self.delimiter)
            .with_batch_size(self.batch_size)
-            .has_header(self.has_header);
+            .with_header(self.has_header);

        if let Some(proj) = &self.file_projection {
            builder = builder.with_projection(proj.clone());
--- a/src/common/datasource/src/file_format/parquet.rs
+++ b/src/common/datasource/src/file_format/parquet.rs
@@ -215,10 +215,7 @@ impl BufferedWriter {

    /// Write a record batch to stream writer.
    pub async fn write(&mut self, arrow_batch: &RecordBatch) -> error::Result<()> {
-        self.inner.write(arrow_batch).await?;
-        self.inner.try_flush(false).await?;
-
-        Ok(())
+        self.inner.write(arrow_batch).await
    }

    /// Close parquet writer.
--- a/src/common/datasource/src/file_format/tests.rs
+++ b/src/common/datasource/src/file_format/tests.rs
@@ -19,6 +19,7 @@ use std::vec;

 use common_test_util::find_workspace_path;
 use datafusion::assert_batches_eq;
+use datafusion::config::TableParquetOptions;
 use datafusion::datasource::physical_plan::{FileOpener, FileScanConfig, FileStream, ParquetExec};
 use datafusion::execution::context::TaskContext;
 use datafusion::physical_plan::metrics::ExecutionPlanMetricsSet;
@@ -166,7 +167,7 @@ async fn test_parquet_exec() {
        .to_string();
    let base_config = scan_config(schema.clone(), None, path);

-    let exec = ParquetExec::new(base_config, None, None)
+    let exec = ParquetExec::new(base_config, None, None, TableParquetOptions::default())
        .with_parquet_file_reader_factory(Arc::new(DefaultParquetFileReaderFactory::new(store)));

    let ctx = SessionContext::new();
--- a/src/common/datasource/src/test_util.rs
+++ b/src/common/datasource/src/test_util.rs
@@ -16,6 +16,7 @@ use std::sync::Arc;

 use arrow_schema::{DataType, Field, Schema, SchemaRef};
 use common_test_util::temp_dir::{create_temp_dir, TempDir};
+use datafusion::common::Statistics;
 use datafusion::datasource::listing::PartitionedFile;
 use datafusion::datasource::object_store::ObjectStoreUrl;
 use datafusion::datasource::physical_plan::{FileScanConfig, FileStream};
@@ -72,17 +73,16 @@ pub fn test_basic_schema() -> SchemaRef {
 pub fn scan_config(file_schema: SchemaRef, limit: Option<usize>, filename: &str) -> FileScanConfig {
    // object_store only recognize the Unix style path, so make it happy.
    let filename = &filename.replace('\\', "/");
-
+    let statistics = Statistics::new_unknown(file_schema.as_ref());
    FileScanConfig {
        object_store_url: ObjectStoreUrl::parse("empty://").unwrap(), // won't be used
        file_schema,
        file_groups: vec![vec![PartitionedFile::new(filename.to_string(), 10)]],
-        statistics: Default::default(),
+        statistics,
        projection: None,
        limit,
        table_partition_cols: vec![],
        output_ordering: vec![],
-        infinite_source: false,
    }
 }

--- a/src/common/decimal/Cargo.toml
+++ b/src/common/decimal/Cargo.toml
@@ -8,7 +8,6 @@ license.workspace = true
 workspace = true

 [dependencies]
-arrow.workspace = true
 bigdecimal.workspace = true
 common-error.workspace = true
 common-macro.workspace = true
--- a/src/common/function/Cargo.toml
+++ b/src/common/function/Cargo.toml
@@ -11,7 +11,6 @@ workspace = true
 api.workspace = true
 arc-swap = "1.0"
 async-trait.workspace = true
-chrono-tz = "0.6"
 common-base.workspace = true
 common-catalog.workspace = true
 common-error.workspace = true
@@ -24,7 +23,6 @@ common-time.workspace = true
 common-version.workspace = true
 datafusion.workspace = true
 datatypes.workspace = true
-libc = "0.2"
 num = "0.4"
 num-traits = "0.2"
 once_cell.workspace = true
--- a/src/common/function/src/handlers.rs
+++ b/src/common/function/src/handlers.rs
@@ -18,6 +18,7 @@ use async_trait::async_trait;
 use common_base::AffectedRows;
 use common_meta::rpc::procedure::{MigrateRegionRequest, ProcedureStateResponse};
 use common_query::error::Result;
+use common_query::Output;
 use session::context::QueryContextRef;
 use store_api::storage::RegionId;
 use table::requests::{CompactTableRequest, DeleteRequest, FlushTableRequest, InsertRequest};
@@ -26,7 +27,7 @@ use table::requests::{CompactTableRequest, DeleteRequest, FlushTableRequest, Ins
 #[async_trait]
 pub trait TableMutationHandler: Send + Sync {
    /// Inserts rows into the table.
-    async fn insert(&self, request: InsertRequest, ctx: QueryContextRef) -> Result<AffectedRows>;
+    async fn insert(&self, request: InsertRequest, ctx: QueryContextRef) -> Result<Output>;

    /// Delete rows from the table.
    async fn delete(&self, request: DeleteRequest, ctx: QueryContextRef) -> Result<AffectedRows>;
--- a/src/common/function/src/scalars/aggregate/diff.rs
+++ b/src/common/function/src/scalars/aggregate/diff.rs
@@ -56,7 +56,7 @@ where
            .map(|&n| n.into())
            .collect::<Vec<Value>>();
        Ok(vec![Value::List(ListValue::new(
-            Some(Box::new(nums)),
+            nums,
            I::LogicalType::build_data_type(),
        ))])
    }
@@ -120,10 +120,7 @@ where
                O::from_native(native).into()
            })
            .collect::<Vec<Value>>();
-        let diff = Value::List(ListValue::new(
-            Some(Box::new(diff)),
-            O::LogicalType::build_data_type(),
-        ));
+        let diff = Value::List(ListValue::new(diff, O::LogicalType::build_data_type()));
        Ok(diff)
    }
 }
@@ -218,10 +215,7 @@ mod test {
        let values = vec![Value::from(2_i64), Value::from(1_i64)];
        diff.update_batch(&v).unwrap();
        assert_eq!(
-            Value::List(ListValue::new(
-                Some(Box::new(values)),
-                ConcreteDataType::int64_datatype()
-            )),
+            Value::List(ListValue::new(values, ConcreteDataType::int64_datatype())),
            diff.evaluate().unwrap()
        );

@@ -236,10 +230,7 @@ mod test {
        let values = vec![Value::from(5_i64), Value::from(1_i64)];
        diff.update_batch(&v).unwrap();
        assert_eq!(
-            Value::List(ListValue::new(
-                Some(Box::new(values)),
-                ConcreteDataType::int64_datatype()
-            )),
+            Value::List(ListValue::new(values, ConcreteDataType::int64_datatype())),
            diff.evaluate().unwrap()
        );

@@ -252,10 +243,7 @@ mod test {
        let values = vec![Value::from(0_i64), Value::from(0_i64), Value::from(0_i64)];
        diff.update_batch(&v).unwrap();
        assert_eq!(
-            Value::List(ListValue::new(
-                Some(Box::new(values)),
-                ConcreteDataType::int64_datatype()
-            )),
+            Value::List(ListValue::new(values, ConcreteDataType::int64_datatype())),
            diff.evaluate().unwrap()
        );
    }
--- a/src/common/function/src/scalars/aggregate/percentile.rs
+++ b/src/common/function/src/scalars/aggregate/percentile.rs
@@ -104,10 +104,7 @@ where
            .map(|&n| n.into())
            .collect::<Vec<Value>>();
        Ok(vec![
-            Value::List(ListValue::new(
-                Some(Box::new(nums)),
-                T::LogicalType::build_data_type(),
-            )),
+            Value::List(ListValue::new(nums, T::LogicalType::build_data_type())),
            self.p.into(),
        ])
    }
--- a/src/common/function/src/scalars/aggregate/polyval.rs
+++ b/src/common/function/src/scalars/aggregate/polyval.rs
@@ -72,10 +72,7 @@ where
            .map(|&n| n.into())
            .collect::<Vec<Value>>();
        Ok(vec![
-            Value::List(ListValue::new(
-                Some(Box::new(nums)),
-                T::LogicalType::build_data_type(),
-            )),
+            Value::List(ListValue::new(nums, T::LogicalType::build_data_type())),
            self.x.into(),
        ])
    }
--- a/src/common/function/src/scalars/aggregate/scipy_stats_norm_cdf.rs
+++ b/src/common/function/src/scalars/aggregate/scipy_stats_norm_cdf.rs
@@ -56,10 +56,7 @@ where
            .map(|&x| x.into())
            .collect::<Vec<Value>>();
        Ok(vec![
-            Value::List(ListValue::new(
-                Some(Box::new(nums)),
-                T::LogicalType::build_data_type(),
-            )),
+            Value::List(ListValue::new(nums, T::LogicalType::build_data_type())),
            self.x.into(),
        ])
    }
--- a/src/common/function/src/scalars/aggregate/scipy_stats_norm_pdf.rs
+++ b/src/common/function/src/scalars/aggregate/scipy_stats_norm_pdf.rs
@@ -56,10 +56,7 @@ where
            .map(|&x| x.into())
            .collect::<Vec<Value>>();
        Ok(vec![
-            Value::List(ListValue::new(
-                Some(Box::new(nums)),
-                T::LogicalType::build_data_type(),
-            )),
+            Value::List(ListValue::new(nums, T::LogicalType::build_data_type())),
            self.x.into(),
        ])
    }
--- a/src/common/function/src/scalars/math.rs
+++ b/src/common/function/src/scalars/math.rs
@@ -77,7 +77,7 @@ impl Function for RangeFunction {
    /// `range_fn` will never been used. As long as a legal signature is returned, the specific content of the signature does not matter.
    /// In fact, the arguments loaded by `range_fn` are very complicated, and it is difficult to use `Signature` to describe
    fn signature(&self) -> Signature {
-        Signature::any(0, Volatility::Immutable)
+        Signature::variadic_any(Volatility::Immutable)
    }

    fn eval(&self, _func_ctx: FunctionContext, _columns: &[VectorRef]) -> Result<VectorRef> {
--- a/src/common/function/src/scalars/timestamp/to_timezone.rs
+++ b/src/common/function/src/scalars/timestamp/to_timezone.rs
@@ -23,7 +23,7 @@ use datatypes::prelude::VectorRef;
 use datatypes::types::TimestampType;
 use datatypes::value::Value;
 use datatypes::vectors::{
-    StringVector, TimestampMicrosecondVector, TimestampMillisecondVector,
+    Int64Vector, StringVector, TimestampMicrosecondVector, TimestampMillisecondVector,
    TimestampNanosecondVector, TimestampSecondVector, Vector,
 };
 use snafu::{ensure, OptionExt};
@@ -43,6 +43,7 @@ fn convert_to_timezone(arg: &str) -> Option<Timezone> {
 fn convert_to_timestamp(arg: &Value) -> Option<Timestamp> {
    match arg {
        Value::Timestamp(ts) => Some(*ts),
+        Value::Int64(i) => Some(Timestamp::new_millisecond(*i)),
        _ => None,
    }
 }
@@ -66,6 +67,8 @@ impl Function for ToTimezoneFunction {
    fn signature(&self) -> Signature {
        helper::one_of_sigs2(
            vec![
+                ConcreteDataType::int32_datatype(),
+                ConcreteDataType::int64_datatype(),
                ConcreteDataType::timestamp_second_datatype(),
                ConcreteDataType::timestamp_millisecond_datatype(),
                ConcreteDataType::timestamp_microsecond_datatype(),
@@ -86,39 +89,45 @@ impl Function for ToTimezoneFunction {
            }
        );

-        // TODO: maybe support epoch timestamp? https://github.com/GreptimeTeam/greptimedb/issues/3477
-        let ts = columns[0].data_type().as_timestamp().with_context(|| {
-            UnsupportedInputDataTypeSnafu {
+        let array = columns[0].to_arrow_array();
+        let times = match columns[0].data_type() {
+            ConcreteDataType::Int64(_) | ConcreteDataType::Int32(_) => {
+                let vector = Int64Vector::try_from_arrow_array(array).unwrap();
+                (0..vector.len())
+                    .map(|i| convert_to_timestamp(&vector.get(i)))
+                    .collect::<Vec<_>>()
+            }
+            ConcreteDataType::Timestamp(ts) => match ts {
+                TimestampType::Second(_) => {
+                    let vector = TimestampSecondVector::try_from_arrow_array(array).unwrap();
+                    (0..vector.len())
+                        .map(|i| convert_to_timestamp(&vector.get(i)))
+                        .collect::<Vec<_>>()
+                }
+                TimestampType::Millisecond(_) => {
+                    let vector = TimestampMillisecondVector::try_from_arrow_array(array).unwrap();
+                    (0..vector.len())
+                        .map(|i| convert_to_timestamp(&vector.get(i)))
+                        .collect::<Vec<_>>()
+                }
+                TimestampType::Microsecond(_) => {
+                    let vector = TimestampMicrosecondVector::try_from_arrow_array(array).unwrap();
+                    (0..vector.len())
+                        .map(|i| convert_to_timestamp(&vector.get(i)))
+                        .collect::<Vec<_>>()
+                }
+                TimestampType::Nanosecond(_) => {
+                    let vector = TimestampNanosecondVector::try_from_arrow_array(array).unwrap();
+                    (0..vector.len())
+                        .map(|i| convert_to_timestamp(&vector.get(i)))
+                        .collect::<Vec<_>>()
+                }
+            },
+            _ => UnsupportedInputDataTypeSnafu {
                function: NAME,
                datatypes: columns.iter().map(|c| c.data_type()).collect::<Vec<_>>(),
            }
-        })?;
-        let array = columns[0].to_arrow_array();
-        let times = match ts {
-            TimestampType::Second(_) => {
-                let vector = TimestampSecondVector::try_from_arrow_array(array).unwrap();
-                (0..vector.len())
-                    .map(|i| convert_to_timestamp(&vector.get(i)))
-                    .collect::<Vec<_>>()
-            }
-            TimestampType::Millisecond(_) => {
-                let vector = TimestampMillisecondVector::try_from_arrow_array(array).unwrap();
-                (0..vector.len())
-                    .map(|i| convert_to_timestamp(&vector.get(i)))
-                    .collect::<Vec<_>>()
-            }
-            TimestampType::Microsecond(_) => {
-                let vector = TimestampMicrosecondVector::try_from_arrow_array(array).unwrap();
-                (0..vector.len())
-                    .map(|i| convert_to_timestamp(&vector.get(i)))
-                    .collect::<Vec<_>>()
-            }
-            TimestampType::Nanosecond(_) => {
-                let vector = TimestampNanosecondVector::try_from_arrow_array(array).unwrap();
-                (0..vector.len())
-                    .map(|i| convert_to_timestamp(&vector.get(i)))
-                    .collect::<Vec<_>>()
-            }
+            .fail()?,
        };

        let tzs = {
@@ -153,7 +162,7 @@ mod tests {
    use datatypes::timestamp::{
        TimestampMicrosecond, TimestampMillisecond, TimestampNanosecond, TimestampSecond,
    };
-    use datatypes::vectors::StringVector;
+    use datatypes::vectors::{Int64Vector, StringVector};

    use super::*;

@@ -257,4 +266,48 @@ mod tests {
        let expect_times: VectorRef = Arc::new(StringVector::from(results));
        assert_eq!(expect_times, vector);
    }
+
+    #[test]
+    fn test_numerical_to_timezone() {
+        let f = ToTimezoneFunction;
+        let results = vec![
+            Some("1969-12-31 19:00:00.001"),
+            None,
+            Some("1970-01-01 03:00:00.001"),
+            None,
+            Some("2024-03-26 23:01:50"),
+            None,
+            Some("2024-03-27 06:02:00"),
+            None,
+        ];
+        let times: Vec<Option<i64>> = vec![
+            Some(1),
+            None,
+            Some(1),
+            None,
+            Some(1711508510000),
+            None,
+            Some(1711508520000),
+            None,
+        ];
+        let ts_vector: Int64Vector = Int64Vector::from_owned_iterator(times.into_iter());
+        let tzs = vec![
+            Some("America/New_York"),
+            None,
+            Some("Europe/Moscow"),
+            None,
+            Some("America/New_York"),
+            None,
+            Some("Europe/Moscow"),
+            None,
+        ];
+        let args: Vec<VectorRef> = vec![
+            Arc::new(ts_vector),
+            Arc::new(StringVector::from(tzs.clone())),
+        ];
+        let vector = f.eval(FunctionContext::default(), &args).unwrap();
+        assert_eq!(8, vector.len());
+        let expect_times: VectorRef = Arc::new(StringVector::from(results));
+        assert_eq!(expect_times, vector);
+    }
 }
--- a/src/common/function/src/state.rs
+++ b/src/common/function/src/state.rs
@@ -35,6 +35,7 @@ impl FunctionState {
        use common_base::AffectedRows;
        use common_meta::rpc::procedure::{MigrateRegionRequest, ProcedureStateResponse};
        use common_query::error::Result;
+        use common_query::Output;
        use session::context::QueryContextRef;
        use store_api::storage::RegionId;
        use table::requests::{
@@ -70,8 +71,8 @@ impl FunctionState {
                &self,
                _request: InsertRequest,
                _ctx: QueryContextRef,
-            ) -> Result<AffectedRows> {
-                Ok(ROWS)
+            ) -> Result<Output> {
+                Ok(Output::new_with_affected_rows(ROWS))
            }

            async fn delete(
--- a/src/common/greptimedb-telemetry/Cargo.toml
+++ b/src/common/greptimedb-telemetry/Cargo.toml
@@ -9,12 +9,10 @@ workspace = true

 [dependencies]
 async-trait.workspace = true
-common-error.workspace = true
 common-runtime.workspace = true
 common-telemetry.workspace = true
 reqwest.workspace = true
 serde.workspace = true
-serde_json.workspace = true
 tokio.workspace = true
 uuid.workspace = true

--- a/src/common/grpc-expr/Cargo.toml
+++ b/src/common/grpc-expr/Cargo.toml
@@ -9,13 +9,11 @@ workspace = true

 [dependencies]
 api.workspace = true
-async-trait.workspace = true
 common-base.workspace = true
 common-catalog.workspace = true
 common-error.workspace = true
 common-macro.workspace = true
 common-query.workspace = true
-common-telemetry.workspace = true
 common-time.workspace = true
 datatypes.workspace = true
 snafu.workspace = true
--- a/src/common/grpc/Cargo.toml
+++ b/src/common/grpc/Cargo.toml
@@ -10,8 +10,6 @@ workspace = true
 [dependencies]
 api.workspace = true
 arrow-flight.workspace = true
-async-trait = "0.1"
-backtrace = "0.3"
 common-base.workspace = true
 common-error.workspace = true
 common-macro.workspace = true
@@ -20,10 +18,8 @@ common-runtime.workspace = true
 common-telemetry.workspace = true
 common-time.workspace = true
 dashmap.workspace = true
-datafusion.workspace = true
 datatypes.workspace = true
 flatbuffers = "23.1"
-futures = "0.3"
 lazy_static.workspace = true
 prost.workspace = true
 snafu.workspace = true
--- a/src/common/macro/src/range_fn.rs
+++ b/src/common/macro/src/range_fn.rs
@@ -56,6 +56,18 @@ pub(crate) fn process_range_fn(args: TokenStream, input: TokenStream) -> TokenSt
    } = &sig;
    let arg_types = ok!(extract_input_types(inputs));

+    // with format like Float64Array
+    let array_types = arg_types
+        .iter()
+        .map(|ty| {
+            if let Type::Reference(TypeReference { elem, .. }) = ty {
+                elem.as_ref().clone()
+            } else {
+                ty.clone()
+            }
+        })
+        .collect::<Vec<_>>();
+
    // build the struct and its impl block
    // only do this when `display_name` is specified
    if let Ok(display_name) = get_ident(&arg_map, "display_name", arg_span) {
@@ -64,6 +76,8 @@ pub(crate) fn process_range_fn(args: TokenStream, input: TokenStream) -> TokenSt
            vis,
            ok!(get_ident(&arg_map, "name", arg_span)),
            display_name,
+            array_types,
+            ok!(get_ident(&arg_map, "ret", arg_span)),
        );
        result.extend(struct_code);
    }
@@ -90,6 +104,8 @@ fn build_struct(
    vis: Visibility,
    name: Ident,
    display_name_ident: Ident,
+    array_types: Vec<Type>,
+    return_array_type: Ident,
 ) -> TokenStream {
    let display_name = display_name_ident.to_string();
    quote! {
@@ -103,29 +119,25 @@ fn build_struct(
            }

            pub fn scalar_udf() -> ScalarUDF {
-                ScalarUDF {
-                    name: Self::name().to_string(),
-                    signature: Signature::new(
+                // TODO(LFC): Use the new Datafusion UDF impl.
+                #[allow(deprecated)]
+                ScalarUDF::new(
+                    Self::name(),
+                    &Signature::new(
                        TypeSignature::Exact(Self::input_type()),
                        Volatility::Immutable,
                    ),
-                    return_type: Arc::new(|_| Ok(Arc::new(Self::return_type()))),
-                    fun: Arc::new(Self::calc),
-                }
+                    &(Arc::new(|_: &_| Ok(Arc::new(Self::return_type()))) as _),
+                    &(Arc::new(Self::calc) as _),
+                )
            }

-            // TODO(ruihang): this should be parameterized
-            // time index column and value column
            fn input_type() -> Vec<DataType> {
-                vec![
-                    RangeArray::convert_data_type(DataType::Timestamp(TimeUnit::Millisecond, None)),
-                    RangeArray::convert_data_type(DataType::Float64),
-                ]
+                vec![#( RangeArray::convert_data_type(#array_types::new_null(0).data_type().clone()), )*]
            }

-            // TODO(ruihang): this should be parameterized
            fn return_type() -> DataType {
-                DataType::Float64
+                #return_array_type::new_null(0).data_type().clone()
            }
        }
    }
@@ -160,6 +172,7 @@ fn build_calc_fn(
        .map(|name| Ident::new(&format!("{}_range_array", name), name.span()))
        .collect::<Vec<_>>();
    let first_range_array_name = range_array_names.first().unwrap().clone();
+    let first_param_name = param_names.first().unwrap().clone();

    quote! {
        impl #name {
@@ -168,13 +181,29 @@ fn build_calc_fn(

                #( let #range_array_names = RangeArray::try_new(extract_array(&input[#param_numbers])?.to_data().into())?; )*

-                // TODO(ruihang): add ensure!()
+                // check arrays len
+                {
+                    let len_first = #first_range_array_name.len();
+                    #(
+                        if len_first != #range_array_names.len() {
+                            return Err(DataFusionError::Execution(format!("RangeArray have different lengths in PromQL function {}: array1={}, array2={}", #name::name(), len_first, #range_array_names.len())));
+                        }
+                    )*
+                }

                let mut result_array = Vec::new();
                for index in 0..#first_range_array_name.len(){
                    #( let #param_names = #range_array_names.get(index).unwrap().as_any().downcast_ref::<#unref_param_types>().unwrap().clone(); )*

-                    // TODO(ruihang): add ensure!() to check length
+                    // check element len
+                    {
+                        let len_first = #first_param_name.len();
+                        #(
+                            if len_first != #param_names.len() {
+                                return Err(DataFusionError::Execution(format!("RangeArray's element {} have different lengths in PromQL function {}: array1={}, array2={}", index, #name::name(), len_first, #param_names.len())));
+                            }
+                        )*
+                    }

                    let result = #fn_name(#( &#param_names, )*);
                    result_array.push(result);
--- a/Show More
+++ b/Show More