chore: pick #6415 to release/v0.15 (#6686 )

ci: add check-version script to check whether push the latast image (#6415) Signed-off-by: liyang <daviderli614@gmail.com> Co-authored-by: liyang <daviderli614@gmail.com>
feat: pick automated metadata recovery feature (#6676 )
2025-12-25 15:40:02 +00:00 · 2025-08-07 12:57:09 +00:00 · 2025-08-07 20:16:48 +08:00 · 2025-08-07 10:15:46 +08:00 · 2025-08-04 22:19:40 +08:00 · 2025-08-04 22:19:40 +08:00
433 changed files with 22162 additions and 10647 deletions
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -339,7 +339,7 @@ jobs:
      build-windows-artifacts,
      release-images-to-dockerhub,
    ]
-    runs-on: ubuntu-latest-16-cores
+    runs-on: ubuntu-latest
    # When we push to ACR, it's easy to fail due to some unknown network issues.
    # However, we don't want to fail the whole workflow because of this.
    # The ACR have daily sync with DockerHub, so don't worry about the image not being updated.
--- a/.github/workflows/semantic-pull-request.yml
+++ b/.github/workflows/semantic-pull-request.yml
@@ -11,17 +11,17 @@ concurrency:
  group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
  cancel-in-progress: true

-permissions:
-  issues: write
-  contents: write
-  pull-requests: write
-
 jobs:
  check:
    runs-on: ubuntu-latest
+    permissions:
+      pull-requests: write  # Add permissions to modify PRs
+      issues: write
    timeout-minutes: 10
    steps:
      - uses: actions/checkout@v4
+        with:
+          persist-credentials: false
      - uses: ./.github/actions/setup-cyborg
      - name: Check Pull Request
        working-directory: cyborg
--- a/Cargo.lock
+++ b/Cargo.lock
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -30,7 +30,6 @@ members = [
    "src/common/recordbatch",
    "src/common/runtime",
    "src/common/session",
-    "src/common/sql",
    "src/common/stat",
    "src/common/substrait",
    "src/common/telemetry",
@@ -72,13 +71,11 @@ members = [
 resolver = "2"

 [workspace.package]
-version = "0.16.0"
+version = "0.15.5"
 edition = "2021"
 license = "Apache-2.0"

 [workspace.lints]
-clippy.print_stdout = "warn"
-clippy.print_stderr = "warn"
 clippy.dbg_macro = "warn"
 clippy.implicit_clone = "warn"
 clippy.result_large_err = "allow"
@@ -124,7 +121,6 @@ datafusion = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "
 datafusion-common = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "12c0381babd52c681043957e9d6ee083a03f7646" }
 datafusion-expr = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "12c0381babd52c681043957e9d6ee083a03f7646" }
 datafusion-functions = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "12c0381babd52c681043957e9d6ee083a03f7646" }
-datafusion-functions-aggregate-common = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "12c0381babd52c681043957e9d6ee083a03f7646" }
 datafusion-optimizer = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "12c0381babd52c681043957e9d6ee083a03f7646" }
 datafusion-physical-expr = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "12c0381babd52c681043957e9d6ee083a03f7646" }
 datafusion-physical-plan = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "12c0381babd52c681043957e9d6ee083a03f7646" }
@@ -134,12 +130,11 @@ deadpool = "0.12"
 deadpool-postgres = "0.14"
 derive_builder = "0.20"
 dotenv = "0.15"
-either = "1.15"
 etcd-client = "0.14"
 fst = "0.4.7"
 futures = "0.3"
 futures-util = "0.3"
-greptime-proto = { git = "https://github.com/GreptimeTeam/greptime-proto.git", rev = "ceb1af4fa9309ce65bda0367db7b384df2bb4d4f" }
+greptime-proto = { git = "https://github.com/GreptimeTeam/greptime-proto.git", rev = "f3103a8c9b8ce162457d0a3e3ca00d53d1a8bd06" }
 hex = "0.4"
 http = "1"
 humantime = "2.1"
@@ -167,13 +162,14 @@ opentelemetry-proto = { version = "0.27", features = [
    "with-serde",
    "logs",
 ] }
-ordered-float = { version = "4.3", features = ["serde"] }
 parking_lot = "0.12"
 parquet = { version = "54.2", default-features = false, features = ["arrow", "async", "object_store"] }
 paste = "1.0"
 pin-project = "1.0"
 prometheus = { version = "0.13.3", features = ["process"] }
-promql-parser = { version = "0.6", features = ["ser"] }
+promql-parser = { git = "https://github.com/GreptimeTeam/promql-parser.git", rev = "0410e8b459dda7cb222ce9596f8bf3971bd07bd2", features = [
+    "ser",
+] }
 prost = { version = "0.13", features = ["no-recursion-limit"] }
 raft-engine = { version = "0.4.1", default-features = false }
 rand = "0.9"
@@ -224,12 +220,12 @@ tokio-util = { version = "0.7", features = ["io-util", "compat"] }
 toml = "0.8.8"
 tonic = { version = "0.12", features = ["tls", "gzip", "zstd"] }
 tower = "0.5"
+tower-http = "0.6"
 tracing = "0.1"
 tracing-appender = "0.2"
 tracing-subscriber = { version = "0.3", features = ["env-filter", "json", "fmt"] }
 typetag = "0.2"
 uuid = { version = "1.7", features = ["serde", "v4", "fast-rng"] }
-vrl = "0.25"
 zstd = "0.13"
 # DO_NOT_REMOVE_THIS: END_OF_EXTERNAL_DEPENDENCIES

@@ -264,7 +260,6 @@ common-query = { path = "src/common/query" }
 common-recordbatch = { path = "src/common/recordbatch" }
 common-runtime = { path = "src/common/runtime" }
 common-session = { path = "src/common/session" }
-common-sql = { path = "src/common/sql" }
 common-telemetry = { path = "src/common/telemetry" }
 common-test-util = { path = "src/common/test-util" }
 common-time = { path = "src/common/time" }
--- a/README.md
+++ b/README.md
@@ -75,9 +75,9 @@
 | --------- | ----------- |
 | [Unified Observability Data](https://docs.greptime.com/user-guide/concepts/why-greptimedb) | Store metrics, logs, and traces as timestamped, contextual wide events. Query via [SQL](https://docs.greptime.com/user-guide/query-data/sql), [PromQL](https://docs.greptime.com/user-guide/query-data/promql), and [streaming](https://docs.greptime.com/user-guide/flow-computation/overview). |
 | [High Performance & Cost Effective](https://docs.greptime.com/user-guide/manage-data/data-index) | Written in Rust, with a distributed query engine, [rich indexing](https://docs.greptime.com/user-guide/manage-data/data-index), and optimized columnar storage, delivering sub-second responses at PB scale. |
-| [Cloud-Native Architecture](https://docs.greptime.com/user-guide/concepts/architecture) | Designed for [Kubernetes](https://docs.greptime.com/user-guide/deployments-administration/deploy-on-kubernetes/greptimedb-operator-management), with compute/storage separation, native object storage (AWS S3, Azure Blob, etc.) and seamless cross-cloud access. |
+| [Cloud-Native Architecture](https://docs.greptime.com/user-guide/concepts/architecture) | Designed for [Kubernetes](https://docs.greptime.com/user-guide/deployments/deploy-on-kubernetes/greptimedb-operator-management), with compute/storage separation, native object storage (AWS S3, Azure Blob, etc.) and seamless cross-cloud access. |
 | [Developer-Friendly](https://docs.greptime.com/user-guide/protocols/overview) | Access via SQL/PromQL interfaces, REST API, MySQL/PostgreSQL protocols, and popular ingestion [protocols](https://docs.greptime.com/user-guide/protocols/overview). |
-| [Flexible Deployment](https://docs.greptime.com/user-guide/deployments-administration/overview) | Deploy anywhere: edge (including ARM/[Android](https://docs.greptime.com/user-guide/deployments-administration/run-on-android)) or cloud, with unified APIs and efficient data sync. |
+| [Flexible Deployment](https://docs.greptime.com/user-guide/deployments/overview) | Deploy anywhere: edge (including ARM/[Android](https://docs.greptime.com/user-guide/deployments/run-on-android)) or cloud, with unified APIs and efficient data sync. |

 Learn more in [Why GreptimeDB](https://docs.greptime.com/user-guide/concepts/why-greptimedb) and [Observability 2.0 and the Database for It](https://greptime.com/blogs/2025-04-25-greptimedb-observability2-new-database).

--- a/config/config.md
+++ b/config/config.md
@@ -147,6 +147,7 @@
 | `region_engine.mito.write_cache_ttl` | String | Unset | TTL for write cache. |
 | `region_engine.mito.sst_write_buffer_size` | String | `8MB` | Buffer size for SST writing. |
 | `region_engine.mito.parallel_scan_channel_size` | Integer | `32` | Capacity of the channel to send data from parallel scan tasks to the main task. |
+| `region_engine.mito.max_concurrent_scan_files` | Integer | `128` | Maximum number of SST files to scan concurrently. |
 | `region_engine.mito.allow_stale_entries` | Bool | `false` | Whether to allow stale WAL entries read during replay. |
 | `region_engine.mito.min_compaction_interval` | String | `0m` | Minimum time interval between two compactions.<br/>To align with the old behavior, the default value is 0 (no restrictions). |
 | `region_engine.mito.index` | -- | -- | The options for index in Mito engine. |
@@ -185,11 +186,10 @@
 | `logging.dir` | String | `./greptimedb_data/logs` | The directory to store the log files. If set to empty, logs will not be written to files. |
 | `logging.level` | String | Unset | The log level. Can be `info`/`debug`/`warn`/`error`. |
 | `logging.enable_otlp_tracing` | Bool | `false` | Enable OTLP tracing. |
-| `logging.otlp_endpoint` | String | `http://localhost:4318` | The OTLP tracing endpoint. |
+| `logging.otlp_endpoint` | String | `http://localhost:4317` | The OTLP tracing endpoint. |
 | `logging.append_stdout` | Bool | `true` | Whether to append logs to stdout. |
 | `logging.log_format` | String | `text` | The log format. Can be `text`/`json`. |
 | `logging.max_log_files` | Integer | `720` | The maximum amount of log files. |
-| `logging.otlp_export_protocol` | String | `http` | The OTLP tracing export protocol. Can be `grpc`/`http`. |
 | `logging.tracing_sample_ratio` | -- | -- | The percentage of tracing will be sampled and exported.<br/>Valid range `[0, 1]`, 1 means all traces are sampled, 0 means all traces are not sampled, the default value is 1.<br/>ratio > 1 are treated as 1. Fractions < 0 are treated as 0 |
 | `logging.tracing_sample_ratio.default_ratio` | Float | `1.0` | -- |
 | `slow_query` | -- | -- | The slow query log options. |
@@ -289,11 +289,10 @@
 | `logging.dir` | String | `./greptimedb_data/logs` | The directory to store the log files. If set to empty, logs will not be written to files. |
 | `logging.level` | String | Unset | The log level. Can be `info`/`debug`/`warn`/`error`. |
 | `logging.enable_otlp_tracing` | Bool | `false` | Enable OTLP tracing. |
-| `logging.otlp_endpoint` | String | `http://localhost:4318` | The OTLP tracing endpoint. |
+| `logging.otlp_endpoint` | String | `http://localhost:4317` | The OTLP tracing endpoint. |
 | `logging.append_stdout` | Bool | `true` | Whether to append logs to stdout. |
 | `logging.log_format` | String | `text` | The log format. Can be `text`/`json`. |
 | `logging.max_log_files` | Integer | `720` | The maximum amount of log files. |
-| `logging.otlp_export_protocol` | String | `http` | The OTLP tracing export protocol. Can be `grpc`/`http`. |
 | `logging.tracing_sample_ratio` | -- | -- | The percentage of tracing will be sampled and exported.<br/>Valid range `[0, 1]`, 1 means all traces are sampled, 0 means all traces are not sampled, the default value is 1.<br/>ratio > 1 are treated as 1. Fractions < 0 are treated as 0 |
 | `logging.tracing_sample_ratio.default_ratio` | Float | `1.0` | -- |
 | `slow_query` | -- | -- | The slow query log options. |
@@ -325,7 +324,6 @@
 | `selector` | String | `round_robin` | Datanode selector type.<br/>- `round_robin` (default value)<br/>- `lease_based`<br/>- `load_based`<br/>For details, please see "https://docs.greptime.com/developer-guide/metasrv/selector". |
 | `use_memory_store` | Bool | `false` | Store data in memory. |
 | `enable_region_failover` | Bool | `false` | Whether to enable region failover.<br/>This feature is only available on GreptimeDB running on cluster mode and<br/>- Using Remote WAL<br/>- Using shared storage (e.g., s3). |
-| `region_failure_detector_initialization_delay` | String | `10m` | The delay before starting region failure detection.<br/>This delay helps prevent Metasrv from triggering unnecessary region failovers before all Datanodes are fully started.<br/>Especially useful when the cluster is not deployed with GreptimeDB Operator and maintenance mode is not enabled. |
 | `allow_region_failover_on_local_wal` | Bool | `false` | Whether to allow region failover on local WAL.<br/>**This option is not recommended to be set to true, because it may lead to data loss during failover.** |
 | `node_max_idle_time` | String | `24hours` | Max allowed idle time before removing node info from metasrv memory. |
 | `enable_telemetry` | Bool | `true` | Whether to enable greptimedb telemetry. Enabled by default. |
@@ -373,11 +371,10 @@
 | `logging.dir` | String | `./greptimedb_data/logs` | The directory to store the log files. If set to empty, logs will not be written to files. |
 | `logging.level` | String | Unset | The log level. Can be `info`/`debug`/`warn`/`error`. |
 | `logging.enable_otlp_tracing` | Bool | `false` | Enable OTLP tracing. |
-| `logging.otlp_endpoint` | String | `http://localhost:4318` | The OTLP tracing endpoint. |
+| `logging.otlp_endpoint` | String | `http://localhost:4317` | The OTLP tracing endpoint. |
 | `logging.append_stdout` | Bool | `true` | Whether to append logs to stdout. |
 | `logging.log_format` | String | `text` | The log format. Can be `text`/`json`. |
 | `logging.max_log_files` | Integer | `720` | The maximum amount of log files. |
-| `logging.otlp_export_protocol` | String | `http` | The OTLP tracing export protocol. Can be `grpc`/`http`. |
 | `logging.tracing_sample_ratio` | -- | -- | The percentage of tracing will be sampled and exported.<br/>Valid range `[0, 1]`, 1 means all traces are sampled, 0 means all traces are not sampled, the default value is 1.<br/>ratio > 1 are treated as 1. Fractions < 0 are treated as 0 |
 | `logging.tracing_sample_ratio.default_ratio` | Float | `1.0` | -- |
 | `export_metrics` | -- | -- | The metasrv can export its metrics and send to Prometheus compatible service (e.g. `greptimedb` itself) from remote-write API.<br/>This is only used for `greptimedb` to export its own metrics internally. It's different from prometheus scrape. |
@@ -436,8 +433,8 @@
 | `wal.provider` | String | `raft_engine` | The provider of the WAL.<br/>- `raft_engine`: the wal is stored in the local file system by raft-engine.<br/>- `kafka`: it's remote wal that data is stored in Kafka. |
 | `wal.dir` | String | Unset | The directory to store the WAL files.<br/>**It's only used when the provider is `raft_engine`**. |
 | `wal.file_size` | String | `128MB` | The size of the WAL segment file.<br/>**It's only used when the provider is `raft_engine`**. |
-| `wal.purge_threshold` | String | `1GB` | The threshold of the WAL size to trigger a purge.<br/>**It's only used when the provider is `raft_engine`**. |
-| `wal.purge_interval` | String | `1m` | The interval to trigger a purge.<br/>**It's only used when the provider is `raft_engine`**. |
+| `wal.purge_threshold` | String | `1GB` | The threshold of the WAL size to trigger a flush.<br/>**It's only used when the provider is `raft_engine`**. |
+| `wal.purge_interval` | String | `1m` | The interval to trigger a flush.<br/>**It's only used when the provider is `raft_engine`**. |
 | `wal.read_batch_size` | Integer | `128` | The read batch size.<br/>**It's only used when the provider is `raft_engine`**. |
 | `wal.sync_write` | Bool | `false` | Whether to use sync write.<br/>**It's only used when the provider is `raft_engine`**. |
 | `wal.enable_log_recycle` | Bool | `true` | Whether to reuse logically truncated log files.<br/>**It's only used when the provider is `raft_engine`**. |
@@ -500,6 +497,7 @@
 | `region_engine.mito.write_cache_ttl` | String | Unset | TTL for write cache. |
 | `region_engine.mito.sst_write_buffer_size` | String | `8MB` | Buffer size for SST writing. |
 | `region_engine.mito.parallel_scan_channel_size` | Integer | `32` | Capacity of the channel to send data from parallel scan tasks to the main task. |
+| `region_engine.mito.max_concurrent_scan_files` | Integer | `128` | Maximum number of SST files to scan concurrently. |
 | `region_engine.mito.allow_stale_entries` | Bool | `false` | Whether to allow stale WAL entries read during replay. |
 | `region_engine.mito.min_compaction_interval` | String | `0m` | Minimum time interval between two compactions.<br/>To align with the old behavior, the default value is 0 (no restrictions). |
 | `region_engine.mito.index` | -- | -- | The options for index in Mito engine. |
@@ -538,11 +536,10 @@
 | `logging.dir` | String | `./greptimedb_data/logs` | The directory to store the log files. If set to empty, logs will not be written to files. |
 | `logging.level` | String | Unset | The log level. Can be `info`/`debug`/`warn`/`error`. |
 | `logging.enable_otlp_tracing` | Bool | `false` | Enable OTLP tracing. |
-| `logging.otlp_endpoint` | String | `http://localhost:4318` | The OTLP tracing endpoint. |
+| `logging.otlp_endpoint` | String | `http://localhost:4317` | The OTLP tracing endpoint. |
 | `logging.append_stdout` | Bool | `true` | Whether to append logs to stdout. |
 | `logging.log_format` | String | `text` | The log format. Can be `text`/`json`. |
 | `logging.max_log_files` | Integer | `720` | The maximum amount of log files. |
-| `logging.otlp_export_protocol` | String | `http` | The OTLP tracing export protocol. Can be `grpc`/`http`. |
 | `logging.tracing_sample_ratio` | -- | -- | The percentage of tracing will be sampled and exported.<br/>Valid range `[0, 1]`, 1 means all traces are sampled, 0 means all traces are not sampled, the default value is 1.<br/>ratio > 1 are treated as 1. Fractions < 0 are treated as 0 |
 | `logging.tracing_sample_ratio.default_ratio` | Float | `1.0` | -- |
 | `export_metrics` | -- | -- | The datanode can export its metrics and send to Prometheus compatible service (e.g. `greptimedb` itself) from remote-write API.<br/>This is only used for `greptimedb` to export its own metrics internally. It's different from prometheus scrape. |
@@ -589,14 +586,11 @@
 | `logging.dir` | String | `./greptimedb_data/logs` | The directory to store the log files. If set to empty, logs will not be written to files. |
 | `logging.level` | String | Unset | The log level. Can be `info`/`debug`/`warn`/`error`. |
 | `logging.enable_otlp_tracing` | Bool | `false` | Enable OTLP tracing. |
-| `logging.otlp_endpoint` | String | `http://localhost:4318` | The OTLP tracing endpoint. |
+| `logging.otlp_endpoint` | String | `http://localhost:4317` | The OTLP tracing endpoint. |
 | `logging.append_stdout` | Bool | `true` | Whether to append logs to stdout. |
 | `logging.log_format` | String | `text` | The log format. Can be `text`/`json`. |
 | `logging.max_log_files` | Integer | `720` | The maximum amount of log files. |
-| `logging.otlp_export_protocol` | String | `http` | The OTLP tracing export protocol. Can be `grpc`/`http`. |
 | `logging.tracing_sample_ratio` | -- | -- | The percentage of tracing will be sampled and exported.<br/>Valid range `[0, 1]`, 1 means all traces are sampled, 0 means all traces are not sampled, the default value is 1.<br/>ratio > 1 are treated as 1. Fractions < 0 are treated as 0 |
 | `logging.tracing_sample_ratio.default_ratio` | Float | `1.0` | -- |
 | `tracing` | -- | -- | The tracing options. Only effect when compiled with `tokio-console` feature. |
 | `tracing.tokio_console_addr` | String | Unset | The tokio console address. |
-| `query` | -- | -- | -- |
-| `query.parallelism` | Integer | `1` | Parallelism of the query engine for query sent by flownode.<br/>Default to 1, so it won't use too much cpu or memory |
--- a/config/datanode.example.toml
+++ b/config/datanode.example.toml
@@ -129,11 +129,11 @@ dir = "./greptimedb_data/wal"
 ## **It's only used when the provider is `raft_engine`**.
 file_size = "128MB"

-## The threshold of the WAL size to trigger a purge.
+## The threshold of the WAL size to trigger a flush.
 ## **It's only used when the provider is `raft_engine`**.
 purge_threshold = "1GB"

-## The interval to trigger a purge.
+## The interval to trigger a flush.
 ## **It's only used when the provider is `raft_engine`**.
 purge_interval = "1m"

@@ -474,6 +474,9 @@ sst_write_buffer_size = "8MB"
 ## Capacity of the channel to send data from parallel scan tasks to the main task.
 parallel_scan_channel_size = 32

+## Maximum number of SST files to scan concurrently.
+max_concurrent_scan_files = 128
+
 ## Whether to allow stale WAL entries read during replay.
 allow_stale_entries = false

@@ -629,7 +632,7 @@ level = "info"
 enable_otlp_tracing = false

 ## The OTLP tracing endpoint.
-otlp_endpoint = "http://localhost:4318"
+otlp_endpoint = "http://localhost:4317"

 ## Whether to append logs to stdout.
 append_stdout = true
@@ -640,9 +643,6 @@ log_format = "text"
 ## The maximum amount of log files.
 max_log_files = 720

-## The OTLP tracing export protocol. Can be `grpc`/`http`.
-otlp_export_protocol = "http"
-
 ## The percentage of tracing will be sampled and exported.
 ## Valid range `[0, 1]`, 1 means all traces are sampled, 0 means all traces are not sampled, the default value is 1.
 ## ratio > 1 are treated as 1. Fractions < 0 are treated as 0
--- a/config/flownode.example.toml
+++ b/config/flownode.example.toml
@@ -83,7 +83,7 @@ level = "info"
 enable_otlp_tracing = false

 ## The OTLP tracing endpoint.
-otlp_endpoint = "http://localhost:4318"
+otlp_endpoint = "http://localhost:4317"

 ## Whether to append logs to stdout.
 append_stdout = true
@@ -94,9 +94,6 @@ log_format = "text"
 ## The maximum amount of log files.
 max_log_files = 720

-## The OTLP tracing export protocol. Can be `grpc`/`http`.
-otlp_export_protocol = "http"
-
 ## The percentage of tracing will be sampled and exported.
 ## Valid range `[0, 1]`, 1 means all traces are sampled, 0 means all traces are not sampled, the default value is 1.
 ## ratio > 1 are treated as 1. Fractions < 0 are treated as 0
@@ -108,8 +105,3 @@ default_ratio = 1.0
 ## The tokio console address.
 ## @toml2docs:none-default
 #+ tokio_console_addr = "127.0.0.1"
-
-[query]
-## Parallelism of the query engine for query sent by flownode.
-## Default to 1, so it won't use too much cpu or memory
-parallelism = 1
--- a/config/frontend.example.toml
+++ b/config/frontend.example.toml
@@ -218,7 +218,7 @@ level = "info"
 enable_otlp_tracing = false

 ## The OTLP tracing endpoint.
-otlp_endpoint = "http://localhost:4318"
+otlp_endpoint = "http://localhost:4317"

 ## Whether to append logs to stdout.
 append_stdout = true
@@ -229,9 +229,6 @@ log_format = "text"
 ## The maximum amount of log files.
 max_log_files = 720

-## The OTLP tracing export protocol. Can be `grpc`/`http`.
-otlp_export_protocol = "http"
-
 ## The percentage of tracing will be sampled and exported.
 ## Valid range `[0, 1]`, 1 means all traces are sampled, 0 means all traces are not sampled, the default value is 1.
 ## ratio > 1 are treated as 1. Fractions < 0 are treated as 0
--- a/config/metasrv.example.toml
+++ b/config/metasrv.example.toml
@@ -43,11 +43,6 @@ use_memory_store = false
 ## - Using shared storage (e.g., s3).
 enable_region_failover = false

-## The delay before starting region failure detection.
-## This delay helps prevent Metasrv from triggering unnecessary region failovers before all Datanodes are fully started.
-## Especially useful when the cluster is not deployed with GreptimeDB Operator and maintenance mode is not enabled.
-region_failure_detector_initialization_delay = '10m'
-
 ## Whether to allow region failover on local WAL.
 ## **This option is not recommended to be set to true, because it may lead to data loss during failover.**
 allow_region_failover_on_local_wal = false
@@ -225,7 +220,7 @@ level = "info"
 enable_otlp_tracing = false

 ## The OTLP tracing endpoint.
-otlp_endpoint = "http://localhost:4318"
+otlp_endpoint = "http://localhost:4317"

 ## Whether to append logs to stdout.
 append_stdout = true
@@ -236,9 +231,6 @@ log_format = "text"
 ## The maximum amount of log files.
 max_log_files = 720

-## The OTLP tracing export protocol. Can be `grpc`/`http`.
-otlp_export_protocol = "http"
-
 ## The percentage of tracing will be sampled and exported.
 ## Valid range `[0, 1]`, 1 means all traces are sampled, 0 means all traces are not sampled, the default value is 1.
 ## ratio > 1 are treated as 1. Fractions < 0 are treated as 0
--- a/config/standalone.example.toml
+++ b/config/standalone.example.toml
@@ -565,6 +565,9 @@ sst_write_buffer_size = "8MB"
 ## Capacity of the channel to send data from parallel scan tasks to the main task.
 parallel_scan_channel_size = 32

+## Maximum number of SST files to scan concurrently.
+max_concurrent_scan_files = 128
+
 ## Whether to allow stale WAL entries read during replay.
 allow_stale_entries = false

@@ -720,7 +723,7 @@ level = "info"
 enable_otlp_tracing = false

 ## The OTLP tracing endpoint.
-otlp_endpoint = "http://localhost:4318"
+otlp_endpoint = "http://localhost:4317"

 ## Whether to append logs to stdout.
 append_stdout = true
@@ -731,9 +734,6 @@ log_format = "text"
 ## The maximum amount of log files.
 max_log_files = 720

-## The OTLP tracing export protocol. Can be `grpc`/`http`.
-otlp_export_protocol = "http"
-
 ## The percentage of tracing will be sampled and exported.
 ## Valid range `[0, 1]`, 1 means all traces are sampled, 0 means all traces are not sampled, the default value is 1.
 ## ratio > 1 are treated as 1. Fractions < 0 are treated as 0
--- a/cyborg/bin/follow-up-docs-issue.ts
+++ b/cyborg/bin/follow-up-docs-issue.ts
@@ -55,25 +55,12 @@ async function main() {
        await client.rest.issues.addLabels({
            owner, repo, issue_number: number, labels: [labelDocsRequired],
        })
-
-        // Get available assignees for the docs repo
-        const assigneesResponse = await docsClient.rest.issues.listAssignees({
-            owner: 'GreptimeTeam',
-            repo: 'docs',
-        })
-        const validAssignees = assigneesResponse.data.map(assignee => assignee.login)
-        core.info(`Available assignees: ${validAssignees.join(', ')}`)
-
-        // Check if the actor is a valid assignee, otherwise fallback to fengjiachun
-        const assignee = validAssignees.includes(actor) ? actor : 'fengjiachun'
-        core.info(`Assigning issue to: ${assignee}`)
-
        await docsClient.rest.issues.create({
            owner: 'GreptimeTeam',
            repo: 'docs',
            title: `Update docs for ${title}`,
            body: `A document change request is generated from ${html_url}`,
-            assignee: assignee,
+            assignee: actor,
        }).then((res) => {
            core.info(`Created issue ${res.data}`)
        })
--- a/docs/benchmarks/log/README.md
+++ b/docs/benchmarks/log/README.md
@@ -48,4 +48,4 @@ Please refer to [SQL query](./query.sql) for GreptimeDB and Clickhouse, and [que

 ## Addition
 - You can tune GreptimeDB's configuration to get better performance.
- You can setup GreptimeDB to use S3 as storage, see [here](https://docs.greptime.com/user-guide/deployments-administration/configuration#storage-options).
+- You can setup GreptimeDB to use S3 as storage, see [here](https://docs.greptime.com/user-guide/deployments/configuration#storage-options).
--- a/grafana/README.md
+++ b/grafana/README.md
@@ -83,7 +83,7 @@ If you use the [Helm Chart](https://github.com/GreptimeTeam/helm-charts) to depl
 - `monitoring.enabled=true`: Deploys a standalone GreptimeDB instance dedicated to monitoring the cluster;
 - `grafana.enabled=true`: Deploys Grafana and automatically imports the monitoring dashboard;

-The standalone GreptimeDB instance will collect metrics from your cluster, and the dashboard will be available in the Grafana UI. For detailed deployment instructions, please refer to our [Kubernetes deployment guide](https://docs.greptime.com/user-guide/deployments-administration-administration/deploy-on-kubernetes/getting-started).
+The standalone GreptimeDB instance will collect metrics from your cluster, and the dashboard will be available in the Grafana UI. For detailed deployment instructions, please refer to our [Kubernetes deployment guide](https://docs.greptime.com/user-guide/deployments-administration/deploy-on-kubernetes/getting-started).

 ### Self-host Prometheus and import dashboards manually

--- a/licenserc.toml
+++ b/licenserc.toml
@@ -31,10 +31,8 @@ excludes = [
    "src/operator/src/expr_helper/trigger.rs",
    "src/sql/src/statements/create/trigger.rs",
    "src/sql/src/statements/show/trigger.rs",
-    "src/sql/src/statements/drop/trigger.rs",
    "src/sql/src/parsers/create_parser/trigger.rs",
    "src/sql/src/parsers/show_parser/trigger.rs",
-    "src/mito2/src/extension.rs",
 ]

 [properties]
--- a/scripts/install.sh
+++ b/scripts/install.sh
@@ -53,54 +53,6 @@ get_arch_type() {
  esac
 }

-# Verify SHA256 checksum
-verify_sha256() {
-  file="$1"
-  expected_sha256="$2"
-
-  if command -v sha256sum >/dev/null 2>&1; then
-    actual_sha256=$(sha256sum "$file" | cut -d' ' -f1)
-  elif command -v shasum >/dev/null 2>&1; then
-    actual_sha256=$(shasum -a 256 "$file" | cut -d' ' -f1)
-  else
-    echo "Warning: No SHA256 verification tool found (sha256sum or shasum). Skipping checksum verification."
-    return 0
-  fi
-
-  if [ "$actual_sha256" = "$expected_sha256" ]; then
-    echo "SHA256 checksum verified successfully."
-    return 0
-  else
-    echo "Error: SHA256 checksum verification failed!"
-    echo "Expected: $expected_sha256"
-    echo "Actual: $actual_sha256"
-    return 1
-  fi
-}
-
-# Prompt for user confirmation (compatible with different shells)
-prompt_confirmation() {
-  message="$1"
-  printf "%s (y/N): " "$message"
-
-  # Try to read user input, fallback if read fails
-  answer=""
-  if read answer </dev/tty 2>/dev/null; then
-    case "$answer" in
-      [Yy]|[Yy][Ee][Ss])
-        return 0
-        ;;
-      *)
-        return 1
-        ;;
-    esac
-  else
-    echo ""
-    echo "Cannot read user input. Defaulting to No."
-    return 1
-  fi
-}
-
 download_artifact() {
  if [ -n "${OS_TYPE}" ] && [ -n "${ARCH_TYPE}" ]; then
    # Use the latest stable released version.
@@ -119,104 +71,17 @@ download_artifact() {
    fi

    echo "Downloading ${BIN}, OS: ${OS_TYPE}, Arch: ${ARCH_TYPE}, Version: ${VERSION}"
-    PKG_NAME="${BIN}-${OS_TYPE}-${ARCH_TYPE}-${VERSION}"
-    PACKAGE_NAME="${PKG_NAME}.tar.gz"
-    SHA256_FILE="${PKG_NAME}.sha256sum"
+    PACKAGE_NAME="${BIN}-${OS_TYPE}-${ARCH_TYPE}-${VERSION}.tar.gz"

    if [ -n "${PACKAGE_NAME}" ]; then
-      # Check if files already exist and prompt for override
-      if [ -f "${PACKAGE_NAME}" ]; then
-        echo "File ${PACKAGE_NAME} already exists."
-        if prompt_confirmation "Do you want to override it?"; then
-          echo "Overriding existing file..."
-          rm -f "${PACKAGE_NAME}"
-        else
-          echo "Skipping download. Using existing file."
-        fi
-      fi
-
-      if [ -f "${BIN}" ]; then
-        echo "Binary ${BIN} already exists."
-        if prompt_confirmation "Do you want to override it?"; then
-          echo "Will override existing binary..."
-          rm -f "${BIN}"
-        else
-          echo "Installation cancelled."
-          exit 0
-        fi
-      fi
-
-      # Download package if not exists
-      if [ ! -f "${PACKAGE_NAME}" ]; then
-        echo "Downloading ${PACKAGE_NAME}..."
-        # Use curl instead of wget for better compatibility
-        if command -v curl >/dev/null 2>&1; then
-          if ! curl -L -o "${PACKAGE_NAME}" "https://github.com/${GITHUB_ORG}/${GITHUB_REPO}/releases/download/${VERSION}/${PACKAGE_NAME}"; then
-            echo "Error: Failed to download ${PACKAGE_NAME}"
-            exit 1
-          fi
-        elif command -v wget >/dev/null 2>&1; then
-          if ! wget -O "${PACKAGE_NAME}" "https://github.com/${GITHUB_ORG}/${GITHUB_REPO}/releases/download/${VERSION}/${PACKAGE_NAME}"; then
-            echo "Error: Failed to download ${PACKAGE_NAME}"
-            exit 1
-          fi
-        else
-          echo "Error: Neither curl nor wget is available for downloading."
-          exit 1
-        fi
-      fi
-
-      # Download and verify SHA256 checksum
-      echo "Downloading SHA256 checksum..."
-      sha256_download_success=0
-      if command -v curl >/dev/null 2>&1; then
-        if curl -L -s -o "${SHA256_FILE}" "https://github.com/${GITHUB_ORG}/${GITHUB_REPO}/releases/download/${VERSION}/${SHA256_FILE}" 2>/dev/null; then
-          sha256_download_success=1
-        fi
-      elif command -v wget >/dev/null 2>&1; then
-        if wget -q -O "${SHA256_FILE}" "https://github.com/${GITHUB_ORG}/${GITHUB_REPO}/releases/download/${VERSION}/${SHA256_FILE}" 2>/dev/null; then
-          sha256_download_success=1
-        fi
-      fi
-
-      if [ $sha256_download_success -eq 1 ] && [ -f "${SHA256_FILE}" ]; then
-        expected_sha256=$(cat "${SHA256_FILE}" | cut -d' ' -f1)
-        if [ -n "$expected_sha256" ]; then
-          if ! verify_sha256 "${PACKAGE_NAME}" "${expected_sha256}"; then
-            echo "SHA256 verification failed. Removing downloaded file."
-            rm -f "${PACKAGE_NAME}" "${SHA256_FILE}"
-            exit 1
-          fi
-        else
-          echo "Warning: Could not parse SHA256 checksum from file."
-        fi
-        rm -f "${SHA256_FILE}"
-      else
-        echo "Warning: Could not download SHA256 checksum file. Skipping verification."
-      fi
+      wget "https://github.com/${GITHUB_ORG}/${GITHUB_REPO}/releases/download/${VERSION}/${PACKAGE_NAME}"

      # Extract the binary and clean the rest.
-      echo "Extracting ${PACKAGE_NAME}..."
-      if ! tar xf "${PACKAGE_NAME}"; then
-        echo "Error: Failed to extract ${PACKAGE_NAME}"
-        exit 1
-      fi
-
-      # Find the binary in the extracted directory
-      extracted_dir="${PACKAGE_NAME%.tar.gz}"
-      if [ -f "${extracted_dir}/${BIN}" ]; then
-        mv "${extracted_dir}/${BIN}" "${PWD}/"
-        rm -f "${PACKAGE_NAME}"
-        rm -rf "${extracted_dir}"
-        chmod +x "${BIN}"
-        echo "Installation completed successfully!"
-        echo "Run './${BIN} --help' to get started"
-      else
-        echo "Error: Binary ${BIN} not found in extracted archive"
-        rm -f "${PACKAGE_NAME}"
-        rm -rf "${extracted_dir}"
-        exit 1
-      fi
+      tar xvf "${PACKAGE_NAME}" && \
+      mv "${PACKAGE_NAME%.tar.gz}/${BIN}" "${PWD}" && \
+      rm -r "${PACKAGE_NAME}" && \
+      rm -r "${PACKAGE_NAME%.tar.gz}" && \
+      echo "Run './${BIN} --help' to get started"
    fi
  fi
 }
--- a/src/api/src/v1/column_def.rs
+++ b/src/api/src/v1/column_def.rs
@@ -24,7 +24,7 @@ use greptime_proto::v1::{
 };
 use snafu::ResultExt;

-use crate::error::{self, Result};
+use crate::error::{self, ConvertColumnDefaultConstraintSnafu, Result};
 use crate::helper::ColumnDataTypeWrapper;
 use crate::v1::{ColumnDef, ColumnOptions, SemanticType};

@@ -77,6 +77,48 @@ pub fn try_as_column_schema(column_def: &ColumnDef) -> Result<ColumnSchema> {
        })
 }

+/// Tries to construct a `ColumnDef` from the given `ColumnSchema`.
+///
+/// TODO(weny): Add tests for this function.
+pub fn try_as_column_def(column_schema: &ColumnSchema, is_primary_key: bool) -> Result<ColumnDef> {
+    let column_datatype =
+        ColumnDataTypeWrapper::try_from(column_schema.data_type.clone()).map(|w| w.to_parts())?;
+
+    let semantic_type = if column_schema.is_time_index() {
+        SemanticType::Timestamp
+    } else if is_primary_key {
+        SemanticType::Tag
+    } else {
+        SemanticType::Field
+    } as i32;
+    let comment = column_schema
+        .metadata()
+        .get(COMMENT_KEY)
+        .cloned()
+        .unwrap_or_default();
+
+    let default_constraint = match column_schema.default_constraint() {
+        None => vec![],
+        Some(v) => v
+            .clone()
+            .try_into()
+            .context(ConvertColumnDefaultConstraintSnafu {
+                column: &column_schema.name,
+            })?,
+    };
+    let options = options_from_column_schema(column_schema);
+    Ok(ColumnDef {
+        name: column_schema.name.clone(),
+        data_type: column_datatype.0 as i32,
+        is_nullable: column_schema.is_nullable(),
+        default_constraint,
+        semantic_type,
+        comment,
+        datatype_extension: column_datatype.1,
+        options,
+    })
+}
+
 /// Constructs a `ColumnOptions` from the given `ColumnSchema`.
 pub fn options_from_column_schema(column_schema: &ColumnSchema) -> Option<ColumnOptions> {
    let mut options = ColumnOptions::default();
--- a/src/catalog/Cargo.toml
+++ b/src/catalog/Cargo.toml
@@ -5,7 +5,6 @@ edition.workspace = true
 license.workspace = true

 [features]
-enterprise = []
 testing = []

 [lints]
@@ -44,6 +43,8 @@ moka = { workspace = true, features = ["future", "sync"] }
 partition.workspace = true
 paste.workspace = true
 prometheus.workspace = true
+promql-parser.workspace = true
+rand.workspace = true
 rustc-hash.workspace = true
 serde_json.workspace = true
 session.workspace = true
--- a/src/catalog/src/information_extension.rs
+++ b/src/catalog/src/information_extension.rs
@@ -16,8 +16,8 @@ use api::v1::meta::ProcedureStatus;
 use common_error::ext::BoxedError;
 use common_meta::cluster::{ClusterInfo, NodeInfo};
 use common_meta::datanode::RegionStat;
-use common_meta::ddl::{ExecutorContext, ProcedureExecutor};
 use common_meta::key::flow::flow_state::FlowStat;
+use common_meta::procedure_executor::{ExecutorContext, ProcedureExecutor};
 use common_meta::rpc::procedure;
 use common_procedure::{ProcedureInfo, ProcedureState};
 use meta_client::MetaClientRef;
--- a/src/catalog/src/kvbackend.rs
+++ b/src/catalog/src/kvbackend.rs
@@ -14,11 +14,9 @@

 pub use client::{CachedKvBackend, CachedKvBackendBuilder, MetaKvBackend};

-mod builder;
 mod client;
 mod manager;
 mod table_cache;

-pub use builder::KvBackendCatalogManagerBuilder;
 pub use manager::KvBackendCatalogManager;
 pub use table_cache::{new_table_cache, TableCache, TableCacheRef};
--- a/src/catalog/src/kvbackend/builder.rs
+++ b/src/catalog/src/kvbackend/builder.rs
@@ -1,131 +0,0 @@
-// Copyright 2023 Greptime Team
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-use std::sync::Arc;
-
-use common_catalog::consts::DEFAULT_CATALOG_NAME;
-use common_meta::cache::LayeredCacheRegistryRef;
-use common_meta::key::flow::FlowMetadataManager;
-use common_meta::key::TableMetadataManager;
-use common_meta::kv_backend::KvBackendRef;
-use common_procedure::ProcedureManagerRef;
-use moka::sync::Cache;
-use partition::manager::PartitionRuleManager;
-
-#[cfg(feature = "enterprise")]
-use crate::information_schema::InformationSchemaTableFactoryRef;
-use crate::information_schema::{InformationExtensionRef, InformationSchemaProvider};
-use crate::kvbackend::manager::{SystemCatalog, CATALOG_CACHE_MAX_CAPACITY};
-use crate::kvbackend::KvBackendCatalogManager;
-use crate::process_manager::ProcessManagerRef;
-use crate::system_schema::pg_catalog::PGCatalogProvider;
-
-pub struct KvBackendCatalogManagerBuilder {
-    information_extension: InformationExtensionRef,
-    backend: KvBackendRef,
-    cache_registry: LayeredCacheRegistryRef,
-    procedure_manager: Option<ProcedureManagerRef>,
-    process_manager: Option<ProcessManagerRef>,
-    #[cfg(feature = "enterprise")]
-    extra_information_table_factories:
-        std::collections::HashMap<String, InformationSchemaTableFactoryRef>,
-}
-
-impl KvBackendCatalogManagerBuilder {
-    pub fn new(
-        information_extension: InformationExtensionRef,
-        backend: KvBackendRef,
-        cache_registry: LayeredCacheRegistryRef,
-    ) -> Self {
-        Self {
-            information_extension,
-            backend,
-            cache_registry,
-            procedure_manager: None,
-            process_manager: None,
-            #[cfg(feature = "enterprise")]
-            extra_information_table_factories: std::collections::HashMap::new(),
-        }
-    }
-
-    pub fn with_procedure_manager(mut self, procedure_manager: ProcedureManagerRef) -> Self {
-        self.procedure_manager = Some(procedure_manager);
-        self
-    }
-
-    pub fn with_process_manager(mut self, process_manager: ProcessManagerRef) -> Self {
-        self.process_manager = Some(process_manager);
-        self
-    }
-
-    /// Sets the extra information tables.
-    #[cfg(feature = "enterprise")]
-    pub fn with_extra_information_table_factories(
-        mut self,
-        factories: std::collections::HashMap<String, InformationSchemaTableFactoryRef>,
-    ) -> Self {
-        self.extra_information_table_factories = factories;
-        self
-    }
-
-    pub fn build(self) -> Arc<KvBackendCatalogManager> {
-        let Self {
-            information_extension,
-            backend,
-            cache_registry,
-            procedure_manager,
-            process_manager,
-            #[cfg(feature = "enterprise")]
-            extra_information_table_factories,
-        } = self;
-        Arc::new_cyclic(|me| KvBackendCatalogManager {
-            information_extension,
-            partition_manager: Arc::new(PartitionRuleManager::new(
-                backend.clone(),
-                cache_registry
-                    .get()
-                    .expect("Failed to get table_route_cache"),
-            )),
-            table_metadata_manager: Arc::new(TableMetadataManager::new(backend.clone())),
-            system_catalog: SystemCatalog {
-                catalog_manager: me.clone(),
-                catalog_cache: Cache::new(CATALOG_CACHE_MAX_CAPACITY),
-                pg_catalog_cache: Cache::new(CATALOG_CACHE_MAX_CAPACITY),
-                information_schema_provider: {
-                    let provider = InformationSchemaProvider::new(
-                        DEFAULT_CATALOG_NAME.to_string(),
-                        me.clone(),
-                        Arc::new(FlowMetadataManager::new(backend.clone())),
-                        process_manager.clone(),
-                        backend.clone(),
-                    );
-                    #[cfg(feature = "enterprise")]
-                    let provider = provider
-                        .with_extra_table_factories(extra_information_table_factories.clone());
-                    Arc::new(provider)
-                },
-                pg_catalog_provider: Arc::new(PGCatalogProvider::new(
-                    DEFAULT_CATALOG_NAME.to_string(),
-                    me.clone(),
-                )),
-                backend,
-                process_manager,
-                #[cfg(feature = "enterprise")]
-                extra_information_table_factories,
-            },
-            cache_registry,
-            procedure_manager,
-        })
-    }
-}
--- a/src/catalog/src/kvbackend/manager.rs
+++ b/src/catalog/src/kvbackend/manager.rs
@@ -30,13 +30,13 @@ use common_meta::key::flow::FlowMetadataManager;
 use common_meta::key::schema_name::SchemaNameKey;
 use common_meta::key::table_info::{TableInfoManager, TableInfoValue};
 use common_meta::key::table_name::TableNameKey;
-use common_meta::key::TableMetadataManagerRef;
+use common_meta::key::{TableMetadataManager, TableMetadataManagerRef};
 use common_meta::kv_backend::KvBackendRef;
 use common_procedure::ProcedureManagerRef;
 use futures_util::stream::BoxStream;
 use futures_util::{StreamExt, TryStreamExt};
 use moka::sync::Cache;
-use partition::manager::PartitionRuleManagerRef;
+use partition::manager::{PartitionRuleManager, PartitionRuleManagerRef};
 use session::context::{Channel, QueryContext};
 use snafu::prelude::*;
 use store_api::metric_engine_consts::METRIC_ENGINE_NAME;
@@ -52,8 +52,6 @@ use crate::error::{
    CacheNotFoundSnafu, GetTableCacheSnafu, InvalidTableInfoInCatalogSnafu, ListCatalogsSnafu,
    ListSchemasSnafu, ListTablesSnafu, Result, TableMetadataManagerSnafu,
 };
-#[cfg(feature = "enterprise")]
-use crate::information_schema::InformationSchemaTableFactoryRef;
 use crate::information_schema::{InformationExtensionRef, InformationSchemaProvider};
 use crate::kvbackend::TableCacheRef;
 use crate::process_manager::ProcessManagerRef;
@@ -69,22 +67,60 @@ use crate::CatalogManager;
 #[derive(Clone)]
 pub struct KvBackendCatalogManager {
    /// Provides the extension methods for the `information_schema` tables
-    pub(super) information_extension: InformationExtensionRef,
+    information_extension: InformationExtensionRef,
    /// Manages partition rules.
-    pub(super) partition_manager: PartitionRuleManagerRef,
+    partition_manager: PartitionRuleManagerRef,
    /// Manages table metadata.
-    pub(super) table_metadata_manager: TableMetadataManagerRef,
+    table_metadata_manager: TableMetadataManagerRef,
    /// A sub-CatalogManager that handles system tables
-    pub(super) system_catalog: SystemCatalog,
+    system_catalog: SystemCatalog,
    /// Cache registry for all caches.
-    pub(super) cache_registry: LayeredCacheRegistryRef,
+    cache_registry: LayeredCacheRegistryRef,
    /// Only available in `Standalone` mode.
-    pub(super) procedure_manager: Option<ProcedureManagerRef>,
+    procedure_manager: Option<ProcedureManagerRef>,
 }

-pub(super) const CATALOG_CACHE_MAX_CAPACITY: u64 = 128;
+const CATALOG_CACHE_MAX_CAPACITY: u64 = 128;

 impl KvBackendCatalogManager {
+    pub fn new(
+        information_extension: InformationExtensionRef,
+        backend: KvBackendRef,
+        cache_registry: LayeredCacheRegistryRef,
+        procedure_manager: Option<ProcedureManagerRef>,
+        process_manager: Option<ProcessManagerRef>,
+    ) -> Arc<Self> {
+        Arc::new_cyclic(|me| Self {
+            information_extension,
+            partition_manager: Arc::new(PartitionRuleManager::new(
+                backend.clone(),
+                cache_registry
+                    .get()
+                    .expect("Failed to get table_route_cache"),
+            )),
+            table_metadata_manager: Arc::new(TableMetadataManager::new(backend.clone())),
+            system_catalog: SystemCatalog {
+                catalog_manager: me.clone(),
+                catalog_cache: Cache::new(CATALOG_CACHE_MAX_CAPACITY),
+                pg_catalog_cache: Cache::new(CATALOG_CACHE_MAX_CAPACITY),
+                information_schema_provider: Arc::new(InformationSchemaProvider::new(
+                    DEFAULT_CATALOG_NAME.to_string(),
+                    me.clone(),
+                    Arc::new(FlowMetadataManager::new(backend.clone())),
+                    process_manager.clone(),
+                )),
+                pg_catalog_provider: Arc::new(PGCatalogProvider::new(
+                    DEFAULT_CATALOG_NAME.to_string(),
+                    me.clone(),
+                )),
+                backend,
+                process_manager,
+            },
+            cache_registry,
+            procedure_manager,
+        })
+    }
+
    pub fn view_info_cache(&self) -> Result<ViewInfoCacheRef> {
        self.cache_registry.get().context(CacheNotFoundSnafu {
            name: "view_info_cache",
@@ -471,19 +507,16 @@ fn build_table(table_info_value: TableInfoValue) -> Result<TableRef> {
 /// - information_schema.{tables}
 /// - pg_catalog.{tables}
 #[derive(Clone)]
-pub(super) struct SystemCatalog {
-    pub(super) catalog_manager: Weak<KvBackendCatalogManager>,
-    pub(super) catalog_cache: Cache<String, Arc<InformationSchemaProvider>>,
-    pub(super) pg_catalog_cache: Cache<String, Arc<PGCatalogProvider>>,
+struct SystemCatalog {
+    catalog_manager: Weak<KvBackendCatalogManager>,
+    catalog_cache: Cache<String, Arc<InformationSchemaProvider>>,
+    pg_catalog_cache: Cache<String, Arc<PGCatalogProvider>>,

    // system_schema_provider for default catalog
-    pub(super) information_schema_provider: Arc<InformationSchemaProvider>,
-    pub(super) pg_catalog_provider: Arc<PGCatalogProvider>,
-    pub(super) backend: KvBackendRef,
-    pub(super) process_manager: Option<ProcessManagerRef>,
-    #[cfg(feature = "enterprise")]
-    pub(super) extra_information_table_factories:
-        std::collections::HashMap<String, InformationSchemaTableFactoryRef>,
+    information_schema_provider: Arc<InformationSchemaProvider>,
+    pg_catalog_provider: Arc<PGCatalogProvider>,
+    backend: KvBackendRef,
+    process_manager: Option<ProcessManagerRef>,
 }

 impl SystemCatalog {
@@ -547,17 +580,12 @@ impl SystemCatalog {
        if schema == INFORMATION_SCHEMA_NAME {
            let information_schema_provider =
                self.catalog_cache.get_with_by_ref(catalog, move || {
-                    let provider = InformationSchemaProvider::new(
+                    Arc::new(InformationSchemaProvider::new(
                        catalog.to_string(),
                        self.catalog_manager.clone(),
                        Arc::new(FlowMetadataManager::new(self.backend.clone())),
                        self.process_manager.clone(),
-                        self.backend.clone(),
-                    );
-                    #[cfg(feature = "enterprise")]
-                    let provider = provider
-                        .with_extra_table_factories(self.extra_information_table_factories.clone());
-                    Arc::new(provider)
+                    ))
                });
            information_schema_provider.table(table_name)
        } else if schema == PG_CATALOG_NAME && channel == Channel::Postgres {
--- a/src/catalog/src/memory/manager.rs
+++ b/src/catalog/src/memory/manager.rs
@@ -352,13 +352,11 @@ impl MemoryCatalogManager {
    }

    fn create_catalog_entry(self: &Arc<Self>, catalog: String) -> SchemaEntries {
-        let backend = Arc::new(MemoryKvBackend::new());
        let information_schema_provider = InformationSchemaProvider::new(
            catalog,
            Arc::downgrade(self) as Weak<dyn CatalogManager>,
-            Arc::new(FlowMetadataManager::new(backend.clone())),
+            Arc::new(FlowMetadataManager::new(Arc::new(MemoryKvBackend::new()))),
            None, // we don't need ProcessManager on regions server.
-            backend,
        );
        let information_schema = information_schema_provider.tables().clone();

--- a/src/catalog/src/process_manager.rs
+++ b/src/catalog/src/process_manager.rs
@@ -14,17 +14,24 @@

 use std::collections::hash_map::Entry;
 use std::collections::HashMap;
-use std::fmt::{Debug, Formatter};
+use std::fmt::{Debug, Display, Formatter};
 use std::sync::atomic::{AtomicU32, Ordering};
 use std::sync::{Arc, RwLock};
+use std::time::{Duration, Instant, UNIX_EPOCH};

 use api::v1::frontend::{KillProcessRequest, ListProcessRequest, ProcessInfo};
 use common_base::cancellation::CancellationHandle;
 use common_frontend::selector::{FrontendSelector, MetaClientSelector};
-use common_telemetry::{debug, info, warn};
+use common_frontend::slow_query_event::SlowQueryEvent;
+use common_telemetry::{debug, error, info, warn};
 use common_time::util::current_time_millis;
 use meta_client::MetaClientRef;
+use promql_parser::parser::EvalStmt;
+use rand::random;
+use session::context::QueryContextRef;
 use snafu::{ensure, OptionExt, ResultExt};
+use sql::statements::statement::Statement;
+use tokio::sync::mpsc::Sender;

 use crate::error;
 use crate::metrics::{PROCESS_KILL_COUNT, PROCESS_LIST_COUNT};
@@ -44,6 +51,23 @@ pub struct ProcessManager {
    frontend_selector: Option<MetaClientSelector>,
 }

+/// Represents a parsed query statement, functionally equivalent to [query::parser::QueryStatement].
+/// This enum is defined here to avoid cyclic dependencies with the query parser module.
+#[derive(Debug, Clone)]
+pub enum QueryStatement {
+    Sql(Statement),
+    Promql(EvalStmt),
+}
+
+impl Display for QueryStatement {
+    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
+        match self {
+            QueryStatement::Sql(stmt) => write!(f, "{}", stmt),
+            QueryStatement::Promql(eval_stmt) => write!(f, "{}", eval_stmt),
+        }
+    }
+}
+
 impl ProcessManager {
    /// Create a [ProcessManager] instance with server address and kv client.
    pub fn new(server_addr: String, meta_client: Option<MetaClientRef>) -> Self {
@@ -67,6 +91,7 @@ impl ProcessManager {
        query: String,
        client: String,
        query_id: Option<ProcessId>,
+        _slow_query_timer: Option<SlowQueryTimer>,
    ) -> Ticket {
        let id = query_id.unwrap_or_else(|| self.next_id.fetch_add(1, Ordering::Relaxed));
        let process = ProcessInfo {
@@ -93,6 +118,7 @@ impl ProcessManager {
            manager: self.clone(),
            id,
            cancellation_handle,
+            _slow_query_timer,
        }
    }

@@ -223,6 +249,7 @@ pub struct Ticket {
    pub(crate) manager: ProcessManagerRef,
    pub(crate) id: ProcessId,
    pub cancellation_handle: Arc<CancellationHandle>,
+    _slow_query_timer: Option<SlowQueryTimer>,
 }

 impl Drop for Ticket {
@@ -263,6 +290,107 @@ impl Debug for CancellableProcess {
    }
 }

+/// SlowQueryTimer is used to log slow query when it's dropped.
+/// In drop(), it will check if the query is slow and send the slow query event to the handler.
+pub struct SlowQueryTimer {
+    start: Instant,
+    stmt: QueryStatement,
+    query_ctx: QueryContextRef,
+    threshold: Option<Duration>,
+    sample_ratio: Option<f64>,
+    tx: Sender<SlowQueryEvent>,
+}
+
+impl SlowQueryTimer {
+    pub fn new(
+        stmt: QueryStatement,
+        query_ctx: QueryContextRef,
+        threshold: Option<Duration>,
+        sample_ratio: Option<f64>,
+        tx: Sender<SlowQueryEvent>,
+    ) -> Self {
+        Self {
+            start: Instant::now(),
+            stmt,
+            query_ctx,
+            threshold,
+            sample_ratio,
+            tx,
+        }
+    }
+}
+
+impl SlowQueryTimer {
+    fn send_slow_query_event(&self, elapsed: Duration, threshold: Duration) {
+        let mut slow_query_event = SlowQueryEvent {
+            cost: elapsed.as_millis() as u64,
+            threshold: threshold.as_millis() as u64,
+            query: "".to_string(),
+            query_ctx: self.query_ctx.clone(),
+
+            // The following fields are only used for PromQL queries.
+            is_promql: false,
+            promql_range: None,
+            promql_step: None,
+            promql_start: None,
+            promql_end: None,
+        };
+
+        match &self.stmt {
+            QueryStatement::Promql(stmt) => {
+                slow_query_event.is_promql = true;
+                slow_query_event.query = stmt.expr.to_string();
+                slow_query_event.promql_step = Some(stmt.interval.as_millis() as u64);
+
+                let start = stmt
+                    .start
+                    .duration_since(UNIX_EPOCH)
+                    .unwrap_or_default()
+                    .as_millis() as i64;
+
+                let end = stmt
+                    .end
+                    .duration_since(UNIX_EPOCH)
+                    .unwrap_or_default()
+                    .as_millis() as i64;
+
+                slow_query_event.promql_range = Some((end - start) as u64);
+                slow_query_event.promql_start = Some(start);
+                slow_query_event.promql_end = Some(end);
+            }
+            QueryStatement::Sql(stmt) => {
+                slow_query_event.query = stmt.to_string();
+            }
+        }
+
+        // Send SlowQueryEvent to the handler.
+        if let Err(e) = self.tx.try_send(slow_query_event) {
+            error!(e; "Failed to send slow query event");
+        }
+    }
+}
+
+impl Drop for SlowQueryTimer {
+    fn drop(&mut self) {
+        if let Some(threshold) = self.threshold {
+            // Calculate the elaspsed duration since the timer is created.
+            let elapsed = self.start.elapsed();
+            if elapsed > threshold {
+                if let Some(ratio) = self.sample_ratio {
+                    // Only capture a portion of slow queries based on sample_ratio.
+                    // Generate a random number in [0, 1) and compare it with sample_ratio.
+                    if ratio >= 1.0 || random::<f64>() <= ratio {
+                        self.send_slow_query_event(elapsed, threshold);
+                    }
+                } else {
+                    // Captures all slow queries if sample_ratio is not set.
+                    self.send_slow_query_event(elapsed, threshold);
+                }
+            }
+        }
+    }
+}
+
 #[cfg(test)]
 mod tests {
    use std::sync::Arc;
@@ -278,6 +406,7 @@ mod tests {
            "SELECT * FROM table".to_string(),
            "".to_string(),
            None,
+            None,
        );

        let running_processes = process_manager.local_processes(None).unwrap();
@@ -301,6 +430,7 @@ mod tests {
            "SELECT * FROM table".to_string(),
            "client1".to_string(),
            Some(custom_id),
+            None,
        );

        assert_eq!(ticket.id, custom_id);
@@ -321,6 +451,7 @@ mod tests {
            "SELECT * FROM table1".to_string(),
            "client1".to_string(),
            None,
+            None,
        );

        let ticket2 = process_manager.clone().register_query(
@@ -329,6 +460,7 @@ mod tests {
            "SELECT * FROM table2".to_string(),
            "client2".to_string(),
            None,
+            None,
        );

        let running_processes = process_manager.local_processes(Some("public")).unwrap();
@@ -350,6 +482,7 @@ mod tests {
            "SELECT * FROM table1".to_string(),
            "client1".to_string(),
            None,
+            None,
        );

        let _ticket2 = process_manager.clone().register_query(
@@ -358,6 +491,7 @@ mod tests {
            "SELECT * FROM table2".to_string(),
            "client2".to_string(),
            None,
+            None,
        );

        // Test listing processes for specific catalog
@@ -384,6 +518,7 @@ mod tests {
            "SELECT * FROM table".to_string(),
            "client1".to_string(),
            None,
+            None,
        );
        assert_eq!(process_manager.local_processes(None).unwrap().len(), 1);
        process_manager.deregister_query("public".to_string(), ticket.id);
@@ -400,6 +535,7 @@ mod tests {
            "SELECT * FROM table".to_string(),
            "client1".to_string(),
            None,
+            None,
        );

        assert!(!ticket.cancellation_handle.is_cancelled());
@@ -417,6 +553,7 @@ mod tests {
            "SELECT * FROM table".to_string(),
            "client1".to_string(),
            None,
+            None,
        );
        assert!(!ticket.cancellation_handle.is_cancelled());
        let killed = process_manager
@@ -462,6 +599,7 @@ mod tests {
            "SELECT COUNT(*) FROM users WHERE age > 18".to_string(),
            "test_client".to_string(),
            Some(42),
+            None,
        );

        let processes = process_manager.local_processes(None).unwrap();
@@ -488,6 +626,7 @@ mod tests {
                "SELECT * FROM table".to_string(),
                "client1".to_string(),
                None,
+                None,
            );

            // Process should be registered
--- a/src/catalog/src/system_schema.rs
+++ b/src/catalog/src/system_schema.rs
@@ -15,7 +15,7 @@
 pub mod information_schema;
 mod memory_table;
 pub mod pg_catalog;
-pub mod predicate;
+mod predicate;
 mod utils;

 use std::collections::HashMap;
@@ -96,7 +96,7 @@ trait SystemSchemaProviderInner {
    }
 }

-pub trait SystemTable {
+pub(crate) trait SystemTable {
    fn table_id(&self) -> TableId;

    fn table_name(&self) -> &'static str;
@@ -110,7 +110,7 @@ pub trait SystemTable {
    }
 }

-pub type SystemTableRef = Arc<dyn SystemTable + Send + Sync>;
+pub(crate) type SystemTableRef = Arc<dyn SystemTable + Send + Sync>;

 struct SystemTableDataSource {
    table: SystemTableRef,
--- a/src/catalog/src/system_schema/information_schema.rs
+++ b/src/catalog/src/system_schema/information_schema.rs
@@ -19,7 +19,7 @@ mod information_memory_table;
 pub mod key_column_usage;
 mod partitions;
 mod procedure_info;
-pub mod process_list;
+mod process_list;
 pub mod region_peers;
 mod region_statistics;
 mod runtime_metrics;
@@ -38,7 +38,6 @@ use common_meta::cluster::NodeInfo;
 use common_meta::datanode::RegionStat;
 use common_meta::key::flow::flow_state::FlowStat;
 use common_meta::key::flow::FlowMetadataManager;
-use common_meta::kv_backend::KvBackendRef;
 use common_procedure::ProcedureInfo;
 use common_recordbatch::SendableRecordBatchStream;
 use datatypes::schema::SchemaRef;
@@ -113,25 +112,6 @@ macro_rules! setup_memory_table {
    };
 }

-#[cfg(feature = "enterprise")]
-pub struct MakeInformationTableRequest {
-    pub catalog_name: String,
-    pub catalog_manager: Weak<dyn CatalogManager>,
-    pub kv_backend: KvBackendRef,
-}
-
-/// A factory trait for making information schema tables.
-///
-/// This trait allows for extensibility of the information schema by providing
-/// a way to dynamically create custom information schema tables.
-#[cfg(feature = "enterprise")]
-pub trait InformationSchemaTableFactory {
-    fn make_information_table(&self, req: MakeInformationTableRequest) -> SystemTableRef;
-}
-
-#[cfg(feature = "enterprise")]
-pub type InformationSchemaTableFactoryRef = Arc<dyn InformationSchemaTableFactory + Send + Sync>;
-
 /// The `information_schema` tables info provider.
 pub struct InformationSchemaProvider {
    catalog_name: String,
@@ -139,10 +119,6 @@ pub struct InformationSchemaProvider {
    process_manager: Option<ProcessManagerRef>,
    flow_metadata_manager: Arc<FlowMetadataManager>,
    tables: HashMap<String, TableRef>,
-    #[allow(dead_code)]
-    kv_backend: KvBackendRef,
-    #[cfg(feature = "enterprise")]
-    extra_table_factories: HashMap<String, InformationSchemaTableFactoryRef>,
 }

 impl SystemSchemaProvider for InformationSchemaProvider {
@@ -152,7 +128,6 @@ impl SystemSchemaProvider for InformationSchemaProvider {
        &self.tables
    }
 }
-
 impl SystemSchemaProviderInner for InformationSchemaProvider {
    fn catalog_name(&self) -> &str {
        &self.catalog_name
@@ -240,22 +215,7 @@ impl SystemSchemaProviderInner for InformationSchemaProvider {
                .process_manager
                .as_ref()
                .map(|p| Arc::new(InformationSchemaProcessList::new(p.clone())) as _),
-            table_name => {
-                #[cfg(feature = "enterprise")]
-                return self.extra_table_factories.get(table_name).map(|factory| {
-                    let req = MakeInformationTableRequest {
-                        catalog_name: self.catalog_name.clone(),
-                        catalog_manager: self.catalog_manager.clone(),
-                        kv_backend: self.kv_backend.clone(),
-                    };
-                    factory.make_information_table(req)
-                });
-                #[cfg(not(feature = "enterprise"))]
-                {
-                    let _ = table_name;
-                    None
-                }
-            }
+            _ => None,
        }
    }
 }
@@ -266,7 +226,6 @@ impl InformationSchemaProvider {
        catalog_manager: Weak<dyn CatalogManager>,
        flow_metadata_manager: Arc<FlowMetadataManager>,
        process_manager: Option<ProcessManagerRef>,
-        kv_backend: KvBackendRef,
    ) -> Self {
        let mut provider = Self {
            catalog_name,
@@ -274,9 +233,6 @@ impl InformationSchemaProvider {
            flow_metadata_manager,
            process_manager,
            tables: HashMap::new(),
-            kv_backend,
-            #[cfg(feature = "enterprise")]
-            extra_table_factories: HashMap::new(),
        };

        provider.build_tables();
@@ -284,16 +240,6 @@ impl InformationSchemaProvider {
        provider
    }

-    #[cfg(feature = "enterprise")]
-    pub(crate) fn with_extra_table_factories(
-        mut self,
-        factories: HashMap<String, InformationSchemaTableFactoryRef>,
-    ) -> Self {
-        self.extra_table_factories = factories;
-        self.build_tables();
-        self
-    }
-
    fn build_tables(&mut self) {
        let mut tables = HashMap::new();

@@ -344,19 +290,16 @@ impl InformationSchemaProvider {
        if let Some(process_list) = self.build_table(PROCESS_LIST) {
            tables.insert(PROCESS_LIST.to_string(), process_list);
        }
-        #[cfg(feature = "enterprise")]
-        for name in self.extra_table_factories.keys() {
-            tables.insert(name.to_string(), self.build_table(name).expect(name));
-        }
        // Add memory tables
        for name in MEMORY_TABLES.iter() {
            tables.insert((*name).to_string(), self.build_table(name).expect(name));
        }
+
        self.tables = tables;
    }
 }

-pub trait InformationTable {
+trait InformationTable {
    fn table_id(&self) -> TableId;

    fn table_name(&self) -> &'static str;
--- a/src/catalog/src/system_schema/information_schema/process_list.rs
+++ b/src/catalog/src/system_schema/information_schema/process_list.rs
@@ -39,14 +39,14 @@ use crate::process_manager::ProcessManagerRef;
 use crate::system_schema::information_schema::InformationTable;

 /// Column names of `information_schema.process_list`
-pub const ID: &str = "id";
-pub const CATALOG: &str = "catalog";
-pub const SCHEMAS: &str = "schemas";
-pub const QUERY: &str = "query";
-pub const CLIENT: &str = "client";
-pub const FRONTEND: &str = "frontend";
-pub const START_TIMESTAMP: &str = "start_timestamp";
-pub const ELAPSED_TIME: &str = "elapsed_time";
+const ID: &str = "id";
+const CATALOG: &str = "catalog";
+const SCHEMAS: &str = "schemas";
+const QUERY: &str = "query";
+const CLIENT: &str = "client";
+const FRONTEND: &str = "frontend";
+const START_TIMESTAMP: &str = "start_timestamp";
+const ELAPSED_TIME: &str = "elapsed_time";

 /// `information_schema.process_list` table implementation that tracks running
 /// queries in current cluster.
--- a/src/catalog/src/system_schema/information_schema/table_names.rs
+++ b/src/catalog/src/system_schema/information_schema/table_names.rs
@@ -48,4 +48,3 @@ pub const FLOWS: &str = "flows";
 pub const PROCEDURE_INFO: &str = "procedure_info";
 pub const REGION_STATISTICS: &str = "region_statistics";
 pub const PROCESS_LIST: &str = "process_list";
-pub const TRIGGER_LIST: &str = "trigger_list";
--- a/src/catalog/src/system_schema/pg_catalog/pg_class.rs
+++ b/src/catalog/src/system_schema/pg_catalog/pg_class.rs
@@ -169,7 +169,7 @@ impl DfPartitionStream for PGClass {
 }

 /// Builds the `pg_catalog.pg_class` table row by row
-/// TODO(J0HN50N133): `relowner` is always the [`DUMMY_OWNER_ID`] because we don't have users.
+/// TODO(J0HN50N133): `relowner` is always the [`DUMMY_OWNER_ID`] cuz we don't have user.
 /// Once we have user system, make it the actual owner of the table.
 struct PGClassBuilder {
    schema: SchemaRef,
--- a/src/catalog/src/table_source.rs
+++ b/src/catalog/src/table_source.rs
@@ -207,7 +207,6 @@ mod tests {
    use session::context::QueryContext;

    use super::*;
-    use crate::kvbackend::KvBackendCatalogManagerBuilder;
    use crate::memory::MemoryCatalogManager;

    #[test]
@@ -324,13 +323,13 @@ mod tests {
            .build(),
        );

-        let catalog_manager = KvBackendCatalogManagerBuilder::new(
+        let catalog_manager = KvBackendCatalogManager::new(
            Arc::new(NoopInformationExtension),
            backend.clone(),
            layered_cache_registry,
-        )
-        .build();
-
+            None,
+            None,
+        );
        let table_metadata_manager = TableMetadataManager::new(backend);
        let mut view_info = common_meta::key::test_utils::new_test_table_info(1024, vec![]);
        view_info.table_type = TableType::View;
--- a/src/cli/Cargo.toml
+++ b/src/cli/Cargo.toml
@@ -43,6 +43,7 @@ common-time.workspace = true
 common-version.workspace = true
 common-wal.workspace = true
 datatypes.workspace = true
+either = "1.8"
 etcd-client.workspace = true
 futures.workspace = true
 humantime.workspace = true
--- a/src/cli/src/lib.rs
+++ b/src/cli/src/lib.rs
@@ -12,7 +12,6 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

-#![allow(clippy::print_stdout)]
 mod bench;
 mod data;
 mod database;
--- a/src/cli/src/metadata/snapshot.rs
+++ b/src/cli/src/metadata/snapshot.rs
@@ -301,6 +301,7 @@ struct MetaInfoTool {

 #[async_trait]
 impl Tool for MetaInfoTool {
+    #[allow(clippy::print_stdout)]
    async fn do_work(&self) -> std::result::Result<(), BoxedError> {
        let result = MetadataSnapshotManager::info(
            &self.inner,
--- a/src/client/src/database.rs
+++ b/src/client/src/database.rs
@@ -23,7 +23,7 @@ use api::v1::greptime_request::Request;
 use api::v1::query_request::Query;
 use api::v1::{
    AlterTableExpr, AuthHeader, Basic, CreateTableExpr, DdlRequest, GreptimeRequest,
-    InsertRequests, QueryRequest, RequestHeader, RowInsertRequests,
+    InsertRequests, QueryRequest, RequestHeader,
 };
 use arrow_flight::{FlightData, Ticket};
 use async_stream::stream;
@@ -31,7 +31,7 @@ use base64::prelude::BASE64_STANDARD;
 use base64::Engine;
 use common_catalog::build_db_string;
 use common_catalog::consts::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME};
-use common_error::ext::BoxedError;
+use common_error::ext::{BoxedError, ErrorExt};
 use common_grpc::flight::do_put::DoPutResponse;
 use common_grpc::flight::{FlightDecoder, FlightMessage};
 use common_query::Output;
@@ -48,7 +48,7 @@ use tonic::transport::Channel;

 use crate::error::{
    ConvertFlightDataSnafu, Error, FlightGetSnafu, IllegalFlightMessagesSnafu,
-    InvalidTonicMetadataValueSnafu,
+    InvalidTonicMetadataValueSnafu, ServerSnafu,
 };
 use crate::{error, from_grpc_response, Client, Result};

@@ -118,7 +118,6 @@ impl Database {
        }
    }

-    /// Set the catalog for the database client.
    pub fn set_catalog(&mut self, catalog: impl Into<String>) {
        self.catalog = catalog.into();
    }
@@ -131,7 +130,6 @@ impl Database {
        }
    }

-    /// Set the schema for the database client.
    pub fn set_schema(&mut self, schema: impl Into<String>) {
        self.schema = schema.into();
    }
@@ -144,24 +142,20 @@ impl Database {
        }
    }

-    /// Set the timezone for the database client.
    pub fn set_timezone(&mut self, timezone: impl Into<String>) {
        self.timezone = timezone.into();
    }

-    /// Set the auth scheme for the database client.
    pub fn set_auth(&mut self, auth: AuthScheme) {
        self.ctx.auth_header = Some(AuthHeader {
            auth_scheme: Some(auth),
        });
    }

-    /// Make an InsertRequests request to the database.
    pub async fn insert(&self, requests: InsertRequests) -> Result<u32> {
        self.handle(Request::Inserts(requests)).await
    }

-    /// Make an InsertRequests request to the database with hints.
    pub async fn insert_with_hints(
        &self,
        requests: InsertRequests,
@@ -178,28 +172,6 @@ impl Database {
        from_grpc_response(response)
    }

-    /// Make a RowInsertRequests request to the database.
-    pub async fn row_inserts(&self, requests: RowInsertRequests) -> Result<u32> {
-        self.handle(Request::RowInserts(requests)).await
-    }
-
-    /// Make a RowInsertRequests request to the database with hints.
-    pub async fn row_inserts_with_hints(
-        &self,
-        requests: RowInsertRequests,
-        hints: &[(&str, &str)],
-    ) -> Result<u32> {
-        let mut client = make_database_client(&self.client)?.inner;
-        let request = self.to_rpc_request(Request::RowInserts(requests));
-
-        let mut request = tonic::Request::new(request);
-        let metadata = request.metadata_mut();
-        Self::put_hints(metadata, hints)?;
-
-        let response = client.handle(request).await?.into_inner();
-        from_grpc_response(response)
-    }
-
    fn put_hints(metadata: &mut MetadataMap, hints: &[(&str, &str)]) -> Result<()> {
        let Some(value) = hints
            .iter()
@@ -215,7 +187,6 @@ impl Database {
        Ok(())
    }

-    /// Make a request to the database.
    pub async fn handle(&self, request: Request) -> Result<u32> {
        let mut client = make_database_client(&self.client)?.inner;
        let request = self.to_rpc_request(request);
@@ -225,22 +196,12 @@ impl Database {

    /// Retry if connection fails, max_retries is the max number of retries, so the total wait time
    /// is `max_retries * GRPC_CONN_TIMEOUT`
-    pub async fn handle_with_retry(
-        &self,
-        request: Request,
-        max_retries: u32,
-        hints: &[(&str, &str)],
-    ) -> Result<u32> {
+    pub async fn handle_with_retry(&self, request: Request, max_retries: u32) -> Result<u32> {
        let mut client = make_database_client(&self.client)?.inner;
        let mut retries = 0;
-
        let request = self.to_rpc_request(request);
-
        loop {
-            let mut tonic_request = tonic::Request::new(request.clone());
-            let metadata = tonic_request.metadata_mut();
-            Self::put_hints(metadata, hints)?;
-            let raw_response = client.handle(tonic_request).await;
+            let raw_response = client.handle(request.clone()).await;
            match (raw_response, retries < max_retries) {
                (Ok(resp), _) => return from_grpc_response(resp.into_inner()),
                (Err(err), true) => {
@@ -250,12 +211,18 @@ impl Database {
                        retries += 1;
                        warn!("Retrying {} times with error = {:?}", retries, err);
                        continue;
+                    } else {
+                        error!(
+                            err; "Failed to send request to grpc handle, retries = {}, not retryable error, aborting",
+                            retries
+                        );
+                        return Err(err.into());
                    }
                }
                (Err(err), false) => {
                    error!(
-                        "Failed to send request to grpc handle after {} retries, error = {:?}",
-                        retries, err
+                        err; "Failed to send request to grpc handle after {} retries",
+                        retries,
                    );
                    return Err(err.into());
                }
@@ -279,7 +246,6 @@ impl Database {
        }
    }

-    /// Executes a SQL query without any hints.
    pub async fn sql<S>(&self, sql: S) -> Result<Output>
    where
        S: AsRef<str>,
@@ -287,7 +253,6 @@ impl Database {
        self.sql_with_hint(sql, &[]).await
    }

-    /// Executes a SQL query with optional hints for query optimization.
    pub async fn sql_with_hint<S>(&self, sql: S, hints: &[(&str, &str)]) -> Result<Output>
    where
        S: AsRef<str>,
@@ -298,7 +263,6 @@ impl Database {
        self.do_get(request, hints).await
    }

-    /// Executes a logical plan directly without SQL parsing.
    pub async fn logical_plan(&self, logical_plan: Vec<u8>) -> Result<Output> {
        let request = Request::Query(QueryRequest {
            query: Some(Query::LogicalPlan(logical_plan)),
@@ -306,7 +270,6 @@ impl Database {
        self.do_get(request, &[]).await
    }

-    /// Creates a new table using the provided table expression.
    pub async fn create(&self, expr: CreateTableExpr) -> Result<Output> {
        let request = Request::Ddl(DdlRequest {
            expr: Some(DdlExpr::CreateTable(expr)),
@@ -314,7 +277,6 @@ impl Database {
        self.do_get(request, &[]).await
    }

-    /// Alters an existing table using the provided alter expression.
    pub async fn alter(&self, expr: AlterTableExpr) -> Result<Output> {
        let request = Request::Ddl(DdlRequest {
            expr: Some(DdlExpr::AlterTable(expr)),
@@ -336,16 +298,21 @@ impl Database {
        let response = client.mut_inner().do_get(request).await.or_else(|e| {
            let tonic_code = e.code();
            let e: Error = e.into();
+            let code = e.status_code();
+            let msg = e.to_string();
+            let error =
+                Err(BoxedError::new(ServerSnafu { code, msg }.build())).with_context(|_| {
+                    FlightGetSnafu {
+                        addr: client.addr().to_string(),
+                        tonic_code,
+                    }
+                });
            error!(
                "Failed to do Flight get, addr: {}, code: {}, source: {:?}",
                client.addr(),
                tonic_code,
-                e
+                error
            );
-            let error = Err(BoxedError::new(e)).with_context(|_| FlightGetSnafu {
-                addr: client.addr().to_string(),
-                tonic_code,
-            });
            error
        })?;

@@ -475,11 +442,8 @@ mod tests {

    use api::v1::auth_header::AuthScheme;
    use api::v1::{AuthHeader, Basic};
-    use common_error::status_code::StatusCode;
-    use tonic::{Code, Status};

    use super::*;
-    use crate::error::TonicSnafu;

    #[test]
    fn test_flight_ctx() {
@@ -502,19 +466,4 @@ mod tests {
            })
        )
    }
-
-    #[test]
-    fn test_from_tonic_status() {
-        let expected = TonicSnafu {
-            code: StatusCode::Internal,
-            msg: "blabla".to_string(),
-            tonic_code: Code::Internal,
-        }
-        .build();
-
-        let status = Status::new(Code::Internal, "blabla");
-        let actual: Error = status.into();
-
-        assert_eq!(expected.to_string(), actual.to_string());
-    }
 }
--- a/src/client/src/error.rs
+++ b/src/client/src/error.rs
@@ -14,13 +14,13 @@

 use std::any::Any;

-use common_error::define_from_tonic_status;
 use common_error::ext::{BoxedError, ErrorExt};
-use common_error::status_code::StatusCode;
+use common_error::status_code::{convert_tonic_code_to_status_code, StatusCode};
+use common_error::{GREPTIME_DB_HEADER_ERROR_CODE, GREPTIME_DB_HEADER_ERROR_MSG};
 use common_macro::stack_trace_debug;
 use snafu::{location, Location, Snafu};
 use tonic::metadata::errors::InvalidMetadataValue;
-use tonic::Code;
+use tonic::{Code, Status};

 #[derive(Snafu)]
 #[snafu(visibility(pub))]
@@ -124,15 +124,6 @@ pub enum Error {
        location: Location,
        source: datatypes::error::Error,
    },
-
-    #[snafu(display("{}", msg))]
-    Tonic {
-        code: StatusCode,
-        msg: String,
-        tonic_code: Code,
-        #[snafu(implicit)]
-        location: Location,
-    },
 }

 pub type Result<T> = std::result::Result<T, Error>;
@@ -144,7 +135,7 @@ impl ErrorExt for Error {
            | Error::MissingField { .. }
            | Error::IllegalDatabaseResponse { .. } => StatusCode::Internal,

-            Error::Server { code, .. } | Error::Tonic { code, .. } => *code,
+            Error::Server { code, .. } => *code,
            Error::FlightGet { source, .. }
            | Error::RegionServer { source, .. }
            | Error::FlowServer { source, .. } => source.status_code(),
@@ -162,7 +153,34 @@ impl ErrorExt for Error {
    }
 }

-define_from_tonic_status!(Error, Tonic);
+impl From<Status> for Error {
+    fn from(e: Status) -> Self {
+        fn get_metadata_value(e: &Status, key: &str) -> Option<String> {
+            e.metadata()
+                .get(key)
+                .and_then(|v| String::from_utf8(v.as_bytes().to_vec()).ok())
+        }
+
+        let code = get_metadata_value(&e, GREPTIME_DB_HEADER_ERROR_CODE).and_then(|s| {
+            if let Ok(code) = s.parse::<u32>() {
+                StatusCode::from_u32(code)
+            } else {
+                None
+            }
+        });
+        let tonic_code = e.code();
+        let code = code.unwrap_or_else(|| convert_tonic_code_to_status_code(tonic_code));
+
+        let msg = get_metadata_value(&e, GREPTIME_DB_HEADER_ERROR_MSG)
+            .unwrap_or_else(|| e.message().to_string());
+
+        Self::Server {
+            code,
+            msg,
+            location: location!(),
+        }
+    }
+}

 impl Error {
    pub fn should_retry(&self) -> bool {
--- a/src/client/src/region.rs
+++ b/src/client/src/region.rs
@@ -21,7 +21,7 @@ use arc_swap::ArcSwapOption;
 use arrow_flight::Ticket;
 use async_stream::stream;
 use async_trait::async_trait;
-use common_error::ext::BoxedError;
+use common_error::ext::{BoxedError, ErrorExt};
 use common_error::status_code::StatusCode;
 use common_grpc::flight::{FlightDecoder, FlightMessage};
 use common_meta::error::{self as meta_error, Result as MetaResult};
@@ -107,18 +107,24 @@ impl RegionRequester {
            .mut_inner()
            .do_get(ticket)
            .await
-            .or_else(|e| {
+            .map_err(|e| {
                let tonic_code = e.code();
                let e: error::Error = e.into();
+                let code = e.status_code();
+                let msg = e.to_string();
+                let error = ServerSnafu { code, msg }
+                    .fail::<()>()
+                    .map_err(BoxedError::new)
+                    .with_context(|_| FlightGetSnafu {
+                        tonic_code,
+                        addr: flight_client.addr().to_string(),
+                    })
+                    .unwrap_err();
                error!(
                    e; "Failed to do Flight get, addr: {}, code: {}",
                    flight_client.addr(),
                    tonic_code
                );
-                let error = Err(BoxedError::new(e)).with_context(|_| FlightGetSnafu {
-                    addr: flight_client.addr().to_string(),
-                    tonic_code,
-                });
                error
            })?;

@@ -157,19 +163,70 @@ impl RegionRequester {
            let _span = tracing_context.attach(common_telemetry::tracing::info_span!(
                "poll_flight_data_stream"
            ));
-            while let Some(flight_message) = flight_message_stream.next().await {
-                let flight_message = flight_message
-                    .map_err(BoxedError::new)
-                    .context(ExternalSnafu)?;
+
+            let mut buffered_message: Option<FlightMessage> = None;
+            let mut stream_ended = false;
+
+            while !stream_ended {
+                // get the next message from the buffered message or read from the flight message stream
+                let flight_message_item = if let Some(msg) = buffered_message.take() {
+                    Some(Ok(msg))
+                } else {
+                    flight_message_stream.next().await
+                };
+
+                let flight_message = match flight_message_item {
+                    Some(Ok(message)) => message,
+                    Some(Err(e)) => {
+                        yield Err(BoxedError::new(e)).context(ExternalSnafu);
+                        break;
+                    }
+                    None => break,
+                };

                match flight_message {
                    FlightMessage::RecordBatch(record_batch) => {
-                        yield RecordBatch::try_from_df_record_batch(
+                        let result_to_yield = RecordBatch::try_from_df_record_batch(
                            schema_cloned.clone(),
                            record_batch,
-                        )
+                        );
+
+                        // get the next message from the stream. normally it should be a metrics message.
+                        if let Some(next_flight_message_result) = flight_message_stream.next().await
+                        {
+                            match next_flight_message_result {
+                                Ok(FlightMessage::Metrics(s)) => {
+                                    let m = serde_json::from_str(&s).ok().map(Arc::new);
+                                    metrics_ref.swap(m);
+                                }
+                                Ok(FlightMessage::RecordBatch(rb)) => {
+                                    // for some reason it's not a metrics message, so we need to buffer this record batch
+                                    // and yield it in the next iteration.
+                                    buffered_message = Some(FlightMessage::RecordBatch(rb));
+                                }
+                                Ok(_) => {
+                                    yield IllegalFlightMessagesSnafu {
+                                        reason: "A RecordBatch message can only be succeeded by a Metrics message or another RecordBatch message"
+                                    }
+                                    .fail()
+                                    .map_err(BoxedError::new)
+                                    .context(ExternalSnafu);
+                                    break;
+                                }
+                                Err(e) => {
+                                    yield Err(BoxedError::new(e)).context(ExternalSnafu);
+                                    break;
+                                }
+                            }
+                        } else {
+                            // the stream has ended
+                            stream_ended = true;
+                        }
+
+                        yield result_to_yield;
                    }
                    FlightMessage::Metrics(s) => {
+                        // just a branch in case of some metrics message comes after other things.
                        let m = serde_json::from_str(&s).ok().map(Arc::new);
                        metrics_ref.swap(m);
                        break;
--- a/src/cmd/Cargo.toml
+++ b/src/cmd/Cargo.toml
@@ -16,7 +16,7 @@ default = [
    "meta-srv/pg_kvbackend",
    "meta-srv/mysql_kvbackend",
 ]
-enterprise = ["common-meta/enterprise", "frontend/enterprise", "meta-srv/enterprise", "catalog/enterprise"]
+enterprise = ["common-meta/enterprise", "frontend/enterprise", "meta-srv/enterprise"]
 tokio-console = ["common-telemetry/tokio-console"]

 [lints]
@@ -52,6 +52,7 @@ common-version.workspace = true
 common-wal.workspace = true
 datanode.workspace = true
 datatypes.workspace = true
+either = "1.8"
 etcd-client.workspace = true
 file-engine.workspace = true
 flow.workspace = true
@@ -66,7 +67,6 @@ metric-engine.workspace = true
 mito2.workspace = true
 moka.workspace = true
 nu-ansi-term = "0.46"
-object-store.workspace = true
 plugins.workspace = true
 prometheus.workspace = true
 prost.workspace = true
--- a/src/cmd/src/datanode.rs
+++ b/src/cmd/src/datanode.rs
@@ -280,7 +280,7 @@ mod tests {

    use common_config::ENV_VAR_SEP;
    use common_test_util::temp_dir::create_named_temp_file;
-    use object_store::config::{FileConfig, GcsConfig, ObjectStoreConfig, S3Config};
+    use datanode::config::{FileConfig, GcsConfig, ObjectStoreConfig, S3Config};
    use servers::heartbeat_options::HeartbeatOptions;

    use super::*;
--- a/src/cmd/src/flownode.rs
+++ b/src/cmd/src/flownode.rs
@@ -18,7 +18,7 @@ use std::time::Duration;

 use cache::{build_fundamental_cache_registry, with_default_composite_cache_registry};
 use catalog::information_extension::DistributedInformationExtension;
-use catalog::kvbackend::{CachedKvBackendBuilder, KvBackendCatalogManagerBuilder, MetaKvBackend};
+use catalog::kvbackend::{CachedKvBackendBuilder, KvBackendCatalogManager, MetaKvBackend};
 use clap::Parser;
 use client::client_manager::NodeClients;
 use common_base::Plugins;
@@ -342,12 +342,13 @@ impl StartCommand {

        let information_extension =
            Arc::new(DistributedInformationExtension::new(meta_client.clone()));
-        let catalog_manager = KvBackendCatalogManagerBuilder::new(
+        let catalog_manager = KvBackendCatalogManager::new(
            information_extension,
            cached_meta_backend.clone(),
            layered_cache_registry.clone(),
-        )
-        .build();
+            None,
+            None,
+        );

        let table_metadata_manager =
            Arc::new(TableMetadataManager::new(cached_meta_backend.clone()));
@@ -370,11 +371,8 @@ impl StartCommand {

        let flow_metadata_manager = Arc::new(FlowMetadataManager::new(cached_meta_backend.clone()));
        let flow_auth_header = get_flow_auth_options(&opts).context(StartFlownodeSnafu)?;
-        let frontend_client = FrontendClient::from_meta_client(
-            meta_client.clone(),
-            flow_auth_header,
-            opts.query.clone(),
-        );
+        let frontend_client =
+            FrontendClient::from_meta_client(meta_client.clone(), flow_auth_header);
        let frontend_client = Arc::new(frontend_client);
        let flownode_builder = FlownodeBuilder::new(
            opts.clone(),
--- a/src/cmd/src/frontend.rs
+++ b/src/cmd/src/frontend.rs
@@ -19,7 +19,7 @@ use std::time::Duration;
 use async_trait::async_trait;
 use cache::{build_fundamental_cache_registry, with_default_composite_cache_registry};
 use catalog::information_extension::DistributedInformationExtension;
-use catalog::kvbackend::{CachedKvBackendBuilder, KvBackendCatalogManagerBuilder, MetaKvBackend};
+use catalog::kvbackend::{CachedKvBackendBuilder, KvBackendCatalogManager, MetaKvBackend};
 use catalog::process_manager::ProcessManager;
 use clap::Parser;
 use client::client_manager::NodeClients;
@@ -102,7 +102,7 @@ impl App for Instance {
 #[derive(Parser)]
 pub struct Command {
    #[clap(subcommand)]
-    pub subcmd: SubCommand,
+    subcmd: SubCommand,
 }

 impl Command {
@@ -116,7 +116,7 @@ impl Command {
 }

 #[derive(Parser)]
-pub enum SubCommand {
+enum SubCommand {
    Start(StartCommand),
 }

@@ -153,7 +153,7 @@ pub struct StartCommand {
    #[clap(long)]
    postgres_addr: Option<String>,
    #[clap(short, long)]
-    pub config_file: Option<String>,
+    config_file: Option<String>,
    #[clap(short, long)]
    influxdb_enable: Option<bool>,
    #[clap(long, value_delimiter = ',', num_args = 1..)]
@@ -169,7 +169,7 @@ pub struct StartCommand {
    #[clap(long)]
    disable_dashboard: Option<bool>,
    #[clap(long, default_value = "GREPTIMEDB_FRONTEND")]
-    pub env_prefix: String,
+    env_prefix: String,
 }

 impl StartCommand {
@@ -350,20 +350,13 @@ impl StartCommand {
            addrs::resolve_addr(&opts.grpc.bind_addr, Some(&opts.grpc.server_addr)),
            Some(meta_client.clone()),
        ));
-
-        let builder = KvBackendCatalogManagerBuilder::new(
+        let catalog_manager = KvBackendCatalogManager::new(
            information_extension,
            cached_meta_backend.clone(),
            layered_cache_registry.clone(),
-        )
-        .with_process_manager(process_manager.clone());
-        #[cfg(feature = "enterprise")]
-        let builder = if let Some(factories) = plugins.get() {
-            builder.with_extra_information_table_factories(factories)
-        } else {
-            builder
-        };
-        let catalog_manager = builder.build();
+            None,
+            Some(process_manager.clone()),
+        );

        let executor = HandlerGroupExecutor::new(vec![
            Arc::new(ParseMailboxMessageHandler),
--- a/src/cmd/src/metasrv.rs
+++ b/src/cmd/src/metasrv.rs
@@ -54,10 +54,6 @@ impl Instance {
    pub fn get_inner(&self) -> &MetasrvInstance {
        &self.instance
    }
-
-    pub fn mut_inner(&mut self) -> &mut MetasrvInstance {
-        &mut self.instance
-    }
 }

 #[async_trait]
@@ -340,7 +336,7 @@ impl StartCommand {
            .await
            .context(StartMetaServerSnafu)?;

-        let builder = meta_srv::bootstrap::metasrv_builder(&opts, plugins, None)
+        let builder = meta_srv::bootstrap::metasrv_builder(&opts, plugins.clone(), None)
            .await
            .context(error::BuildMetaServerSnafu)?;
        let metasrv = builder.build().await.context(error::BuildMetaServerSnafu)?;
--- a/src/cmd/src/standalone.rs
+++ b/src/cmd/src/standalone.rs
@@ -20,7 +20,7 @@ use std::{fs, path};
 use async_trait::async_trait;
 use cache::{build_fundamental_cache_registry, with_default_composite_cache_registry};
 use catalog::information_schema::InformationExtension;
-use catalog::kvbackend::KvBackendCatalogManagerBuilder;
+use catalog::kvbackend::KvBackendCatalogManager;
 use catalog::process_manager::ProcessManager;
 use clap::Parser;
 use client::api::v1::meta::RegionRole;
@@ -34,13 +34,14 @@ use common_meta::cluster::{NodeInfo, NodeStatus};
 use common_meta::datanode::RegionStat;
 use common_meta::ddl::flow_meta::FlowMetadataAllocator;
 use common_meta::ddl::table_meta::TableMetadataAllocator;
-use common_meta::ddl::{DdlContext, NoopRegionFailureDetectorControl, ProcedureExecutorRef};
+use common_meta::ddl::{DdlContext, NoopRegionFailureDetectorControl};
 use common_meta::ddl_manager::DdlManager;
 use common_meta::key::flow::flow_state::FlowStat;
 use common_meta::key::flow::FlowMetadataManager;
 use common_meta::key::{TableMetadataManager, TableMetadataManagerRef};
 use common_meta::kv_backend::KvBackendRef;
 use common_meta::peer::Peer;
+use common_meta::procedure_executor::LocalProcedureExecutor;
 use common_meta::region_keeper::MemoryRegionKeeper;
 use common_meta::region_registry::LeaderRegionRegistry;
 use common_meta::sequence::SequenceBuilder;
@@ -257,34 +258,15 @@ pub struct Instance {
    flownode: FlownodeInstance,
    procedure_manager: ProcedureManagerRef,
    wal_options_allocator: WalOptionsAllocatorRef,
-
-    // The components of standalone, which make it easier to expand based
-    // on the components.
-    #[cfg(feature = "enterprise")]
-    components: Components,
-
    // Keep the logging guard to prevent the worker from being dropped.
    _guard: Vec<WorkerGuard>,
 }

-#[cfg(feature = "enterprise")]
-pub struct Components {
-    pub plugins: Plugins,
-    pub kv_backend: KvBackendRef,
-    pub frontend_client: Arc<FrontendClient>,
-    pub catalog_manager: catalog::CatalogManagerRef,
-}
-
 impl Instance {
    /// Find the socket addr of a server by its `name`.
    pub fn server_addr(&self, name: &str) -> Option<SocketAddr> {
        self.frontend.server_handlers().addr(name)
    }
-
-    #[cfg(feature = "enterprise")]
-    pub fn components(&self) -> &Components {
-        &self.components
-    }
 }

 #[async_trait]
@@ -544,20 +526,13 @@ impl StartCommand {
        ));

        let process_manager = Arc::new(ProcessManager::new(opts.grpc.server_addr.clone(), None));
-        let builder = KvBackendCatalogManagerBuilder::new(
+        let catalog_manager = KvBackendCatalogManager::new(
            information_extension.clone(),
            kv_backend.clone(),
            layered_cache_registry.clone(),
-        )
-        .with_procedure_manager(procedure_manager.clone())
-        .with_process_manager(process_manager.clone());
-        #[cfg(feature = "enterprise")]
-        let builder = if let Some(factories) = plugins.get() {
-            builder.with_extra_information_table_factories(factories)
-        } else {
-            builder
-        };
-        let catalog_manager = builder.build();
+            Some(procedure_manager.clone()),
+            Some(process_manager.clone()),
+        );

        let table_metadata_manager =
            Self::create_table_metadata_manager(kv_backend.clone()).await?;
@@ -571,15 +546,14 @@ impl StartCommand {
        // for standalone not use grpc, but get a handler to frontend grpc client without
        // actually make a connection
        let (frontend_client, frontend_instance_handler) =
-            FrontendClient::from_empty_grpc_handler(opts.query.clone());
-        let frontend_client = Arc::new(frontend_client);
+            FrontendClient::from_empty_grpc_handler();
        let flow_builder = FlownodeBuilder::new(
            flownode_options,
            plugins.clone(),
            table_metadata_manager.clone(),
            catalog_manager.clone(),
            flow_metadata_manager.clone(),
-            frontend_client.clone(),
+            Arc::new(frontend_client.clone()),
        );
        let flownode = flow_builder
            .build()
@@ -636,9 +610,8 @@ impl StartCommand {
            flow_metadata_allocator: flow_metadata_allocator.clone(),
            region_failure_detector_controller: Arc::new(NoopRegionFailureDetectorControl),
        };
-        let procedure_manager_c = procedure_manager.clone();

-        let ddl_manager = DdlManager::try_new(ddl_context, procedure_manager_c, true)
+        let ddl_manager = DdlManager::try_new(ddl_context, procedure_manager.clone(), true)
            .context(error::InitDdlManagerSnafu)?;
        #[cfg(feature = "enterprise")]
        let ddl_manager = {
@@ -646,7 +619,11 @@ impl StartCommand {
                plugins.get();
            ddl_manager.with_trigger_ddl_manager(trigger_ddl_manager)
        };
-        let ddl_task_executor: ProcedureExecutorRef = Arc::new(ddl_manager);
+
+        let procedure_executor = Arc::new(LocalProcedureExecutor::new(
+            Arc::new(ddl_manager),
+            procedure_manager.clone(),
+        ));

        let fe_instance = FrontendBuilder::new(
            fe_opts.clone(),
@@ -654,7 +631,7 @@ impl StartCommand {
            layered_cache_registry.clone(),
            catalog_manager.clone(),
            node_manager.clone(),
-            ddl_task_executor.clone(),
+            procedure_executor.clone(),
            process_manager,
        )
        .with_plugin(plugins.clone())
@@ -679,7 +656,7 @@ impl StartCommand {
            catalog_manager.clone(),
            kv_backend.clone(),
            layered_cache_registry.clone(),
-            ddl_task_executor.clone(),
+            procedure_executor,
            node_manager,
        )
        .await
@@ -689,7 +666,7 @@ impl StartCommand {
        let export_metrics_task = ExportMetricsTask::try_new(&opts.export_metrics, Some(&plugins))
            .context(error::ServersSnafu)?;

-        let servers = Services::new(opts, fe_instance.clone(), plugins.clone())
+        let servers = Services::new(opts, fe_instance.clone(), plugins)
            .build()
            .context(error::StartFrontendSnafu)?;

@@ -700,22 +677,12 @@ impl StartCommand {
            export_metrics_task,
        };

-        #[cfg(feature = "enterprise")]
-        let components = Components {
-            plugins,
-            kv_backend,
-            frontend_client,
-            catalog_manager,
-        };
-
        Ok(Instance {
            datanode,
            frontend,
            flownode,
            procedure_manager,
            wal_options_allocator,
-            #[cfg(feature = "enterprise")]
-            components,
            _guard: guard,
        })
    }
@@ -855,7 +822,7 @@ mod tests {
    use common_config::ENV_VAR_SEP;
    use common_test_util::temp_dir::create_named_temp_file;
    use common_wal::config::DatanodeWalConfig;
-    use object_store::config::{FileConfig, GcsConfig};
+    use datanode::config::{FileConfig, GcsConfig};

    use super::*;
    use crate::options::GlobalOptions;
@@ -974,15 +941,15 @@ mod tests {

        assert!(matches!(
            &dn_opts.storage.store,
-            object_store::config::ObjectStoreConfig::File(FileConfig { .. })
+            datanode::config::ObjectStoreConfig::File(FileConfig { .. })
        ));
        assert_eq!(dn_opts.storage.providers.len(), 2);
        assert!(matches!(
            dn_opts.storage.providers[0],
-            object_store::config::ObjectStoreConfig::Gcs(GcsConfig { .. })
+            datanode::config::ObjectStoreConfig::Gcs(GcsConfig { .. })
        ));
        match &dn_opts.storage.providers[1] {
-            object_store::config::ObjectStoreConfig::S3(s3_config) => {
+            datanode::config::ObjectStoreConfig::S3(s3_config) => {
                assert_eq!(
                    "SecretBox<alloc::string::String>([REDACTED])".to_string(),
                    format!("{:?}", s3_config.access_key_id)
--- a/src/cmd/tests/load_config_test.rs
+++ b/src/cmd/tests/load_config_test.rs
@@ -18,19 +18,17 @@ use cmd::options::GreptimeOptions;
 use cmd::standalone::StandaloneOptions;
 use common_config::{Configurable, DEFAULT_DATA_HOME};
 use common_options::datanode::{ClientOptions, DatanodeClientOptions};
-use common_telemetry::logging::{LoggingOptions, DEFAULT_LOGGING_DIR, DEFAULT_OTLP_HTTP_ENDPOINT};
+use common_telemetry::logging::{LoggingOptions, DEFAULT_LOGGING_DIR, DEFAULT_OTLP_ENDPOINT};
 use common_wal::config::raft_engine::RaftEngineConfig;
 use common_wal::config::DatanodeWalConfig;
 use datanode::config::{DatanodeOptions, RegionEngineConfig, StorageConfig};
 use file_engine::config::EngineConfig as FileEngineConfig;
-use flow::FlownodeOptions;
 use frontend::frontend::FrontendOptions;
 use meta_client::MetaClientOptions;
 use meta_srv::metasrv::MetasrvOptions;
 use meta_srv::selector::SelectorType;
 use metric_engine::config::EngineConfig as MetricEngineConfig;
 use mito2::config::MitoConfig;
-use query::options::QueryOptions;
 use servers::export_metrics::ExportMetricsOption;
 use servers::grpc::GrpcOptions;
 use servers::http::HttpOptions;
@@ -83,7 +81,7 @@ fn test_load_datanode_example_config() {
            logging: LoggingOptions {
                level: Some("info".to_string()),
                dir: format!("{}/{}", DEFAULT_DATA_HOME, DEFAULT_LOGGING_DIR),
-                otlp_endpoint: Some(DEFAULT_OTLP_HTTP_ENDPOINT.to_string()),
+                otlp_endpoint: Some(DEFAULT_OTLP_ENDPOINT.to_string()),
                tracing_sample_ratio: Some(Default::default()),
                ..Default::default()
            },
@@ -126,7 +124,7 @@ fn test_load_frontend_example_config() {
            logging: LoggingOptions {
                level: Some("info".to_string()),
                dir: format!("{}/{}", DEFAULT_DATA_HOME, DEFAULT_LOGGING_DIR),
-                otlp_endpoint: Some(DEFAULT_OTLP_HTTP_ENDPOINT.to_string()),
+                otlp_endpoint: Some(DEFAULT_OTLP_ENDPOINT.to_string()),
                tracing_sample_ratio: Some(Default::default()),
                ..Default::default()
            },
@@ -174,7 +172,7 @@ fn test_load_metasrv_example_config() {
            logging: LoggingOptions {
                dir: format!("{}/{}", DEFAULT_DATA_HOME, DEFAULT_LOGGING_DIR),
                level: Some("info".to_string()),
-                otlp_endpoint: Some(DEFAULT_OTLP_HTTP_ENDPOINT.to_string()),
+                otlp_endpoint: Some(DEFAULT_OTLP_ENDPOINT.to_string()),
                tracing_sample_ratio: Some(Default::default()),
                ..Default::default()
            },
@@ -197,57 +195,6 @@ fn test_load_metasrv_example_config() {
    similar_asserts::assert_eq!(options, expected);
 }

-#[test]
-fn test_load_flownode_example_config() {
-    let example_config = common_test_util::find_workspace_path("config/flownode.example.toml");
-    let options =
-        GreptimeOptions::<FlownodeOptions>::load_layered_options(example_config.to_str(), "")
-            .unwrap();
-    let expected = GreptimeOptions::<FlownodeOptions> {
-        component: FlownodeOptions {
-            node_id: Some(14),
-            flow: Default::default(),
-            grpc: GrpcOptions {
-                bind_addr: "127.0.0.1:6800".to_string(),
-                server_addr: "127.0.0.1:6800".to_string(),
-                runtime_size: 2,
-                ..Default::default()
-            },
-            logging: LoggingOptions {
-                dir: format!("{}/{}", DEFAULT_DATA_HOME, DEFAULT_LOGGING_DIR),
-                level: Some("info".to_string()),
-                otlp_endpoint: Some(DEFAULT_OTLP_HTTP_ENDPOINT.to_string()),
-                otlp_export_protocol: Some(common_telemetry::logging::OtlpExportProtocol::Http),
-                tracing_sample_ratio: Some(Default::default()),
-                ..Default::default()
-            },
-            tracing: Default::default(),
-            heartbeat: Default::default(),
-            // flownode deliberately use a slower query parallelism
-            // to avoid overwhelming the frontend with too many queries
-            query: QueryOptions { parallelism: 1 },
-            meta_client: Some(MetaClientOptions {
-                metasrv_addrs: vec!["127.0.0.1:3002".to_string()],
-                timeout: Duration::from_secs(3),
-                heartbeat_timeout: Duration::from_millis(500),
-                ddl_timeout: Duration::from_secs(10),
-                connect_timeout: Duration::from_secs(1),
-                tcp_nodelay: true,
-                metadata_cache_max_capacity: 100000,
-                metadata_cache_ttl: Duration::from_secs(600),
-                metadata_cache_tti: Duration::from_secs(300),
-            }),
-            http: HttpOptions {
-                addr: "127.0.0.1:4000".to_string(),
-                ..Default::default()
-            },
-            user_provider: None,
-        },
-        ..Default::default()
-    };
-    similar_asserts::assert_eq!(options, expected);
-}
-
 #[test]
 fn test_load_standalone_example_config() {
    let example_config = common_test_util::find_workspace_path("config/standalone.example.toml");
@@ -282,7 +229,7 @@ fn test_load_standalone_example_config() {
            logging: LoggingOptions {
                level: Some("info".to_string()),
                dir: format!("{}/{}", DEFAULT_DATA_HOME, DEFAULT_LOGGING_DIR),
-                otlp_endpoint: Some(DEFAULT_OTLP_HTTP_ENDPOINT.to_string()),
+                otlp_endpoint: Some(DEFAULT_OTLP_ENDPOINT.to_string()),
                tracing_sample_ratio: Some(Default::default()),
                ..Default::default()
            },
--- a/src/common/catalog/src/consts.rs
+++ b/src/common/catalog/src/consts.rs
@@ -78,7 +78,7 @@ pub const INFORMATION_SCHEMA_ROUTINES_TABLE_ID: u32 = 21;
 pub const INFORMATION_SCHEMA_SCHEMA_PRIVILEGES_TABLE_ID: u32 = 22;
 /// id for information_schema.TABLE_PRIVILEGES
 pub const INFORMATION_SCHEMA_TABLE_PRIVILEGES_TABLE_ID: u32 = 23;
-/// id for information_schema.TRIGGERS (for mysql)
+/// id for information_schema.TRIGGERS
 pub const INFORMATION_SCHEMA_TRIGGERS_TABLE_ID: u32 = 24;
 /// id for information_schema.GLOBAL_STATUS
 pub const INFORMATION_SCHEMA_GLOBAL_STATUS_TABLE_ID: u32 = 25;
@@ -104,8 +104,6 @@ pub const INFORMATION_SCHEMA_PROCEDURE_INFO_TABLE_ID: u32 = 34;
 pub const INFORMATION_SCHEMA_REGION_STATISTICS_TABLE_ID: u32 = 35;
 /// id for information_schema.process_list
 pub const INFORMATION_SCHEMA_PROCESS_LIST_TABLE_ID: u32 = 36;
-/// id for information_schema.trigger_list (for greptimedb trigger)
-pub const INFORMATION_SCHEMA_TRIGGER_TABLE_ID: u32 = 37;

 // ----- End of information_schema tables -----

--- a/src/common/config/Cargo.toml
+++ b/src/common/config/Cargo.toml
@@ -14,7 +14,6 @@ common-macro.workspace = true
 config.workspace = true
 humantime-serde.workspace = true
 num_cpus.workspace = true
-object-store.workspace = true
 serde.workspace = true
 serde_json.workspace = true
 serde_with.workspace = true
--- a/src/common/config/src/config.rs
+++ b/src/common/config/src/config.rs
@@ -106,7 +106,7 @@ mod tests {
    use common_telemetry::logging::LoggingOptions;
    use common_test_util::temp_dir::create_named_temp_file;
    use common_wal::config::DatanodeWalConfig;
-    use datanode::config::StorageConfig;
+    use datanode::config::{ObjectStoreConfig, StorageConfig};
    use meta_client::MetaClientOptions;
    use serde::{Deserialize, Serialize};

@@ -212,7 +212,7 @@ mod tests {

                // Check the configs from environment variables.
                match &opts.storage.store {
-                    object_store::config::ObjectStoreConfig::S3(s3_config) => {
+                    ObjectStoreConfig::S3(s3_config) => {
                        assert_eq!(s3_config.bucket, "mybucket".to_string());
                    }
                    _ => panic!("unexpected store type"),
--- a/src/common/error/src/status_code.rs
+++ b/src/common/error/src/status_code.rs
@@ -119,11 +119,6 @@ pub enum StatusCode {
    FlowAlreadyExists = 8000,
    FlowNotFound = 8001,
    // ====== End of flow related status code =====
-
-    // ====== Begin of trigger related status code =====
-    TriggerAlreadyExists = 9000,
-    TriggerNotFound = 9001,
-    // ====== End of trigger related status code =====
 }

 impl StatusCode {
@@ -160,8 +155,6 @@ impl StatusCode {
            | StatusCode::RegionNotFound
            | StatusCode::FlowAlreadyExists
            | StatusCode::FlowNotFound
-            | StatusCode::TriggerAlreadyExists
-            | StatusCode::TriggerNotFound
            | StatusCode::RegionReadonly
            | StatusCode::TableColumnNotFound
            | StatusCode::TableColumnExists
@@ -205,8 +198,6 @@ impl StatusCode {
            | StatusCode::PlanQuery
            | StatusCode::FlowAlreadyExists
            | StatusCode::FlowNotFound
-            | StatusCode::TriggerAlreadyExists
-            | StatusCode::TriggerNotFound
            | StatusCode::RegionNotReady
            | StatusCode::RegionBusy
            | StatusCode::RegionReadonly
@@ -239,48 +230,6 @@ impl fmt::Display for StatusCode {
    }
 }

-#[macro_export]
-macro_rules! define_from_tonic_status {
-    ($Error: ty, $Variant: ident) => {
-        impl From<tonic::Status> for $Error {
-            fn from(e: tonic::Status) -> Self {
-                use snafu::location;
-
-                fn metadata_value(e: &tonic::Status, key: &str) -> Option<String> {
-                    e.metadata()
-                        .get(key)
-                        .and_then(|v| String::from_utf8(v.as_bytes().to_vec()).ok())
-                }
-
-                let code = metadata_value(&e, $crate::GREPTIME_DB_HEADER_ERROR_CODE)
-                    .and_then(|s| {
-                        if let Ok(code) = s.parse::<u32>() {
-                            StatusCode::from_u32(code)
-                        } else {
-                            None
-                        }
-                    })
-                    .unwrap_or_else(|| match e.code() {
-                        tonic::Code::Cancelled => StatusCode::Cancelled,
-                        tonic::Code::DeadlineExceeded => StatusCode::DeadlineExceeded,
-                        _ => StatusCode::Internal,
-                    });
-
-                let msg = metadata_value(&e, $crate::GREPTIME_DB_HEADER_ERROR_MSG)
-                    .unwrap_or_else(|| e.message().to_string());
-
-                // TODO(LFC): Make the error variant defined automatically.
-                Self::$Variant {
-                    code,
-                    msg,
-                    tonic_code: e.code(),
-                    location: location!(),
-                }
-            }
-        }
-    };
-}
-
 #[macro_export]
 macro_rules! define_into_tonic_status {
    ($Error: ty) => {
@@ -332,14 +281,12 @@ pub fn status_to_tonic_code(status_code: StatusCode) -> Code {
        | StatusCode::TableColumnExists
        | StatusCode::RegionAlreadyExists
        | StatusCode::DatabaseAlreadyExists
-        | StatusCode::TriggerAlreadyExists
        | StatusCode::FlowAlreadyExists => Code::AlreadyExists,
        StatusCode::TableNotFound
        | StatusCode::RegionNotFound
        | StatusCode::TableColumnNotFound
        | StatusCode::DatabaseNotFound
        | StatusCode::UserNotFound
-        | StatusCode::TriggerNotFound
        | StatusCode::FlowNotFound => Code::NotFound,
        StatusCode::TableUnavailable
        | StatusCode::StorageUnavailable
@@ -357,6 +304,15 @@ pub fn status_to_tonic_code(status_code: StatusCode) -> Code {
    }
 }

+/// Converts tonic [Code] to [StatusCode].
+pub fn convert_tonic_code_to_status_code(code: Code) -> StatusCode {
+    match code {
+        Code::Cancelled => StatusCode::Cancelled,
+        Code::DeadlineExceeded => StatusCode::DeadlineExceeded,
+        _ => StatusCode::Internal,
+    }
+}
+
 #[cfg(test)]
 mod tests {
    use strum::IntoEnumIterator;
--- a/src/common/frontend/Cargo.toml
+++ b/src/common/frontend/Cargo.toml
@@ -12,6 +12,7 @@ common-macro.workspace = true
 common-meta.workspace = true
 greptime-proto.workspace = true
 meta-client.workspace = true
+session.workspace = true
 snafu.workspace = true
 tonic.workspace = true

--- a/src/common/frontend/src/lib.rs
+++ b/src/common/frontend/src/lib.rs
@@ -19,6 +19,7 @@ use snafu::OptionExt;

 pub mod error;
 pub mod selector;
+pub mod slow_query_event;

 #[derive(Debug, Clone, Eq, PartialEq)]
 pub struct DisplayProcessId {
--- a/src/common/frontend/src/slow_query_event.rs
+++ b/src/common/frontend/src/slow_query_event.rs
@@ -0,0 +1,28 @@
+// Copyright 2023 Greptime Team
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use session::context::QueryContextRef;
+
+#[derive(Debug)]
+pub struct SlowQueryEvent {
+    pub cost: u64,
+    pub threshold: u64,
+    pub query: String,
+    pub is_promql: bool,
+    pub query_ctx: QueryContextRef,
+    pub promql_range: Option<u64>,
+    pub promql_step: Option<u64>,
+    pub promql_start: Option<i64>,
+    pub promql_end: Option<i64>,
+}
--- a/src/common/function/Cargo.toml
+++ b/src/common/function/Cargo.toml
@@ -33,7 +33,6 @@ common-version.workspace = true
 datafusion.workspace = true
 datafusion-common.workspace = true
 datafusion-expr.workspace = true
-datafusion-functions-aggregate-common.workspace = true
 datatypes.workspace = true
 derive_more = { version = "1", default-features = false, features = ["display"] }
 geo = { version = "0.29", optional = true }
--- a/src/common/function/src/admin.rs
+++ b/src/common/function/src/admin.rs
@@ -16,6 +16,9 @@ mod add_region_follower;
 mod flush_compact_region;
 mod flush_compact_table;
 mod migrate_region;
+mod reconcile_catalog;
+mod reconcile_database;
+mod reconcile_table;
 mod remove_region_follower;

 use std::sync::Arc;
@@ -24,6 +27,9 @@ use add_region_follower::AddRegionFollowerFunction;
 use flush_compact_region::{CompactRegionFunction, FlushRegionFunction};
 use flush_compact_table::{CompactTableFunction, FlushTableFunction};
 use migrate_region::MigrateRegionFunction;
+use reconcile_catalog::ReconcileCatalogFunction;
+use reconcile_database::ReconcileDatabaseFunction;
+use reconcile_table::ReconcileTableFunction;
 use remove_region_follower::RemoveRegionFollowerFunction;

 use crate::flush_flow::FlushFlowFunction;
@@ -43,5 +49,8 @@ impl AdminFunction {
        registry.register_async(Arc::new(FlushTableFunction));
        registry.register_async(Arc::new(CompactTableFunction));
        registry.register_async(Arc::new(FlushFlowFunction));
+        registry.register_async(Arc::new(ReconcileCatalogFunction));
+        registry.register_async(Arc::new(ReconcileDatabaseFunction));
+        registry.register_async(Arc::new(ReconcileTableFunction));
    }
 }
--- a/src/common/function/src/admin/reconcile_catalog.rs
+++ b/src/common/function/src/admin/reconcile_catalog.rs
@@ -0,0 +1,179 @@
+// Copyright 2023 Greptime Team
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use api::v1::meta::reconcile_request::Target;
+use api::v1::meta::{ReconcileCatalog, ReconcileRequest};
+use common_macro::admin_fn;
+use common_query::error::{
+    InvalidFuncArgsSnafu, MissingProcedureServiceHandlerSnafu, Result,
+    UnsupportedInputDataTypeSnafu,
+};
+use common_query::prelude::{Signature, TypeSignature, Volatility};
+use common_telemetry::info;
+use datatypes::prelude::*;
+use session::context::QueryContextRef;
+
+use crate::handlers::ProcedureServiceHandlerRef;
+use crate::helper::{
+    cast_u32, default_parallelism, default_resolve_strategy, get_string_from_params,
+    parse_resolve_strategy,
+};
+
+const FN_NAME: &str = "reconcile_catalog";
+
+/// A function to reconcile a catalog.
+/// Returns the procedure id if success.
+///
+/// - `reconcile_catalog(resolve_strategy)`.
+/// - `reconcile_catalog(resolve_strategy, parallelism)`.
+///
+/// - `reconcile_catalog()`.
+#[admin_fn(
+    name = ReconcileCatalogFunction,
+    display_name = reconcile_catalog,
+    sig_fn = signature,
+    ret = string
+)]
+pub(crate) async fn reconcile_catalog(
+    procedure_service_handler: &ProcedureServiceHandlerRef,
+    query_ctx: &QueryContextRef,
+    params: &[ValueRef<'_>],
+) -> Result<Value> {
+    let (resolve_strategy, parallelism) = match params.len() {
+        0 => (default_resolve_strategy(), default_parallelism()),
+        1 => (
+            parse_resolve_strategy(get_string_from_params(params, 0, FN_NAME)?)?,
+            default_parallelism(),
+        ),
+        2 => {
+            let Some(parallelism) = cast_u32(&params[1])? else {
+                return UnsupportedInputDataTypeSnafu {
+                    function: FN_NAME,
+                    datatypes: params.iter().map(|v| v.data_type()).collect::<Vec<_>>(),
+                }
+                .fail();
+            };
+            (
+                parse_resolve_strategy(get_string_from_params(params, 0, FN_NAME)?)?,
+                parallelism,
+            )
+        }
+        size => {
+            return InvalidFuncArgsSnafu {
+                err_msg: format!(
+                    "The length of the args is not correct, expect 0, 1 or 2, have: {}",
+                    size
+                ),
+            }
+            .fail();
+        }
+    };
+    info!(
+        "Reconciling catalog with resolve_strategy: {:?}, parallelism: {}",
+        resolve_strategy, parallelism
+    );
+    let pid = procedure_service_handler
+        .reconcile(ReconcileRequest {
+            target: Some(Target::ReconcileCatalog(ReconcileCatalog {
+                catalog_name: query_ctx.current_catalog().to_string(),
+                parallelism,
+                resolve_strategy: resolve_strategy as i32,
+            })),
+            ..Default::default()
+        })
+        .await?;
+    match pid {
+        Some(pid) => Ok(Value::from(pid)),
+        None => Ok(Value::Null),
+    }
+}
+
+fn signature() -> Signature {
+    let nums = ConcreteDataType::numerics();
+    let mut signs = Vec::with_capacity(2 + nums.len());
+    signs.extend([
+        // reconcile_catalog()
+        TypeSignature::NullAry,
+        // reconcile_catalog(resolve_strategy)
+        TypeSignature::Exact(vec![ConcreteDataType::string_datatype()]),
+    ]);
+    for sign in nums {
+        // reconcile_catalog(resolve_strategy, parallelism)
+        signs.push(TypeSignature::Exact(vec![
+            ConcreteDataType::string_datatype(),
+            sign,
+        ]));
+    }
+    Signature::one_of(signs, Volatility::Immutable)
+}
+
+#[cfg(test)]
+mod tests {
+    use std::assert_matches::assert_matches;
+    use std::sync::Arc;
+
+    use common_query::error::Error;
+    use datatypes::vectors::{StringVector, UInt64Vector, VectorRef};
+
+    use crate::admin::reconcile_catalog::ReconcileCatalogFunction;
+    use crate::function::{AsyncFunction, FunctionContext};
+
+    #[tokio::test]
+    async fn test_reconcile_catalog() {
+        common_telemetry::init_default_ut_logging();
+
+        // reconcile_catalog()
+        let f = ReconcileCatalogFunction;
+        let args = vec![];
+        let result = f.eval(FunctionContext::mock(), &args).await.unwrap();
+        let expect: VectorRef = Arc::new(StringVector::from(vec!["test_pid"]));
+        assert_eq!(expect, result);
+
+        // reconcile_catalog(resolve_strategy)
+        let f = ReconcileCatalogFunction;
+        let args = vec![Arc::new(StringVector::from(vec!["UseMetasrv"])) as _];
+        let result = f.eval(FunctionContext::mock(), &args).await.unwrap();
+        let expect: VectorRef = Arc::new(StringVector::from(vec!["test_pid"]));
+        assert_eq!(expect, result);
+
+        // reconcile_catalog(resolve_strategy, parallelism)
+        let f = ReconcileCatalogFunction;
+        let args = vec![
+            Arc::new(StringVector::from(vec!["UseLatest"])) as _,
+            Arc::new(UInt64Vector::from_slice([10])) as _,
+        ];
+        let result = f.eval(FunctionContext::mock(), &args).await.unwrap();
+        let expect: VectorRef = Arc::new(StringVector::from(vec!["test_pid"]));
+        assert_eq!(expect, result);
+
+        // unsupported input data type
+        let f = ReconcileCatalogFunction;
+        let args = vec![
+            Arc::new(StringVector::from(vec!["UseLatest"])) as _,
+            Arc::new(StringVector::from(vec!["test"])) as _,
+        ];
+        let err = f.eval(FunctionContext::mock(), &args).await.unwrap_err();
+        assert_matches!(err, Error::UnsupportedInputDataType { .. });
+
+        // invalid function args
+        let f = ReconcileCatalogFunction;
+        let args = vec![
+            Arc::new(StringVector::from(vec!["UseLatest"])) as _,
+            Arc::new(UInt64Vector::from_slice([10])) as _,
+            Arc::new(StringVector::from(vec!["10"])) as _,
+        ];
+        let err = f.eval(FunctionContext::mock(), &args).await.unwrap_err();
+        assert_matches!(err, Error::InvalidFuncArgs { .. });
+    }
+}
--- a/src/common/function/src/admin/reconcile_database.rs
+++ b/src/common/function/src/admin/reconcile_database.rs
@@ -0,0 +1,198 @@
+// Copyright 2023 Greptime Team
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use api::v1::meta::reconcile_request::Target;
+use api::v1::meta::{ReconcileDatabase, ReconcileRequest};
+use common_macro::admin_fn;
+use common_query::error::{
+    InvalidFuncArgsSnafu, MissingProcedureServiceHandlerSnafu, Result,
+    UnsupportedInputDataTypeSnafu,
+};
+use common_query::prelude::{Signature, TypeSignature, Volatility};
+use common_telemetry::info;
+use datatypes::prelude::*;
+use session::context::QueryContextRef;
+
+use crate::handlers::ProcedureServiceHandlerRef;
+use crate::helper::{
+    cast_u32, default_parallelism, default_resolve_strategy, get_string_from_params,
+    parse_resolve_strategy,
+};
+
+const FN_NAME: &str = "reconcile_database";
+
+/// A function to reconcile a database.
+/// Returns the procedure id if success.
+///
+/// - `reconcile_database(database_name)`.
+/// - `reconcile_database(database_name, resolve_strategy)`.
+/// - `reconcile_database(database_name, resolve_strategy, parallelism)`.
+///
+/// The parameters:
+/// - `database_name`:  the database name
+#[admin_fn(
+    name = ReconcileDatabaseFunction,
+    display_name = reconcile_database,
+    sig_fn = signature,
+    ret = string
+)]
+pub(crate) async fn reconcile_database(
+    procedure_service_handler: &ProcedureServiceHandlerRef,
+    query_ctx: &QueryContextRef,
+    params: &[ValueRef<'_>],
+) -> Result<Value> {
+    let (database_name, resolve_strategy, parallelism) = match params.len() {
+        1 => (
+            get_string_from_params(params, 0, FN_NAME)?,
+            default_resolve_strategy(),
+            default_parallelism(),
+        ),
+        2 => (
+            get_string_from_params(params, 0, FN_NAME)?,
+            parse_resolve_strategy(get_string_from_params(params, 1, FN_NAME)?)?,
+            default_parallelism(),
+        ),
+        3 => {
+            let Some(parallelism) = cast_u32(&params[2])? else {
+                return UnsupportedInputDataTypeSnafu {
+                    function: FN_NAME,
+                    datatypes: params.iter().map(|v| v.data_type()).collect::<Vec<_>>(),
+                }
+                .fail();
+            };
+            (
+                get_string_from_params(params, 0, FN_NAME)?,
+                parse_resolve_strategy(get_string_from_params(params, 1, FN_NAME)?)?,
+                parallelism,
+            )
+        }
+        size => {
+            return InvalidFuncArgsSnafu {
+                err_msg: format!(
+                    "The length of the args is not correct, expect 1, 2 or 3, have: {}",
+                    size
+                ),
+            }
+            .fail();
+        }
+    };
+    info!(
+        "Reconciling database: {}, resolve_strategy: {:?}, parallelism: {}",
+        database_name, resolve_strategy, parallelism
+    );
+    let pid = procedure_service_handler
+        .reconcile(ReconcileRequest {
+            target: Some(Target::ReconcileDatabase(ReconcileDatabase {
+                catalog_name: query_ctx.current_catalog().to_string(),
+                database_name: database_name.to_string(),
+                parallelism,
+                resolve_strategy: resolve_strategy as i32,
+            })),
+            ..Default::default()
+        })
+        .await?;
+    match pid {
+        Some(pid) => Ok(Value::from(pid)),
+        None => Ok(Value::Null),
+    }
+}
+
+fn signature() -> Signature {
+    let nums = ConcreteDataType::numerics();
+    let mut signs = Vec::with_capacity(2 + nums.len());
+    signs.extend([
+        // reconcile_database(datanode_name)
+        TypeSignature::Exact(vec![ConcreteDataType::string_datatype()]),
+        // reconcile_database(database_name, resolve_strategy)
+        TypeSignature::Exact(vec![
+            ConcreteDataType::string_datatype(),
+            ConcreteDataType::string_datatype(),
+        ]),
+    ]);
+    for sign in nums {
+        // reconcile_database(database_name, resolve_strategy, parallelism)
+        signs.push(TypeSignature::Exact(vec![
+            ConcreteDataType::string_datatype(),
+            ConcreteDataType::string_datatype(),
+            sign,
+        ]));
+    }
+    Signature::one_of(signs, Volatility::Immutable)
+}
+
+#[cfg(test)]
+mod tests {
+    use std::assert_matches::assert_matches;
+    use std::sync::Arc;
+
+    use common_query::error::Error;
+    use datatypes::vectors::{StringVector, UInt32Vector, VectorRef};
+
+    use crate::admin::reconcile_database::ReconcileDatabaseFunction;
+    use crate::function::{AsyncFunction, FunctionContext};
+
+    #[tokio::test]
+    async fn test_reconcile_catalog() {
+        common_telemetry::init_default_ut_logging();
+
+        // reconcile_database(database_name)
+        let f = ReconcileDatabaseFunction;
+        let args = vec![Arc::new(StringVector::from(vec!["test"])) as _];
+        let result = f.eval(FunctionContext::mock(), &args).await.unwrap();
+        let expect: VectorRef = Arc::new(StringVector::from(vec!["test_pid"]));
+        assert_eq!(expect, result);
+
+        // reconcile_database(database_name, resolve_strategy)
+        let f = ReconcileDatabaseFunction;
+        let args = vec![
+            Arc::new(StringVector::from(vec!["test"])) as _,
+            Arc::new(StringVector::from(vec!["UseLatest"])) as _,
+        ];
+        let result = f.eval(FunctionContext::mock(), &args).await.unwrap();
+        let expect: VectorRef = Arc::new(StringVector::from(vec!["test_pid"]));
+        assert_eq!(expect, result);
+
+        // reconcile_database(database_name, resolve_strategy, parallelism)
+        let f = ReconcileDatabaseFunction;
+        let args = vec![
+            Arc::new(StringVector::from(vec!["test"])) as _,
+            Arc::new(StringVector::from(vec!["UseLatest"])) as _,
+            Arc::new(UInt32Vector::from_slice([10])) as _,
+        ];
+        let result = f.eval(FunctionContext::mock(), &args).await.unwrap();
+        let expect: VectorRef = Arc::new(StringVector::from(vec!["test_pid"]));
+        assert_eq!(expect, result);
+
+        // invalid function args
+        let f = ReconcileDatabaseFunction;
+        let args = vec![
+            Arc::new(StringVector::from(vec!["UseLatest"])) as _,
+            Arc::new(UInt32Vector::from_slice([10])) as _,
+            Arc::new(StringVector::from(vec!["v1"])) as _,
+            Arc::new(StringVector::from(vec!["v2"])) as _,
+        ];
+        let err = f.eval(FunctionContext::mock(), &args).await.unwrap_err();
+        assert_matches!(err, Error::InvalidFuncArgs { .. });
+
+        // unsupported input data type
+        let f = ReconcileDatabaseFunction;
+        let args = vec![
+            Arc::new(StringVector::from(vec!["UseLatest"])) as _,
+            Arc::new(UInt32Vector::from_slice([10])) as _,
+            Arc::new(StringVector::from(vec!["v1"])) as _,
+        ];
+        let err = f.eval(FunctionContext::mock(), &args).await.unwrap_err();
+        assert_matches!(err, Error::UnsupportedInputDataType { .. });
+    }
+}
--- a/src/common/function/src/admin/reconcile_table.rs
+++ b/src/common/function/src/admin/reconcile_table.rs
@@ -0,0 +1,149 @@
+// Copyright 2023 Greptime Team
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use api::v1::meta::reconcile_request::Target;
+use api::v1::meta::{ReconcileRequest, ReconcileTable, ResolveStrategy};
+use common_catalog::format_full_table_name;
+use common_error::ext::BoxedError;
+use common_macro::admin_fn;
+use common_query::error::{
+    MissingProcedureServiceHandlerSnafu, Result, TableMutationSnafu, UnsupportedInputDataTypeSnafu,
+};
+use common_query::prelude::{Signature, TypeSignature, Volatility};
+use common_telemetry::info;
+use datatypes::prelude::*;
+use session::context::QueryContextRef;
+use session::table_name::table_name_to_full_name;
+use snafu::ResultExt;
+
+use crate::handlers::ProcedureServiceHandlerRef;
+use crate::helper::parse_resolve_strategy;
+
+const FN_NAME: &str = "reconcile_table";
+
+/// A function to reconcile a table.
+/// Returns the procedure id if success.
+///
+/// - `reconcile_table(table_name)`.
+/// - `reconcile_table(table_name, resolve_strategy)`.
+///
+/// The parameters:
+/// - `table_name`:  the table name
+#[admin_fn(
+    name = ReconcileTableFunction,
+    display_name = reconcile_table,
+    sig_fn = signature,
+    ret = string
+)]
+pub(crate) async fn reconcile_table(
+    procedure_service_handler: &ProcedureServiceHandlerRef,
+    query_ctx: &QueryContextRef,
+    params: &[ValueRef<'_>],
+) -> Result<Value> {
+    let (table_name, resolve_strategy) = match params {
+        [ValueRef::String(table_name)] => (table_name, ResolveStrategy::UseLatest),
+        [ValueRef::String(table_name), ValueRef::String(resolve_strategy)] => {
+            (table_name, parse_resolve_strategy(resolve_strategy)?)
+        }
+        _ => {
+            return UnsupportedInputDataTypeSnafu {
+                function: FN_NAME,
+                datatypes: params.iter().map(|v| v.data_type()).collect::<Vec<_>>(),
+            }
+            .fail()
+        }
+    };
+    let (catalog_name, schema_name, table_name) = table_name_to_full_name(table_name, query_ctx)
+        .map_err(BoxedError::new)
+        .context(TableMutationSnafu)?;
+    info!(
+        "Reconciling table: {} with resolve_strategy: {:?}",
+        format_full_table_name(&catalog_name, &schema_name, &table_name),
+        resolve_strategy
+    );
+    let pid = procedure_service_handler
+        .reconcile(ReconcileRequest {
+            target: Some(Target::ReconcileTable(ReconcileTable {
+                catalog_name,
+                schema_name,
+                table_name,
+                resolve_strategy: resolve_strategy as i32,
+            })),
+            ..Default::default()
+        })
+        .await?;
+    match pid {
+        Some(pid) => Ok(Value::from(pid)),
+        None => Ok(Value::Null),
+    }
+}
+
+fn signature() -> Signature {
+    Signature::one_of(
+        vec![
+            // reconcile_table(table_name)
+            TypeSignature::Exact(vec![ConcreteDataType::string_datatype()]),
+            // reconcile_table(table_name, resolve_strategy)
+            TypeSignature::Exact(vec![
+                ConcreteDataType::string_datatype(),
+                ConcreteDataType::string_datatype(),
+            ]),
+        ],
+        Volatility::Immutable,
+    )
+}
+
+#[cfg(test)]
+mod tests {
+    use std::assert_matches::assert_matches;
+    use std::sync::Arc;
+
+    use common_query::error::Error;
+    use datatypes::vectors::{StringVector, VectorRef};
+
+    use crate::admin::reconcile_table::ReconcileTableFunction;
+    use crate::function::{AsyncFunction, FunctionContext};
+
+    #[tokio::test]
+    async fn test_reconcile_table() {
+        common_telemetry::init_default_ut_logging();
+
+        // reconcile_table(table_name)
+        let f = ReconcileTableFunction;
+        let args = vec![Arc::new(StringVector::from(vec!["test"])) as _];
+        let result = f.eval(FunctionContext::mock(), &args).await.unwrap();
+        let expect: VectorRef = Arc::new(StringVector::from(vec!["test_pid"]));
+        assert_eq!(expect, result);
+
+        // reconcile_table(table_name, resolve_strategy)
+        let f = ReconcileTableFunction;
+        let args = vec![
+            Arc::new(StringVector::from(vec!["test"])) as _,
+            Arc::new(StringVector::from(vec!["UseMetasrv"])) as _,
+        ];
+        let result = f.eval(FunctionContext::mock(), &args).await.unwrap();
+        let expect: VectorRef = Arc::new(StringVector::from(vec!["test_pid"]));
+        assert_eq!(expect, result);
+
+        // unsupported input data type
+        let f = ReconcileTableFunction;
+        let args = vec![
+            Arc::new(StringVector::from(vec!["test"])) as _,
+            Arc::new(StringVector::from(vec!["UseMetasrv"])) as _,
+            Arc::new(StringVector::from(vec!["10"])) as _,
+        ];
+        let err = f.eval(FunctionContext::mock(), &args).await.unwrap_err();
+        assert_matches!(err, Error::UnsupportedInputDataType { .. });
+    }
+}
--- a/src/common/function/src/aggrs.rs
+++ b/src/common/function/src/aggrs.rs
@@ -13,7 +13,6 @@
 // limitations under the License.

 pub mod approximate;
-pub mod count_hash;
 #[cfg(feature = "geo")]
 pub mod geo;
 pub mod vector;
--- a/src/common/function/src/aggrs/count_hash.rs
+++ b/src/common/function/src/aggrs/count_hash.rs
@@ -1,647 +0,0 @@
-// Copyright 2023 Greptime Team
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-//! `CountHash` / `count_hash` is a hash-based approximate distinct count function.
-//!
-//! It is a variant of `CountDistinct` that uses a hash function to approximate the
-//! distinct count.
-//! It is designed to be more efficient than `CountDistinct` for large datasets,
-//! but it is not as accurate, as the hash value may be collision.
-
-use std::collections::HashSet;
-use std::fmt::Debug;
-use std::sync::Arc;
-
-use ahash::RandomState;
-use datafusion_common::cast::as_list_array;
-use datafusion_common::error::Result;
-use datafusion_common::hash_utils::create_hashes;
-use datafusion_common::utils::SingleRowListArrayBuilder;
-use datafusion_common::{internal_err, not_impl_err, ScalarValue};
-use datafusion_expr::function::{AccumulatorArgs, StateFieldsArgs};
-use datafusion_expr::utils::{format_state_name, AggregateOrderSensitivity};
-use datafusion_expr::{
-    Accumulator, AggregateUDF, AggregateUDFImpl, EmitTo, GroupsAccumulator, ReversedUDAF,
-    SetMonotonicity, Signature, TypeSignature, Volatility,
-};
-use datafusion_functions_aggregate_common::aggregate::groups_accumulator::nulls::filtered_null_mask;
-use datatypes::arrow;
-use datatypes::arrow::array::{
-    Array, ArrayRef, AsArray, BooleanArray, Int64Array, ListArray, UInt64Array,
-};
-use datatypes::arrow::buffer::{OffsetBuffer, ScalarBuffer};
-use datatypes::arrow::datatypes::{DataType, Field};
-
-use crate::function_registry::FunctionRegistry;
-
-type HashValueType = u64;
-
-// read from /dev/urandom 4047821dc6144e4b2abddf23ad4171126a52eeecd26eff2191cf673b965a7875
-const RANDOM_SEED_0: u64 = 0x4047821dc6144e4b;
-const RANDOM_SEED_1: u64 = 0x2abddf23ad417112;
-const RANDOM_SEED_2: u64 = 0x6a52eeecd26eff21;
-const RANDOM_SEED_3: u64 = 0x91cf673b965a7875;
-
-impl CountHash {
-    pub fn register(registry: &FunctionRegistry) {
-        registry.register_aggr(CountHash::udf_impl());
-    }
-
-    pub fn udf_impl() -> AggregateUDF {
-        AggregateUDF::new_from_impl(CountHash {
-            signature: Signature::one_of(
-                vec![TypeSignature::VariadicAny, TypeSignature::Nullary],
-                Volatility::Immutable,
-            ),
-        })
-    }
-}
-
-#[derive(Debug, Clone)]
-pub struct CountHash {
-    signature: Signature,
-}
-
-impl AggregateUDFImpl for CountHash {
-    fn as_any(&self) -> &dyn std::any::Any {
-        self
-    }
-
-    fn name(&self) -> &str {
-        "count_hash"
-    }
-
-    fn signature(&self) -> &Signature {
-        &self.signature
-    }
-
-    fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
-        Ok(DataType::Int64)
-    }
-
-    fn is_nullable(&self) -> bool {
-        false
-    }
-
-    fn state_fields(&self, args: StateFieldsArgs) -> Result<Vec<Field>> {
-        Ok(vec![Field::new_list(
-            format_state_name(args.name, "count_hash"),
-            Field::new_list_field(DataType::UInt64, true),
-            // For count_hash accumulator, null list item stands for an
-            // empty value set (i.e., all NULL value so far for that group).
-            true,
-        )])
-    }
-
-    fn accumulator(&self, acc_args: AccumulatorArgs) -> Result<Box<dyn Accumulator>> {
-        if acc_args.exprs.len() > 1 {
-            return not_impl_err!("count_hash with multiple arguments");
-        }
-
-        Ok(Box::new(CountHashAccumulator {
-            values: HashSet::default(),
-            random_state: RandomState::with_seeds(
-                RANDOM_SEED_0,
-                RANDOM_SEED_1,
-                RANDOM_SEED_2,
-                RANDOM_SEED_3,
-            ),
-            batch_hashes: vec![],
-        }))
-    }
-
-    fn aliases(&self) -> &[String] {
-        &[]
-    }
-
-    fn groups_accumulator_supported(&self, _args: AccumulatorArgs) -> bool {
-        true
-    }
-
-    fn create_groups_accumulator(
-        &self,
-        args: AccumulatorArgs,
-    ) -> Result<Box<dyn GroupsAccumulator>> {
-        if args.exprs.len() > 1 {
-            return not_impl_err!("count_hash with multiple arguments");
-        }
-
-        Ok(Box::new(CountHashGroupAccumulator::new()))
-    }
-
-    fn reverse_expr(&self) -> ReversedUDAF {
-        ReversedUDAF::Identical
-    }
-
-    fn order_sensitivity(&self) -> AggregateOrderSensitivity {
-        AggregateOrderSensitivity::Insensitive
-    }
-
-    fn default_value(&self, _data_type: &DataType) -> Result<ScalarValue> {
-        Ok(ScalarValue::Int64(Some(0)))
-    }
-
-    fn set_monotonicity(&self, _data_type: &DataType) -> SetMonotonicity {
-        SetMonotonicity::Increasing
-    }
-}
-
-/// GroupsAccumulator for `count_hash` aggregate function
-#[derive(Debug)]
-pub struct CountHashGroupAccumulator {
-    /// One HashSet per group to track distinct values
-    distinct_sets: Vec<HashSet<HashValueType, RandomState>>,
-    random_state: RandomState,
-    batch_hashes: Vec<HashValueType>,
-}
-
-impl Default for CountHashGroupAccumulator {
-    fn default() -> Self {
-        Self::new()
-    }
-}
-
-impl CountHashGroupAccumulator {
-    pub fn new() -> Self {
-        Self {
-            distinct_sets: vec![],
-            random_state: RandomState::with_seeds(
-                RANDOM_SEED_0,
-                RANDOM_SEED_1,
-                RANDOM_SEED_2,
-                RANDOM_SEED_3,
-            ),
-            batch_hashes: vec![],
-        }
-    }
-
-    fn ensure_sets(&mut self, total_num_groups: usize) {
-        if self.distinct_sets.len() < total_num_groups {
-            self.distinct_sets
-                .resize_with(total_num_groups, HashSet::default);
-        }
-    }
-}
-
-impl GroupsAccumulator for CountHashGroupAccumulator {
-    fn update_batch(
-        &mut self,
-        values: &[ArrayRef],
-        group_indices: &[usize],
-        opt_filter: Option<&BooleanArray>,
-        total_num_groups: usize,
-    ) -> Result<()> {
-        assert_eq!(values.len(), 1, "count_hash expects a single argument");
-        self.ensure_sets(total_num_groups);
-
-        let array = &values[0];
-        self.batch_hashes.clear();
-        self.batch_hashes.resize(array.len(), 0);
-        let hashes = create_hashes(
-            &[ArrayRef::clone(array)],
-            &self.random_state,
-            &mut self.batch_hashes,
-        )?;
-
-        // Use a pattern similar to accumulate_indices to process rows
-        // that are not null and pass the filter
-        let nulls = array.logical_nulls();
-
-        match (nulls.as_ref(), opt_filter) {
-            (None, None) => {
-                // No nulls, no filter - process all rows
-                for (row_idx, &group_idx) in group_indices.iter().enumerate() {
-                    self.distinct_sets[group_idx].insert(hashes[row_idx]);
-                }
-            }
-            (Some(nulls), None) => {
-                // Has nulls, no filter
-                for (row_idx, (&group_idx, is_valid)) in
-                    group_indices.iter().zip(nulls.iter()).enumerate()
-                {
-                    if is_valid {
-                        self.distinct_sets[group_idx].insert(hashes[row_idx]);
-                    }
-                }
-            }
-            (None, Some(filter)) => {
-                // No nulls, has filter
-                for (row_idx, (&group_idx, filter_value)) in
-                    group_indices.iter().zip(filter.iter()).enumerate()
-                {
-                    if let Some(true) = filter_value {
-                        self.distinct_sets[group_idx].insert(hashes[row_idx]);
-                    }
-                }
-            }
-            (Some(nulls), Some(filter)) => {
-                // Has nulls and filter
-                let iter = filter
-                    .iter()
-                    .zip(group_indices.iter())
-                    .zip(nulls.iter())
-                    .enumerate();
-
-                for (row_idx, ((filter_value, &group_idx), is_valid)) in iter {
-                    if is_valid && filter_value == Some(true) {
-                        self.distinct_sets[group_idx].insert(hashes[row_idx]);
-                    }
-                }
-            }
-        }
-
-        Ok(())
-    }
-
-    fn evaluate(&mut self, emit_to: EmitTo) -> Result<ArrayRef> {
-        let distinct_sets: Vec<HashSet<u64, RandomState>> =
-            emit_to.take_needed(&mut self.distinct_sets);
-
-        let counts = distinct_sets
-            .iter()
-            .map(|set| set.len() as i64)
-            .collect::<Vec<_>>();
-        Ok(Arc::new(Int64Array::from(counts)))
-    }
-
-    fn merge_batch(
-        &mut self,
-        values: &[ArrayRef],
-        group_indices: &[usize],
-        _opt_filter: Option<&BooleanArray>,
-        total_num_groups: usize,
-    ) -> Result<()> {
-        assert_eq!(
-            values.len(),
-            1,
-            "count_hash merge expects a single state array"
-        );
-        self.ensure_sets(total_num_groups);
-
-        let list_array = as_list_array(&values[0])?;
-
-        // For each group in the incoming batch
-        for (i, &group_idx) in group_indices.iter().enumerate() {
-            if i < list_array.len() {
-                let inner_array = list_array.value(i);
-                let inner_array = inner_array.as_any().downcast_ref::<UInt64Array>().unwrap();
-                // Add each value to our set for this group
-                for j in 0..inner_array.len() {
-                    if !inner_array.is_null(j) {
-                        self.distinct_sets[group_idx].insert(inner_array.value(j));
-                    }
-                }
-            }
-        }
-
-        Ok(())
-    }
-
-    fn state(&mut self, emit_to: EmitTo) -> Result<Vec<ArrayRef>> {
-        let distinct_sets: Vec<HashSet<u64, RandomState>> =
-            emit_to.take_needed(&mut self.distinct_sets);
-
-        let mut offsets = Vec::with_capacity(distinct_sets.len() + 1);
-        offsets.push(0);
-        let mut curr_len = 0i32;
-
-        let mut value_iter = distinct_sets
-            .into_iter()
-            .flat_map(|set| {
-                // build offset
-                curr_len += set.len() as i32;
-                offsets.push(curr_len);
-                // convert into iter
-                set.into_iter()
-            })
-            .peekable();
-        let data_array: ArrayRef = if value_iter.peek().is_none() {
-            arrow::array::new_empty_array(&DataType::UInt64) as _
-        } else {
-            Arc::new(UInt64Array::from_iter_values(value_iter))
-        };
-        let offset_buffer = OffsetBuffer::new(ScalarBuffer::from(offsets));
-
-        let list_array = ListArray::new(
-            Arc::new(Field::new_list_field(DataType::UInt64, true)),
-            offset_buffer,
-            data_array,
-            None,
-        );
-
-        Ok(vec![Arc::new(list_array) as _])
-    }
-
-    fn convert_to_state(
-        &self,
-        values: &[ArrayRef],
-        opt_filter: Option<&BooleanArray>,
-    ) -> Result<Vec<ArrayRef>> {
-        // For a single hash value per row, create a list array with that value
-        assert_eq!(values.len(), 1, "count_hash expects a single argument");
-        let values = ArrayRef::clone(&values[0]);
-
-        let offsets = OffsetBuffer::new(ScalarBuffer::from_iter(0..values.len() as i32 + 1));
-        let nulls = filtered_null_mask(opt_filter, &values);
-        let list_array = ListArray::new(
-            Arc::new(Field::new_list_field(DataType::UInt64, true)),
-            offsets,
-            values,
-            nulls,
-        );
-
-        Ok(vec![Arc::new(list_array)])
-    }
-
-    fn supports_convert_to_state(&self) -> bool {
-        true
-    }
-
-    fn size(&self) -> usize {
-        // Base size of the struct
-        let mut size = size_of::<Self>();
-
-        // Size of the vector holding the HashSets
-        size += size_of::<Vec<HashSet<HashValueType, RandomState>>>()
-            + self.distinct_sets.capacity() * size_of::<HashSet<HashValueType, RandomState>>();
-
-        // Estimate HashSet contents size more efficiently
-        // Instead of iterating through all values which is expensive, use an approximation
-        for set in &self.distinct_sets {
-            // Base size of the HashSet
-            size += set.capacity() * size_of::<HashValueType>();
-        }
-
-        size
-    }
-}
-
-#[derive(Debug)]
-struct CountHashAccumulator {
-    values: HashSet<HashValueType, RandomState>,
-    random_state: RandomState,
-    batch_hashes: Vec<HashValueType>,
-}
-
-impl CountHashAccumulator {
-    // calculating the size for fixed length values, taking first batch size *
-    // number of batches.
-    fn fixed_size(&self) -> usize {
-        size_of_val(self) + (size_of::<HashValueType>() * self.values.capacity())
-    }
-}
-
-impl Accumulator for CountHashAccumulator {
-    /// Returns the distinct values seen so far as (one element) ListArray.
-    fn state(&mut self) -> Result<Vec<ScalarValue>> {
-        let values = self.values.iter().cloned().collect::<Vec<_>>();
-        let arr = Arc::new(UInt64Array::from(values)) as _;
-        let list_scalar = SingleRowListArrayBuilder::new(arr).build_list_scalar();
-        Ok(vec![list_scalar])
-    }
-
-    fn update_batch(&mut self, values: &[ArrayRef]) -> Result<()> {
-        if values.is_empty() {
-            return Ok(());
-        }
-
-        let arr = &values[0];
-        if arr.data_type() == &DataType::Null {
-            return Ok(());
-        }
-
-        self.batch_hashes.clear();
-        self.batch_hashes.resize(arr.len(), 0);
-        let hashes = create_hashes(
-            &[ArrayRef::clone(arr)],
-            &self.random_state,
-            &mut self.batch_hashes,
-        )?;
-        for hash in hashes.as_slice() {
-            self.values.insert(*hash);
-        }
-        Ok(())
-    }
-
-    /// Merges multiple sets of distinct values into the current set.
-    ///
-    /// The input to this function is a `ListArray` with **multiple** rows,
-    /// where each row contains the values from a partial aggregate's phase (e.g.
-    /// the result of calling `Self::state` on multiple accumulators).
-    fn merge_batch(&mut self, states: &[ArrayRef]) -> Result<()> {
-        if states.is_empty() {
-            return Ok(());
-        }
-        assert_eq!(states.len(), 1, "array_agg states must be singleton!");
-        let array = &states[0];
-        let list_array = array.as_list::<i32>();
-        for inner_array in list_array.iter() {
-            let Some(inner_array) = inner_array else {
-                return internal_err!(
-                    "Intermediate results of count_hash should always be non null"
-                );
-            };
-            let hash_array = inner_array.as_any().downcast_ref::<UInt64Array>().unwrap();
-            for i in 0..hash_array.len() {
-                self.values.insert(hash_array.value(i));
-            }
-        }
-        Ok(())
-    }
-
-    fn evaluate(&mut self) -> Result<ScalarValue> {
-        Ok(ScalarValue::Int64(Some(self.values.len() as i64)))
-    }
-
-    fn size(&self) -> usize {
-        self.fixed_size()
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use datatypes::arrow::array::{Array, BooleanArray, Int32Array, Int64Array};
-
-    use super::*;
-
-    fn create_test_accumulator() -> CountHashAccumulator {
-        CountHashAccumulator {
-            values: HashSet::default(),
-            random_state: RandomState::with_seeds(
-                RANDOM_SEED_0,
-                RANDOM_SEED_1,
-                RANDOM_SEED_2,
-                RANDOM_SEED_3,
-            ),
-            batch_hashes: vec![],
-        }
-    }
-
-    #[test]
-    fn test_count_hash_accumulator() -> Result<()> {
-        let mut acc = create_test_accumulator();
-
-        // Test with some data
-        let array = Arc::new(Int32Array::from(vec![
-            Some(1),
-            Some(2),
-            Some(3),
-            Some(1),
-            Some(2),
-            None,
-        ])) as ArrayRef;
-        acc.update_batch(&[array])?;
-        let result = acc.evaluate()?;
-        assert_eq!(result, ScalarValue::Int64(Some(4)));
-
-        // Test with empty data
-        let mut acc = create_test_accumulator();
-        let array = Arc::new(Int32Array::from(vec![] as Vec<Option<i32>>)) as ArrayRef;
-        acc.update_batch(&[array])?;
-        let result = acc.evaluate()?;
-        assert_eq!(result, ScalarValue::Int64(Some(0)));
-
-        // Test with only nulls
-        let mut acc = create_test_accumulator();
-        let array = Arc::new(Int32Array::from(vec![None, None, None])) as ArrayRef;
-        acc.update_batch(&[array])?;
-        let result = acc.evaluate()?;
-        assert_eq!(result, ScalarValue::Int64(Some(1)));
-
-        Ok(())
-    }
-
-    #[test]
-    fn test_count_hash_accumulator_merge() -> Result<()> {
-        // Accumulator 1
-        let mut acc1 = create_test_accumulator();
-        let array1 = Arc::new(Int32Array::from(vec![Some(1), Some(2), Some(3)])) as ArrayRef;
-        acc1.update_batch(&[array1])?;
-        let state1 = acc1.state()?;
-
-        // Accumulator 2
-        let mut acc2 = create_test_accumulator();
-        let array2 = Arc::new(Int32Array::from(vec![Some(3), Some(4), Some(5)])) as ArrayRef;
-        acc2.update_batch(&[array2])?;
-        let state2 = acc2.state()?;
-
-        // Merge state1 and state2 into a new accumulator
-        let mut acc_merged = create_test_accumulator();
-        let state_array1 = state1[0].to_array()?;
-        let state_array2 = state2[0].to_array()?;
-
-        acc_merged.merge_batch(&[state_array1])?;
-        acc_merged.merge_batch(&[state_array2])?;
-
-        let result = acc_merged.evaluate()?;
-        // Distinct values are {1, 2, 3, 4, 5}, so count is 5
-        assert_eq!(result, ScalarValue::Int64(Some(5)));
-
-        Ok(())
-    }
-
-    fn create_test_group_accumulator() -> CountHashGroupAccumulator {
-        CountHashGroupAccumulator::new()
-    }
-
-    #[test]
-    fn test_count_hash_group_accumulator() -> Result<()> {
-        let mut acc = create_test_group_accumulator();
-        let values = Arc::new(Int32Array::from(vec![1, 2, 1, 3, 2, 4, 5])) as ArrayRef;
-        let group_indices = vec![0, 1, 0, 0, 1, 2, 0];
-        let total_num_groups = 3;
-
-        acc.update_batch(&[values], &group_indices, None, total_num_groups)?;
-
-        let result_array = acc.evaluate(EmitTo::All)?;
-        let result = result_array.as_any().downcast_ref::<Int64Array>().unwrap();
-
-        // Group 0: {1, 3, 5} -> 3
-        // Group 1: {2} -> 1
-        // Group 2: {4} -> 1
-        assert_eq!(result.value(0), 3);
-        assert_eq!(result.value(1), 1);
-        assert_eq!(result.value(2), 1);
-
-        Ok(())
-    }
-
-    #[test]
-    fn test_count_hash_group_accumulator_with_filter() -> Result<()> {
-        let mut acc = create_test_group_accumulator();
-        let values = Arc::new(Int32Array::from(vec![1, 2, 3, 4, 5, 6])) as ArrayRef;
-        let group_indices = vec![0, 0, 1, 1, 2, 2];
-        let filter = BooleanArray::from(vec![true, false, true, true, false, true]);
-        let total_num_groups = 3;
-
-        acc.update_batch(&[values], &group_indices, Some(&filter), total_num_groups)?;
-
-        let result_array = acc.evaluate(EmitTo::All)?;
-        let result = result_array.as_any().downcast_ref::<Int64Array>().unwrap();
-
-        // Group 0: {1} (2 is filtered out) -> 1
-        // Group 1: {3, 4} -> 2
-        // Group 2: {6} (5 is filtered out) -> 1
-        assert_eq!(result.value(0), 1);
-        assert_eq!(result.value(1), 2);
-        assert_eq!(result.value(2), 1);
-
-        Ok(())
-    }
-
-    #[test]
-    fn test_count_hash_group_accumulator_merge() -> Result<()> {
-        // Accumulator 1
-        let mut acc1 = create_test_group_accumulator();
-        let values1 = Arc::new(Int32Array::from(vec![1, 2, 3, 4])) as ArrayRef;
-        let group_indices1 = vec![0, 0, 1, 1];
-        acc1.update_batch(&[values1], &group_indices1, None, 2)?;
-        // acc1 state: group 0 -> {1, 2}, group 1 -> {3, 4}
-        let state1 = acc1.state(EmitTo::All)?;
-
-        // Accumulator 2
-        let mut acc2 = create_test_group_accumulator();
-        let values2 = Arc::new(Int32Array::from(vec![5, 6, 1, 3])) as ArrayRef;
-        // Merge into different group indices
-        let group_indices2 = vec![2, 2, 0, 1];
-        acc2.update_batch(&[values2], &group_indices2, None, 3)?;
-        // acc2 state: group 0 -> {1}, group 1 -> {3}, group 2 -> {5, 6}
-
-        // Merge state from acc1 into acc2
-        // We will merge acc1's group 0 into acc2's group 0
-        // and acc1's group 1 into acc2's group 2
-        let merge_group_indices = vec![0, 2];
-        acc2.merge_batch(&state1, &merge_group_indices, None, 3)?;
-
-        let result_array = acc2.evaluate(EmitTo::All)?;
-        let result = result_array.as_any().downcast_ref::<Int64Array>().unwrap();
-
-        // Final state of acc2:
-        // Group 0: {1} U {1, 2} -> {1, 2}, count = 2
-        // Group 1: {3}, count = 1
-        // Group 2: {5, 6} U {3, 4} -> {3, 4, 5, 6}, count = 4
-        assert_eq!(result.value(0), 2);
-        assert_eq!(result.value(1), 1);
-        assert_eq!(result.value(2), 4);
-
-        Ok(())
-    }
-
-    #[test]
-    fn test_size() {
-        let acc = create_test_group_accumulator();
-        // Just test it doesn't crash and returns a value.
-        assert!(acc.size() > 0);
-    }
-}
--- a/src/common/function/src/function_registry.rs
+++ b/src/common/function/src/function_registry.rs
@@ -21,7 +21,6 @@ use once_cell::sync::Lazy;

 use crate::admin::AdminFunction;
 use crate::aggrs::approximate::ApproximateFunction;
-use crate::aggrs::count_hash::CountHash;
 use crate::aggrs::vector::VectorFunction as VectorAggrFunction;
 use crate::function::{AsyncFunctionRef, Function, FunctionRef};
 use crate::function_factory::ScalarFunctionFactory;
@@ -145,9 +144,6 @@ pub static FUNCTION_REGISTRY: Lazy<Arc<FunctionRegistry>> = Lazy::new(|| {
    // Approximate functions
    ApproximateFunction::register(&function_registry);

-    // CountHash function
-    CountHash::register(&function_registry);
-
    Arc::new(function_registry)
 });

--- a/src/common/function/src/handlers.rs
+++ b/src/common/function/src/handlers.rs
@@ -14,6 +14,7 @@

 use std::sync::Arc;

+use api::v1::meta::ReconcileRequest;
 use async_trait::async_trait;
 use catalog::CatalogManagerRef;
 use common_base::AffectedRows;
@@ -65,6 +66,9 @@ pub trait ProcedureServiceHandler: Send + Sync {
    /// Migrate a region from source peer to target peer, returns the procedure id if success.
    async fn migrate_region(&self, request: MigrateRegionRequest) -> Result<Option<String>>;

+    /// Reconcile a table, database or catalog, returns the procedure id if success.
+    async fn reconcile(&self, request: ReconcileRequest) -> Result<Option<String>>;
+
    /// Query the procedure' state by its id
    async fn query_procedure_state(&self, pid: &str) -> Result<ProcedureStateResponse>;

--- a/src/common/function/src/helper.rs
+++ b/src/common/function/src/helper.rs
@@ -12,12 +12,15 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

-use common_query::error::{InvalidInputTypeSnafu, Result};
+use api::v1::meta::ResolveStrategy;
+use common_query::error::{
+    InvalidFuncArgsSnafu, InvalidInputTypeSnafu, Result, UnsupportedInputDataTypeSnafu,
+};
 use common_query::prelude::{Signature, TypeSignature, Volatility};
 use datatypes::prelude::ConcreteDataType;
 use datatypes::types::cast::cast;
 use datatypes::value::ValueRef;
-use snafu::ResultExt;
+use snafu::{OptionExt, ResultExt};

 /// Create a function signature with oneof signatures of interleaving two arguments.
 pub fn one_of_sigs2(args1: Vec<ConcreteDataType>, args2: Vec<ConcreteDataType>) -> Signature {
@@ -43,3 +46,64 @@ pub fn cast_u64(value: &ValueRef) -> Result<Option<u64>> {
        })
        .map(|v| v.as_u64())
 }
+
+/// Cast a [`ValueRef`] to u32, returns `None` if fails
+pub fn cast_u32(value: &ValueRef) -> Result<Option<u32>> {
+    cast((*value).into(), &ConcreteDataType::uint32_datatype())
+        .context(InvalidInputTypeSnafu {
+            err_msg: format!(
+                "Failed to cast input into uint32, actual type: {:#?}",
+                value.data_type(),
+            ),
+        })
+        .map(|v| v.as_u64().map(|v| v as u32))
+}
+
+/// Parse a resolve strategy from a string.
+pub fn parse_resolve_strategy(strategy: &str) -> Result<ResolveStrategy> {
+    ResolveStrategy::from_str_name(strategy).context(InvalidFuncArgsSnafu {
+        err_msg: format!("Invalid resolve strategy: {}", strategy),
+    })
+}
+
+/// Default parallelism for reconcile operations.
+pub fn default_parallelism() -> u32 {
+    64
+}
+
+/// Default resolve strategy for reconcile operations.
+pub fn default_resolve_strategy() -> ResolveStrategy {
+    ResolveStrategy::UseLatest
+}
+
+/// Get the string value from the params.
+///
+/// # Errors
+/// Returns an error if the input type is not a string.
+pub fn get_string_from_params<'a>(
+    params: &'a [ValueRef<'a>],
+    index: usize,
+    fn_name: &'a str,
+) -> Result<&'a str> {
+    let ValueRef::String(s) = &params[index] else {
+        return UnsupportedInputDataTypeSnafu {
+            function: fn_name,
+            datatypes: params.iter().map(|v| v.data_type()).collect::<Vec<_>>(),
+        }
+        .fail();
+    };
+    Ok(s)
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_parse_resolve_strategy() {
+        assert_eq!(
+            parse_resolve_strategy("UseLatest").unwrap(),
+            ResolveStrategy::UseLatest
+        );
+    }
+}
--- a/src/common/function/src/lib.rs
+++ b/src/common/function/src/lib.rs
@@ -14,6 +14,7 @@

 #![feature(let_chains)]
 #![feature(try_blocks)]
+#![feature(assert_matches)]

 mod admin;
 mod flush_flow;
--- a/src/common/function/src/scalars/math.rs
+++ b/src/common/function/src/scalars/math.rs
@@ -14,6 +14,7 @@

 pub mod clamp;
 mod modulo;
+mod pow;
 mod rate;

 use std::fmt;
@@ -25,6 +26,7 @@ use datafusion::error::DataFusionError;
 use datafusion::logical_expr::Volatility;
 use datatypes::prelude::ConcreteDataType;
 use datatypes::vectors::VectorRef;
+pub use pow::PowFunction;
 pub use rate::RateFunction;
 use snafu::ResultExt;

@@ -37,6 +39,7 @@ pub(crate) struct MathFunction;
 impl MathFunction {
    pub fn register(registry: &FunctionRegistry) {
        registry.register_scalar(ModuloFunction);
+        registry.register_scalar(PowFunction);
        registry.register_scalar(RateFunction);
        registry.register_scalar(RangeFunction);
        registry.register_scalar(ClampFunction);
--- a/src/common/function/src/scalars/math/pow.rs
+++ b/src/common/function/src/scalars/math/pow.rs
@@ -0,0 +1,120 @@
+// Copyright 2023 Greptime Team
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use std::fmt;
+use std::sync::Arc;
+
+use common_query::error::Result;
+use common_query::prelude::{Signature, Volatility};
+use datatypes::data_type::DataType;
+use datatypes::prelude::ConcreteDataType;
+use datatypes::types::LogicalPrimitiveType;
+use datatypes::vectors::VectorRef;
+use datatypes::with_match_primitive_type_id;
+use num::traits::Pow;
+use num_traits::AsPrimitive;
+
+use crate::function::{Function, FunctionContext};
+use crate::scalars::expression::{scalar_binary_op, EvalContext};
+
+#[derive(Clone, Debug, Default)]
+pub struct PowFunction;
+
+impl Function for PowFunction {
+    fn name(&self) -> &str {
+        "pow"
+    }
+
+    fn return_type(&self, _input_types: &[ConcreteDataType]) -> Result<ConcreteDataType> {
+        Ok(ConcreteDataType::float64_datatype())
+    }
+
+    fn signature(&self) -> Signature {
+        Signature::uniform(2, ConcreteDataType::numerics(), Volatility::Immutable)
+    }
+
+    fn eval(&self, _func_ctx: &FunctionContext, columns: &[VectorRef]) -> Result<VectorRef> {
+        with_match_primitive_type_id!(columns[0].data_type().logical_type_id(), |$S| {
+            with_match_primitive_type_id!(columns[1].data_type().logical_type_id(), |$T| {
+                let col = scalar_binary_op::<<$S as LogicalPrimitiveType>::Native, <$T as LogicalPrimitiveType>::Native, f64, _>(&columns[0], &columns[1], scalar_pow, &mut EvalContext::default())?;
+                Ok(Arc::new(col))
+            },{
+                unreachable!()
+            })
+        },{
+            unreachable!()
+        })
+    }
+}
+
+#[inline]
+fn scalar_pow<S, T>(value: Option<S>, base: Option<T>, _ctx: &mut EvalContext) -> Option<f64>
+where
+    S: AsPrimitive<f64>,
+    T: AsPrimitive<f64>,
+{
+    match (value, base) {
+        (Some(value), Some(base)) => Some(value.as_().pow(base.as_())),
+        _ => None,
+    }
+}
+
+impl fmt::Display for PowFunction {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        write!(f, "POW")
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use common_query::prelude::TypeSignature;
+    use datatypes::value::Value;
+    use datatypes::vectors::{Float32Vector, Int8Vector};
+
+    use super::*;
+    use crate::function::FunctionContext;
+    #[test]
+    fn test_pow_function() {
+        let pow = PowFunction;
+
+        assert_eq!("pow", pow.name());
+        assert_eq!(
+            ConcreteDataType::float64_datatype(),
+            pow.return_type(&[]).unwrap()
+        );
+
+        assert!(matches!(pow.signature(),
+                         Signature {
+                             type_signature: TypeSignature::Uniform(2, valid_types),
+                             volatility: Volatility::Immutable
+                         } if  valid_types == ConcreteDataType::numerics()
+        ));
+
+        let values = vec![1.0, 2.0, 3.0];
+        let bases = vec![0i8, -1i8, 3i8];
+
+        let args: Vec<VectorRef> = vec![
+            Arc::new(Float32Vector::from_vec(values.clone())),
+            Arc::new(Int8Vector::from_vec(bases.clone())),
+        ];
+
+        let vector = pow.eval(&FunctionContext::default(), &args).unwrap();
+        assert_eq!(3, vector.len());
+
+        for i in 0..3 {
+            let p: f64 = (values[i] as f64).pow(bases[i] as f64);
+            assert!(matches!(vector.get(i), Value::Float64(v) if v == p));
+        }
+    }
+}
--- a/src/common/function/src/state.rs
+++ b/src/common/function/src/state.rs
@@ -32,7 +32,7 @@ impl FunctionState {
    pub fn mock() -> Self {
        use std::sync::Arc;

-        use api::v1::meta::ProcedureStatus;
+        use api::v1::meta::{ProcedureStatus, ReconcileRequest};
        use async_trait::async_trait;
        use catalog::CatalogManagerRef;
        use common_base::AffectedRows;
@@ -63,6 +63,10 @@ impl FunctionState {
                Ok(Some("test_pid".to_string()))
            }

+            async fn reconcile(&self, _request: ReconcileRequest) -> Result<Option<String>> {
+                Ok(Some("test_pid".to_string()))
+            }
+
            async fn query_procedure_state(&self, _pid: &str) -> Result<ProcedureStateResponse> {
                Ok(ProcedureStateResponse {
                    status: ProcedureStatus::Done.into(),
--- a/src/common/grpc-expr/src/alter.rs
+++ b/src/common/grpc-expr/src/alter.rs
@@ -29,8 +29,8 @@ use snafu::{ensure, OptionExt, ResultExt};
 use store_api::region_request::{SetRegionOption, UnsetRegionOption};
 use table::metadata::TableId;
 use table::requests::{
-    AddColumnRequest, AlterKind, AlterTableRequest, ModifyColumnTypeRequest, SetIndexOptions,
-    UnsetIndexOptions,
+    AddColumnRequest, AlterKind, AlterTableRequest, ModifyColumnTypeRequest, SetIndexOption,
+    UnsetIndexOption,
 };

 use crate::error::{
@@ -43,6 +43,59 @@ use crate::error::{
 const LOCATION_TYPE_FIRST: i32 = LocationType::First as i32;
 const LOCATION_TYPE_AFTER: i32 = LocationType::After as i32;

+fn set_index_option_from_proto(set_index: api::v1::SetIndex) -> Result<SetIndexOption> {
+    let options = set_index.options.context(MissingAlterIndexOptionSnafu)?;
+    Ok(match options {
+        api::v1::set_index::Options::Fulltext(f) => SetIndexOption::Fulltext {
+            column_name: f.column_name.clone(),
+            options: FulltextOptions::new(
+                f.enable,
+                as_fulltext_option_analyzer(
+                    Analyzer::try_from(f.analyzer).context(InvalidSetFulltextOptionRequestSnafu)?,
+                ),
+                f.case_sensitive,
+                as_fulltext_option_backend(
+                    PbFulltextBackend::try_from(f.backend)
+                        .context(InvalidSetFulltextOptionRequestSnafu)?,
+                ),
+                f.granularity as u32,
+                f.false_positive_rate,
+            )
+            .context(InvalidIndexOptionSnafu)?,
+        },
+        api::v1::set_index::Options::Inverted(i) => SetIndexOption::Inverted {
+            column_name: i.column_name,
+        },
+        api::v1::set_index::Options::Skipping(s) => SetIndexOption::Skipping {
+            column_name: s.column_name,
+            options: SkippingIndexOptions::new(
+                s.granularity as u32,
+                s.false_positive_rate,
+                as_skipping_index_type(
+                    PbSkippingIndexType::try_from(s.skipping_index_type)
+                        .context(InvalidSetSkippingIndexOptionRequestSnafu)?,
+                ),
+            )
+            .context(InvalidIndexOptionSnafu)?,
+        },
+    })
+}
+
+fn unset_index_option_from_proto(unset_index: api::v1::UnsetIndex) -> Result<UnsetIndexOption> {
+    let options = unset_index.options.context(MissingAlterIndexOptionSnafu)?;
+    Ok(match options {
+        api::v1::unset_index::Options::Fulltext(f) => UnsetIndexOption::Fulltext {
+            column_name: f.column_name,
+        },
+        api::v1::unset_index::Options::Inverted(i) => UnsetIndexOption::Inverted {
+            column_name: i.column_name,
+        },
+        api::v1::unset_index::Options::Skipping(s) => UnsetIndexOption::Skipping {
+            column_name: s.column_name,
+        },
+    })
+}
+
 /// Convert an [`AlterTableExpr`] to an [`AlterTableRequest`]
 pub fn alter_expr_to_request(table_id: TableId, expr: AlterTableExpr) -> Result<AlterTableRequest> {
    let catalog_name = expr.catalog_name;
@@ -121,70 +174,34 @@ pub fn alter_expr_to_request(table_id: TableId, expr: AlterTableExpr) -> Result<
                    .context(InvalidUnsetTableOptionRequestSnafu)?,
            }
        }
-        Kind::SetIndex(o) => match o.options {
-            Some(opt) => match opt {
-                api::v1::set_index::Options::Fulltext(f) => AlterKind::SetIndex {
-                    options: SetIndexOptions::Fulltext {
-                        column_name: f.column_name.clone(),
-                        options: FulltextOptions::new(
-                            f.enable,
-                            as_fulltext_option_analyzer(
-                                Analyzer::try_from(f.analyzer)
-                                    .context(InvalidSetFulltextOptionRequestSnafu)?,
-                            ),
-                            f.case_sensitive,
-                            as_fulltext_option_backend(
-                                PbFulltextBackend::try_from(f.backend)
-                                    .context(InvalidSetFulltextOptionRequestSnafu)?,
-                            ),
-                            f.granularity as u32,
-                            f.false_positive_rate,
-                        )
-                        .context(InvalidIndexOptionSnafu)?,
-                    },
-                },
-                api::v1::set_index::Options::Inverted(i) => AlterKind::SetIndex {
-                    options: SetIndexOptions::Inverted {
-                        column_name: i.column_name,
-                    },
-                },
-                api::v1::set_index::Options::Skipping(s) => AlterKind::SetIndex {
-                    options: SetIndexOptions::Skipping {
-                        column_name: s.column_name,
-                        options: SkippingIndexOptions::new(
-                            s.granularity as u32,
-                            s.false_positive_rate,
-                            as_skipping_index_type(
-                                PbSkippingIndexType::try_from(s.skipping_index_type)
-                                    .context(InvalidSetSkippingIndexOptionRequestSnafu)?,
-                            ),
-                        )
-                        .context(InvalidIndexOptionSnafu)?,
-                    },
-                },
-            },
-            None => return MissingAlterIndexOptionSnafu.fail(),
-        },
-        Kind::UnsetIndex(o) => match o.options {
-            Some(opt) => match opt {
-                api::v1::unset_index::Options::Fulltext(f) => AlterKind::UnsetIndex {
-                    options: UnsetIndexOptions::Fulltext {
-                        column_name: f.column_name,
-                    },
-                },
-                api::v1::unset_index::Options::Inverted(i) => AlterKind::UnsetIndex {
-                    options: UnsetIndexOptions::Inverted {
-                        column_name: i.column_name,
-                    },
-                },
-                api::v1::unset_index::Options::Skipping(s) => AlterKind::UnsetIndex {
-                    options: UnsetIndexOptions::Skipping {
-                        column_name: s.column_name,
-                    },
-                },
-            },
-            None => return MissingAlterIndexOptionSnafu.fail(),
-        },
+        Kind::SetIndex(o) => {
+            let option = set_index_option_from_proto(o)?;
+            AlterKind::SetIndexes {
+                options: vec![option],
+            }
+        }
+        Kind::UnsetIndex(o) => {
+            let option = unset_index_option_from_proto(o)?;
+            AlterKind::UnsetIndexes {
+                options: vec![option],
+            }
+        }
+        Kind::SetIndexes(o) => {
+            let options = o
+                .set_indexes
+                .into_iter()
+                .map(set_index_option_from_proto)
+                .collect::<Result<Vec<_>>>()?;
+            AlterKind::SetIndexes { options }
+        }
+        Kind::UnsetIndexes(o) => {
+            let options = o
+                .unset_indexes
+                .into_iter()
+                .map(unset_index_option_from_proto)
+                .collect::<Result<Vec<_>>>()?;
+            AlterKind::UnsetIndexes { options }
+        }
        Kind::DropDefaults(o) => {
            let names = o
                .drop_defaults
--- a/src/common/meta/Cargo.toml
+++ b/src/common/meta/Cargo.toml
@@ -32,6 +32,7 @@ common-procedure.workspace = true
 common-procedure-test.workspace = true
 common-query.workspace = true
 common-recordbatch.workspace = true
+common-runtime.workspace = true
 common-telemetry.workspace = true
 common-time.workspace = true
 common-wal.workspace = true
@@ -69,7 +70,6 @@ table = { workspace = true, features = ["testing"] }
 tokio.workspace = true
 tokio-postgres = { workspace = true, optional = true }
 tonic.workspace = true
-tracing.workspace = true
 typetag.workspace = true

 [dev-dependencies]
--- a/src/common/meta/src/cache/flow/table_flownode.rs
+++ b/src/common/meta/src/cache/flow/table_flownode.rs
@@ -15,6 +15,7 @@
 use std::collections::HashMap;
 use std::sync::Arc;

+use common_telemetry::info;
 use futures::future::BoxFuture;
 use moka::future::Cache;
 use moka::ops::compute::Op;
@@ -89,6 +90,12 @@ fn init_factory(table_flow_manager: TableFlowManagerRef) -> Initializer<TableId,
                // we have a corresponding cache invalidation mechanism to invalidate `(Key, EmptyHashSet)`.
                .map(Arc::new)
                .map(Some)
+                .inspect(|set| {
+                    info!(
+                        "Initialized table_flownode cache for table_id: {}, set: {:?}",
+                        table_id, set
+                    );
+                })
        })
    })
 }
@@ -167,6 +174,13 @@ fn invalidator<'a>(
        match ident {
            CacheIdent::CreateFlow(create_flow) => handle_create_flow(cache, create_flow).await,
            CacheIdent::DropFlow(drop_flow) => handle_drop_flow(cache, drop_flow).await,
+            CacheIdent::FlowNodeAddressChange(node_id) => {
+                info!(
+                    "Invalidate flow node cache for node_id in table_flownode: {}",
+                    node_id
+                );
+                cache.invalidate_all();
+            }
            _ => {}
        }
        Ok(())
@@ -174,7 +188,10 @@ fn invalidator<'a>(
 }

 fn filter(ident: &CacheIdent) -> bool {
-    matches!(ident, CacheIdent::CreateFlow(_) | CacheIdent::DropFlow(_))
+    matches!(
+        ident,
+        CacheIdent::CreateFlow(_) | CacheIdent::DropFlow(_) | CacheIdent::FlowNodeAddressChange(_)
+    )
 }

 #[cfg(test)]
--- a/src/common/meta/src/cache_invalidator.rs
+++ b/src/common/meta/src/cache_invalidator.rs
@@ -22,6 +22,7 @@ use crate::key::flow::flow_name::FlowNameKey;
 use crate::key::flow::flow_route::FlowRouteKey;
 use crate::key::flow::flownode_flow::FlownodeFlowKey;
 use crate::key::flow::table_flow::TableFlowKey;
+use crate::key::node_address::NodeAddressKey;
 use crate::key::schema_name::SchemaNameKey;
 use crate::key::table_info::TableInfoKey;
 use crate::key::table_name::TableNameKey;
@@ -53,6 +54,10 @@ pub struct Context {
 #[async_trait::async_trait]
 pub trait CacheInvalidator: Send + Sync {
    async fn invalidate(&self, ctx: &Context, caches: &[CacheIdent]) -> Result<()>;
+
+    fn name(&self) -> &'static str {
+        std::any::type_name::<Self>()
+    }
 }

 pub type CacheInvalidatorRef = Arc<dyn CacheInvalidator>;
@@ -137,6 +142,13 @@ where
                    let key = FlowInfoKey::new(*flow_id);
                    self.invalidate_key(&key.to_bytes()).await;
                }
+                CacheIdent::FlowNodeAddressChange(node_id) => {
+                    // other caches doesn't need to be invalidated
+                    // since this is only for flownode address change not id change
+                    common_telemetry::info!("Invalidate flow node cache for node_id: {}", node_id);
+                    let key = NodeAddressKey::with_flownode(*node_id);
+                    self.invalidate_key(&key.to_bytes()).await;
+                }
            }
        }
        Ok(())
--- a/src/common/meta/src/ddl.rs
+++ b/src/common/meta/src/ddl.rs
@@ -15,25 +15,17 @@
 use std::collections::HashMap;
 use std::sync::Arc;

-use api::v1::meta::ProcedureDetailResponse;
-use common_telemetry::tracing_context::W3cTrace;
 use store_api::storage::{RegionId, RegionNumber, TableId};

 use crate::cache_invalidator::CacheInvalidatorRef;
 use crate::ddl::flow_meta::FlowMetadataAllocatorRef;
 use crate::ddl::table_meta::TableMetadataAllocatorRef;
-use crate::error::{Result, UnsupportedSnafu};
 use crate::key::flow::FlowMetadataManagerRef;
 use crate::key::table_route::PhysicalTableRouteValue;
 use crate::key::TableMetadataManagerRef;
 use crate::node_manager::NodeManagerRef;
 use crate::region_keeper::MemoryRegionKeeperRef;
 use crate::region_registry::LeaderRegionRegistryRef;
-use crate::rpc::ddl::{SubmitDdlTaskRequest, SubmitDdlTaskResponse};
-use crate::rpc::procedure::{
-    AddRegionFollowerRequest, MigrateRegionRequest, MigrateRegionResponse, ProcedureStateResponse,
-    RemoveRegionFollowerRequest,
-};
 use crate::DatanodeId;

 pub mod alter_database;
@@ -44,6 +36,7 @@ pub mod create_flow;
 pub mod create_logical_tables;
 pub mod create_table;
 mod create_table_template;
+pub(crate) use create_table_template::{build_template_from_raw_table_info, CreateRequestBuilder};
 pub mod create_view;
 pub mod drop_database;
 pub mod drop_flow;
@@ -58,64 +51,6 @@ pub(crate) mod tests;
 pub mod truncate_table;
 pub mod utils;

-#[derive(Debug, Default)]
-pub struct ExecutorContext {
-    pub tracing_context: Option<W3cTrace>,
-}
-
-/// The procedure executor that accepts ddl, region migration task etc.
-#[async_trait::async_trait]
-pub trait ProcedureExecutor: Send + Sync {
-    /// Submit a ddl task
-    async fn submit_ddl_task(
-        &self,
-        ctx: &ExecutorContext,
-        request: SubmitDdlTaskRequest,
-    ) -> Result<SubmitDdlTaskResponse>;
-
-    /// Add a region follower
-    async fn add_region_follower(
-        &self,
-        _ctx: &ExecutorContext,
-        _request: AddRegionFollowerRequest,
-    ) -> Result<()> {
-        UnsupportedSnafu {
-            operation: "add_region_follower",
-        }
-        .fail()
-    }
-
-    /// Remove a region follower
-    async fn remove_region_follower(
-        &self,
-        _ctx: &ExecutorContext,
-        _request: RemoveRegionFollowerRequest,
-    ) -> Result<()> {
-        UnsupportedSnafu {
-            operation: "remove_region_follower",
-        }
-        .fail()
-    }
-
-    /// Submit a region migration task
-    async fn migrate_region(
-        &self,
-        ctx: &ExecutorContext,
-        request: MigrateRegionRequest,
-    ) -> Result<MigrateRegionResponse>;
-
-    /// Query the procedure state by its id
-    async fn query_procedure_state(
-        &self,
-        ctx: &ExecutorContext,
-        pid: &str,
-    ) -> Result<ProcedureStateResponse>;
-
-    async fn list_procedures(&self, ctx: &ExecutorContext) -> Result<ProcedureDetailResponse>;
-}
-
-pub type ProcedureExecutorRef = Arc<dyn ProcedureExecutor>;
-
 /// Metadata allocated to a table.
 #[derive(Default)]
 pub struct TableMetadata {
--- a/src/common/meta/src/ddl/alter_logical_tables/update_metadata.rs
+++ b/src/common/meta/src/ddl/alter_logical_tables/update_metadata.rs
@@ -13,12 +13,12 @@
 // limitations under the License.

 use common_grpc_expr::alter_expr_to_request;
-use itertools::Itertools;
 use snafu::ResultExt;
 use table::metadata::{RawTableInfo, TableInfo};

 use crate::ddl::alter_logical_tables::executor::AlterLogicalTablesExecutor;
 use crate::ddl::alter_logical_tables::AlterLogicalTablesProcedure;
+use crate::ddl::utils::table_info::batch_update_table_info_values;
 use crate::error;
 use crate::error::{ConvertAlterTableRequestSnafu, Result};
 use crate::key::table_info::TableInfoValue;
@@ -48,25 +48,8 @@ impl AlterLogicalTablesProcedure {

    pub(crate) async fn update_logical_tables_metadata(&mut self) -> Result<()> {
        let table_info_values = self.build_update_metadata()?;
-        let manager = &self.context.table_metadata_manager;
-        let chunk_size = manager.batch_update_table_info_value_chunk_size();
-        if table_info_values.len() > chunk_size {
-            let chunks = table_info_values
-                .into_iter()
-                .chunks(chunk_size)
-                .into_iter()
-                .map(|check| check.collect::<Vec<_>>())
-                .collect::<Vec<_>>();
-            for chunk in chunks {
-                manager.batch_update_table_info_values(chunk).await?;
-            }
-        } else {
-            manager
-                .batch_update_table_info_values(table_info_values)
-                .await?;
-        }
-
-        Ok(())
+        batch_update_table_info_values(&self.context.table_metadata_manager, table_info_values)
+            .await
    }

    pub(crate) fn build_update_metadata(
--- a/src/common/meta/src/ddl/alter_logical_tables/validator.rs
+++ b/src/common/meta/src/ddl/alter_logical_tables/validator.rs
@@ -21,7 +21,9 @@ use store_api::storage::TableId;
 use table::table_reference::TableReference;

 use crate::ddl::utils::table_id::get_all_table_ids_by_names;
-use crate::ddl::utils::table_info::get_all_table_info_values_by_table_ids;
+use crate::ddl::utils::table_info::{
+    all_logical_table_routes_have_same_physical_id, get_all_table_info_values_by_table_ids,
+};
 use crate::error::{
    AlterLogicalTablesInvalidArgumentsSnafu, Result, TableInfoNotFoundSnafu,
    TableRouteNotFoundSnafu,
@@ -146,23 +148,16 @@ impl<'a> AlterLogicalTableValidator<'a> {
        table_route_manager: &TableRouteManager,
        table_ids: &[TableId],
    ) -> Result<()> {
-        let table_routes = table_route_manager
-            .table_route_storage()
-            .batch_get(table_ids)
+        let all_logical_table_routes_have_same_physical_id =
+            all_logical_table_routes_have_same_physical_id(
+                table_route_manager,
+                table_ids,
+                self.physical_table_id,
+            )
            .await?;

-        let physical_table_id = self.physical_table_id;
-
-        let is_same_physical_table = table_routes.iter().all(|r| {
-            if let Some(TableRouteValue::Logical(r)) = r {
-                r.physical_table_id() == physical_table_id
-            } else {
-                false
-            }
-        });
-
        ensure!(
-            is_same_physical_table,
+            all_logical_table_routes_have_same_physical_id,
            AlterLogicalTablesInvalidArgumentsSnafu {
                err_msg: "All the tasks should have the same physical table id"
            }
--- a/src/common/meta/src/ddl/alter_table.rs
+++ b/src/common/meta/src/ddl/alter_table.rs
@@ -227,7 +227,6 @@ impl AlterTableProcedure {
    }

    fn handle_alter_region_response(&mut self, mut results: Vec<RegionResponse>) -> Result<()> {
-        self.data.state = AlterTableState::UpdateMetadata;
        if let Some(column_metadatas) =
            extract_column_metadatas(&mut results, TABLE_COLUMN_METADATA_EXTENSION_KEY)?
        {
@@ -235,7 +234,7 @@ impl AlterTableProcedure {
        } else {
            warn!("altering table result doesn't contains extension key `{TABLE_COLUMN_METADATA_EXTENSION_KEY}`,leaving the table's column metadata unchanged");
        }
-
+        self.data.state = AlterTableState::UpdateMetadata;
        Ok(())
    }

--- a/src/common/meta/src/ddl/alter_table/executor.rs
+++ b/src/common/meta/src/ddl/alter_table/executor.rs
@@ -20,8 +20,8 @@ use api::v1::region::{alter_request, AlterRequest, RegionRequest, RegionRequestH
 use api::v1::AlterTableExpr;
 use common_catalog::format_full_table_name;
 use common_grpc_expr::alter_expr_to_request;
+use common_telemetry::debug;
 use common_telemetry::tracing_context::TracingContext;
-use common_telemetry::{debug, info};
 use futures::future;
 use snafu::{ensure, ResultExt};
 use store_api::metadata::ColumnMetadata;
@@ -299,15 +299,10 @@ fn build_new_table_info(
        | AlterKind::ModifyColumnTypes { .. }
        | AlterKind::SetTableOptions { .. }
        | AlterKind::UnsetTableOptions { .. }
-        | AlterKind::SetIndex { .. }
-        | AlterKind::UnsetIndex { .. }
+        | AlterKind::SetIndexes { .. }
+        | AlterKind::UnsetIndexes { .. }
        | AlterKind::DropDefaults { .. } => {}
    }

-    info!(
-        "Built new table info: {:?} for table {}, table_id: {}",
-        new_info.meta, table_name, table_id
-    );
-
    Ok(new_info)
 }
--- a/src/common/meta/src/ddl/alter_table/region_request.rs
+++ b/src/common/meta/src/ddl/alter_table/region_request.rs
@@ -108,6 +108,8 @@ fn create_proto_alter_kind(
        Kind::UnsetTableOptions(v) => Ok(Some(alter_request::Kind::UnsetTableOptions(v.clone()))),
        Kind::SetIndex(v) => Ok(Some(alter_request::Kind::SetIndex(v.clone()))),
        Kind::UnsetIndex(v) => Ok(Some(alter_request::Kind::UnsetIndex(v.clone()))),
+        Kind::SetIndexes(v) => Ok(Some(alter_request::Kind::SetIndexes(v.clone()))),
+        Kind::UnsetIndexes(v) => Ok(Some(alter_request::Kind::UnsetIndexes(v.clone()))),
        Kind::DropDefaults(v) => Ok(Some(alter_request::Kind::DropDefaults(v.clone()))),
    }
 }
--- a/src/common/meta/src/ddl/create_flow.rs
+++ b/src/common/meta/src/ddl/create_flow.rs
@@ -167,25 +167,6 @@ impl CreateFlowProcedure {
        }

        self.collect_source_tables().await?;
-
-        // Validate that source and sink tables are not the same
-        let sink_table_name = &self.data.task.sink_table_name;
-        if self
-            .data
-            .task
-            .source_table_names
-            .iter()
-            .any(|source| source == sink_table_name)
-        {
-            return error::UnsupportedSnafu {
-                operation: format!(
-                    "Creating flow with source and sink table being the same: {}",
-                    sink_table_name
-                ),
-            }
-            .fail();
-        }
-
        if self.data.flow_id.is_none() {
            self.allocate_flow_id().await?;
        }
--- a/src/common/meta/src/ddl/create_flow/metadata.rs
+++ b/src/common/meta/src/ddl/create_flow/metadata.rs
@@ -21,7 +21,7 @@ use crate::key::table_name::TableNameKey;
 impl CreateFlowProcedure {
    /// Allocates the [FlowId].
    pub(crate) async fn allocate_flow_id(&mut self) -> Result<()> {
-        // TODO(weny, ruihang): We don't support the partitions. It's always be 1, now.
+        //TODO(weny, ruihang): We doesn't support the partitions. It's always be 1, now.
        let partitions = 1;
        let (flow_id, peers) = self
            .context
--- a/src/common/meta/src/ddl/create_table.rs
+++ b/src/common/meta/src/ddl/create_table.rs
@@ -21,7 +21,7 @@ use common_error::ext::BoxedError;
 use common_procedure::error::{
    ExternalSnafu, FromJsonSnafu, Result as ProcedureResult, ToJsonSnafu,
 };
-use common_procedure::{Context as ProcedureContext, LockKey, Procedure, Status};
+use common_procedure::{Context as ProcedureContext, LockKey, Procedure, ProcedureId, Status};
 use common_telemetry::tracing_context::TracingContext;
 use common_telemetry::{info, warn};
 use futures::future::join_all;
@@ -246,8 +246,6 @@ impl CreateTableProcedure {
            }
        }

-        self.creator.data.state = CreateTableState::CreateMetadata;
-
        let mut results = join_all(create_region_tasks)
            .await
            .into_iter()
@@ -261,6 +259,7 @@ impl CreateTableProcedure {
            warn!("creating table result doesn't contains extension key `{TABLE_COLUMN_METADATA_EXTENSION_KEY}`,leaving the table's column metadata unchanged");
        }

+        self.creator.data.state = CreateTableState::CreateMetadata;
        Ok(Status::executing(true))
    }

@@ -268,8 +267,9 @@ impl CreateTableProcedure {
    ///
    /// Abort(not-retry):
    /// - Failed to create table metadata.
-    async fn on_create_metadata(&mut self) -> Result<Status> {
+    async fn on_create_metadata(&mut self, pid: ProcedureId) -> Result<Status> {
        let table_id = self.table_id();
+        let table_ref = self.creator.data.table_ref();
        let manager = &self.context.table_metadata_manager;

        let mut raw_table_info = self.table_info().clone();
@@ -289,7 +289,10 @@ impl CreateTableProcedure {
        self.context
            .register_failure_detectors(detecting_regions)
            .await;
-        info!("Created table metadata for table {table_id}");
+        info!(
+            "Successfully created table: {}, table_id: {}, procedure_id: {}",
+            table_ref, table_id, pid
+        );

        self.creator.opening_regions.clear();
        Ok(Status::done_with_output(table_id))
@@ -317,7 +320,7 @@ impl Procedure for CreateTableProcedure {
        Ok(())
    }

-    async fn execute(&mut self, _ctx: &ProcedureContext) -> ProcedureResult<Status> {
+    async fn execute(&mut self, ctx: &ProcedureContext) -> ProcedureResult<Status> {
        let state = &self.creator.data.state;

        let _timer = metrics::METRIC_META_PROCEDURE_CREATE_TABLE
@@ -327,7 +330,7 @@ impl Procedure for CreateTableProcedure {
        match state {
            CreateTableState::Prepare => self.on_prepare().await,
            CreateTableState::DatanodeCreateRegions => self.on_datanode_create_regions().await,
-            CreateTableState::CreateMetadata => self.on_create_metadata().await,
+            CreateTableState::CreateMetadata => self.on_create_metadata(ctx.procedure_id).await,
        }
        .map_err(map_to_procedure_error)
    }
--- a/src/common/meta/src/ddl/create_table_template.rs
+++ b/src/common/meta/src/ddl/create_table_template.rs
@@ -14,17 +14,57 @@

 use std::collections::HashMap;

+use api::v1::column_def::try_as_column_def;
 use api::v1::region::{CreateRequest, RegionColumnDef};
 use api::v1::{ColumnDef, CreateTableExpr, SemanticType};
-use snafu::OptionExt;
-use store_api::metric_engine_consts::LOGICAL_TABLE_METADATA_KEY;
+use snafu::{OptionExt, ResultExt};
+use store_api::metric_engine_consts::{LOGICAL_TABLE_METADATA_KEY, METRIC_ENGINE_NAME};
 use store_api::storage::{RegionId, RegionNumber};
-use table::metadata::TableId;
+use table::metadata::{RawTableInfo, TableId};

-use crate::error;
-use crate::error::Result;
+use crate::error::{self, Result};
 use crate::wal_options_allocator::prepare_wal_options;

+/// Builds a [CreateRequest] from a [RawTableInfo].
+///
+/// Note: **This method is only used for creating logical tables.**
+pub(crate) fn build_template_from_raw_table_info(
+    raw_table_info: &RawTableInfo,
+) -> Result<CreateRequest> {
+    let primary_key_indices = &raw_table_info.meta.primary_key_indices;
+    let column_defs = raw_table_info
+        .meta
+        .schema
+        .column_schemas
+        .iter()
+        .enumerate()
+        .map(|(i, c)| {
+            let is_primary_key = primary_key_indices.contains(&i);
+            let column_def = try_as_column_def(c, is_primary_key)
+                .context(error::ConvertColumnDefSnafu { column: &c.name })?;
+
+            Ok(RegionColumnDef {
+                column_def: Some(column_def),
+                // The column id will be overridden by the metric engine.
+                // So we just use the index as the column id.
+                column_id: i as u32,
+            })
+        })
+        .collect::<Result<Vec<_>>>()?;
+
+    let options = HashMap::from(&raw_table_info.meta.options);
+    let template = CreateRequest {
+        region_id: 0,
+        engine: METRIC_ENGINE_NAME.to_string(),
+        column_defs,
+        primary_key: primary_key_indices.iter().map(|i| *i as u32).collect(),
+        path: String::new(),
+        options,
+    };
+
+    Ok(template)
+}
+
 pub(crate) fn build_template(create_table_expr: &CreateTableExpr) -> Result<CreateRequest> {
    let column_defs = create_table_expr
        .column_defs
--- a/src/common/meta/src/ddl/table_meta.rs
+++ b/src/common/meta/src/ddl/table_meta.rs
@@ -113,19 +113,15 @@ impl TableMetadataAllocator {
        table_id: TableId,
        task: &CreateTableTask,
    ) -> Result<PhysicalTableRouteValue> {
-        let num_regions = task
-            .partitions
-            .as_ref()
-            .map(|p| p.value_list.len())
-            .unwrap_or(1);
+        let regions = task.partitions.len();
        ensure!(
-            num_regions > 0,
+            regions > 0,
            error::UnexpectedSnafu {
                err_msg: "The number of partitions must be greater than 0"
            }
        );

-        let peers = self.peer_allocator.alloc(num_regions).await?;
+        let peers = self.peer_allocator.alloc(regions).await?;
        debug!("Allocated peers {:?} for table {}", peers, table_id);
        let region_routes = task
            .partitions
@@ -179,6 +175,10 @@ impl TableMetadataAllocator {
            region_wal_options,
        })
    }
+
+    pub fn table_id_sequence(&self) -> SequenceRef {
+        self.table_id_sequence.clone()
+    }
 }

 pub type PeerAllocatorRef = Arc<dyn PeerAllocator>;
--- a/src/common/meta/src/ddl/test_util.rs
+++ b/src/common/meta/src/ddl/test_util.rs
@@ -17,10 +17,12 @@ pub mod columns;
 pub mod create_table;
 pub mod datanode_handler;
 pub mod flownode_handler;
+pub mod region_metadata;

 use std::assert_matches::assert_matches;
 use std::collections::HashMap;

+use api::v1::meta::Partition;
 use api::v1::{ColumnDataType, SemanticType};
 use common_procedure::Status;
 use datatypes::prelude::ConcreteDataType;
@@ -144,7 +146,10 @@ pub fn test_create_logical_table_task(name: &str) -> CreateTableTask {
    CreateTableTask {
        create_table,
        // Single region
-        partitions: None,
+        partitions: vec![Partition {
+            column_list: vec![],
+            value_list: vec![],
+        }],
        table_info,
    }
 }
@@ -179,7 +184,10 @@ pub fn test_create_physical_table_task(name: &str) -> CreateTableTask {
    CreateTableTask {
        create_table,
        // Single region
-        partitions: None,
+        partitions: vec![Partition {
+            column_list: vec![],
+            value_list: vec![],
+        }],
        table_info,
    }
 }
--- a/src/common/meta/src/ddl/test_util/create_table.rs
+++ b/src/common/meta/src/ddl/test_util/create_table.rs
@@ -15,6 +15,7 @@
 use std::collections::HashMap;

 use api::v1::column_def::try_as_column_schema;
+use api::v1::meta::Partition;
 use api::v1::{ColumnDataType, ColumnDef, CreateTableExpr, SemanticType};
 use chrono::DateTime;
 use common_catalog::consts::{
@@ -174,7 +175,10 @@ pub fn test_create_table_task(name: &str, table_id: TableId) -> CreateTableTask
    CreateTableTask {
        create_table,
        // Single region
-        partitions: None,
+        partitions: vec![Partition {
+            column_list: vec![],
+            value_list: vec![],
+        }],
        table_info,
    }
 }
--- a/src/common/meta/src/ddl/test_util/datanode_handler.rs
+++ b/src/common/meta/src/ddl/test_util/datanode_handler.rs
@@ -12,9 +12,11 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

+use std::collections::HashMap;
 use std::sync::Arc;

 use api::region::RegionResponse;
+use api::v1::region::region_request::Body;
 use api::v1::region::RegionRequest;
 use common_error::ext::{BoxedError, ErrorExt, StackError};
 use common_error::status_code::StatusCode;
@@ -22,6 +24,8 @@ use common_query::request::QueryRequest;
 use common_recordbatch::SendableRecordBatchStream;
 use common_telemetry::debug;
 use snafu::{ResultExt, Snafu};
+use store_api::metadata::RegionMetadata;
+use store_api::storage::RegionId;
 use tokio::sync::mpsc;

 use crate::error::{self, Error, Result};
@@ -278,3 +282,47 @@ impl MockDatanodeHandler for AllFailureDatanodeHandler {
        unreachable!()
    }
 }
+
+#[derive(Clone)]
+pub struct ListMetadataDatanodeHandler {
+    pub region_metadatas: HashMap<RegionId, Option<RegionMetadata>>,
+}
+
+impl ListMetadataDatanodeHandler {
+    pub fn new(region_metadatas: HashMap<RegionId, Option<RegionMetadata>>) -> Self {
+        Self { region_metadatas }
+    }
+}
+
+#[async_trait::async_trait]
+impl MockDatanodeHandler for ListMetadataDatanodeHandler {
+    async fn handle(&self, _peer: &Peer, request: RegionRequest) -> Result<RegionResponse> {
+        let Some(Body::ListMetadata(req)) = request.body else {
+            unreachable!()
+        };
+        let mut response = RegionResponse::new(0);
+
+        let mut output = Vec::with_capacity(req.region_ids.len());
+        for region_id in req.region_ids {
+            match self.region_metadatas.get(&RegionId::from_u64(region_id)) {
+                Some(metadata) => {
+                    output.push(metadata.clone());
+                }
+                None => {
+                    output.push(None);
+                }
+            }
+        }
+
+        response.metadata = serde_json::to_vec(&output).unwrap();
+        Ok(response)
+    }
+
+    async fn handle_query(
+        &self,
+        _peer: &Peer,
+        _request: QueryRequest,
+    ) -> Result<SendableRecordBatchStream> {
+        unreachable!()
+    }
+}
--- a/src/common/meta/src/ddl/test_util/region_metadata.rs
+++ b/src/common/meta/src/ddl/test_util/region_metadata.rs
@@ -0,0 +1,34 @@
+// Copyright 2023 Greptime Team
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use api::v1::SemanticType;
+use store_api::metadata::{ColumnMetadata, RegionMetadata, RegionMetadataBuilder};
+use store_api::storage::RegionId;
+
+/// Builds a region metadata with the given column metadatas.
+pub fn build_region_metadata(
+    region_id: RegionId,
+    column_metadatas: &[ColumnMetadata],
+) -> RegionMetadata {
+    let mut builder = RegionMetadataBuilder::new(region_id);
+    let mut primary_key = vec![];
+    for column_metadata in column_metadatas {
+        builder.push_column_metadata(column_metadata.clone());
+        if column_metadata.semantic_type == SemanticType::Tag {
+            primary_key.push(column_metadata.column_id);
+        }
+    }
+    builder.primary_key(primary_key);
+    builder.build().unwrap()
+}
--- a/src/common/meta/src/ddl/tests/create_flow.rs
+++ b/src/common/meta/src/ddl/tests/create_flow.rs
@@ -141,41 +141,3 @@ async fn test_create_flow() {
    let err = procedure.on_prepare().await.unwrap_err();
    assert_matches!(err, error::Error::FlowAlreadyExists { .. });
 }
-
-#[tokio::test]
-async fn test_create_flow_same_source_and_sink_table() {
-    let table_id = 1024;
-    let table_name = TableName::new(DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, "same_table");
-
-    // Use the same table for both source and sink
-    let source_table_names = vec![table_name.clone()];
-    let sink_table_name = table_name.clone();
-
-    let node_manager = Arc::new(MockFlownodeManager::new(NaiveFlownodeHandler));
-    let ddl_context = new_ddl_context(node_manager);
-
-    // Create the table first so it exists
-    let task = test_create_table_task("same_table", table_id);
-    ddl_context
-        .table_metadata_manager
-        .create_table_metadata(
-            task.table_info.clone(),
-            TableRouteValue::physical(vec![]),
-            HashMap::new(),
-        )
-        .await
-        .unwrap();
-
-    // Try to create a flow with same source and sink table - should fail
-    let task = test_create_flow_task("my_flow", source_table_names, sink_table_name, false);
-    let query_ctx = QueryContext::arc().into();
-    let mut procedure = CreateFlowProcedure::new(task, query_ctx, ddl_context);
-    let err = procedure.on_prepare().await.unwrap_err();
-    assert_matches!(err, error::Error::Unsupported { .. });
-
-    // Verify the error message contains information about the same table
-    if let error::Error::Unsupported { operation, .. } = &err {
-        assert!(operation.contains("source and sink table being the same"));
-        assert!(operation.contains("same_table"));
-    }
-}
--- a/src/common/meta/src/ddl/tests/create_table.rs
+++ b/src/common/meta/src/ddl/tests/create_table.rs
@@ -17,7 +17,7 @@ use std::collections::HashMap;
 use std::sync::Arc;

 use api::region::RegionResponse;
-use api::v1::meta::Peer;
+use api::v1::meta::{Partition, Peer};
 use api::v1::region::{region_request, RegionRequest};
 use api::v1::{ColumnDataType, SemanticType};
 use common_error::ext::ErrorExt;
@@ -141,7 +141,10 @@ pub(crate) fn test_create_table_task(name: &str) -> CreateTableTask {
    CreateTableTask {
        create_table,
        // Single region
-        partitions: None,
+        partitions: vec![Partition {
+            column_list: vec![],
+            value_list: vec![],
+        }],
        table_info,
    }
 }
@@ -215,7 +218,7 @@ async fn test_on_prepare_with_no_partition_err() {
    let node_manager = Arc::new(MockDatanodeManager::new(()));
    let ddl_context = new_ddl_context(node_manager);
    let mut task = test_create_table_task("foo");
-    task.partitions = None;
+    task.partitions = vec![];
    task.create_table.create_if_not_exists = true;
    let mut procedure = CreateTableProcedure::new(task, ddl_context);
    let err = procedure.on_prepare().await.unwrap_err();
--- a/src/common/meta/src/ddl/utils.rs
+++ b/src/common/meta/src/ddl/utils.rs
@@ -13,6 +13,8 @@
 // limitations under the License.

 pub(crate) mod raw_table_info;
+#[allow(dead_code)]
+pub(crate) mod region_metadata_lister;
 pub(crate) mod table_id;
 pub(crate) mod table_info;

@@ -446,6 +448,7 @@ pub fn extract_column_metadatas(
        .collect::<Vec<_>>();

    if schemas.is_empty() {
+        warn!("extract_column_metadatas: no extension key `{key}` found in results");
        return Ok(None);
    }

--- a/src/common/meta/src/ddl/utils/raw_table_info.rs
+++ b/src/common/meta/src/ddl/utils/raw_table_info.rs
@@ -54,7 +54,10 @@ pub(crate) fn build_new_physical_table_info(
                }
            }
            SemanticType::Field => value_indices.push(idx),
-            SemanticType::Timestamp => *time_index = Some(idx),
+            SemanticType::Timestamp => {
+                value_indices.push(idx);
+                *time_index = Some(idx);
+            }
        }

        columns.push(col.column_schema.clone());
--- a/src/common/meta/src/ddl/utils/region_metadata_lister.rs
+++ b/src/common/meta/src/ddl/utils/region_metadata_lister.rs
@@ -0,0 +1,240 @@
+// Copyright 2023 Greptime Team
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use std::collections::HashMap;
+
+use api::v1::region::region_request::Body as PbRegionRequest;
+use api::v1::region::{ListMetadataRequest, RegionRequest, RegionRequestHeader};
+use common_telemetry::tracing_context::TracingContext;
+use futures::future::join_all;
+use snafu::ResultExt;
+use store_api::metadata::RegionMetadata;
+use store_api::storage::{RegionId, TableId};
+
+use crate::ddl::utils::add_peer_context_if_needed;
+use crate::error::{DecodeJsonSnafu, Result};
+use crate::node_manager::NodeManagerRef;
+use crate::rpc::router::{find_leaders, region_distribution, RegionRoute};
+
+/// Collects the region metadata from the datanodes.
+pub struct RegionMetadataLister {
+    node_manager: NodeManagerRef,
+}
+
+impl RegionMetadataLister {
+    /// Creates a new [`RegionMetadataLister`] with the given [`NodeManagerRef`].
+    pub fn new(node_manager: NodeManagerRef) -> Self {
+        Self { node_manager }
+    }
+
+    /// Collects the region metadata from the datanodes.
+    pub async fn list(
+        &self,
+        table_id: TableId,
+        region_routes: &[RegionRoute],
+    ) -> Result<Vec<Option<RegionMetadata>>> {
+        let region_distribution = region_distribution(region_routes);
+        let leaders = find_leaders(region_routes)
+            .into_iter()
+            .map(|p| (p.id, p))
+            .collect::<HashMap<_, _>>();
+
+        let total_num_region = region_distribution
+            .values()
+            .map(|r| r.leader_regions.len())
+            .sum::<usize>();
+
+        let mut list_metadata_tasks = Vec::with_capacity(leaders.len());
+
+        // Build requests.
+        for (datanode_id, region_role_set) in region_distribution {
+            if region_role_set.leader_regions.is_empty() {
+                continue;
+            }
+            // Safety: must exists.
+            let peer = leaders.get(&datanode_id).unwrap();
+            let requester = self.node_manager.datanode(peer).await;
+            let region_ids = region_role_set
+                .leader_regions
+                .iter()
+                .map(|r| RegionId::new(table_id, *r).as_u64())
+                .collect();
+            let request = Self::build_list_metadata_request(region_ids);
+
+            let peer = peer.clone();
+            list_metadata_tasks.push(async move {
+                requester
+                    .handle(request)
+                    .await
+                    .map_err(add_peer_context_if_needed(peer))
+            });
+        }
+
+        let results = join_all(list_metadata_tasks)
+            .await
+            .into_iter()
+            .collect::<Result<Vec<_>>>()?
+            .into_iter()
+            .map(|r| r.metadata);
+
+        let mut output = Vec::with_capacity(total_num_region);
+        for result in results {
+            let region_metadatas: Vec<Option<RegionMetadata>> =
+                serde_json::from_slice(&result).context(DecodeJsonSnafu)?;
+            output.extend(region_metadatas);
+        }
+
+        Ok(output)
+    }
+
+    fn build_list_metadata_request(region_ids: Vec<u64>) -> RegionRequest {
+        RegionRequest {
+            header: Some(RegionRequestHeader {
+                tracing_context: TracingContext::from_current_span().to_w3c(),
+                ..Default::default()
+            }),
+            body: Some(PbRegionRequest::ListMetadata(ListMetadataRequest {
+                region_ids,
+            })),
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use std::collections::HashMap;
+    use std::sync::Arc;
+
+    use api::region::RegionResponse;
+    use api::v1::meta::Peer;
+    use api::v1::region::region_request::Body;
+    use api::v1::region::RegionRequest;
+    use store_api::metadata::RegionMetadata;
+    use store_api::storage::RegionId;
+    use tokio::sync::mpsc;
+
+    use crate::ddl::test_util::datanode_handler::{DatanodeWatcher, ListMetadataDatanodeHandler};
+    use crate::ddl::test_util::region_metadata::build_region_metadata;
+    use crate::ddl::test_util::test_column_metadatas;
+    use crate::ddl::utils::region_metadata_lister::RegionMetadataLister;
+    use crate::error::Result;
+    use crate::rpc::router::{Region, RegionRoute};
+    use crate::test_util::MockDatanodeManager;
+
+    fn assert_list_metadata_request(req: RegionRequest, expected_region_ids: &[RegionId]) {
+        let Some(Body::ListMetadata(req)) = req.body else {
+            unreachable!()
+        };
+
+        assert_eq!(req.region_ids.len(), expected_region_ids.len());
+        for region_id in expected_region_ids {
+            assert!(req.region_ids.contains(&region_id.as_u64()));
+        }
+    }
+
+    fn empty_list_metadata_handler(_peer: Peer, request: RegionRequest) -> Result<RegionResponse> {
+        let Some(Body::ListMetadata(req)) = request.body else {
+            unreachable!()
+        };
+
+        let mut output: Vec<Option<RegionMetadata>> = Vec::with_capacity(req.region_ids.len());
+        for _region_id in req.region_ids {
+            output.push(None);
+        }
+
+        Ok(RegionResponse::from_metadata(
+            serde_json::to_vec(&output).unwrap(),
+        ))
+    }
+
+    #[tokio::test]
+    async fn test_list_request() {
+        let (tx, mut rx) = mpsc::channel(8);
+        let handler = DatanodeWatcher::new(tx).with_handler(empty_list_metadata_handler);
+        let node_manager = Arc::new(MockDatanodeManager::new(handler));
+        let lister = RegionMetadataLister::new(node_manager);
+        let region_routes = vec![
+            RegionRoute {
+                region: Region::new_test(RegionId::new(1024, 1)),
+                leader_peer: Some(Peer::empty(1)),
+                follower_peers: vec![Peer::empty(5)],
+                leader_state: None,
+                leader_down_since: None,
+            },
+            RegionRoute {
+                region: Region::new_test(RegionId::new(1024, 2)),
+                leader_peer: Some(Peer::empty(3)),
+                follower_peers: vec![Peer::empty(4)],
+                leader_state: None,
+                leader_down_since: None,
+            },
+            RegionRoute {
+                region: Region::new_test(RegionId::new(1024, 3)),
+                leader_peer: Some(Peer::empty(3)),
+                follower_peers: vec![Peer::empty(4)],
+                leader_state: None,
+                leader_down_since: None,
+            },
+        ];
+        let region_metadatas = lister.list(1024, &region_routes).await.unwrap();
+        assert_eq!(region_metadatas.len(), 3);
+
+        let mut requests = vec![];
+        for _ in 0..2 {
+            let (peer, request) = rx.try_recv().unwrap();
+            requests.push((peer, request));
+        }
+        rx.try_recv().unwrap_err();
+
+        let (peer, request) = requests.remove(0);
+        assert_eq!(peer.id, 1);
+        assert_list_metadata_request(request, &[RegionId::new(1024, 1)]);
+        let (peer, request) = requests.remove(0);
+        assert_eq!(peer.id, 3);
+        assert_list_metadata_request(request, &[RegionId::new(1024, 2), RegionId::new(1024, 3)]);
+    }
+
+    #[tokio::test]
+    async fn test_list_region_metadata() {
+        let region_metadata =
+            build_region_metadata(RegionId::new(1024, 1), &test_column_metadatas(&["tag_0"]));
+        let region_metadatas = HashMap::from([
+            (RegionId::new(1024, 0), None),
+            (RegionId::new(1024, 1), Some(region_metadata.clone())),
+        ]);
+        let handler = ListMetadataDatanodeHandler::new(region_metadatas);
+        let node_manager = Arc::new(MockDatanodeManager::new(handler));
+        let lister = RegionMetadataLister::new(node_manager);
+        let region_routes = vec![
+            RegionRoute {
+                region: Region::new_test(RegionId::new(1024, 0)),
+                leader_peer: Some(Peer::empty(1)),
+                follower_peers: vec![],
+                leader_state: None,
+                leader_down_since: None,
+            },
+            RegionRoute {
+                region: Region::new_test(RegionId::new(1024, 1)),
+                leader_peer: Some(Peer::empty(3)),
+                follower_peers: vec![],
+                leader_state: None,
+                leader_down_since: None,
+            },
+        ];
+        let region_metadatas = lister.list(1024, &region_routes).await.unwrap();
+        assert_eq!(region_metadatas.len(), 2);
+        assert_eq!(region_metadatas[0], None);
+        assert_eq!(region_metadatas[1], Some(region_metadata));
+    }
+}
--- a/src/common/meta/src/ddl/utils/table_info.rs
+++ b/src/common/meta/src/ddl/utils/table_info.rs
@@ -12,13 +12,16 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

+use itertools::Itertools;
 use snafu::OptionExt;
 use store_api::storage::TableId;
+use table::metadata::RawTableInfo;
 use table::table_reference::TableReference;

 use crate::error::{Result, TableInfoNotFoundSnafu};
 use crate::key::table_info::{TableInfoManager, TableInfoValue};
-use crate::key::DeserializedValueWithBytes;
+use crate::key::table_route::{TableRouteManager, TableRouteValue};
+use crate::key::{DeserializedValueWithBytes, TableMetadataManager};

 /// Get all table info values by table ids.
 ///
@@ -42,3 +45,56 @@ pub(crate) async fn get_all_table_info_values_by_table_ids<'a>(

    Ok(table_info_values)
 }
+
+/// Checks if all the logical table routes have the same physical table id.
+pub(crate) async fn all_logical_table_routes_have_same_physical_id(
+    table_route_manager: &TableRouteManager,
+    table_ids: &[TableId],
+    physical_table_id: TableId,
+) -> Result<bool> {
+    let table_routes = table_route_manager
+        .table_route_storage()
+        .batch_get(table_ids)
+        .await?;
+
+    let is_same_physical_table = table_routes.iter().all(|r| {
+        if let Some(TableRouteValue::Logical(r)) = r {
+            r.physical_table_id() == physical_table_id
+        } else {
+            false
+        }
+    });
+
+    Ok(is_same_physical_table)
+}
+
+/// Batch updates the table info values.
+///
+/// The table info values are grouped into chunks, and each chunk is updated in a single transaction.
+///
+/// Returns an error if any table info value fails to update.
+pub(crate) async fn batch_update_table_info_values(
+    table_metadata_manager: &TableMetadataManager,
+    table_info_values: Vec<(DeserializedValueWithBytes<TableInfoValue>, RawTableInfo)>,
+) -> Result<()> {
+    let chunk_size = table_metadata_manager.batch_update_table_info_value_chunk_size();
+    if table_info_values.len() > chunk_size {
+        let chunks = table_info_values
+            .into_iter()
+            .chunks(chunk_size)
+            .into_iter()
+            .map(|check| check.collect::<Vec<_>>())
+            .collect::<Vec<_>>();
+        for chunk in chunks {
+            table_metadata_manager
+                .batch_update_table_info_values(chunk)
+                .await?;
+        }
+    } else {
+        table_metadata_manager
+            .batch_update_table_info_values(table_info_values)
+            .await?;
+    }
+
+    Ok(())
+}
--- a/src/common/meta/src/ddl_manager.rs
+++ b/src/common/meta/src/ddl_manager.rs
@@ -14,7 +14,6 @@

 use std::sync::Arc;

-use api::v1::meta::ProcedureDetailResponse;
 use common_procedure::{
    watcher, BoxedProcedureLoader, Output, ProcedureId, ProcedureManagerRef, ProcedureWithId,
 };
@@ -37,24 +36,20 @@ use crate::ddl::drop_flow::DropFlowProcedure;
 use crate::ddl::drop_table::DropTableProcedure;
 use crate::ddl::drop_view::DropViewProcedure;
 use crate::ddl::truncate_table::TruncateTableProcedure;
-use crate::ddl::{utils, DdlContext, ExecutorContext, ProcedureExecutor};
+use crate::ddl::{utils, DdlContext};
 use crate::error::{
-    EmptyDdlTasksSnafu, ParseProcedureIdSnafu, ProcedureNotFoundSnafu, ProcedureOutputSnafu,
-    QueryProcedureSnafu, RegisterProcedureLoaderSnafu, Result, SubmitProcedureSnafu,
-    TableInfoNotFoundSnafu, TableNotFoundSnafu, TableRouteNotFoundSnafu,
-    UnexpectedLogicalRouteTableSnafu, UnsupportedSnafu, WaitProcedureSnafu,
+    EmptyDdlTasksSnafu, ProcedureOutputSnafu, RegisterProcedureLoaderSnafu, Result,
+    SubmitProcedureSnafu, TableInfoNotFoundSnafu, TableNotFoundSnafu, TableRouteNotFoundSnafu,
+    UnexpectedLogicalRouteTableSnafu, WaitProcedureSnafu,
 };
 use crate::key::table_info::TableInfoValue;
 use crate::key::table_name::TableNameKey;
 use crate::key::{DeserializedValueWithBytes, TableMetadataManagerRef};
+use crate::procedure_executor::ExecutorContext;
 #[cfg(feature = "enterprise")]
 use crate::rpc::ddl::trigger::CreateTriggerTask;
 #[cfg(feature = "enterprise")]
-use crate::rpc::ddl::trigger::DropTriggerTask;
-#[cfg(feature = "enterprise")]
 use crate::rpc::ddl::DdlTask::CreateTrigger;
-#[cfg(feature = "enterprise")]
-use crate::rpc::ddl::DdlTask::DropTrigger;
 use crate::rpc::ddl::DdlTask::{
    AlterDatabase, AlterLogicalTables, AlterTable, CreateDatabase, CreateFlow, CreateLogicalTables,
    CreateTable, CreateView, DropDatabase, DropFlow, DropLogicalTables, DropTable, DropView,
@@ -65,8 +60,6 @@ use crate::rpc::ddl::{
    CreateViewTask, DropDatabaseTask, DropFlowTask, DropTableTask, DropViewTask, QueryContext,
    SubmitDdlTaskRequest, SubmitDdlTaskResponse, TruncateTableTask,
 };
-use crate::rpc::procedure;
-use crate::rpc::procedure::{MigrateRegionRequest, MigrateRegionResponse, ProcedureStateResponse};
 use crate::rpc::router::RegionRoute;

 pub type DdlManagerRef = Arc<DdlManager>;
@@ -95,14 +88,6 @@ pub trait TriggerDdlManager: Send + Sync {
        query_context: QueryContext,
    ) -> Result<SubmitDdlTaskResponse>;

-    async fn drop_trigger(
-        &self,
-        drop_trigger_task: DropTriggerTask,
-        procedure_manager: ProcedureManagerRef,
-        ddl_context: DdlContext,
-        query_context: QueryContext,
-    ) -> Result<SubmitDdlTaskResponse>;
-
    fn as_any(&self) -> &dyn std::any::Any;
 }

@@ -418,6 +403,70 @@ impl DdlManager {

        Ok((procedure_id, output))
    }
+
+    pub async fn submit_ddl_task(
+        &self,
+        ctx: &ExecutorContext,
+        request: SubmitDdlTaskRequest,
+    ) -> Result<SubmitDdlTaskResponse> {
+        let span = ctx
+            .tracing_context
+            .as_ref()
+            .map(TracingContext::from_w3c)
+            .unwrap_or_else(TracingContext::from_current_span)
+            .attach(tracing::info_span!("DdlManager::submit_ddl_task"));
+        async move {
+            debug!("Submitting Ddl task: {:?}", request.task);
+            match request.task {
+                CreateTable(create_table_task) => {
+                    handle_create_table_task(self, create_table_task).await
+                }
+                DropTable(drop_table_task) => handle_drop_table_task(self, drop_table_task).await,
+                AlterTable(alter_table_task) => {
+                    handle_alter_table_task(self, alter_table_task).await
+                }
+                TruncateTable(truncate_table_task) => {
+                    handle_truncate_table_task(self, truncate_table_task).await
+                }
+                CreateLogicalTables(create_table_tasks) => {
+                    handle_create_logical_table_tasks(self, create_table_tasks).await
+                }
+                AlterLogicalTables(alter_table_tasks) => {
+                    handle_alter_logical_table_tasks(self, alter_table_tasks).await
+                }
+                DropLogicalTables(_) => todo!(),
+                CreateDatabase(create_database_task) => {
+                    handle_create_database_task(self, create_database_task).await
+                }
+                DropDatabase(drop_database_task) => {
+                    handle_drop_database_task(self, drop_database_task).await
+                }
+                AlterDatabase(alter_database_task) => {
+                    handle_alter_database_task(self, alter_database_task).await
+                }
+                CreateFlow(create_flow_task) => {
+                    handle_create_flow_task(self, create_flow_task, request.query_context.into())
+                        .await
+                }
+                DropFlow(drop_flow_task) => handle_drop_flow_task(self, drop_flow_task).await,
+                CreateView(create_view_task) => {
+                    handle_create_view_task(self, create_view_task).await
+                }
+                DropView(drop_view_task) => handle_drop_view_task(self, drop_view_task).await,
+                #[cfg(feature = "enterprise")]
+                CreateTrigger(create_trigger_task) => {
+                    handle_create_trigger_task(
+                        self,
+                        create_trigger_task,
+                        request.query_context.into(),
+                    )
+                    .await
+                }
+            }
+        }
+        .trace(span)
+        .await
+    }
 }

 async fn handle_truncate_table_task(
@@ -660,28 +709,6 @@ async fn handle_drop_flow_task(
    })
 }

-#[cfg(feature = "enterprise")]
-async fn handle_drop_trigger_task(
-    ddl_manager: &DdlManager,
-    drop_trigger_task: DropTriggerTask,
-    query_context: QueryContext,
-) -> Result<SubmitDdlTaskResponse> {
-    let Some(m) = ddl_manager.trigger_ddl_manager.as_ref() else {
-        return UnsupportedSnafu {
-            operation: "drop trigger",
-        }
-        .fail();
-    };
-
-    m.drop_trigger(
-        drop_trigger_task,
-        ddl_manager.procedure_manager.clone(),
-        ddl_manager.ddl_context.clone(),
-        query_context,
-    )
-    .await
-}
-
 async fn handle_drop_view_task(
    ddl_manager: &DdlManager,
    drop_view_task: DropViewTask,
@@ -746,6 +773,8 @@ async fn handle_create_trigger_task(
    query_context: QueryContext,
 ) -> Result<SubmitDdlTaskResponse> {
    let Some(m) = ddl_manager.trigger_ddl_manager.as_ref() else {
+        use crate::error::UnsupportedSnafu;
+
        return UnsupportedSnafu {
            operation: "create trigger",
        }
@@ -822,119 +851,6 @@ async fn handle_create_view_task(
    })
 }

-/// TODO(dennis): let [`DdlManager`] implement [`ProcedureExecutor`] looks weird, find some way to refactor it.
-#[async_trait::async_trait]
-impl ProcedureExecutor for DdlManager {
-    async fn submit_ddl_task(
-        &self,
-        ctx: &ExecutorContext,
-        request: SubmitDdlTaskRequest,
-    ) -> Result<SubmitDdlTaskResponse> {
-        let span = ctx
-            .tracing_context
-            .as_ref()
-            .map(TracingContext::from_w3c)
-            .unwrap_or(TracingContext::from_current_span())
-            .attach(tracing::info_span!("DdlManager::submit_ddl_task"));
-        async move {
-            debug!("Submitting Ddl task: {:?}", request.task);
-            match request.task {
-                CreateTable(create_table_task) => {
-                    handle_create_table_task(self, create_table_task).await
-                }
-                DropTable(drop_table_task) => handle_drop_table_task(self, drop_table_task).await,
-                AlterTable(alter_table_task) => {
-                    handle_alter_table_task(self, alter_table_task).await
-                }
-                TruncateTable(truncate_table_task) => {
-                    handle_truncate_table_task(self, truncate_table_task).await
-                }
-                CreateLogicalTables(create_table_tasks) => {
-                    handle_create_logical_table_tasks(self, create_table_tasks).await
-                }
-                AlterLogicalTables(alter_table_tasks) => {
-                    handle_alter_logical_table_tasks(self, alter_table_tasks).await
-                }
-                DropLogicalTables(_) => todo!(),
-                CreateDatabase(create_database_task) => {
-                    handle_create_database_task(self, create_database_task).await
-                }
-                DropDatabase(drop_database_task) => {
-                    handle_drop_database_task(self, drop_database_task).await
-                }
-                AlterDatabase(alter_database_task) => {
-                    handle_alter_database_task(self, alter_database_task).await
-                }
-                CreateFlow(create_flow_task) => {
-                    handle_create_flow_task(self, create_flow_task, request.query_context.into())
-                        .await
-                }
-                DropFlow(drop_flow_task) => handle_drop_flow_task(self, drop_flow_task).await,
-                CreateView(create_view_task) => {
-                    handle_create_view_task(self, create_view_task).await
-                }
-                DropView(drop_view_task) => handle_drop_view_task(self, drop_view_task).await,
-                #[cfg(feature = "enterprise")]
-                CreateTrigger(create_trigger_task) => {
-                    handle_create_trigger_task(
-                        self,
-                        create_trigger_task,
-                        request.query_context.into(),
-                    )
-                    .await
-                }
-                #[cfg(feature = "enterprise")]
-                DropTrigger(drop_trigger_task) => {
-                    handle_drop_trigger_task(self, drop_trigger_task, request.query_context.into())
-                        .await
-                }
-            }
-        }
-        .trace(span)
-        .await
-    }
-
-    async fn migrate_region(
-        &self,
-        _ctx: &ExecutorContext,
-        _request: MigrateRegionRequest,
-    ) -> Result<MigrateRegionResponse> {
-        UnsupportedSnafu {
-            operation: "migrate_region",
-        }
-        .fail()
-    }
-
-    async fn query_procedure_state(
-        &self,
-        _ctx: &ExecutorContext,
-        pid: &str,
-    ) -> Result<ProcedureStateResponse> {
-        let pid =
-            ProcedureId::parse_str(pid).with_context(|_| ParseProcedureIdSnafu { key: pid })?;
-
-        let state = self
-            .procedure_manager
-            .procedure_state(pid)
-            .await
-            .context(QueryProcedureSnafu)?
-            .context(ProcedureNotFoundSnafu {
-                pid: pid.to_string(),
-            })?;
-
-        Ok(procedure::procedure_state_to_pb_response(&state))
-    }
-
-    async fn list_procedures(&self, _ctx: &ExecutorContext) -> Result<ProcedureDetailResponse> {
-        let metas = self
-            .procedure_manager
-            .list_procedures()
-            .await
-            .context(QueryProcedureSnafu)?;
-        Ok(procedure::procedure_details_to_pb_response(metas))
-    }
-}
-
 #[cfg(test)]
 mod tests {
    use std::sync::Arc;
--- a/src/common/meta/src/error.rs
+++ b/src/common/meta/src/error.rs
@@ -18,6 +18,7 @@ use std::sync::Arc;
 use common_error::ext::{BoxedError, ErrorExt};
 use common_error::status_code::StatusCode;
 use common_macro::stack_trace_debug;
+use common_procedure::ProcedureId;
 use common_wal::options::WalOptions;
 use serde_json::error::Error as JsonError;
 use snafu::{Location, Snafu};
@@ -140,6 +141,21 @@ pub enum Error {
        location: Location,
    },

+    #[snafu(display("Failed to get procedure state receiver, procedure id: {procedure_id}"))]
+    ProcedureStateReceiver {
+        procedure_id: ProcedureId,
+        #[snafu(implicit)]
+        location: Location,
+        source: common_procedure::Error,
+    },
+
+    #[snafu(display("Procedure state receiver not found: {procedure_id}"))]
+    ProcedureStateReceiverNotFound {
+        procedure_id: ProcedureId,
+        #[snafu(implicit)]
+        location: Location,
+    },
+
    #[snafu(display("Failed to wait procedure done"))]
    WaitProcedure {
        #[snafu(implicit)]
@@ -387,6 +403,13 @@ pub enum Error {
        location: Location,
    },

+    #[snafu(display("Catalog not found, catalog: {}", catalog))]
+    CatalogNotFound {
+        catalog: String,
+        #[snafu(implicit)]
+        location: Location,
+    },
+
    #[snafu(display("Invalid metadata, err: {}", err_msg))]
    InvalidMetadata {
        err_msg: String,
@@ -877,6 +900,93 @@ pub enum Error {
        #[snafu(source)]
        error: object_store::Error,
    },
+
+    #[snafu(display("Missing column ids"))]
+    MissingColumnIds {
+        #[snafu(implicit)]
+        location: Location,
+    },
+
+    #[snafu(display(
+        "Missing column in column metadata: {}, table: {}, table_id: {}",
+        column_name,
+        table_name,
+        table_id,
+    ))]
+    MissingColumnInColumnMetadata {
+        column_name: String,
+        #[snafu(implicit)]
+        location: Location,
+        table_name: String,
+        table_id: TableId,
+    },
+
+    #[snafu(display(
+        "Mismatch column id: column_name: {}, column_id: {}, table: {}, table_id: {}",
+        column_name,
+        column_id,
+        table_name,
+        table_id,
+    ))]
+    MismatchColumnId {
+        column_name: String,
+        column_id: u32,
+        #[snafu(implicit)]
+        location: Location,
+        table_name: String,
+        table_id: TableId,
+    },
+
+    #[snafu(display("Failed to convert column def, column: {}", column))]
+    ConvertColumnDef {
+        column: String,
+        #[snafu(implicit)]
+        location: Location,
+        source: api::error::Error,
+    },
+
+    #[snafu(display(
+        "Column metadata inconsistencies found in table: {}, table_id: {}",
+        table_name,
+        table_id
+    ))]
+    ColumnMetadataConflicts {
+        table_name: String,
+        table_id: TableId,
+    },
+
+    #[snafu(display(
+        "Column not found in column metadata, column_name: {}, column_id: {}",
+        column_name,
+        column_id
+    ))]
+    ColumnNotFound { column_name: String, column_id: u32 },
+
+    #[snafu(display(
+        "Column id mismatch, column_name: {}, expected column_id: {}, actual column_id: {}",
+        column_name,
+        expected_column_id,
+        actual_column_id
+    ))]
+    ColumnIdMismatch {
+        column_name: String,
+        expected_column_id: u32,
+        actual_column_id: u32,
+    },
+
+    #[snafu(display(
+        "Timestamp column mismatch, expected column_name: {}, expected column_id: {}, actual column_name: {}, actual column_id: {}",
+        expected_column_name,
+        expected_column_id,
+        actual_column_name,
+        actual_column_id,
+    ))]
+    TimestampMismatch {
+        expected_column_name: String,
+        expected_column_id: u32,
+        actual_column_name: String,
+        actual_column_id: u32,
+    },
 }

 pub type Result<T> = std::result::Result<T, Error>;
@@ -896,7 +1006,16 @@ impl ErrorExt for Error {
            | DeserializeFromJson { .. } => StatusCode::Internal,

            NoLeader { .. } => StatusCode::TableUnavailable,
-            ValueNotExist { .. } | ProcedurePoisonConflict { .. } => StatusCode::Unexpected,
+            ValueNotExist { .. }
+            | ProcedurePoisonConflict { .. }
+            | ProcedureStateReceiverNotFound { .. }
+            | MissingColumnIds { .. }
+            | MissingColumnInColumnMetadata { .. }
+            | MismatchColumnId { .. }
+            | ColumnMetadataConflicts { .. }
+            | ColumnNotFound { .. }
+            | ColumnIdMismatch { .. }
+            | TimestampMismatch { .. } => StatusCode::Unexpected,

            Unsupported { .. } => StatusCode::Unsupported,
            WriteObject { .. } | ReadObject { .. } => StatusCode::StorageUnavailable,
@@ -980,10 +1099,13 @@ impl ErrorExt for Error {
            AbortProcedure { source, .. } => source.status_code(),
            ConvertAlterTableRequest { source, .. } => source.status_code(),
            PutPoison { source, .. } => source.status_code(),
+            ConvertColumnDef { source, .. } => source.status_code(),
+            ProcedureStateReceiver { source, .. } => source.status_code(),

            ParseProcedureId { .. }
            | InvalidNumTopics { .. }
            | SchemaNotFound { .. }
+            | CatalogNotFound { .. }
            | InvalidNodeInfoKey { .. }
            | InvalidStatKey { .. }
            | ParseNum { .. }
--- a/src/common/meta/src/instruction.rs
+++ b/src/common/meta/src/instruction.rs
@@ -174,6 +174,8 @@ pub struct UpgradeRegion {
 /// The identifier of cache.
 pub enum CacheIdent {
    FlowId(FlowId),
+    /// Indicate change of address of flownode.
+    FlowNodeAddressChange(u64),
    FlowName(FlowName),
    TableId(TableId),
    TableName(TableName),
--- a/src/common/meta/src/key.rs
+++ b/src/common/meta/src/key.rs
@@ -167,6 +167,7 @@ pub const NAME_PATTERN: &str = r"[a-zA-Z_:-][a-zA-Z0-9_:\-\.@#]*";
 pub const LEGACY_MAINTENANCE_KEY: &str = "__maintenance";
 pub const MAINTENANCE_KEY: &str = "__switches/maintenance";
 pub const PAUSE_PROCEDURE_KEY: &str = "__switches/pause_procedure";
+pub const RECOVERY_MODE_KEY: &str = "__switches/recovery";

 pub const DATANODE_TABLE_KEY_PREFIX: &str = "__dn_table";
 pub const TABLE_INFO_KEY_PREFIX: &str = "__table_info";
@@ -181,6 +182,11 @@ pub const KAFKA_TOPIC_KEY_PREFIX: &str = "__topic_name/kafka";
 pub const LEGACY_TOPIC_KEY_PREFIX: &str = "__created_wal_topics/kafka";
 pub const TOPIC_REGION_PREFIX: &str = "__topic_region";

+/// The election key.
+pub const ELECTION_KEY: &str = "__metasrv_election";
+/// The root key of metasrv election candidates.
+pub const CANDIDATES_ROOT: &str = "__metasrv_election_candidates/";
+
 /// The keys with these prefixes will be loaded into the cache when the leader starts.
 pub const CACHE_KEY_PREFIXES: [&str; 5] = [
    TABLE_NAME_KEY_PREFIX,
--- a/src/common/meta/src/key/runtime_switch.rs
+++ b/src/common/meta/src/key/runtime_switch.rs
@@ -21,7 +21,7 @@ use moka::future::Cache;
 use snafu::ResultExt;

 use crate::error::{GetCacheSnafu, Result};
-use crate::key::{LEGACY_MAINTENANCE_KEY, MAINTENANCE_KEY, PAUSE_PROCEDURE_KEY};
+use crate::key::{LEGACY_MAINTENANCE_KEY, MAINTENANCE_KEY, PAUSE_PROCEDURE_KEY, RECOVERY_MODE_KEY};
 use crate::kv_backend::KvBackendRef;
 use crate::rpc::store::{BatchDeleteRequest, PutRequest};

@@ -131,6 +131,21 @@ impl RuntimeSwitchManager {
    pub async fn is_procedure_paused(&self) -> Result<bool> {
        self.exists(PAUSE_PROCEDURE_KEY).await
    }
+
+    /// Enables recovery mode.
+    pub async fn set_recovery_mode(&self) -> Result<()> {
+        self.put_key(RECOVERY_MODE_KEY).await
+    }
+
+    /// Unsets recovery mode.
+    pub async fn unset_recovery_mode(&self) -> Result<()> {
+        self.delete_keys(&[RECOVERY_MODE_KEY]).await
+    }
+
+    /// Returns true if the system is currently in recovery mode.
+    pub async fn recovery_mode(&self) -> Result<bool> {
+        self.exists(RECOVERY_MODE_KEY).await
+    }
 }

 #[cfg(test)]
@@ -221,4 +236,15 @@ mod tests {
        runtime_switch_manager.resume_procedure().await.unwrap();
        assert!(!runtime_switch_manager.is_procedure_paused().await.unwrap());
    }
+
+    #[tokio::test]
+    async fn test_recovery_mode() {
+        let runtime_switch_manager =
+            Arc::new(RuntimeSwitchManager::new(Arc::new(MemoryKvBackend::new())));
+        assert!(!runtime_switch_manager.recovery_mode().await.unwrap());
+        runtime_switch_manager.set_recovery_mode().await.unwrap();
+        assert!(runtime_switch_manager.recovery_mode().await.unwrap());
+        runtime_switch_manager.unset_recovery_mode().await.unwrap();
+        assert!(!runtime_switch_manager.recovery_mode().await.unwrap());
+    }
 }
--- a/src/common/meta/src/key/table_route.rs
+++ b/src/common/meta/src/key/table_route.rs
@@ -48,11 +48,6 @@ impl TableRouteKey {
    pub fn new(table_id: TableId) -> Self {
        Self { table_id }
    }
-
-    /// Returns the range prefix of the table route key.
-    pub fn range_prefix() -> Vec<u8> {
-        format!("{}/", TABLE_ROUTE_PREFIX).into_bytes()
-    }
 }

 #[derive(Debug, PartialEq, Serialize, Deserialize, Clone)]
@@ -189,6 +184,17 @@ impl TableRouteValue {
        }
    }

+    /// Converts to [`LogicalTableRouteValue`].
+    ///
+    /// # Panic
+    /// If it is not the [`LogicalTableRouteValue`].
+    pub fn into_logical_table_route(self) -> LogicalTableRouteValue {
+        match self {
+            TableRouteValue::Logical(x) => x,
+            _ => unreachable!("Mistakenly been treated as a Logical TableRoute: {self:?}"),
+        }
+    }
+
    pub fn region_numbers(&self) -> Vec<RegionNumber> {
        match self {
            TableRouteValue::Physical(x) => x
--- a/src/common/meta/src/lib.rs
+++ b/src/common/meta/src/lib.rs
@@ -37,7 +37,9 @@ pub mod node_expiry_listener;
 pub mod node_manager;
 pub mod peer;
 pub mod poison_key;
+pub mod procedure_executor;
 pub mod range_stream;
+pub mod reconciliation;
 pub mod region_keeper;
 pub mod region_registry;
 pub mod rpc;
--- a/src/common/meta/src/metrics.rs
+++ b/src/common/meta/src/metrics.rs
@@ -15,6 +15,13 @@
 use lazy_static::lazy_static;
 use prometheus::*;

+pub const TABLE_TYPE_PHYSICAL: &str = "physical";
+pub const TABLE_TYPE_LOGICAL: &str = "logical";
+pub const ERROR_TYPE_RETRYABLE: &str = "retryable";
+pub const ERROR_TYPE_EXTERNAL: &str = "external";
+pub const STATS_TYPE_NO_REGION_METADATA: &str = "no_region_metadata";
+pub const STATS_TYPE_REGION_NOT_OPEN: &str = "region_not_open";
+
 lazy_static! {
    pub static ref METRIC_META_TXN_REQUEST: HistogramVec = register_histogram_vec!(
        "greptime_meta_txn_request",
@@ -114,4 +121,39 @@ lazy_static! {
        &["backend", "result", "op", "type"]
    )
    .unwrap();
+    pub static ref METRIC_META_RECONCILIATION_LIST_REGION_METADATA_DURATION: HistogramVec =
+        register_histogram_vec!(
+            "greptime_meta_reconciliation_list_region_metadata_duration",
+            "reconciliation list region metadata duration",
+            &["table_type"]
+        )
+        .unwrap();
+    pub static ref METRIC_META_RECONCILIATION_RESOLVED_COLUMN_METADATA: IntCounterVec =
+        register_int_counter_vec!(
+            "greptime_meta_reconciliation_resolved_column_metadata",
+            "reconciliation resolved column metadata",
+            &["strategy"]
+        )
+        .unwrap();
+    pub static ref METRIC_META_RECONCILIATION_STATS: IntCounterVec =
+        register_int_counter_vec!(
+            "greptime_meta_reconciliation_stats",
+            "reconciliation stats",
+            &["procedure_name", "table_type", "type"]
+        )
+        .unwrap();
+    pub static ref METRIC_META_RECONCILIATION_PROCEDURE: HistogramVec =
+        register_histogram_vec!(
+            "greptime_meta_reconciliation_procedure",
+            "reconcile table procedure",
+            &["procedure_name", "step"]
+        )
+        .unwrap();
+    pub static ref METRIC_META_RECONCILIATION_PROCEDURE_ERROR: IntCounterVec =
+        register_int_counter_vec!(
+            "greptime_meta_reconciliation_procedure_error",
+            "reconciliation procedure error",
+            &["procedure_name", "step", "error_type"]
+        )
+        .unwrap();
 }
--- a/src/common/meta/src/peer.rs
+++ b/src/common/meta/src/peer.rs
@@ -19,17 +19,11 @@ pub use api::v1::meta::Peer;
 use crate::error::Error;
 use crate::{DatanodeId, FlownodeId};

-/// PeerLookupService is a service that can lookup peers.
+/// can query peer given a node id
 #[async_trait::async_trait]
 pub trait PeerLookupService {
-    /// Returns the datanode with the given id. It may return inactive peers.
    async fn datanode(&self, id: DatanodeId) -> Result<Option<Peer>, Error>;
-
-    /// Returns the flownode with the given id. It may return inactive peers.
    async fn flownode(&self, id: FlownodeId) -> Result<Option<Peer>, Error>;
-
-    /// Returns all currently active frontend nodes that have reported a heartbeat within the most recent heartbeat interval from the in-memory backend.
-    async fn active_frontends(&self) -> Result<Vec<Peer>, Error>;
 }

 pub type PeerLookupServiceRef = Arc<dyn PeerLookupService + Send + Sync>;
--- a/src/common/meta/src/procedure_executor.rs
+++ b/src/common/meta/src/procedure_executor.rs
@@ -0,0 +1,173 @@
+// Copyright 2023 Greptime Team
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use std::sync::Arc;
+
+use api::v1::meta::{ProcedureDetailResponse, ReconcileRequest, ReconcileResponse};
+use common_procedure::{ProcedureId, ProcedureManagerRef};
+use common_telemetry::tracing_context::W3cTrace;
+use snafu::{OptionExt, ResultExt};
+
+use crate::ddl_manager::DdlManagerRef;
+use crate::error::{
+    ParseProcedureIdSnafu, ProcedureNotFoundSnafu, QueryProcedureSnafu, Result, UnsupportedSnafu,
+};
+use crate::rpc::ddl::{SubmitDdlTaskRequest, SubmitDdlTaskResponse};
+use crate::rpc::procedure::{
+    self, AddRegionFollowerRequest, MigrateRegionRequest, MigrateRegionResponse,
+    ProcedureStateResponse, RemoveRegionFollowerRequest,
+};
+
+/// The context of procedure executor.
+#[derive(Debug, Default)]
+pub struct ExecutorContext {
+    pub tracing_context: Option<W3cTrace>,
+}
+
+/// The procedure executor that accepts ddl, region migration task etc.
+#[async_trait::async_trait]
+pub trait ProcedureExecutor: Send + Sync {
+    /// Submit a ddl task
+    async fn submit_ddl_task(
+        &self,
+        ctx: &ExecutorContext,
+        request: SubmitDdlTaskRequest,
+    ) -> Result<SubmitDdlTaskResponse>;
+
+    /// Add a region follower
+    async fn add_region_follower(
+        &self,
+        _ctx: &ExecutorContext,
+        _request: AddRegionFollowerRequest,
+    ) -> Result<()> {
+        UnsupportedSnafu {
+            operation: "add_region_follower",
+        }
+        .fail()
+    }
+
+    /// Remove a region follower
+    async fn remove_region_follower(
+        &self,
+        _ctx: &ExecutorContext,
+        _request: RemoveRegionFollowerRequest,
+    ) -> Result<()> {
+        UnsupportedSnafu {
+            operation: "remove_region_follower",
+        }
+        .fail()
+    }
+
+    /// Submit a region migration task
+    async fn migrate_region(
+        &self,
+        ctx: &ExecutorContext,
+        request: MigrateRegionRequest,
+    ) -> Result<MigrateRegionResponse>;
+
+    /// Submit a reconcile task.
+    async fn reconcile(
+        &self,
+        _ctx: &ExecutorContext,
+        request: ReconcileRequest,
+    ) -> Result<ReconcileResponse>;
+
+    /// Query the procedure state by its id
+    async fn query_procedure_state(
+        &self,
+        ctx: &ExecutorContext,
+        pid: &str,
+    ) -> Result<ProcedureStateResponse>;
+
+    async fn list_procedures(&self, ctx: &ExecutorContext) -> Result<ProcedureDetailResponse>;
+}
+
+pub type ProcedureExecutorRef = Arc<dyn ProcedureExecutor>;
+
+/// The local procedure executor that accepts ddl, region migration task etc.
+pub struct LocalProcedureExecutor {
+    pub ddl_manager: DdlManagerRef,
+    pub procedure_manager: ProcedureManagerRef,
+}
+
+impl LocalProcedureExecutor {
+    pub fn new(ddl_manager: DdlManagerRef, procedure_manager: ProcedureManagerRef) -> Self {
+        Self {
+            ddl_manager,
+            procedure_manager,
+        }
+    }
+}
+
+#[async_trait::async_trait]
+impl ProcedureExecutor for LocalProcedureExecutor {
+    async fn submit_ddl_task(
+        &self,
+        ctx: &ExecutorContext,
+        request: SubmitDdlTaskRequest,
+    ) -> Result<SubmitDdlTaskResponse> {
+        self.ddl_manager.submit_ddl_task(ctx, request).await
+    }
+
+    async fn migrate_region(
+        &self,
+        _ctx: &ExecutorContext,
+        _request: MigrateRegionRequest,
+    ) -> Result<MigrateRegionResponse> {
+        UnsupportedSnafu {
+            operation: "migrate_region",
+        }
+        .fail()
+    }
+
+    async fn reconcile(
+        &self,
+        _ctx: &ExecutorContext,
+        _request: ReconcileRequest,
+    ) -> Result<ReconcileResponse> {
+        UnsupportedSnafu {
+            operation: "reconcile",
+        }
+        .fail()
+    }
+
+    async fn query_procedure_state(
+        &self,
+        _ctx: &ExecutorContext,
+        pid: &str,
+    ) -> Result<ProcedureStateResponse> {
+        let pid =
+            ProcedureId::parse_str(pid).with_context(|_| ParseProcedureIdSnafu { key: pid })?;
+
+        let state = self
+            .procedure_manager
+            .procedure_state(pid)
+            .await
+            .context(QueryProcedureSnafu)?
+            .with_context(|| ProcedureNotFoundSnafu {
+                pid: pid.to_string(),
+            })?;
+
+        Ok(procedure::procedure_state_to_pb_response(&state))
+    }
+
+    async fn list_procedures(&self, _ctx: &ExecutorContext) -> Result<ProcedureDetailResponse> {
+        let metas = self
+            .procedure_manager
+            .list_procedures()
+            .await
+            .context(QueryProcedureSnafu)?;
+        Ok(procedure::procedure_details_to_pb_response(metas))
+    }
+}
--- a/src/common/meta/src/reconciliation.rs
+++ b/src/common/meta/src/reconciliation.rs
@@ -12,8 +12,9 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

-#![feature(assert_matches)]
-
-pub mod convert;
-pub mod default_constraint;
-pub mod error;
+pub mod manager;
+pub(crate) mod reconcile_catalog;
+pub(crate) mod reconcile_database;
+pub(crate) mod reconcile_logical_tables;
+pub(crate) mod reconcile_table;
+pub(crate) mod utils;
--- a/Show More
+++ b/Show More