test: build a failing example with LogicalPlanBuilder

Signed-off-by: discord9 <discord9@163.com>
test: reproduce the panic, still no clue why
2026-01-04 20:32:56 +00:00 · 2025-08-12 15:19:47 +08:00 · 2025-08-11 19:37:48 +08:00 · 2025-08-07 19:55:32 +08:00 · 2025-08-07 06:17:52 +00:00 · 2025-08-07 06:16:23 +00:00
269 changed files with 19499 additions and 7615 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -73,7 +73,7 @@ members = [
 resolver = "2"

 [workspace.package]
-version = "0.16.0"
+version = "0.17.0"
 edition = "2021"
 license = "Apache-2.0"

@@ -140,7 +140,7 @@ etcd-client = "0.14"
 fst = "0.4.7"
 futures = "0.3"
 futures-util = "0.3"
-greptime-proto = { git = "https://github.com/GreptimeTeam/greptime-proto.git", rev = "fe8c13f5f3c1fbef63f57fbdd29f0490dfeb987b" }
+greptime-proto = { git = "https://github.com/GreptimeTeam/greptime-proto.git", rev = "ccfd4da48bc0254ed865e479cd981a3581b02d84" }
 hex = "0.4"
 http = "1"
 humantime = "2.1"
@@ -173,6 +173,7 @@ parking_lot = "0.12"
 parquet = { version = "54.2", default-features = false, features = ["arrow", "async", "object_store"] }
 paste = "1.0"
 pin-project = "1.0"
+pretty_assertions = "1.4.0"
 prometheus = { version = "0.13.3", features = ["process"] }
 promql-parser = { version = "0.6", features = ["ser"] }
 prost = { version = "0.13", features = ["no-recursion-limit"] }
@@ -196,6 +197,7 @@ rust_decimal = "1.33"
 rustc-hash = "2.0"
 # It is worth noting that we should try to avoid using aws-lc-rs until it can be compiled on various platforms.
 rustls = { version = "0.23.25", default-features = false }
+sea-query = "0.32"
 serde = { version = "1.0", features = ["derive"] }
 serde_json = { version = "1.0", features = ["float_roundtrip"] }
 serde_with = "3"
@@ -204,7 +206,7 @@ simd-json = "0.15"
 similar-asserts = "1.6.0"
 smallvec = { version = "1", features = ["serde"] }
 snafu = "0.8"
-sqlparser = { git = "https://github.com/GreptimeTeam/sqlparser-rs.git", rev = "0cf6c04490d59435ee965edd2078e8855bd8471e", features = [
+sqlparser = { git = "https://github.com/GreptimeTeam/sqlparser-rs.git", rev = "df6fcca80ce903f5beef7002cd2c1b062e7024f8", features = [
    "visitor",
    "serde",
 ] } # branch = "v0.54.x"
--- a/config/config.md
+++ b/config/config.md
@@ -147,6 +147,7 @@
 | `region_engine.mito.write_cache_ttl` | String | Unset | TTL for write cache. |
 | `region_engine.mito.sst_write_buffer_size` | String | `8MB` | Buffer size for SST writing. |
 | `region_engine.mito.parallel_scan_channel_size` | Integer | `32` | Capacity of the channel to send data from parallel scan tasks to the main task. |
+| `region_engine.mito.max_concurrent_scan_files` | Integer | `128` | Maximum number of SST files to scan concurrently. |
 | `region_engine.mito.allow_stale_entries` | Bool | `false` | Whether to allow stale WAL entries read during replay. |
 | `region_engine.mito.min_compaction_interval` | String | `0m` | Minimum time interval between two compactions.<br/>To align with the old behavior, the default value is 0 (no restrictions). |
 | `region_engine.mito.index` | -- | -- | The options for index in Mito engine. |
@@ -207,6 +208,8 @@
 | `export_metrics.remote_write.headers` | InlineTable | -- | HTTP headers of Prometheus remote-write carry. |
 | `tracing` | -- | -- | The tracing options. Only effect when compiled with `tokio-console` feature. |
 | `tracing.tokio_console_addr` | String | Unset | The tokio console address. |
+| `memory` | -- | -- | The memory options. |
+| `memory.enable_heap_profiling` | Bool | `true` | Whether to enable heap profiling activation during startup.<br/>When enabled, heap profiling will be activated if the `MALLOC_CONF` environment variable<br/>is set to "prof:true,prof_active:false". The official image adds this env variable.<br/>Default is true. |


 ## Distributed Mode
@@ -311,6 +314,8 @@
 | `export_metrics.remote_write.headers` | InlineTable | -- | HTTP headers of Prometheus remote-write carry. |
 | `tracing` | -- | -- | The tracing options. Only effect when compiled with `tokio-console` feature. |
 | `tracing.tokio_console_addr` | String | Unset | The tokio console address. |
+| `memory` | -- | -- | The memory options. |
+| `memory.enable_heap_profiling` | Bool | `true` | Whether to enable heap profiling activation during startup.<br/>When enabled, heap profiling will be activated if the `MALLOC_CONF` environment variable<br/>is set to "prof:true,prof_active:false". The official image adds this env variable.<br/>Default is true. |


 ### Metasrv
@@ -333,6 +338,12 @@
 | `runtime` | -- | -- | The runtime options. |
 | `runtime.global_rt_size` | Integer | `8` | The number of threads to execute the runtime for global read operations. |
 | `runtime.compact_rt_size` | Integer | `4` | The number of threads to execute the runtime for global write operations. |
+| `backend_tls` | -- | -- | TLS configuration for kv store backend (only applicable for PostgreSQL/MySQL backends)<br/>When using PostgreSQL or MySQL as metadata store, you can configure TLS here |
+| `backend_tls.mode` | String | `prefer` | TLS mode, refer to https://www.postgresql.org/docs/current/libpq-ssl.html<br/>- "disable" - No TLS<br/>- "prefer" (default) - Try TLS, fallback to plain<br/>- "require" - Require TLS<br/>- "verify_ca" - Require TLS and verify CA<br/>- "verify_full" - Require TLS and verify hostname |
+| `backend_tls.cert_path` | String | `""` | Path to client certificate file (for client authentication)<br/>Like "/path/to/client.crt" |
+| `backend_tls.key_path` | String | `""` | Path to client private key file (for client authentication)<br/>Like "/path/to/client.key" |
+| `backend_tls.ca_cert_path` | String | `""` | Path to CA certificate file (for server certificate verification)<br/>Required when using custom CAs or self-signed certificates<br/>Leave empty to use system root certificates only<br/>Like "/path/to/ca.crt" |
+| `backend_tls.watch` | Bool | `false` | Watch for certificate file changes and auto reload |
 | `grpc` | -- | -- | The gRPC server options. |
 | `grpc.bind_addr` | String | `127.0.0.1:3002` | The address to bind the gRPC server. |
 | `grpc.server_addr` | String | `127.0.0.1:3002` | The communication server address for the frontend and datanode to connect to metasrv.<br/>If left empty or unset, the server will automatically use the IP address of the first network interface<br/>on the host, with the same port number as the one specified in `bind_addr`. |
@@ -370,6 +381,8 @@
 | `wal.topic_name_prefix` | String | `greptimedb_wal_topic` | A Kafka topic is constructed by concatenating `topic_name_prefix` and `topic_id`.<br/>Only accepts strings that match the following regular expression pattern:<br/>[a-zA-Z_:-][a-zA-Z0-9_:\-\.@#]*<br/>i.g., greptimedb_wal_topic_0, greptimedb_wal_topic_1. |
 | `wal.replication_factor` | Integer | `1` | Expected number of replicas of each partition. |
 | `wal.create_topic_timeout` | String | `30s` | Above which a topic creation operation will be cancelled. |
+| `event_recorder` | -- | -- | Configuration options for the event recorder. |
+| `event_recorder.ttl` | String | `30d` | TTL for the events table that will be used to store the events. |
 | `logging` | -- | -- | The logging options. |
 | `logging.dir` | String | `./greptimedb_data/logs` | The directory to store the log files. If set to empty, logs will not be written to files. |
 | `logging.level` | String | Unset | The log level. Can be `info`/`debug`/`warn`/`error`. |
@@ -389,6 +402,8 @@
 | `export_metrics.remote_write.headers` | InlineTable | -- | HTTP headers of Prometheus remote-write carry. |
 | `tracing` | -- | -- | The tracing options. Only effect when compiled with `tokio-console` feature. |
 | `tracing.tokio_console_addr` | String | Unset | The tokio console address. |
+| `memory` | -- | -- | The memory options. |
+| `memory.enable_heap_profiling` | Bool | `true` | Whether to enable heap profiling activation during startup.<br/>When enabled, heap profiling will be activated if the `MALLOC_CONF` environment variable<br/>is set to "prof:true,prof_active:false". The official image adds this env variable.<br/>Default is true. |


 ### Datanode
@@ -501,6 +516,7 @@
 | `region_engine.mito.write_cache_ttl` | String | Unset | TTL for write cache. |
 | `region_engine.mito.sst_write_buffer_size` | String | `8MB` | Buffer size for SST writing. |
 | `region_engine.mito.parallel_scan_channel_size` | Integer | `32` | Capacity of the channel to send data from parallel scan tasks to the main task. |
+| `region_engine.mito.max_concurrent_scan_files` | Integer | `128` | Maximum number of SST files to scan concurrently. |
 | `region_engine.mito.allow_stale_entries` | Bool | `false` | Whether to allow stale WAL entries read during replay. |
 | `region_engine.mito.min_compaction_interval` | String | `0m` | Minimum time interval between two compactions.<br/>To align with the old behavior, the default value is 0 (no restrictions). |
 | `region_engine.mito.index` | -- | -- | The options for index in Mito engine. |
@@ -554,6 +570,8 @@
 | `export_metrics.remote_write.headers` | InlineTable | -- | HTTP headers of Prometheus remote-write carry. |
 | `tracing` | -- | -- | The tracing options. Only effect when compiled with `tokio-console` feature. |
 | `tracing.tokio_console_addr` | String | Unset | The tokio console address. |
+| `memory` | -- | -- | The memory options. |
+| `memory.enable_heap_profiling` | Bool | `true` | Whether to enable heap profiling activation during startup.<br/>When enabled, heap profiling will be activated if the `MALLOC_CONF` environment variable<br/>is set to "prof:true,prof_active:false". The official image adds this env variable.<br/>Default is true. |


 ### Flownode
@@ -611,3 +629,5 @@
 | `tracing.tokio_console_addr` | String | Unset | The tokio console address. |
 | `query` | -- | -- | -- |
 | `query.parallelism` | Integer | `1` | Parallelism of the query engine for query sent by flownode.<br/>Default to 1, so it won't use too much cpu or memory |
+| `memory` | -- | -- | The memory options. |
+| `memory.enable_heap_profiling` | Bool | `true` | Whether to enable heap profiling activation during startup.<br/>When enabled, heap profiling will be activated if the `MALLOC_CONF` environment variable<br/>is set to "prof:true,prof_active:false". The official image adds this env variable.<br/>Default is true. |
--- a/config/datanode.example.toml
+++ b/config/datanode.example.toml
@@ -474,6 +474,9 @@ sst_write_buffer_size = "8MB"
 ## Capacity of the channel to send data from parallel scan tasks to the main task.
 parallel_scan_channel_size = 32

+## Maximum number of SST files to scan concurrently.
+max_concurrent_scan_files = 128
+
 ## Whether to allow stale WAL entries read during replay.
 allow_stale_entries = false

@@ -669,3 +672,11 @@ headers = { }
 ## The tokio console address.
 ## @toml2docs:none-default
 #+ tokio_console_addr = "127.0.0.1"
+
+## The memory options.
+[memory]
+## Whether to enable heap profiling activation during startup.
+## When enabled, heap profiling will be activated if the `MALLOC_CONF` environment variable
+## is set to "prof:true,prof_active:false". The official image adds this env variable.
+## Default is true.
+enable_heap_profiling = true
--- a/config/flownode.example.toml
+++ b/config/flownode.example.toml
@@ -136,3 +136,11 @@ default_ratio = 1.0
 ## Parallelism of the query engine for query sent by flownode.
 ## Default to 1, so it won't use too much cpu or memory
 parallelism = 1
+
+## The memory options.
+[memory]
+## Whether to enable heap profiling activation during startup.
+## When enabled, heap profiling will be activated if the `MALLOC_CONF` environment variable
+## is set to "prof:true,prof_active:false". The official image adds this env variable.
+## Default is true.
+enable_heap_profiling = true
--- a/config/frontend.example.toml
+++ b/config/frontend.example.toml
@@ -280,3 +280,11 @@ headers = { }
 ## The tokio console address.
 ## @toml2docs:none-default
 #+ tokio_console_addr = "127.0.0.1"
+
+## The memory options.
+[memory]
+## Whether to enable heap profiling activation during startup.
+## When enabled, heap profiling will be activated if the `MALLOC_CONF` environment variable
+## is set to "prof:true,prof_active:false". The official image adds this env variable.
+## Default is true.
+enable_heap_profiling = true
--- a/config/metasrv.example.toml
+++ b/config/metasrv.example.toml
@@ -65,6 +65,34 @@ node_max_idle_time = "24hours"
 ## The number of threads to execute the runtime for global write operations.
 #+ compact_rt_size = 4

+## TLS configuration for kv store backend (only applicable for PostgreSQL/MySQL backends)
+## When using PostgreSQL or MySQL as metadata store, you can configure TLS here
+[backend_tls]
+## TLS mode, refer to https://www.postgresql.org/docs/current/libpq-ssl.html
+## - "disable" - No TLS
+## - "prefer" (default) - Try TLS, fallback to plain
+## - "require" - Require TLS
+## - "verify_ca" - Require TLS and verify CA
+## - "verify_full" - Require TLS and verify hostname
+mode = "prefer"
+
+## Path to client certificate file (for client authentication)
+## Like "/path/to/client.crt"
+cert_path = ""
+
+## Path to client private key file (for client authentication)
+## Like "/path/to/client.key"
+key_path = ""
+
+## Path to CA certificate file (for server certificate verification)
+## Required when using custom CAs or self-signed certificates
+## Leave empty to use system root certificates only
+## Like "/path/to/ca.crt"
+ca_cert_path = ""
+
+## Watch for certificate file changes and auto reload
+watch = false
+
 ## The gRPC server options.
 [grpc]
 ## The address to bind the gRPC server.
@@ -212,6 +240,11 @@ create_topic_timeout = "30s"
 # client_cert_path = "/path/to/client_cert"
 # client_key_path = "/path/to/key"

+## Configuration options for the event recorder.
+[event_recorder]
+## TTL for the events table that will be used to store the events.
+ttl = "30d"
+
 ## The logging options.
 [logging]
 ## The directory to store the log files. If set to empty, logs will not be written to files.
@@ -265,3 +298,11 @@ headers = { }
 ## The tokio console address.
 ## @toml2docs:none-default
 #+ tokio_console_addr = "127.0.0.1"
+
+## The memory options.
+[memory]
+## Whether to enable heap profiling activation during startup.
+## When enabled, heap profiling will be activated if the `MALLOC_CONF` environment variable
+## is set to "prof:true,prof_active:false". The official image adds this env variable.
+## Default is true.
+enable_heap_profiling = true
--- a/config/standalone.example.toml
+++ b/config/standalone.example.toml
@@ -565,6 +565,9 @@ sst_write_buffer_size = "8MB"
 ## Capacity of the channel to send data from parallel scan tasks to the main task.
 parallel_scan_channel_size = 32

+## Maximum number of SST files to scan concurrently.
+max_concurrent_scan_files = 128
+
 ## Whether to allow stale WAL entries read during replay.
 allow_stale_entries = false

@@ -783,3 +786,11 @@ headers = { }
 ## The tokio console address.
 ## @toml2docs:none-default
 #+ tokio_console_addr = "127.0.0.1"
+
+## The memory options.
+[memory]
+## Whether to enable heap profiling activation during startup.
+## When enabled, heap profiling will be activated if the `MALLOC_CONF` environment variable
+## is set to "prof:true,prof_active:false". The official image adds this env variable.
+## Default is true.
+enable_heap_profiling = true
--- a/docker/buildx/centos/Dockerfile
+++ b/docker/buildx/centos/Dockerfile
@@ -47,4 +47,6 @@ WORKDIR /greptime
 COPY --from=builder /out/target/${OUTPUT_DIR}/greptime /greptime/bin/
 ENV PATH /greptime/bin/:$PATH

+ENV MALLOC_CONF="prof:true,prof_active:false"
+
 ENTRYPOINT ["greptime"]
--- a/docker/buildx/ubuntu/Dockerfile
+++ b/docker/buildx/ubuntu/Dockerfile
@@ -47,4 +47,6 @@ WORKDIR /greptime
 COPY --from=builder /out/target/${OUTPUT_DIR}/greptime /greptime/bin/
 ENV PATH /greptime/bin/:$PATH

+ENV MALLOC_CONF="prof:true,prof_active:false"
+
 ENTRYPOINT ["greptime"]
--- a/docker/ci/centos/Dockerfile
+++ b/docker/ci/centos/Dockerfile
@@ -15,4 +15,6 @@ ADD $TARGETARCH/greptime /greptime/bin/

 ENV PATH /greptime/bin/:$PATH

+ENV MALLOC_CONF="prof:true,prof_active:false"
+
 ENTRYPOINT ["greptime"]
--- a/docker/ci/ubuntu/Dockerfile
+++ b/docker/ci/ubuntu/Dockerfile
@@ -18,4 +18,6 @@ ENV PATH /greptime/bin/:$PATH

 ENV TARGET_BIN=$TARGET_BIN

+ENV MALLOC_CONF="prof:true,prof_active:false"
+
 ENTRYPOINT ["sh", "-c", "exec $TARGET_BIN \"$@\"", "--"]
--- a/docs/how-to/how-to-profile-memory.md
+++ b/docs/how-to/how-to-profile-memory.md
@@ -30,6 +30,23 @@ curl https://raw.githubusercontent.com/brendangregg/FlameGraph/master/flamegraph

 ## Profiling

+### Configuration
+
+You can control heap profiling activation through configuration. Add the following to your configuration file:
+
+```toml
+[memory]
+# Whether to enable heap profiling activation during startup.
+# When enabled, heap profiling will be activated if the `MALLOC_CONF` environment variable
+# is set to "prof:true,prof_active:false". The official image adds this env variable.
+# Default is true.
+enable_heap_profiling = true
+```
+
+By default, if you set `MALLOC_CONF=prof:true,prof_active:false`, the database will enable profiling during startup. You can disable this behavior by setting `enable_heap_profiling = false` in the configuration.
+
+### Starting with environment variables
+
 Start GreptimeDB instance with environment variables:

 ```bash
@@ -40,6 +57,23 @@ MALLOC_CONF=prof:true ./target/debug/greptime standalone start
 _RJEM_MALLOC_CONF=prof:true ./target/debug/greptime standalone start
 ```

+### Memory profiling control
+
+You can control heap profiling activation using the new HTTP APIs:
+
+```bash
+# Check current profiling status
+curl -X GET localhost:4000/debug/prof/mem/status
+
+# Activate heap profiling (if not already active)
+curl -X POST localhost:4000/debug/prof/mem/activate
+
+# Deactivate heap profiling
+curl -X POST localhost:4000/debug/prof/mem/deactivate
+```
+
+### Dump memory profiling data
+
 Dump memory profiling data through HTTP API:

 ```bash
--- a/docs/rfcs/2025-07-04-compatibility-test-framework.md
+++ b/docs/rfcs/2025-07-04-compatibility-test-framework.md
@@ -0,0 +1,151 @@
+---
+Feature Name: Compatibility Test Framework
+Tracking Issue: TBD
+Date: 2025-07-04
+Author: "Ruihang Xia <waynestxia@gmail.com>"
+---
+
+# Summary
+
+This RFC proposes a compatibility test framework for GreptimeDB to ensure backward/forward compatibility for different versions of GreptimeDB.
+
+# Motivation
+
+In current practice, we don't have a systematic way to test and ensure the compatibility of different versions of GreptimeDB. Each time we release a new version, we need to manually test the compatibility with ad-hoc cases. This is not only time-consuming, but also prone to errors and unmaintainable. Highly rely on the release manager to ensure the compatibility of different versions of GreptimeDB.
+
+We don't have a detailed guide on the release SoP of how to test and ensure the compatibility of the new version. And has broken the compatibility of the new version many times (`v0.14.1` and `v0.15.1` are two examples, which are both released right after the major release).
+
+# Details
+
+This RFC proposes a compatibility test framework that is easy to maintain, extend and run. It can tell the compatibility between any given two versions of GreptimeDB, both backward and forward. It's based on the Sqlness library but used in a different way.
+
+Generally speaking, the framework is composed of two parts:
+
+1. Test cases: A set of test cases that are maintained dedicatedly for the compatibility test. Still in the `.sql` and `.result` format.
+2. Test framework: A new sqlness runner that is used to run the test cases. With some new features that is not required by the integration sqlness test.
+
+## Test Cases
+
+### Structure
+
+The case set is organized in three parts:
+
+- `1.feature`: Use a new feature
+- `2.verify`: Verify database behavior
+- `3.cleanup`: Paired with `1.feature`, cleanup the test environment.
+
+These three parts are organized in a tree structure, and should be run in sequence:
+
+```
+compatibility_test/
+├── 1.feature/
+│   ├── feature-a/
+│   ├── feature-b/
+│   └── feature-c/
+├── 2.verify/
+│   ├── verify-metadata/
+│   ├── verify-data/
+│   └── verify-schema/
+└── 3.cleanup/
+    ├── cleanup-a/
+    ├── cleanup-b/
+    └── cleanup-c/
+```
+
+### Example
+
+For example, for a new feature like adding new index option ([#6416](https://github.com/GreptimeTeam/greptimedb/pull/6416)), we (who implement the feature) create a new test case like this:
+
+```sql
+-- path: compatibility_test/1.feature/index-option/granularity_and_false_positive_rate.sql
+
+-- SQLNESS ARG since=0.15.0
+-- SQLNESS IGNORE_RESULT
+CREATE TABLE granularity_and_false_positive_rate (ts timestamp time index, val double) with ("index.granularity" = "8192", "index.false_positive_rate" = "0.01");
+```
+
+And
+
+```sql
+-- path: compatibility_test/3.cleanup/index-option/granularity_and_false_positive_rate.sql
+drop table granularity_and_false_positive_rate;
+```
+
+Since this new feature don't require some special way to verify the database behavior, we can reuse existing test cases in `2.verify/` to verify the database behavior. For example, we can reuse the `verify-metadata` test case to verify the metadata of the table.
+
+```sql
+-- path: compatibility_test/2.verify/verify-metadata/show-create-table.sql
+
+-- SQLNESS TEMPLATE TABLE="SHOW TABLES";
+SHOW CREATE TABLE $TABLE;
+```
+
+In this example, we use some new sqlness features that will be introduced in the next section (`since`, `IGNORE_RESULT`, `TEMPLATE`).
+
+### Maintenance
+
+Each time implement a new feature that should be covered by the compatibility test, we should create a new test case in `1.feature/` and `3.cleanup/` for them. And check if existing cases in `2.verify/` can be reused to verify the database behavior.
+
+This simulates an enthusiastic user who uses all the new features at the first time. All the new Maintenance burden is on the feature implementer to write one more test case for the new feature, to "fixation" the behavior. And once there is a breaking change in the future, it can be detected by the compatibility test framework automatically.
+
+Another topic is about deprecation. If a feature is deprecated, we should also mark it in the test case. Still use above example, assume we deprecate the `index.granularity` and `index.false_positive_rate` index options in `v0.99.0`, we can mark them as:
+```sql
+-- SQLNESS ARG since=0.15.0 till=0.99.0
+...
+```
+
+This tells the framework to ignore this feature in version `v0.99.0` and later. Currently, we have so many experimental features that are scheduled to be broken in the future, this is a good way to mark them.
+
+## Test Framework
+
+This section is about new sqlness features required by this framework.
+
+### Since and Till
+
+Follows the `ARG` interceptor in sqlness, we can mark a feature is available between two given versions. Only the `since` is required:
+
+```sql
+-- SQLNESS ARG since=VERSION_STRING [till=VERSION_STRING]
+```
+
+### IGNORE_RESULT
+
+`IGNORE_RESULT` is a new interceptor, it tells the runner to ignore the result of the query, only check whether the query is executed successfully.
+
+This is useful to reduce the Maintenance burden of the test cases, unlike the integration sqlness test, in most cases we don't care about the result of the query, only need to make sure the query is executed successfully.
+
+### TEMPLATE
+
+`TEMPLATE` is another new interceptor, it can generate queries from a template based on a runtime data.
+
+In above example, we need to run the `SHOW CREATE TABLE` query for all existing tables, so we can use the `TEMPLATE` interceptor to generate the query with a dynamic table list.
+
+### RUNNER
+
+There are also some extra requirement for the runner itself:
+
+- It should run the test cases in sequence, first `1.feature/`, then `2.verify/`, and finally `3.cleanup/`.
+- It should be able to fetch required version automatically to finish the test.
+- It should handle the `since` and `till` properly.
+
+On the `1.feature` phase, the runner needs to identify all features need to be tested by version number. And then restart with a new version (the `to` version) to run `2.verify/` and `3.cleanup/` phase.
+
+## Test Report
+
+Finally, we can run the compatibility test to verify the compatibility between any given two versions of GreptimeDB, for example:
+
+```bash
+# check backward compatibility between v0.15.0 and v0.16.0 when releasing v0.16.0
+./sqlness run --from=0.15.0 --to=0.16.0
+
+# check forward compatibility when downgrading from v0.15.0 to v0.13.0
+./sqlness run --from=0.15.0 --to=0.13.0
+```
+
+We can also use a script to run the compatibility test for all the versions in a given range to give a quick report with all versions we need.
+
+And we always bump the version in `Cargo.toml` to the next major release version, so the next major release version can be used as "latest" unpublished version for scenarios like local testing.
+
+# Alternatives
+
+There was a previous attempt to implement a compatibility test framework that was disabled due to some reasons [#3728](https://github.com/GreptimeTeam/greptimedb/issues/3728).
--- a/docs/rfcs/2025-07-23-global-gc-worker.md
+++ b/docs/rfcs/2025-07-23-global-gc-worker.md
@@ -0,0 +1,157 @@
+---
+Feature Name: "global-gc-worker"
+Tracking Issue: https://github.com/GreptimeTeam/greptimedb/issues/6571
+Date: 2025-07-23
+Author: "discord9 <discord9@163.com>"
+---
+
+# Global GC Worker
+
+## Summary
+
+This RFC proposes the integration of a garbage collection (GC) mechanism within the Compaction process. This mechanism aims to manage and remove stale files that are no longer actively used by any system component, thereby reclaiming storage space.
+
+## Motivation
+
+With the introduction of features such as table repartitioning, a substantial number of Parquet files can become obsolete. Furthermore, failures during manifest updates may result in orphaned files that are never referenced by the system. Therefore, a periodic garbage collection mechanism is essential to reclaim storage space by systematically removing these unused files.
+
+## Details
+
+### Overview
+
+The garbage collection process will be integrated directly into the Compaction process. Upon the completion of a Compaction for a given region, the GC worker will be automatically triggered. Its primary function will be to identify and subsequently delete obsolete files that have persisted beyond their designated retention period. This integration ensures that garbage collection is performed in close conjunction with data lifecycle management, effectively leveraging the compaction process's inherent knowledge of file states.
+
+This design prioritizes correctness and safety by explicitly linking GC execution to a well-defined operational boundary: the successful completion of a compaction cycle.
+
+### Terminology
+
+- **Unused File**: Refers to a file present in the storage directory that has never been formally recorded in any manifest. A common scenario for this includes cases where a new SST file is successfully written to storage, but the subsequent update to the manifest fails, leaving the file unreferenced.
+- **Obsolete File**: Denotes a file that was previously recorded in a manifest but has since been explicitly marked for removal. This typically occurs following operations such as data repartitioning or compaction.
+
+### GC Worker Process
+
+The GC worker operates as an integral part of the Compaction process. Once a Compaction for a specific region is completed, the GC worker is automatically triggered. Executing this process on a `datanode` is preferred to eliminate the overhead associated with having to set object storage configurations in the `metasrv`.
+
+The detailed process is as follows:
+
+1.  **Invocation**: Upon the successful completion of a Compaction for a region, the GC worker is invoked.
+2.  **Manifest Reading**: The worker reads the region's primary manifest to obtain a comprehensive list of all files marked as obsolete. Concurrently, it reads any temporary manifests generated by long-running queries to identify files that are currently in active use, thereby preventing their premature deletion.
+3.  **Lingering Time Check (Obsolete Files)**: For each identified obsolete file, the GC worker evaluates its "lingering time." Which is the time passed after it had been removed from manifest.
+4.  **Deletion Marking (Obsolete Files)**: Files that have exceeded their maximum configurable lingering time and are not referenced by any active temporary manifests are marked for deletion.
+5.  **Lingering Time (Unused Files)**: Unused files (those never recorded in any manifest) are also subject to a configurable maximum lingering time before they are eligible for deletion.
+
+Following flowchart illustrates the GC worker's process:
+
+```mermaid
+flowchart TD
+    A[Compaction Completed] --> B[Trigger GC Worker]
+    B --> C[Scan Region Manifest]
+    C --> D[Identify File Types]
+    D --> E[Unused Files<br/>Never recorded in manifest]
+    D --> F[Obsolete Files<br/>Previously in manifest<br/>but marked for removal]
+    E --> G[Check Lingering Time]
+    F --> G
+    G --> H{File exceeds<br/>configured lingering time?}
+    H -->|No| I[Skip deletion]
+    H -->|Yes| J[Check Temporary Manifest]
+    J --> K{File in use by<br/>active queries?}
+    K -->|Yes| L[Retain file<br/>Wait for next GC cycle]
+    K -->|No| M[Safely delete file]
+    I --> N[End GC cycle]
+    L --> N
+    M --> O[Update Manifest]
+    O --> N
+    N --> P[Wait for next Compaction]
+    P --> A
+    style A fill:#e1f5fe
+    style B fill:#f3e5f5
+    style M fill:#e8f5e8
+    style L fill:#fff3e0
+```
+
+#### Handling Obsolete Files
+
+An obsolete file is permanently deleted only if two conditions are met:
+1. The time elapsed since its removal from the manifest (its obsolescence timestamp) exceeds a configurable threshold.
+2. It is not currently referenced by any active temporary manifests.
+
+
+#### Handling Unused Files
+
+With the integration of the GC worker into the Compaction process, the risk of accidentally deleting newly created SST files that have not yet been recorded in the manifest is significantly mitigated. Consequently, the concept of "Unused Files" as a distinct category primarily susceptible to accidental deletion is largely resolved. Any files that are genuinely "unused" (i.e., never referenced by any manifest, including temporary ones) can be safely deleted after a configurable maximum lingering time.
+
+For debugging and auditing purposes, a comprehensive list of recently deleted files can be maintained.
+
+### Ensuring Read Consistency
+
+To prevent the GC worker from inadvertently deleting files that are actively being utilized by long-running analytical queries, a robust protection mechanism is introduced. This mechanism relies on temporary manifests that are actively kept "alive" by the queries using them.
+
+When a long-running query is detected (e.g., by a slow query recorder), it will write a temporary manifest to the region's manifest directory. This manifest lists all files required for the query. However, simply creating this file is not enough, as a query runner might crash, leaving the temporary manifest orphaned and preventing garbage collection indefinitely.
+
+To address this, the following "heartbeat" mechanism is implemented:
+1.  **Periodic Updates**: The process executing the long-running query is responsible for periodically updating the modification timestamp of its temporary manifest file (i.e., "touching" the file). This serves as a heartbeat, signaling that the query is still active.
+2.  **GC Worker Verification**: When the GC worker runs, it scans for temporary manifests. For each one it finds, it checks the file's last modification time.
+3.  **Stale File Handling**: If a temporary manifest's last modification time is older than a configurable threshold, the GC worker considers it stale (left over from a crashed or terminated query). The GC worker will then delete this stale temporary manifest. Files that were protected only by this stale manifest are no longer shielded from garbage collection.
+
+This approach ensures that only files for genuinely active queries are protected. The lifecycle of the temporary manifest is managed dynamically: it is created when a long query starts, kept alive through periodic updates, and is either deleted by the query upon normal completion or automatically cleaned up by the GC worker if the query terminates unexpectedly.
+
+This mechanism may be too complex to implement at once. We can consider a two-phased approach:
+1.  **Phase 1 (Simple Time-Based Deletion)**: Initially, implement a simpler GC strategy that deletes obsolete files based solely on a configurable lingering time. This provides a baseline for space reclamation without the complexity of temporary manifests.
+2.  **Phase 2 (Consistency-Aware GC)**: Based on the practical effectiveness and observed issues from Phase 1, we can then decide whether to implement the full temporary manifest and heartbeat mechanism to handle long-running queries. This iterative approach allows for a quicker initial implementation while gathering real-world data to justify the need for a more complex solution.
+
+## Drawbacks
+
+- **Dependency on Compaction Frequency**: The integration of the GC worker with Compaction means that GC cycles are directly tied to the frequency of compactions. In environments with infrequent compaction operations, obsolete files may accumulate for extended periods before being reclaimed, potentially leading to increased storage consumption.
+- **Race Condition with Long-Running Queries**: A potential race condition exists if a long-running query initiates but haven't write its temporary manifest in time, while a compaction process simultaneously begins and marks files used by that query as obsolete. This scenario could lead to the premature deletion of files still required by the active query. To mitigate this, the threshold time for writing a temporary manifest should be significantly shorter than the lingering time configured for obsolete files, ensuring that next GC worker runs do not delete files that are now referenced by a temporary manifest if the query is still running.
+Also the read replica shouldn't be later in manifest version for more than the lingering time of obsolete files, otherwise it might ref to files that are already deleted by the GC worker.
+- need to upload tmp manifest to object storage, which may introduce additional complexity and potential performance overhead. But since long-running queries are typically not frequent, the performance impact is expected to be minimal.
+
+
+## Conclusion and Rationale
+
+This section summarizes the key aspects and trade-offs of the proposed integrated GC worker, highlighting its advantages and potential challenges.
+
+| Aspect | Current Proposal (Integrated GC) |
+| :--- | :--- |
+| **Implementation Complexity** | **Medium**. Requires careful integration with the compaction process and the slow query recorder for temporary manifest management. |
+| **Reliability** | **High**. Integration with compaction and leveraging temporary manifests from long-running queries significantly mitigates the risk of incorrect deletion. Accurate management of lingering times for obsolete files and prevention of accidental deletion of newly created SSTs enhance data safety. |
+| **Performance Overhead** | **Low to Medium**. The GC worker runs post-compaction, minimizing direct impact on write paths. Overhead from temporary manifest management by the slow query recorder is expected to be acceptable for long-running queries. |
+| **Impact on Other Components** | **Moderate**. Requires modifications to the compaction process to trigger GC and the slow query recorder to manage temporary manifests. This introduces some coupling but enhances overall data safety. |
+| **Deletion Strategy** | **State- and Time-Based**. Obsolete files are deleted based on a configurable lingering time, which is paused if the file is referenced by a temporary manifest. Unused files (never in a manifest) are also subject to a lingering time. |
+
+## Unresolved Questions and Future Work
+
+This section outlines key areas requiring further discussion and defines potential avenues for future development.
+
+*   **Slow Query Recorder Implementation**: Detailed specifications for modify slow query recorder's implementation and its precise interaction mechanisms with temporary manifests are needed.
+*   **Configurable Lingering Times**: Establish and make configurable the specific lingering times for both obsolete and unused files to optimize storage reclamation and data availability.
+
+## Alternatives
+
+### 1. Standalone GC Service
+
+Instead of integrating the GC worker directly into the Compaction process, a standalone GC service could be implemented. This service would operate independently, periodically scanning the storage for obsolete and unused files based on manifest information and predefined retention policies.
+
+**Pros:**
+*   **Decoupling**: Separates GC logic from compaction, allowing independent scaling and deployment.
+*   **Flexibility**: Can be configured to run at different frequencies and with different strategies than compaction.
+
+**Cons:**
+*   **Increased Complexity**: Requires a separate service to manage, monitor, and coordinate with other components.
+*   **Potential for Redundancy**: May duplicate some file scanning logic already present in compaction.
+*   **Consistency Challenges**: Ensuring read consistency would require more complex coordination mechanisms between the standalone GC service and active queries, potentially involving a distributed lock manager or a more sophisticated temporary manifest system.
+
+This alternative could be implemented in the future if the integrated GC worker proves insufficient or if there is a need for more advanced GC strategies.
+
+### 2. Manifest-Driven Deletion (No Lingering Time)
+
+This alternative would involve immediate deletion of files once they are removed from the manifest, without a lingering time.
+
+**Pros:**
+*   **Simplicity**: Simplifies the GC logic by removing the need for lingering time management.
+*   **Immediate Space Reclamation**: Storage space is reclaimed as soon as files are marked for deletion.
+
+**Cons:**
+*   **Increased Risk of Data Loss**: Higher risk of deleting files still in use by long-running queries or other processes if not perfectly synchronized.
+*   **Complex Read Consistency**: Requires extremely robust and immediate mechanisms to ensure that no active queries are referencing files marked for deletion, potentially leading to performance bottlenecks or complex error handling.
+*   **Debugging Challenges**: Difficult to debug issues related to premature file deletion due to the immediate nature of the operation.
--- a/grafana/dashboards/metrics/cluster/dashboard.json
+++ b/grafana/dashboards/metrics/cluster/dashboard.json
--- a/grafana/dashboards/metrics/cluster/dashboard.md
+++ b/grafana/dashboards/metrics/cluster/dashboard.md
@@ -21,14 +21,14 @@
 # Resources
 | Title | Query | Type | Description | Datasource | Unit | Legend Format |
 | --- | --- | --- | --- | --- | --- | --- |
-| Datanode Memory per Instance | `sum(process_resident_memory_bytes{instance=~"$datanode"}) by (instance, pod)` | `timeseries` | Current memory usage by instance | `prometheus` | `decbytes` | `[{{instance}}]-[{{ pod }}]` |
-| Datanode CPU Usage per Instance | `sum(rate(process_cpu_seconds_total{instance=~"$datanode"}[$__rate_interval]) * 1000) by (instance, pod)` | `timeseries` | Current cpu usage by instance | `prometheus` | `none` | `[{{ instance }}]-[{{ pod }}]` |
-| Frontend Memory per Instance | `sum(process_resident_memory_bytes{instance=~"$frontend"}) by (instance, pod)` | `timeseries` | Current memory usage by instance | `prometheus` | `decbytes` | `[{{ instance }}]-[{{ pod }}]` |
-| Frontend CPU Usage per Instance | `sum(rate(process_cpu_seconds_total{instance=~"$frontend"}[$__rate_interval]) * 1000) by (instance, pod)` | `timeseries` | Current cpu usage by instance | `prometheus` | `none` | `[{{ instance }}]-[{{ pod }}]-cpu` |
-| Metasrv Memory per Instance | `sum(process_resident_memory_bytes{instance=~"$metasrv"}) by (instance, pod)` | `timeseries` | Current memory usage by instance | `prometheus` | `decbytes` | `[{{ instance }}]-[{{ pod }}]-resident` |
-| Metasrv CPU Usage per Instance | `sum(rate(process_cpu_seconds_total{instance=~"$metasrv"}[$__rate_interval]) * 1000) by (instance, pod)` | `timeseries` | Current cpu usage by instance | `prometheus` | `none` | `[{{ instance }}]-[{{ pod }}]` |
-| Flownode Memory per Instance | `sum(process_resident_memory_bytes{instance=~"$flownode"}) by (instance, pod)` | `timeseries` | Current memory usage by instance | `prometheus` | `decbytes` | `[{{ instance }}]-[{{ pod }}]` |
-| Flownode CPU Usage per Instance | `sum(rate(process_cpu_seconds_total{instance=~"$flownode"}[$__rate_interval]) * 1000) by (instance, pod)` | `timeseries` | Current cpu usage by instance | `prometheus` | `none` | `[{{ instance }}]-[{{ pod }}]` |
+| Datanode Memory per Instance | `sum(process_resident_memory_bytes{instance=~"$datanode"}) by (instance, pod)`<br/>`max(greptime_memory_limit_in_bytes{app="greptime-datanode"})` | `timeseries` | Current memory usage by instance | `prometheus` | `bytes` | `[{{instance}}]-[{{ pod }}]` |
+| Datanode CPU Usage per Instance | `sum(rate(process_cpu_seconds_total{instance=~"$datanode"}[$__rate_interval]) * 1000) by (instance, pod)`<br/>`max(greptime_cpu_limit_in_millicores{app="greptime-datanode"})` | `timeseries` | Current cpu usage by instance | `prometheus` | `none` | `[{{ instance }}]-[{{ pod }}]` |
+| Frontend Memory per Instance | `sum(process_resident_memory_bytes{instance=~"$frontend"}) by (instance, pod)`<br/>`max(greptime_memory_limit_in_bytes{app="greptime-frontend"})` | `timeseries` | Current memory usage by instance | `prometheus` | `bytes` | `[{{ instance }}]-[{{ pod }}]` |
+| Frontend CPU Usage per Instance | `sum(rate(process_cpu_seconds_total{instance=~"$frontend"}[$__rate_interval]) * 1000) by (instance, pod)`<br/>`max(greptime_cpu_limit_in_millicores{app="greptime-frontend"})` | `timeseries` | Current cpu usage by instance | `prometheus` | `none` | `[{{ instance }}]-[{{ pod }}]-cpu` |
+| Metasrv Memory per Instance | `sum(process_resident_memory_bytes{instance=~"$metasrv"}) by (instance, pod)`<br/>`max(greptime_memory_limit_in_bytes{app="greptime-metasrv"})` | `timeseries` | Current memory usage by instance | `prometheus` | `bytes` | `[{{ instance }}]-[{{ pod }}]-resident` |
+| Metasrv CPU Usage per Instance | `sum(rate(process_cpu_seconds_total{instance=~"$metasrv"}[$__rate_interval]) * 1000) by (instance, pod)`<br/>`max(greptime_cpu_limit_in_millicores{app="greptime-metasrv"})` | `timeseries` | Current cpu usage by instance | `prometheus` | `none` | `[{{ instance }}]-[{{ pod }}]` |
+| Flownode Memory per Instance | `sum(process_resident_memory_bytes{instance=~"$flownode"}) by (instance, pod)`<br/>`max(greptime_memory_limit_in_bytes{app="greptime-flownode"})` | `timeseries` | Current memory usage by instance | `prometheus` | `bytes` | `[{{ instance }}]-[{{ pod }}]` |
+| Flownode CPU Usage per Instance | `sum(rate(process_cpu_seconds_total{instance=~"$flownode"}[$__rate_interval]) * 1000) by (instance, pod)`<br/>`max(greptime_cpu_limit_in_millicores{app="greptime-flownode"})` | `timeseries` | Current cpu usage by instance | `prometheus` | `none` | `[{{ instance }}]-[{{ pod }}]` |
 # Frontend Requests
 | Title | Query | Type | Description | Datasource | Unit | Legend Format |
 | --- | --- | --- | --- | --- | --- | --- |
@@ -72,6 +72,7 @@
 | Region Worker Handle Bulk Insert Requests | `histogram_quantile(0.95, sum by(le,instance, stage, pod) (rate(greptime_region_worker_handle_write_bucket[$__rate_interval])))`<br/>`sum by(instance, stage, pod) (rate(greptime_region_worker_handle_write_sum[$__rate_interval]))/sum by(instance, stage, pod) (rate(greptime_region_worker_handle_write_count[$__rate_interval]))` | `timeseries` | Per-stage elapsed time for region worker to handle bulk insert region requests. | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-[{{stage}}]-P95` |
 | Active Series and Field Builders Count | `sum by(instance, pod) (greptime_mito_memtable_active_series_count)`<br/>`sum by(instance, pod) (greptime_mito_memtable_field_builder_count)` | `timeseries` | Compaction oinput output bytes | `prometheus` | `none` | `[{{instance}}]-[{{pod}}]-series` |
 | Region Worker Convert Requests | `histogram_quantile(0.95, sum by(le, instance, stage, pod) (rate(greptime_datanode_convert_region_request_bucket[$__rate_interval])))`<br/>`sum by(le,instance, stage, pod) (rate(greptime_datanode_convert_region_request_sum[$__rate_interval]))/sum by(le,instance, stage, pod) (rate(greptime_datanode_convert_region_request_count[$__rate_interval]))` | `timeseries` | Per-stage elapsed time for region worker to decode requests. | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-[{{stage}}]-P95` |
+| Cache Miss | `sum by (instance,pod, type) (rate(greptime_mito_cache_miss{instance=~"$datanode"}[$__rate_interval]))` | `timeseries` | The local cache miss of the datanode. | `prometheus` | -- | `[{{instance}}]-[{{pod}}]-[{{type}}]` |
 # OpenDAL
 | Title | Query | Type | Description | Datasource | Unit | Legend Format |
 | --- | --- | --- | --- | --- | --- | --- |
--- a/grafana/dashboards/metrics/cluster/dashboard.yaml
+++ b/grafana/dashboards/metrics/cluster/dashboard.yaml
@@ -180,13 +180,18 @@ groups:
        - title: Datanode Memory per Instance
          type: timeseries
          description: Current memory usage by instance
-          unit: decbytes
+          unit: bytes
          queries:
            - expr: sum(process_resident_memory_bytes{instance=~"$datanode"}) by (instance, pod)
              datasource:
                type: prometheus
                uid: ${metrics}
              legendFormat: '[{{instance}}]-[{{ pod }}]'
+            - expr: max(greptime_memory_limit_in_bytes{app="greptime-datanode"})
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: limit
        - title: Datanode CPU Usage per Instance
          type: timeseries
          description: Current cpu usage by instance
@@ -197,16 +202,26 @@ groups:
                type: prometheus
                uid: ${metrics}
              legendFormat: '[{{ instance }}]-[{{ pod }}]'
+            - expr: max(greptime_cpu_limit_in_millicores{app="greptime-datanode"})
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: limit
        - title: Frontend Memory per Instance
          type: timeseries
          description: Current memory usage by instance
-          unit: decbytes
+          unit: bytes
          queries:
            - expr: sum(process_resident_memory_bytes{instance=~"$frontend"}) by (instance, pod)
              datasource:
                type: prometheus
                uid: ${metrics}
              legendFormat: '[{{ instance }}]-[{{ pod }}]'
+            - expr: max(greptime_memory_limit_in_bytes{app="greptime-frontend"})
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: limit
        - title: Frontend CPU Usage per Instance
          type: timeseries
          description: Current cpu usage by instance
@@ -217,16 +232,26 @@ groups:
                type: prometheus
                uid: ${metrics}
              legendFormat: '[{{ instance }}]-[{{ pod }}]-cpu'
+            - expr: max(greptime_cpu_limit_in_millicores{app="greptime-frontend"})
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: limit
        - title: Metasrv Memory per Instance
          type: timeseries
          description: Current memory usage by instance
-          unit: decbytes
+          unit: bytes
          queries:
            - expr: sum(process_resident_memory_bytes{instance=~"$metasrv"}) by (instance, pod)
              datasource:
                type: prometheus
                uid: ${metrics}
              legendFormat: '[{{ instance }}]-[{{ pod }}]-resident'
+            - expr: max(greptime_memory_limit_in_bytes{app="greptime-metasrv"})
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: limit
        - title: Metasrv CPU Usage per Instance
          type: timeseries
          description: Current cpu usage by instance
@@ -237,16 +262,26 @@ groups:
                type: prometheus
                uid: ${metrics}
              legendFormat: '[{{ instance }}]-[{{ pod }}]'
+            - expr: max(greptime_cpu_limit_in_millicores{app="greptime-metasrv"})
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: limit
        - title: Flownode Memory per Instance
          type: timeseries
          description: Current memory usage by instance
-          unit: decbytes
+          unit: bytes
          queries:
            - expr: sum(process_resident_memory_bytes{instance=~"$flownode"}) by (instance, pod)
              datasource:
                type: prometheus
                uid: ${metrics}
              legendFormat: '[{{ instance }}]-[{{ pod }}]'
+            - expr: max(greptime_memory_limit_in_bytes{app="greptime-flownode"})
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: limit
        - title: Flownode CPU Usage per Instance
          type: timeseries
          description: Current cpu usage by instance
@@ -257,6 +292,11 @@ groups:
                type: prometheus
                uid: ${metrics}
              legendFormat: '[{{ instance }}]-[{{ pod }}]'
+            - expr: max(greptime_cpu_limit_in_millicores{app="greptime-flownode"})
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: limit
    - title: Frontend Requests
      panels:
        - title: HTTP QPS per Instance
@@ -642,6 +682,15 @@ groups:
                type: prometheus
                uid: ${metrics}
              legendFormat: '[{{instance}}]-[{{pod}}]-[{{stage}}]-AVG'
+        - title: Cache Miss
+          type: timeseries
+          description: The local cache miss of the datanode.
+          queries:
+            - expr: sum by (instance,pod, type) (rate(greptime_mito_cache_miss{instance=~"$datanode"}[$__rate_interval]))
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: '[{{instance}}]-[{{pod}}]-[{{type}}]'
    - title: OpenDAL
      panels:
        - title: QPS per Instance
--- a/grafana/dashboards/metrics/standalone/dashboard.json
+++ b/grafana/dashboards/metrics/standalone/dashboard.json
--- a/grafana/dashboards/metrics/standalone/dashboard.md
+++ b/grafana/dashboards/metrics/standalone/dashboard.md
@@ -21,14 +21,14 @@
 # Resources
 | Title | Query | Type | Description | Datasource | Unit | Legend Format |
 | --- | --- | --- | --- | --- | --- | --- |
-| Datanode Memory per Instance | `sum(process_resident_memory_bytes{}) by (instance, pod)` | `timeseries` | Current memory usage by instance | `prometheus` | `decbytes` | `[{{instance}}]-[{{ pod }}]` |
-| Datanode CPU Usage per Instance | `sum(rate(process_cpu_seconds_total{}[$__rate_interval]) * 1000) by (instance, pod)` | `timeseries` | Current cpu usage by instance | `prometheus` | `none` | `[{{ instance }}]-[{{ pod }}]` |
-| Frontend Memory per Instance | `sum(process_resident_memory_bytes{}) by (instance, pod)` | `timeseries` | Current memory usage by instance | `prometheus` | `decbytes` | `[{{ instance }}]-[{{ pod }}]` |
-| Frontend CPU Usage per Instance | `sum(rate(process_cpu_seconds_total{}[$__rate_interval]) * 1000) by (instance, pod)` | `timeseries` | Current cpu usage by instance | `prometheus` | `none` | `[{{ instance }}]-[{{ pod }}]-cpu` |
-| Metasrv Memory per Instance | `sum(process_resident_memory_bytes{}) by (instance, pod)` | `timeseries` | Current memory usage by instance | `prometheus` | `decbytes` | `[{{ instance }}]-[{{ pod }}]-resident` |
-| Metasrv CPU Usage per Instance | `sum(rate(process_cpu_seconds_total{}[$__rate_interval]) * 1000) by (instance, pod)` | `timeseries` | Current cpu usage by instance | `prometheus` | `none` | `[{{ instance }}]-[{{ pod }}]` |
-| Flownode Memory per Instance | `sum(process_resident_memory_bytes{}) by (instance, pod)` | `timeseries` | Current memory usage by instance | `prometheus` | `decbytes` | `[{{ instance }}]-[{{ pod }}]` |
-| Flownode CPU Usage per Instance | `sum(rate(process_cpu_seconds_total{}[$__rate_interval]) * 1000) by (instance, pod)` | `timeseries` | Current cpu usage by instance | `prometheus` | `none` | `[{{ instance }}]-[{{ pod }}]` |
+| Datanode Memory per Instance | `sum(process_resident_memory_bytes{}) by (instance, pod)`<br/>`max(greptime_memory_limit_in_bytes{app="greptime-datanode"})` | `timeseries` | Current memory usage by instance | `prometheus` | `bytes` | `[{{instance}}]-[{{ pod }}]` |
+| Datanode CPU Usage per Instance | `sum(rate(process_cpu_seconds_total{}[$__rate_interval]) * 1000) by (instance, pod)`<br/>`max(greptime_cpu_limit_in_millicores{app="greptime-datanode"})` | `timeseries` | Current cpu usage by instance | `prometheus` | `none` | `[{{ instance }}]-[{{ pod }}]` |
+| Frontend Memory per Instance | `sum(process_resident_memory_bytes{}) by (instance, pod)`<br/>`max(greptime_memory_limit_in_bytes{app="greptime-frontend"})` | `timeseries` | Current memory usage by instance | `prometheus` | `bytes` | `[{{ instance }}]-[{{ pod }}]` |
+| Frontend CPU Usage per Instance | `sum(rate(process_cpu_seconds_total{}[$__rate_interval]) * 1000) by (instance, pod)`<br/>`max(greptime_cpu_limit_in_millicores{app="greptime-frontend"})` | `timeseries` | Current cpu usage by instance | `prometheus` | `none` | `[{{ instance }}]-[{{ pod }}]-cpu` |
+| Metasrv Memory per Instance | `sum(process_resident_memory_bytes{}) by (instance, pod)`<br/>`max(greptime_memory_limit_in_bytes{app="greptime-metasrv"})` | `timeseries` | Current memory usage by instance | `prometheus` | `bytes` | `[{{ instance }}]-[{{ pod }}]-resident` |
+| Metasrv CPU Usage per Instance | `sum(rate(process_cpu_seconds_total{}[$__rate_interval]) * 1000) by (instance, pod)`<br/>`max(greptime_cpu_limit_in_millicores{app="greptime-metasrv"})` | `timeseries` | Current cpu usage by instance | `prometheus` | `none` | `[{{ instance }}]-[{{ pod }}]` |
+| Flownode Memory per Instance | `sum(process_resident_memory_bytes{}) by (instance, pod)`<br/>`max(greptime_memory_limit_in_bytes{app="greptime-flownode"})` | `timeseries` | Current memory usage by instance | `prometheus` | `bytes` | `[{{ instance }}]-[{{ pod }}]` |
+| Flownode CPU Usage per Instance | `sum(rate(process_cpu_seconds_total{}[$__rate_interval]) * 1000) by (instance, pod)`<br/>`max(greptime_cpu_limit_in_millicores{app="greptime-flownode"})` | `timeseries` | Current cpu usage by instance | `prometheus` | `none` | `[{{ instance }}]-[{{ pod }}]` |
 # Frontend Requests
 | Title | Query | Type | Description | Datasource | Unit | Legend Format |
 | --- | --- | --- | --- | --- | --- | --- |
@@ -72,6 +72,7 @@
 | Region Worker Handle Bulk Insert Requests | `histogram_quantile(0.95, sum by(le,instance, stage, pod) (rate(greptime_region_worker_handle_write_bucket[$__rate_interval])))`<br/>`sum by(instance, stage, pod) (rate(greptime_region_worker_handle_write_sum[$__rate_interval]))/sum by(instance, stage, pod) (rate(greptime_region_worker_handle_write_count[$__rate_interval]))` | `timeseries` | Per-stage elapsed time for region worker to handle bulk insert region requests. | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-[{{stage}}]-P95` |
 | Active Series and Field Builders Count | `sum by(instance, pod) (greptime_mito_memtable_active_series_count)`<br/>`sum by(instance, pod) (greptime_mito_memtable_field_builder_count)` | `timeseries` | Compaction oinput output bytes | `prometheus` | `none` | `[{{instance}}]-[{{pod}}]-series` |
 | Region Worker Convert Requests | `histogram_quantile(0.95, sum by(le, instance, stage, pod) (rate(greptime_datanode_convert_region_request_bucket[$__rate_interval])))`<br/>`sum by(le,instance, stage, pod) (rate(greptime_datanode_convert_region_request_sum[$__rate_interval]))/sum by(le,instance, stage, pod) (rate(greptime_datanode_convert_region_request_count[$__rate_interval]))` | `timeseries` | Per-stage elapsed time for region worker to decode requests. | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-[{{stage}}]-P95` |
+| Cache Miss | `sum by (instance,pod, type) (rate(greptime_mito_cache_miss{}[$__rate_interval]))` | `timeseries` | The local cache miss of the datanode. | `prometheus` | -- | `[{{instance}}]-[{{pod}}]-[{{type}}]` |
 # OpenDAL
 | Title | Query | Type | Description | Datasource | Unit | Legend Format |
 | --- | --- | --- | --- | --- | --- | --- |
--- a/grafana/dashboards/metrics/standalone/dashboard.yaml
+++ b/grafana/dashboards/metrics/standalone/dashboard.yaml
@@ -180,13 +180,18 @@ groups:
        - title: Datanode Memory per Instance
          type: timeseries
          description: Current memory usage by instance
-          unit: decbytes
+          unit: bytes
          queries:
            - expr: sum(process_resident_memory_bytes{}) by (instance, pod)
              datasource:
                type: prometheus
                uid: ${metrics}
              legendFormat: '[{{instance}}]-[{{ pod }}]'
+            - expr: max(greptime_memory_limit_in_bytes{app="greptime-datanode"})
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: limit
        - title: Datanode CPU Usage per Instance
          type: timeseries
          description: Current cpu usage by instance
@@ -197,16 +202,26 @@ groups:
                type: prometheus
                uid: ${metrics}
              legendFormat: '[{{ instance }}]-[{{ pod }}]'
+            - expr: max(greptime_cpu_limit_in_millicores{app="greptime-datanode"})
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: limit
        - title: Frontend Memory per Instance
          type: timeseries
          description: Current memory usage by instance
-          unit: decbytes
+          unit: bytes
          queries:
            - expr: sum(process_resident_memory_bytes{}) by (instance, pod)
              datasource:
                type: prometheus
                uid: ${metrics}
              legendFormat: '[{{ instance }}]-[{{ pod }}]'
+            - expr: max(greptime_memory_limit_in_bytes{app="greptime-frontend"})
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: limit
        - title: Frontend CPU Usage per Instance
          type: timeseries
          description: Current cpu usage by instance
@@ -217,16 +232,26 @@ groups:
                type: prometheus
                uid: ${metrics}
              legendFormat: '[{{ instance }}]-[{{ pod }}]-cpu'
+            - expr: max(greptime_cpu_limit_in_millicores{app="greptime-frontend"})
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: limit
        - title: Metasrv Memory per Instance
          type: timeseries
          description: Current memory usage by instance
-          unit: decbytes
+          unit: bytes
          queries:
            - expr: sum(process_resident_memory_bytes{}) by (instance, pod)
              datasource:
                type: prometheus
                uid: ${metrics}
              legendFormat: '[{{ instance }}]-[{{ pod }}]-resident'
+            - expr: max(greptime_memory_limit_in_bytes{app="greptime-metasrv"})
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: limit
        - title: Metasrv CPU Usage per Instance
          type: timeseries
          description: Current cpu usage by instance
@@ -237,16 +262,26 @@ groups:
                type: prometheus
                uid: ${metrics}
              legendFormat: '[{{ instance }}]-[{{ pod }}]'
+            - expr: max(greptime_cpu_limit_in_millicores{app="greptime-metasrv"})
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: limit
        - title: Flownode Memory per Instance
          type: timeseries
          description: Current memory usage by instance
-          unit: decbytes
+          unit: bytes
          queries:
            - expr: sum(process_resident_memory_bytes{}) by (instance, pod)
              datasource:
                type: prometheus
                uid: ${metrics}
              legendFormat: '[{{ instance }}]-[{{ pod }}]'
+            - expr: max(greptime_memory_limit_in_bytes{app="greptime-flownode"})
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: limit
        - title: Flownode CPU Usage per Instance
          type: timeseries
          description: Current cpu usage by instance
@@ -257,6 +292,11 @@ groups:
                type: prometheus
                uid: ${metrics}
              legendFormat: '[{{ instance }}]-[{{ pod }}]'
+            - expr: max(greptime_cpu_limit_in_millicores{app="greptime-flownode"})
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: limit
    - title: Frontend Requests
      panels:
        - title: HTTP QPS per Instance
@@ -642,6 +682,15 @@ groups:
                type: prometheus
                uid: ${metrics}
              legendFormat: '[{{instance}}]-[{{pod}}]-[{{stage}}]-AVG'
+        - title: Cache Miss
+          type: timeseries
+          description: The local cache miss of the datanode.
+          queries:
+            - expr: sum by (instance,pod, type) (rate(greptime_mito_cache_miss{}[$__rate_interval]))
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: '[{{instance}}]-[{{pod}}]-[{{type}}]'
    - title: OpenDAL
      panels:
        - title: QPS per Instance
--- a/grafana/scripts/check.sh
+++ b/grafana/scripts/check.sh
@@ -26,7 +26,7 @@ check_dashboards_generation() {
  ./grafana/scripts/gen-dashboards.sh

  if [[ -n "$(git diff --name-only grafana/dashboards/metrics)" ]]; then
-    echo "Error: The dashboards are not generated correctly. You should execute the `make dashboards` command."
+    echo "Error: The dashboards are not generated correctly. You should execute the 'make dashboards' command."
    exit 1
  fi
 }
--- a/src/api/src/error.rs
+++ b/src/api/src/error.rs
@@ -17,6 +17,7 @@ use std::any::Any;
 use common_error::ext::ErrorExt;
 use common_error::status_code::StatusCode;
 use common_macro::stack_trace_debug;
+use common_time::timestamp::TimeUnit;
 use datatypes::prelude::ConcreteDataType;
 use snafu::prelude::*;
 use snafu::Location;
@@ -66,12 +67,28 @@ pub enum Error {
        #[snafu(implicit)]
        location: Location,
    },
+
+    #[snafu(display("Invalid time unit: {time_unit}"))]
+    InvalidTimeUnit {
+        time_unit: i32,
+        #[snafu(implicit)]
+        location: Location,
+    },
+
+    #[snafu(display("Inconsistent time unit: {:?}", units))]
+    InconsistentTimeUnit {
+        units: Vec<TimeUnit>,
+        #[snafu(implicit)]
+        location: Location,
+    },
 }

 impl ErrorExt for Error {
    fn status_code(&self) -> StatusCode {
        match self {
-            Error::UnknownColumnDataType { .. } => StatusCode::InvalidArguments,
+            Error::UnknownColumnDataType { .. }
+            | Error::InvalidTimeUnit { .. }
+            | Error::InconsistentTimeUnit { .. } => StatusCode::InvalidArguments,
            Error::IntoColumnDataType { .. } | Error::SerializeJson { .. } => {
                StatusCode::Unexpected
            }
--- a/src/api/src/helper.rs
+++ b/src/api/src/helper.rs
@@ -12,6 +12,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

+use std::collections::HashSet;
 use std::sync::Arc;

 use common_base::BitVec;
@@ -46,7 +47,7 @@ use greptime_proto::v1::{
 use paste::paste;
 use snafu::prelude::*;

-use crate::error::{self, Result};
+use crate::error::{self, InconsistentTimeUnitSnafu, InvalidTimeUnitSnafu, Result};
 use crate::v1::column::Values;
 use crate::v1::{Column, ColumnDataType, Value as GrpcValue};

@@ -1079,6 +1080,89 @@ pub fn value_to_grpc_value(value: Value) -> GrpcValue {
    }
 }

+pub fn from_pb_time_unit(unit: v1::TimeUnit) -> TimeUnit {
+    match unit {
+        v1::TimeUnit::Second => TimeUnit::Second,
+        v1::TimeUnit::Millisecond => TimeUnit::Millisecond,
+        v1::TimeUnit::Microsecond => TimeUnit::Microsecond,
+        v1::TimeUnit::Nanosecond => TimeUnit::Nanosecond,
+    }
+}
+
+pub fn to_pb_time_unit(unit: TimeUnit) -> v1::TimeUnit {
+    match unit {
+        TimeUnit::Second => v1::TimeUnit::Second,
+        TimeUnit::Millisecond => v1::TimeUnit::Millisecond,
+        TimeUnit::Microsecond => v1::TimeUnit::Microsecond,
+        TimeUnit::Nanosecond => v1::TimeUnit::Nanosecond,
+    }
+}
+
+pub fn from_pb_time_ranges(time_ranges: v1::TimeRanges) -> Result<Vec<(Timestamp, Timestamp)>> {
+    if time_ranges.time_ranges.is_empty() {
+        return Ok(vec![]);
+    }
+    let proto_time_unit = v1::TimeUnit::try_from(time_ranges.time_unit).map_err(|_| {
+        InvalidTimeUnitSnafu {
+            time_unit: time_ranges.time_unit,
+        }
+        .build()
+    })?;
+    let time_unit = from_pb_time_unit(proto_time_unit);
+    Ok(time_ranges
+        .time_ranges
+        .into_iter()
+        .map(|r| {
+            (
+                Timestamp::new(r.start, time_unit),
+                Timestamp::new(r.end, time_unit),
+            )
+        })
+        .collect())
+}
+
+/// All time_ranges must be of the same time unit.
+///
+/// if input `time_ranges` is empty, it will return a default `TimeRanges` with `Millisecond` as the time unit.
+pub fn to_pb_time_ranges(time_ranges: &[(Timestamp, Timestamp)]) -> Result<v1::TimeRanges> {
+    let is_same_time_unit = time_ranges.windows(2).all(|x| {
+        x[0].0.unit() == x[1].0.unit()
+            && x[0].1.unit() == x[1].1.unit()
+            && x[0].0.unit() == x[0].1.unit()
+    });
+
+    if !is_same_time_unit {
+        let all_time_units: Vec<_> = time_ranges
+            .iter()
+            .map(|(s, e)| [s.unit(), e.unit()])
+            .clone()
+            .flatten()
+            .collect::<HashSet<_>>()
+            .into_iter()
+            .collect();
+        InconsistentTimeUnitSnafu {
+            units: all_time_units,
+        }
+        .fail()?
+    }
+
+    let mut pb_time_ranges = v1::TimeRanges {
+        // default time unit is Millisecond
+        time_unit: v1::TimeUnit::Millisecond as i32,
+        time_ranges: Vec::with_capacity(time_ranges.len()),
+    };
+    if let Some((start, _end)) = time_ranges.first() {
+        pb_time_ranges.time_unit = to_pb_time_unit(start.unit()) as i32;
+    }
+    for (start, end) in time_ranges {
+        pb_time_ranges.time_ranges.push(v1::TimeRange {
+            start: start.value(),
+            end: end.value(),
+        });
+    }
+    Ok(pb_time_ranges)
+}
+
 #[cfg(test)]
 mod tests {
    use std::sync::Arc;
--- a/src/catalog/Cargo.toml
+++ b/src/catalog/Cargo.toml
@@ -44,6 +44,8 @@ moka = { workspace = true, features = ["future", "sync"] }
 partition.workspace = true
 paste.workspace = true
 prometheus.workspace = true
+promql-parser.workspace = true
+rand.workspace = true
 rustc-hash.workspace = true
 serde_json.workspace = true
 session.workspace = true
--- a/src/catalog/src/information_extension.rs
+++ b/src/catalog/src/information_extension.rs
@@ -16,8 +16,8 @@ use api::v1::meta::ProcedureStatus;
 use common_error::ext::BoxedError;
 use common_meta::cluster::{ClusterInfo, NodeInfo};
 use common_meta::datanode::RegionStat;
-use common_meta::ddl::{ExecutorContext, ProcedureExecutor};
 use common_meta::key::flow::flow_state::FlowStat;
+use common_meta::procedure_executor::{ExecutorContext, ProcedureExecutor};
 use common_meta::rpc::procedure;
 use common_procedure::{ProcedureInfo, ProcedureState};
 use meta_client::MetaClientRef;
--- a/src/catalog/src/kvbackend/manager.rs
+++ b/src/catalog/src/kvbackend/manager.rs
@@ -23,7 +23,8 @@ use common_catalog::consts::{
 };
 use common_error::ext::BoxedError;
 use common_meta::cache::{
-    LayeredCacheRegistryRef, TableRoute, TableRouteCacheRef, ViewInfoCacheRef,
+    LayeredCacheRegistryRef, TableInfoCacheRef, TableNameCacheRef, TableRoute, TableRouteCacheRef,
+    ViewInfoCacheRef,
 };
 use common_meta::key::catalog_name::CatalogNameKey;
 use common_meta::key::flow::FlowMetadataManager;
@@ -41,7 +42,7 @@ use session::context::{Channel, QueryContext};
 use snafu::prelude::*;
 use store_api::metric_engine_consts::METRIC_ENGINE_NAME;
 use table::dist_table::DistTable;
-use table::metadata::TableId;
+use table::metadata::{TableId, TableInfoRef};
 use table::table::numbers::{NumbersTable, NUMBERS_TABLE_NAME};
 use table::table_name::TableName;
 use table::TableRef;
@@ -325,6 +326,63 @@ impl CatalogManager for KvBackendCatalogManager {
        Ok(None)
    }

+    async fn table_id(
+        &self,
+        catalog_name: &str,
+        schema_name: &str,
+        table_name: &str,
+        query_ctx: Option<&QueryContext>,
+    ) -> Result<Option<TableId>> {
+        let channel = query_ctx.map_or(Channel::Unknown, |ctx| ctx.channel());
+        if let Some(table) =
+            self.system_catalog
+                .table(catalog_name, schema_name, table_name, query_ctx)
+        {
+            return Ok(Some(table.table_info().table_id()));
+        }
+
+        let table_cache: TableNameCacheRef =
+            self.cache_registry.get().context(CacheNotFoundSnafu {
+                name: "table_name_cache",
+            })?;
+
+        let table = table_cache
+            .get_by_ref(&TableName {
+                catalog_name: catalog_name.to_string(),
+                schema_name: schema_name.to_string(),
+                table_name: table_name.to_string(),
+            })
+            .await
+            .context(GetTableCacheSnafu)?;
+
+        if let Some(table) = table {
+            return Ok(Some(table));
+        }
+
+        if channel == Channel::Postgres {
+            // falldown to pg_catalog
+            if let Some(table) =
+                self.system_catalog
+                    .table(catalog_name, PG_CATALOG_NAME, table_name, query_ctx)
+            {
+                return Ok(Some(table.table_info().table_id()));
+            }
+        }
+
+        Ok(None)
+    }
+
+    async fn table_info_by_id(&self, table_id: TableId) -> Result<Option<TableInfoRef>> {
+        let table_info_cache: TableInfoCacheRef =
+            self.cache_registry.get().context(CacheNotFoundSnafu {
+                name: "table_info_cache",
+            })?;
+        table_info_cache
+            .get_by_ref(&table_id)
+            .await
+            .context(GetTableCacheSnafu)
+    }
+
    async fn tables_by_ids(
        &self,
        catalog: &str,
--- a/src/catalog/src/lib.rs
+++ b/src/catalog/src/lib.rs
@@ -25,7 +25,7 @@ use common_catalog::consts::{INFORMATION_SCHEMA_NAME, PG_CATALOG_NAME};
 use futures::future::BoxFuture;
 use futures_util::stream::BoxStream;
 use session::context::QueryContext;
-use table::metadata::TableId;
+use table::metadata::{TableId, TableInfoRef};
 use table::TableRef;

 use crate::error::Result;
@@ -89,6 +89,23 @@ pub trait CatalogManager: Send + Sync {
        query_ctx: Option<&QueryContext>,
    ) -> Result<Option<TableRef>>;

+    /// Returns the table id of provided table ident.
+    async fn table_id(
+        &self,
+        catalog: &str,
+        schema: &str,
+        table_name: &str,
+        query_ctx: Option<&QueryContext>,
+    ) -> Result<Option<TableId>> {
+        Ok(self
+            .table(catalog, schema, table_name, query_ctx)
+            .await?
+            .map(|t| t.table_info().ident.table_id))
+    }
+
+    /// Returns the table of provided id.
+    async fn table_info_by_id(&self, table_id: TableId) -> Result<Option<TableInfoRef>>;
+
    /// Returns the tables by table ids.
    async fn tables_by_ids(
        &self,
--- a/src/catalog/src/memory/manager.rs
+++ b/src/catalog/src/memory/manager.rs
@@ -28,7 +28,7 @@ use common_meta::kv_backend::memory::MemoryKvBackend;
 use futures_util::stream::BoxStream;
 use session::context::QueryContext;
 use snafu::OptionExt;
-use table::metadata::TableId;
+use table::metadata::{TableId, TableInfoRef};
 use table::TableRef;

 use crate::error::{CatalogNotFoundSnafu, Result, SchemaNotFoundSnafu, TableExistsSnafu};
@@ -144,6 +144,18 @@ impl CatalogManager for MemoryCatalogManager {
        Ok(result)
    }

+    async fn table_info_by_id(&self, table_id: TableId) -> Result<Option<TableInfoRef>> {
+        Ok(self
+            .catalogs
+            .read()
+            .unwrap()
+            .iter()
+            .flat_map(|(_, schema_entries)| schema_entries.values())
+            .flat_map(|tables| tables.values())
+            .find(|t| t.table_info().ident.table_id == table_id)
+            .map(|t| t.table_info()))
+    }
+
    async fn tables_by_ids(
        &self,
        catalog: &str,
--- a/src/catalog/src/process_manager.rs
+++ b/src/catalog/src/process_manager.rs
@@ -14,17 +14,24 @@

 use std::collections::hash_map::Entry;
 use std::collections::HashMap;
-use std::fmt::{Debug, Formatter};
+use std::fmt::{Debug, Display, Formatter};
 use std::sync::atomic::{AtomicU32, Ordering};
 use std::sync::{Arc, RwLock};
+use std::time::{Duration, Instant, UNIX_EPOCH};

 use api::v1::frontend::{KillProcessRequest, ListProcessRequest, ProcessInfo};
 use common_base::cancellation::CancellationHandle;
 use common_frontend::selector::{FrontendSelector, MetaClientSelector};
-use common_telemetry::{debug, info, warn};
+use common_frontend::slow_query_event::SlowQueryEvent;
+use common_telemetry::{debug, error, info, warn};
 use common_time::util::current_time_millis;
 use meta_client::MetaClientRef;
+use promql_parser::parser::EvalStmt;
+use rand::random;
+use session::context::QueryContextRef;
 use snafu::{ensure, OptionExt, ResultExt};
+use sql::statements::statement::Statement;
+use tokio::sync::mpsc::Sender;

 use crate::error;
 use crate::metrics::{PROCESS_KILL_COUNT, PROCESS_LIST_COUNT};
@@ -44,6 +51,23 @@ pub struct ProcessManager {
    frontend_selector: Option<MetaClientSelector>,
 }

+/// Represents a parsed query statement, functionally equivalent to [query::parser::QueryStatement].
+/// This enum is defined here to avoid cyclic dependencies with the query parser module.
+#[derive(Debug, Clone)]
+pub enum QueryStatement {
+    Sql(Statement),
+    Promql(EvalStmt),
+}
+
+impl Display for QueryStatement {
+    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
+        match self {
+            QueryStatement::Sql(stmt) => write!(f, "{}", stmt),
+            QueryStatement::Promql(eval_stmt) => write!(f, "{}", eval_stmt),
+        }
+    }
+}
+
 impl ProcessManager {
    /// Create a [ProcessManager] instance with server address and kv client.
    pub fn new(server_addr: String, meta_client: Option<MetaClientRef>) -> Self {
@@ -67,6 +91,7 @@ impl ProcessManager {
        query: String,
        client: String,
        query_id: Option<ProcessId>,
+        _slow_query_timer: Option<SlowQueryTimer>,
    ) -> Ticket {
        let id = query_id.unwrap_or_else(|| self.next_id.fetch_add(1, Ordering::Relaxed));
        let process = ProcessInfo {
@@ -93,6 +118,7 @@ impl ProcessManager {
            manager: self.clone(),
            id,
            cancellation_handle,
+            _slow_query_timer,
        }
    }

@@ -223,6 +249,7 @@ pub struct Ticket {
    pub(crate) manager: ProcessManagerRef,
    pub(crate) id: ProcessId,
    pub cancellation_handle: Arc<CancellationHandle>,
+    _slow_query_timer: Option<SlowQueryTimer>,
 }

 impl Drop for Ticket {
@@ -263,6 +290,107 @@ impl Debug for CancellableProcess {
    }
 }

+/// SlowQueryTimer is used to log slow query when it's dropped.
+/// In drop(), it will check if the query is slow and send the slow query event to the handler.
+pub struct SlowQueryTimer {
+    start: Instant,
+    stmt: QueryStatement,
+    query_ctx: QueryContextRef,
+    threshold: Option<Duration>,
+    sample_ratio: Option<f64>,
+    tx: Sender<SlowQueryEvent>,
+}
+
+impl SlowQueryTimer {
+    pub fn new(
+        stmt: QueryStatement,
+        query_ctx: QueryContextRef,
+        threshold: Option<Duration>,
+        sample_ratio: Option<f64>,
+        tx: Sender<SlowQueryEvent>,
+    ) -> Self {
+        Self {
+            start: Instant::now(),
+            stmt,
+            query_ctx,
+            threshold,
+            sample_ratio,
+            tx,
+        }
+    }
+}
+
+impl SlowQueryTimer {
+    fn send_slow_query_event(&self, elapsed: Duration, threshold: Duration) {
+        let mut slow_query_event = SlowQueryEvent {
+            cost: elapsed.as_millis() as u64,
+            threshold: threshold.as_millis() as u64,
+            query: "".to_string(),
+            query_ctx: self.query_ctx.clone(),
+
+            // The following fields are only used for PromQL queries.
+            is_promql: false,
+            promql_range: None,
+            promql_step: None,
+            promql_start: None,
+            promql_end: None,
+        };
+
+        match &self.stmt {
+            QueryStatement::Promql(stmt) => {
+                slow_query_event.is_promql = true;
+                slow_query_event.query = stmt.expr.to_string();
+                slow_query_event.promql_step = Some(stmt.interval.as_millis() as u64);
+
+                let start = stmt
+                    .start
+                    .duration_since(UNIX_EPOCH)
+                    .unwrap_or_default()
+                    .as_millis() as i64;
+
+                let end = stmt
+                    .end
+                    .duration_since(UNIX_EPOCH)
+                    .unwrap_or_default()
+                    .as_millis() as i64;
+
+                slow_query_event.promql_range = Some((end - start) as u64);
+                slow_query_event.promql_start = Some(start);
+                slow_query_event.promql_end = Some(end);
+            }
+            QueryStatement::Sql(stmt) => {
+                slow_query_event.query = stmt.to_string();
+            }
+        }
+
+        // Send SlowQueryEvent to the handler.
+        if let Err(e) = self.tx.try_send(slow_query_event) {
+            error!(e; "Failed to send slow query event");
+        }
+    }
+}
+
+impl Drop for SlowQueryTimer {
+    fn drop(&mut self) {
+        if let Some(threshold) = self.threshold {
+            // Calculate the elaspsed duration since the timer is created.
+            let elapsed = self.start.elapsed();
+            if elapsed > threshold {
+                if let Some(ratio) = self.sample_ratio {
+                    // Only capture a portion of slow queries based on sample_ratio.
+                    // Generate a random number in [0, 1) and compare it with sample_ratio.
+                    if ratio >= 1.0 || random::<f64>() <= ratio {
+                        self.send_slow_query_event(elapsed, threshold);
+                    }
+                } else {
+                    // Captures all slow queries if sample_ratio is not set.
+                    self.send_slow_query_event(elapsed, threshold);
+                }
+            }
+        }
+    }
+}
+
 #[cfg(test)]
 mod tests {
    use std::sync::Arc;
@@ -278,6 +406,7 @@ mod tests {
            "SELECT * FROM table".to_string(),
            "".to_string(),
            None,
+            None,
        );

        let running_processes = process_manager.local_processes(None).unwrap();
@@ -301,6 +430,7 @@ mod tests {
            "SELECT * FROM table".to_string(),
            "client1".to_string(),
            Some(custom_id),
+            None,
        );

        assert_eq!(ticket.id, custom_id);
@@ -321,6 +451,7 @@ mod tests {
            "SELECT * FROM table1".to_string(),
            "client1".to_string(),
            None,
+            None,
        );

        let ticket2 = process_manager.clone().register_query(
@@ -329,6 +460,7 @@ mod tests {
            "SELECT * FROM table2".to_string(),
            "client2".to_string(),
            None,
+            None,
        );

        let running_processes = process_manager.local_processes(Some("public")).unwrap();
@@ -350,6 +482,7 @@ mod tests {
            "SELECT * FROM table1".to_string(),
            "client1".to_string(),
            None,
+            None,
        );

        let _ticket2 = process_manager.clone().register_query(
@@ -358,6 +491,7 @@ mod tests {
            "SELECT * FROM table2".to_string(),
            "client2".to_string(),
            None,
+            None,
        );

        // Test listing processes for specific catalog
@@ -384,6 +518,7 @@ mod tests {
            "SELECT * FROM table".to_string(),
            "client1".to_string(),
            None,
+            None,
        );
        assert_eq!(process_manager.local_processes(None).unwrap().len(), 1);
        process_manager.deregister_query("public".to_string(), ticket.id);
@@ -400,6 +535,7 @@ mod tests {
            "SELECT * FROM table".to_string(),
            "client1".to_string(),
            None,
+            None,
        );

        assert!(!ticket.cancellation_handle.is_cancelled());
@@ -417,6 +553,7 @@ mod tests {
            "SELECT * FROM table".to_string(),
            "client1".to_string(),
            None,
+            None,
        );
        assert!(!ticket.cancellation_handle.is_cancelled());
        let killed = process_manager
@@ -462,6 +599,7 @@ mod tests {
            "SELECT COUNT(*) FROM users WHERE age > 18".to_string(),
            "test_client".to_string(),
            Some(42),
+            None,
        );

        let processes = process_manager.local_processes(None).unwrap();
@@ -488,6 +626,7 @@ mod tests {
                "SELECT * FROM table".to_string(),
                "client1".to_string(),
                None,
+                None,
            );

            // Process should be registered
--- a/src/cli/src/metadata/common.rs
+++ b/src/cli/src/metadata/common.rs
@@ -75,7 +75,7 @@ impl StoreConfig {
                #[cfg(feature = "pg_kvbackend")]
                BackendImpl::PostgresStore => {
                    let table_name = &self.meta_table_name;
-                    let pool = meta_srv::bootstrap::create_postgres_pool(store_addrs)
+                    let pool = meta_srv::bootstrap::create_postgres_pool(store_addrs, None)
                        .await
                        .map_err(BoxedError::new)?;
                    Ok(common_meta::kv_backend::rds::PgStore::with_pg_pool(
--- a/src/cmd/Cargo.toml
+++ b/src/cmd/Cargo.toml
@@ -38,6 +38,7 @@ common-config.workspace = true
 common-error.workspace = true
 common-grpc.workspace = true
 common-macro.workspace = true
+common-mem-prof.workspace = true
 common-meta.workspace = true
 common-options.workspace = true
 common-procedure.workspace = true
--- a/src/cmd/src/datanode/builder.rs
+++ b/src/cmd/src/datanode/builder.rs
@@ -28,7 +28,7 @@ use tracing_appender::non_blocking::WorkerGuard;

 use crate::datanode::{DatanodeOptions, Instance, APP_NAME};
 use crate::error::{MetaClientInitSnafu, MissingConfigSnafu, Result, StartDatanodeSnafu};
-use crate::{create_resource_limit_metrics, log_versions};
+use crate::{create_resource_limit_metrics, log_versions, maybe_activate_heap_profile};

 /// Builder for Datanode instance.
 pub struct InstanceBuilder {
@@ -68,6 +68,7 @@ impl InstanceBuilder {
        );

        log_versions(verbose_version(), short_version(), APP_NAME);
+        maybe_activate_heap_profile(&dn_opts.memory);
        create_resource_limit_metrics(APP_NAME);

        plugins::setup_datanode_plugins(plugins, &opts.plugins, dn_opts)
--- a/src/cmd/src/flownode.rs
+++ b/src/cmd/src/flownode.rs
@@ -46,7 +46,7 @@ use crate::error::{
    MissingConfigSnafu, Result, ShutdownFlownodeSnafu, StartFlownodeSnafu,
 };
 use crate::options::{GlobalOptions, GreptimeOptions};
-use crate::{create_resource_limit_metrics, log_versions, App};
+use crate::{create_resource_limit_metrics, log_versions, maybe_activate_heap_profile, App};

 pub const APP_NAME: &str = "greptime-flownode";

@@ -280,6 +280,7 @@ impl StartCommand {
        );

        log_versions(verbose_version(), short_version(), APP_NAME);
+        maybe_activate_heap_profile(&opts.component.memory);
        create_resource_limit_metrics(APP_NAME);

        info!("Flownode start command: {:#?}", self);
--- a/src/cmd/src/frontend.rs
+++ b/src/cmd/src/frontend.rs
@@ -47,7 +47,7 @@ use tracing_appender::non_blocking::WorkerGuard;

 use crate::error::{self, Result};
 use crate::options::{GlobalOptions, GreptimeOptions};
-use crate::{create_resource_limit_metrics, log_versions, App};
+use crate::{create_resource_limit_metrics, log_versions, maybe_activate_heap_profile, App};

 type FrontendOptions = GreptimeOptions<frontend::frontend::FrontendOptions>;

@@ -283,6 +283,7 @@ impl StartCommand {
        );

        log_versions(verbose_version(), short_version(), APP_NAME);
+        maybe_activate_heap_profile(&opts.component.memory);
        create_resource_limit_metrics(APP_NAME);

        info!("Frontend start command: {:#?}", self);
--- a/src/cmd/src/lib.rs
+++ b/src/cmd/src/lib.rs
@@ -15,7 +15,10 @@
 #![feature(assert_matches, let_chains)]

 use async_trait::async_trait;
-use common_telemetry::{error, info};
+use common_error::ext::ErrorExt;
+use common_error::status_code::StatusCode;
+use common_mem_prof::activate_heap_profile;
+use common_telemetry::{error, info, warn};
 use stat::{get_cpu_limit, get_memory_limit};

 use crate::error::Result;
@@ -145,3 +148,20 @@ fn log_env_flags() {
        info!("argument: {}", argument);
    }
 }
+
+pub fn maybe_activate_heap_profile(memory_options: &common_options::memory::MemoryOptions) {
+    if memory_options.enable_heap_profiling {
+        match activate_heap_profile() {
+            Ok(()) => {
+                info!("Heap profile is active");
+            }
+            Err(err) => {
+                if err.status_code() == StatusCode::Unsupported {
+                    info!("Heap profile is not supported");
+                } else {
+                    warn!(err; "Failed to activate heap profile");
+                }
+            }
+        }
+    }
+}
--- a/src/cmd/src/metasrv.rs
+++ b/src/cmd/src/metasrv.rs
@@ -30,7 +30,7 @@ use tracing_appender::non_blocking::WorkerGuard;

 use crate::error::{self, LoadLayeredConfigSnafu, Result, StartMetaServerSnafu};
 use crate::options::{GlobalOptions, GreptimeOptions};
-use crate::{create_resource_limit_metrics, log_versions, App};
+use crate::{create_resource_limit_metrics, log_versions, maybe_activate_heap_profile, App};

 type MetasrvOptions = GreptimeOptions<meta_srv::metasrv::MetasrvOptions>;

@@ -325,6 +325,7 @@ impl StartCommand {
        );

        log_versions(verbose_version(), short_version(), APP_NAME);
+        maybe_activate_heap_profile(&opts.component.memory);
        create_resource_limit_metrics(APP_NAME);

        info!("Metasrv start command: {:#?}", self);
--- a/src/cmd/src/standalone.rs
+++ b/src/cmd/src/standalone.rs
@@ -34,17 +34,19 @@ use common_meta::cluster::{NodeInfo, NodeStatus};
 use common_meta::datanode::RegionStat;
 use common_meta::ddl::flow_meta::FlowMetadataAllocator;
 use common_meta::ddl::table_meta::TableMetadataAllocator;
-use common_meta::ddl::{DdlContext, NoopRegionFailureDetectorControl, ProcedureExecutorRef};
+use common_meta::ddl::{DdlContext, NoopRegionFailureDetectorControl};
 use common_meta::ddl_manager::DdlManager;
 use common_meta::key::flow::flow_state::FlowStat;
 use common_meta::key::flow::FlowMetadataManager;
 use common_meta::key::{TableMetadataManager, TableMetadataManagerRef};
 use common_meta::kv_backend::KvBackendRef;
 use common_meta::peer::Peer;
+use common_meta::procedure_executor::LocalProcedureExecutor;
 use common_meta::region_keeper::MemoryRegionKeeper;
 use common_meta::region_registry::LeaderRegionRegistry;
 use common_meta::sequence::SequenceBuilder;
 use common_meta::wal_options_allocator::{build_wal_options_allocator, WalOptionsAllocatorRef};
+use common_options::memory::MemoryOptions;
 use common_procedure::{ProcedureInfo, ProcedureManagerRef};
 use common_telemetry::info;
 use common_telemetry::logging::{
@@ -83,7 +85,7 @@ use tracing_appender::non_blocking::WorkerGuard;

 use crate::error::{Result, StartFlownodeSnafu};
 use crate::options::{GlobalOptions, GreptimeOptions};
-use crate::{create_resource_limit_metrics, error, log_versions, App};
+use crate::{create_resource_limit_metrics, error, log_versions, maybe_activate_heap_profile, App};

 pub const APP_NAME: &str = "greptime-standalone";

@@ -157,6 +159,7 @@ pub struct StandaloneOptions {
    pub max_in_flight_write_bytes: Option<ReadableSize>,
    pub slow_query: Option<SlowQueryOptions>,
    pub query: QueryOptions,
+    pub memory: MemoryOptions,
 }

 impl Default for StandaloneOptions {
@@ -190,6 +193,7 @@ impl Default for StandaloneOptions {
            max_in_flight_write_bytes: None,
            slow_query: Some(SlowQueryOptions::default()),
            query: QueryOptions::default(),
+            memory: MemoryOptions::default(),
        }
    }
 }
@@ -486,6 +490,7 @@ impl StartCommand {
        );

        log_versions(verbose_version(), short_version(), APP_NAME);
+        maybe_activate_heap_profile(&opts.component.memory);
        create_resource_limit_metrics(APP_NAME);

        info!("Standalone start command: {:#?}", self);
@@ -636,9 +641,8 @@ impl StartCommand {
            flow_metadata_allocator: flow_metadata_allocator.clone(),
            region_failure_detector_controller: Arc::new(NoopRegionFailureDetectorControl),
        };
-        let procedure_manager_c = procedure_manager.clone();

-        let ddl_manager = DdlManager::try_new(ddl_context, procedure_manager_c, true)
+        let ddl_manager = DdlManager::try_new(ddl_context, procedure_manager.clone(), true)
            .context(error::InitDdlManagerSnafu)?;
        #[cfg(feature = "enterprise")]
        let ddl_manager = {
@@ -646,7 +650,11 @@ impl StartCommand {
                plugins.get();
            ddl_manager.with_trigger_ddl_manager(trigger_ddl_manager)
        };
-        let ddl_task_executor: ProcedureExecutorRef = Arc::new(ddl_manager);
+
+        let procedure_executor = Arc::new(LocalProcedureExecutor::new(
+            Arc::new(ddl_manager),
+            procedure_manager.clone(),
+        ));

        let fe_instance = FrontendBuilder::new(
            fe_opts.clone(),
@@ -654,7 +662,7 @@ impl StartCommand {
            layered_cache_registry.clone(),
            catalog_manager.clone(),
            node_manager.clone(),
-            ddl_task_executor.clone(),
+            procedure_executor.clone(),
            process_manager,
        )
        .with_plugin(plugins.clone())
@@ -679,7 +687,7 @@ impl StartCommand {
            catalog_manager.clone(),
            kv_backend.clone(),
            layered_cache_registry.clone(),
-            ddl_task_executor.clone(),
+            procedure_executor,
            node_manager,
        )
        .await
--- a/src/cmd/tests/load_config_test.rs
+++ b/src/cmd/tests/load_config_test.rs
@@ -34,6 +34,7 @@ use query::options::QueryOptions;
 use servers::export_metrics::ExportMetricsOption;
 use servers::grpc::GrpcOptions;
 use servers::http::HttpOptions;
+use servers::tls::{TlsMode, TlsOption};
 use store_api::path_utils::WAL_DIR;

 #[allow(deprecated)]
@@ -190,6 +191,13 @@ fn test_load_metasrv_example_config() {
                remote_write: Some(Default::default()),
                ..Default::default()
            },
+            backend_tls: Some(TlsOption {
+                mode: TlsMode::Prefer,
+                cert_path: String::new(),
+                key_path: String::new(),
+                ca_cert_path: String::new(),
+                watch: false,
+            }),
            ..Default::default()
        },
        ..Default::default()
@@ -245,6 +253,7 @@ fn test_load_flownode_example_config() {
                ..Default::default()
            },
            user_provider: None,
+            memory: Default::default(),
        },
        ..Default::default()
    };
@@ -298,6 +307,7 @@ fn test_load_standalone_example_config() {
                cors_allowed_origins: vec!["https://example.com".to_string()],
                ..Default::default()
            },
+
            ..Default::default()
        },
        ..Default::default()
--- a/src/common/event-recorder/Cargo.toml
+++ b/src/common/event-recorder/Cargo.toml
@@ -8,10 +8,8 @@ license.workspace = true
 api.workspace = true
 async-trait.workspace = true
 backon.workspace = true
-client.workspace = true
 common-error.workspace = true
 common-macro.workspace = true
-common-meta.workspace = true
 common-telemetry.workspace = true
 common-time.workspace = true
 serde.workspace = true
--- a/src/common/event-recorder/src/error.rs
+++ b/src/common/event-recorder/src/error.rs
@@ -13,7 +13,7 @@
 // limitations under the License.

 use api::v1::ColumnSchema;
-use common_error::ext::ErrorExt;
+use common_error::ext::{BoxedError, ErrorExt};
 use common_error::status_code::StatusCode;
 use common_macro::stack_trace_debug;
 use snafu::{Location, Snafu};
@@ -35,6 +35,30 @@ pub enum Error {
        expected: Vec<ColumnSchema>,
        actual: Vec<ColumnSchema>,
    },
+
+    #[snafu(display("Failed to serialize event"))]
+    SerializeEvent {
+        #[snafu(source)]
+        error: serde_json::error::Error,
+        #[snafu(implicit)]
+        location: Location,
+    },
+
+    #[snafu(display("Failed to insert events"))]
+    InsertEvents {
+        // BoxedError is utilized here to prevent introducing a circular dependency that would arise from directly referencing `client::error::Error`.
+        source: BoxedError,
+        #[snafu(implicit)]
+        location: Location,
+    },
+
+    #[snafu(display("Keyvalue backend error"))]
+    KvBackend {
+        // BoxedError is utilized here to prevent introducing a circular dependency that would arise from directly referencing `common_meta::error::Error`.
+        source: BoxedError,
+        #[snafu(implicit)]
+        location: Location,
+    },
 }

 pub type Result<T> = std::result::Result<T, Error>;
@@ -42,8 +66,12 @@ pub type Result<T> = std::result::Result<T, Error>;
 impl ErrorExt for Error {
    fn status_code(&self) -> StatusCode {
        match self {
-            Error::MismatchedSchema { .. } => StatusCode::InvalidArguments,
-            Error::NoAvailableFrontend { .. } => StatusCode::Internal,
+            Error::MismatchedSchema { .. } | Error::SerializeEvent { .. } => {
+                StatusCode::InvalidArguments
+            }
+            Error::NoAvailableFrontend { .. }
+            | Error::InsertEvents { .. }
+            | Error::KvBackend { .. } => StatusCode::Internal,
        }
    }

--- a/src/common/event-recorder/src/recorder.rs
+++ b/src/common/event-recorder/src/recorder.rs
@@ -81,7 +81,9 @@ pub trait Event: Send + Sync + Debug {
    }

    /// Returns the JSON bytes of the event as the payload. It will use JSON type to store the payload.
-    fn json_payload(&self) -> Result<String>;
+    fn json_payload(&self) -> Result<String> {
+        Ok("".to_string())
+    }

    /// Add the extra schema to the event with the default schema.
    fn extra_schema(&self) -> Vec<ColumnSchema> {
@@ -97,6 +99,14 @@ pub trait Event: Send + Sync + Debug {
    fn as_any(&self) -> &dyn Any;
 }

+/// Eventable trait defines the interface for objects that can be converted to [Event].
+pub trait Eventable: Send + Sync + Debug {
+    /// Converts the object to an [Event].
+    fn to_event(&self) -> Option<Box<dyn Event>> {
+        None
+    }
+}
+
 /// Returns the hints for the insert operation.
 pub fn insert_hints() -> Vec<(&'static str, &'static str)> {
    vec![
@@ -199,7 +209,7 @@ fn validate_events(events: &[&Box<dyn Event>]) -> Result<()> {
 }

 /// EventRecorder trait defines the interface for recording events.
-pub trait EventRecorder: Send + Sync + 'static {
+pub trait EventRecorder: Send + Sync + Debug + 'static {
    /// Records an event for persistence and processing by [EventHandler].
    fn record(&self, event: Box<dyn Event>);

@@ -231,6 +241,7 @@ impl Default for EventRecorderOptions {
 }

 /// Implementation of [EventRecorder] that records the events and processes them in the background by the [EventHandler].
+#[derive(Debug)]
 pub struct EventRecorderImpl {
    // The channel to send the events to the background processor.
    tx: Sender<Box<dyn Event>>,
--- a/src/common/frontend/Cargo.toml
+++ b/src/common/frontend/Cargo.toml
@@ -12,6 +12,7 @@ common-macro.workspace = true
 common-meta.workspace = true
 greptime-proto.workspace = true
 meta-client.workspace = true
+session.workspace = true
 snafu.workspace = true
 tonic.workspace = true

--- a/src/common/frontend/src/lib.rs
+++ b/src/common/frontend/src/lib.rs
@@ -19,6 +19,7 @@ use snafu::OptionExt;

 pub mod error;
 pub mod selector;
+pub mod slow_query_event;

 #[derive(Debug, Clone, Eq, PartialEq)]
 pub struct DisplayProcessId {
--- a/src/common/frontend/src/slow_query_event.rs
+++ b/src/common/frontend/src/slow_query_event.rs
@@ -12,16 +12,17 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

-//! Format to store in parquet.
-//!
-//! We store two additional internal columns at last:
-//! - `__sequence`, the sequence number of a row. Type: uint64
-//! - `__op_type`, the op type of the row. Type: uint8
-//!
-//! We store other columns in the same order as [RegionMetadata::field_columns()](store_api::metadata::RegionMetadata::field_columns()).
-//!
+use session::context::QueryContextRef;

-/// Number of columns that have fixed positions.
-///
-/// Contains all internal columns.
-pub(crate) const PLAIN_FIXED_POS_COLUMN_NUM: usize = 2;
+#[derive(Debug)]
+pub struct SlowQueryEvent {
+    pub cost: u64,
+    pub threshold: u64,
+    pub query: String,
+    pub is_promql: bool,
+    pub query_ctx: QueryContextRef,
+    pub promql_range: Option<u64>,
+    pub promql_step: Option<u64>,
+    pub promql_start: Option<i64>,
+    pub promql_end: Option<i64>,
+}
--- a/src/common/function/Cargo.toml
+++ b/src/common/function/Cargo.toml
@@ -66,6 +66,6 @@ wkt = { version = "0.11", optional = true }
 [dev-dependencies]
 approx = "0.5"
 futures.workspace = true
-pretty_assertions = "1.4.0"
+pretty_assertions.workspace = true
 serde = { version = "1.0", features = ["derive"] }
 tokio.workspace = true
--- a/src/common/function/src/admin.rs
+++ b/src/common/function/src/admin.rs
@@ -16,6 +16,9 @@ mod add_region_follower;
 mod flush_compact_region;
 mod flush_compact_table;
 mod migrate_region;
+mod reconcile_catalog;
+mod reconcile_database;
+mod reconcile_table;
 mod remove_region_follower;

 use std::sync::Arc;
@@ -24,6 +27,9 @@ use add_region_follower::AddRegionFollowerFunction;
 use flush_compact_region::{CompactRegionFunction, FlushRegionFunction};
 use flush_compact_table::{CompactTableFunction, FlushTableFunction};
 use migrate_region::MigrateRegionFunction;
+use reconcile_catalog::ReconcileCatalogFunction;
+use reconcile_database::ReconcileDatabaseFunction;
+use reconcile_table::ReconcileTableFunction;
 use remove_region_follower::RemoveRegionFollowerFunction;

 use crate::flush_flow::FlushFlowFunction;
@@ -43,5 +49,8 @@ impl AdminFunction {
        registry.register_async(Arc::new(FlushTableFunction));
        registry.register_async(Arc::new(CompactTableFunction));
        registry.register_async(Arc::new(FlushFlowFunction));
+        registry.register_async(Arc::new(ReconcileCatalogFunction));
+        registry.register_async(Arc::new(ReconcileDatabaseFunction));
+        registry.register_async(Arc::new(ReconcileTableFunction));
    }
 }
--- a/src/common/function/src/admin/reconcile_catalog.rs
+++ b/src/common/function/src/admin/reconcile_catalog.rs
@@ -0,0 +1,179 @@
+// Copyright 2023 Greptime Team
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use api::v1::meta::reconcile_request::Target;
+use api::v1::meta::{ReconcileCatalog, ReconcileRequest};
+use common_macro::admin_fn;
+use common_query::error::{
+    InvalidFuncArgsSnafu, MissingProcedureServiceHandlerSnafu, Result,
+    UnsupportedInputDataTypeSnafu,
+};
+use common_query::prelude::{Signature, TypeSignature, Volatility};
+use common_telemetry::info;
+use datatypes::prelude::*;
+use session::context::QueryContextRef;
+
+use crate::handlers::ProcedureServiceHandlerRef;
+use crate::helper::{
+    cast_u32, default_parallelism, default_resolve_strategy, get_string_from_params,
+    parse_resolve_strategy,
+};
+
+const FN_NAME: &str = "reconcile_catalog";
+
+/// A function to reconcile a catalog.
+/// Returns the procedure id if success.
+///
+/// - `reconcile_catalog(resolve_strategy)`.
+/// - `reconcile_catalog(resolve_strategy, parallelism)`.
+///
+/// - `reconcile_catalog()`.
+#[admin_fn(
+    name = ReconcileCatalogFunction,
+    display_name = reconcile_catalog,
+    sig_fn = signature,
+    ret = string
+)]
+pub(crate) async fn reconcile_catalog(
+    procedure_service_handler: &ProcedureServiceHandlerRef,
+    query_ctx: &QueryContextRef,
+    params: &[ValueRef<'_>],
+) -> Result<Value> {
+    let (resolve_strategy, parallelism) = match params.len() {
+        0 => (default_resolve_strategy(), default_parallelism()),
+        1 => (
+            parse_resolve_strategy(get_string_from_params(params, 0, FN_NAME)?)?,
+            default_parallelism(),
+        ),
+        2 => {
+            let Some(parallelism) = cast_u32(&params[1])? else {
+                return UnsupportedInputDataTypeSnafu {
+                    function: FN_NAME,
+                    datatypes: params.iter().map(|v| v.data_type()).collect::<Vec<_>>(),
+                }
+                .fail();
+            };
+            (
+                parse_resolve_strategy(get_string_from_params(params, 0, FN_NAME)?)?,
+                parallelism,
+            )
+        }
+        size => {
+            return InvalidFuncArgsSnafu {
+                err_msg: format!(
+                    "The length of the args is not correct, expect 0, 1 or 2, have: {}",
+                    size
+                ),
+            }
+            .fail();
+        }
+    };
+    info!(
+        "Reconciling catalog with resolve_strategy: {:?}, parallelism: {}",
+        resolve_strategy, parallelism
+    );
+    let pid = procedure_service_handler
+        .reconcile(ReconcileRequest {
+            target: Some(Target::ReconcileCatalog(ReconcileCatalog {
+                catalog_name: query_ctx.current_catalog().to_string(),
+                parallelism,
+                resolve_strategy: resolve_strategy as i32,
+            })),
+            ..Default::default()
+        })
+        .await?;
+    match pid {
+        Some(pid) => Ok(Value::from(pid)),
+        None => Ok(Value::Null),
+    }
+}
+
+fn signature() -> Signature {
+    let nums = ConcreteDataType::numerics();
+    let mut signs = Vec::with_capacity(2 + nums.len());
+    signs.extend([
+        // reconcile_catalog()
+        TypeSignature::NullAry,
+        // reconcile_catalog(resolve_strategy)
+        TypeSignature::Exact(vec![ConcreteDataType::string_datatype()]),
+    ]);
+    for sign in nums {
+        // reconcile_catalog(resolve_strategy, parallelism)
+        signs.push(TypeSignature::Exact(vec![
+            ConcreteDataType::string_datatype(),
+            sign,
+        ]));
+    }
+    Signature::one_of(signs, Volatility::Immutable)
+}
+
+#[cfg(test)]
+mod tests {
+    use std::assert_matches::assert_matches;
+    use std::sync::Arc;
+
+    use common_query::error::Error;
+    use datatypes::vectors::{StringVector, UInt64Vector, VectorRef};
+
+    use crate::admin::reconcile_catalog::ReconcileCatalogFunction;
+    use crate::function::{AsyncFunction, FunctionContext};
+
+    #[tokio::test]
+    async fn test_reconcile_catalog() {
+        common_telemetry::init_default_ut_logging();
+
+        // reconcile_catalog()
+        let f = ReconcileCatalogFunction;
+        let args = vec![];
+        let result = f.eval(FunctionContext::mock(), &args).await.unwrap();
+        let expect: VectorRef = Arc::new(StringVector::from(vec!["test_pid"]));
+        assert_eq!(expect, result);
+
+        // reconcile_catalog(resolve_strategy)
+        let f = ReconcileCatalogFunction;
+        let args = vec![Arc::new(StringVector::from(vec!["UseMetasrv"])) as _];
+        let result = f.eval(FunctionContext::mock(), &args).await.unwrap();
+        let expect: VectorRef = Arc::new(StringVector::from(vec!["test_pid"]));
+        assert_eq!(expect, result);
+
+        // reconcile_catalog(resolve_strategy, parallelism)
+        let f = ReconcileCatalogFunction;
+        let args = vec![
+            Arc::new(StringVector::from(vec!["UseLatest"])) as _,
+            Arc::new(UInt64Vector::from_slice([10])) as _,
+        ];
+        let result = f.eval(FunctionContext::mock(), &args).await.unwrap();
+        let expect: VectorRef = Arc::new(StringVector::from(vec!["test_pid"]));
+        assert_eq!(expect, result);
+
+        // unsupported input data type
+        let f = ReconcileCatalogFunction;
+        let args = vec![
+            Arc::new(StringVector::from(vec!["UseLatest"])) as _,
+            Arc::new(StringVector::from(vec!["test"])) as _,
+        ];
+        let err = f.eval(FunctionContext::mock(), &args).await.unwrap_err();
+        assert_matches!(err, Error::UnsupportedInputDataType { .. });
+
+        // invalid function args
+        let f = ReconcileCatalogFunction;
+        let args = vec![
+            Arc::new(StringVector::from(vec!["UseLatest"])) as _,
+            Arc::new(UInt64Vector::from_slice([10])) as _,
+            Arc::new(StringVector::from(vec!["10"])) as _,
+        ];
+        let err = f.eval(FunctionContext::mock(), &args).await.unwrap_err();
+        assert_matches!(err, Error::InvalidFuncArgs { .. });
+    }
+}
--- a/src/common/function/src/admin/reconcile_database.rs
+++ b/src/common/function/src/admin/reconcile_database.rs
@@ -0,0 +1,198 @@
+// Copyright 2023 Greptime Team
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use api::v1::meta::reconcile_request::Target;
+use api::v1::meta::{ReconcileDatabase, ReconcileRequest};
+use common_macro::admin_fn;
+use common_query::error::{
+    InvalidFuncArgsSnafu, MissingProcedureServiceHandlerSnafu, Result,
+    UnsupportedInputDataTypeSnafu,
+};
+use common_query::prelude::{Signature, TypeSignature, Volatility};
+use common_telemetry::info;
+use datatypes::prelude::*;
+use session::context::QueryContextRef;
+
+use crate::handlers::ProcedureServiceHandlerRef;
+use crate::helper::{
+    cast_u32, default_parallelism, default_resolve_strategy, get_string_from_params,
+    parse_resolve_strategy,
+};
+
+const FN_NAME: &str = "reconcile_database";
+
+/// A function to reconcile a database.
+/// Returns the procedure id if success.
+///
+/// - `reconcile_database(database_name)`.
+/// - `reconcile_database(database_name, resolve_strategy)`.
+/// - `reconcile_database(database_name, resolve_strategy, parallelism)`.
+///
+/// The parameters:
+/// - `database_name`:  the database name
+#[admin_fn(
+    name = ReconcileDatabaseFunction,
+    display_name = reconcile_database,
+    sig_fn = signature,
+    ret = string
+)]
+pub(crate) async fn reconcile_database(
+    procedure_service_handler: &ProcedureServiceHandlerRef,
+    query_ctx: &QueryContextRef,
+    params: &[ValueRef<'_>],
+) -> Result<Value> {
+    let (database_name, resolve_strategy, parallelism) = match params.len() {
+        1 => (
+            get_string_from_params(params, 0, FN_NAME)?,
+            default_resolve_strategy(),
+            default_parallelism(),
+        ),
+        2 => (
+            get_string_from_params(params, 0, FN_NAME)?,
+            parse_resolve_strategy(get_string_from_params(params, 1, FN_NAME)?)?,
+            default_parallelism(),
+        ),
+        3 => {
+            let Some(parallelism) = cast_u32(&params[2])? else {
+                return UnsupportedInputDataTypeSnafu {
+                    function: FN_NAME,
+                    datatypes: params.iter().map(|v| v.data_type()).collect::<Vec<_>>(),
+                }
+                .fail();
+            };
+            (
+                get_string_from_params(params, 0, FN_NAME)?,
+                parse_resolve_strategy(get_string_from_params(params, 1, FN_NAME)?)?,
+                parallelism,
+            )
+        }
+        size => {
+            return InvalidFuncArgsSnafu {
+                err_msg: format!(
+                    "The length of the args is not correct, expect 1, 2 or 3, have: {}",
+                    size
+                ),
+            }
+            .fail();
+        }
+    };
+    info!(
+        "Reconciling database: {}, resolve_strategy: {:?}, parallelism: {}",
+        database_name, resolve_strategy, parallelism
+    );
+    let pid = procedure_service_handler
+        .reconcile(ReconcileRequest {
+            target: Some(Target::ReconcileDatabase(ReconcileDatabase {
+                catalog_name: query_ctx.current_catalog().to_string(),
+                database_name: database_name.to_string(),
+                parallelism,
+                resolve_strategy: resolve_strategy as i32,
+            })),
+            ..Default::default()
+        })
+        .await?;
+    match pid {
+        Some(pid) => Ok(Value::from(pid)),
+        None => Ok(Value::Null),
+    }
+}
+
+fn signature() -> Signature {
+    let nums = ConcreteDataType::numerics();
+    let mut signs = Vec::with_capacity(2 + nums.len());
+    signs.extend([
+        // reconcile_database(datanode_name)
+        TypeSignature::Exact(vec![ConcreteDataType::string_datatype()]),
+        // reconcile_database(database_name, resolve_strategy)
+        TypeSignature::Exact(vec![
+            ConcreteDataType::string_datatype(),
+            ConcreteDataType::string_datatype(),
+        ]),
+    ]);
+    for sign in nums {
+        // reconcile_database(database_name, resolve_strategy, parallelism)
+        signs.push(TypeSignature::Exact(vec![
+            ConcreteDataType::string_datatype(),
+            ConcreteDataType::string_datatype(),
+            sign,
+        ]));
+    }
+    Signature::one_of(signs, Volatility::Immutable)
+}
+
+#[cfg(test)]
+mod tests {
+    use std::assert_matches::assert_matches;
+    use std::sync::Arc;
+
+    use common_query::error::Error;
+    use datatypes::vectors::{StringVector, UInt32Vector, VectorRef};
+
+    use crate::admin::reconcile_database::ReconcileDatabaseFunction;
+    use crate::function::{AsyncFunction, FunctionContext};
+
+    #[tokio::test]
+    async fn test_reconcile_catalog() {
+        common_telemetry::init_default_ut_logging();
+
+        // reconcile_database(database_name)
+        let f = ReconcileDatabaseFunction;
+        let args = vec![Arc::new(StringVector::from(vec!["test"])) as _];
+        let result = f.eval(FunctionContext::mock(), &args).await.unwrap();
+        let expect: VectorRef = Arc::new(StringVector::from(vec!["test_pid"]));
+        assert_eq!(expect, result);
+
+        // reconcile_database(database_name, resolve_strategy)
+        let f = ReconcileDatabaseFunction;
+        let args = vec![
+            Arc::new(StringVector::from(vec!["test"])) as _,
+            Arc::new(StringVector::from(vec!["UseLatest"])) as _,
+        ];
+        let result = f.eval(FunctionContext::mock(), &args).await.unwrap();
+        let expect: VectorRef = Arc::new(StringVector::from(vec!["test_pid"]));
+        assert_eq!(expect, result);
+
+        // reconcile_database(database_name, resolve_strategy, parallelism)
+        let f = ReconcileDatabaseFunction;
+        let args = vec![
+            Arc::new(StringVector::from(vec!["test"])) as _,
+            Arc::new(StringVector::from(vec!["UseLatest"])) as _,
+            Arc::new(UInt32Vector::from_slice([10])) as _,
+        ];
+        let result = f.eval(FunctionContext::mock(), &args).await.unwrap();
+        let expect: VectorRef = Arc::new(StringVector::from(vec!["test_pid"]));
+        assert_eq!(expect, result);
+
+        // invalid function args
+        let f = ReconcileDatabaseFunction;
+        let args = vec![
+            Arc::new(StringVector::from(vec!["UseLatest"])) as _,
+            Arc::new(UInt32Vector::from_slice([10])) as _,
+            Arc::new(StringVector::from(vec!["v1"])) as _,
+            Arc::new(StringVector::from(vec!["v2"])) as _,
+        ];
+        let err = f.eval(FunctionContext::mock(), &args).await.unwrap_err();
+        assert_matches!(err, Error::InvalidFuncArgs { .. });
+
+        // unsupported input data type
+        let f = ReconcileDatabaseFunction;
+        let args = vec![
+            Arc::new(StringVector::from(vec!["UseLatest"])) as _,
+            Arc::new(UInt32Vector::from_slice([10])) as _,
+            Arc::new(StringVector::from(vec!["v1"])) as _,
+        ];
+        let err = f.eval(FunctionContext::mock(), &args).await.unwrap_err();
+        assert_matches!(err, Error::UnsupportedInputDataType { .. });
+    }
+}
--- a/src/common/function/src/admin/reconcile_table.rs
+++ b/src/common/function/src/admin/reconcile_table.rs
@@ -0,0 +1,149 @@
+// Copyright 2023 Greptime Team
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use api::v1::meta::reconcile_request::Target;
+use api::v1::meta::{ReconcileRequest, ReconcileTable, ResolveStrategy};
+use common_catalog::format_full_table_name;
+use common_error::ext::BoxedError;
+use common_macro::admin_fn;
+use common_query::error::{
+    MissingProcedureServiceHandlerSnafu, Result, TableMutationSnafu, UnsupportedInputDataTypeSnafu,
+};
+use common_query::prelude::{Signature, TypeSignature, Volatility};
+use common_telemetry::info;
+use datatypes::prelude::*;
+use session::context::QueryContextRef;
+use session::table_name::table_name_to_full_name;
+use snafu::ResultExt;
+
+use crate::handlers::ProcedureServiceHandlerRef;
+use crate::helper::parse_resolve_strategy;
+
+const FN_NAME: &str = "reconcile_table";
+
+/// A function to reconcile a table.
+/// Returns the procedure id if success.
+///
+/// - `reconcile_table(table_name)`.
+/// - `reconcile_table(table_name, resolve_strategy)`.
+///
+/// The parameters:
+/// - `table_name`:  the table name
+#[admin_fn(
+    name = ReconcileTableFunction,
+    display_name = reconcile_table,
+    sig_fn = signature,
+    ret = string
+)]
+pub(crate) async fn reconcile_table(
+    procedure_service_handler: &ProcedureServiceHandlerRef,
+    query_ctx: &QueryContextRef,
+    params: &[ValueRef<'_>],
+) -> Result<Value> {
+    let (table_name, resolve_strategy) = match params {
+        [ValueRef::String(table_name)] => (table_name, ResolveStrategy::UseLatest),
+        [ValueRef::String(table_name), ValueRef::String(resolve_strategy)] => {
+            (table_name, parse_resolve_strategy(resolve_strategy)?)
+        }
+        _ => {
+            return UnsupportedInputDataTypeSnafu {
+                function: FN_NAME,
+                datatypes: params.iter().map(|v| v.data_type()).collect::<Vec<_>>(),
+            }
+            .fail()
+        }
+    };
+    let (catalog_name, schema_name, table_name) = table_name_to_full_name(table_name, query_ctx)
+        .map_err(BoxedError::new)
+        .context(TableMutationSnafu)?;
+    info!(
+        "Reconciling table: {} with resolve_strategy: {:?}",
+        format_full_table_name(&catalog_name, &schema_name, &table_name),
+        resolve_strategy
+    );
+    let pid = procedure_service_handler
+        .reconcile(ReconcileRequest {
+            target: Some(Target::ReconcileTable(ReconcileTable {
+                catalog_name,
+                schema_name,
+                table_name,
+                resolve_strategy: resolve_strategy as i32,
+            })),
+            ..Default::default()
+        })
+        .await?;
+    match pid {
+        Some(pid) => Ok(Value::from(pid)),
+        None => Ok(Value::Null),
+    }
+}
+
+fn signature() -> Signature {
+    Signature::one_of(
+        vec![
+            // reconcile_table(table_name)
+            TypeSignature::Exact(vec![ConcreteDataType::string_datatype()]),
+            // reconcile_table(table_name, resolve_strategy)
+            TypeSignature::Exact(vec![
+                ConcreteDataType::string_datatype(),
+                ConcreteDataType::string_datatype(),
+            ]),
+        ],
+        Volatility::Immutable,
+    )
+}
+
+#[cfg(test)]
+mod tests {
+    use std::assert_matches::assert_matches;
+    use std::sync::Arc;
+
+    use common_query::error::Error;
+    use datatypes::vectors::{StringVector, VectorRef};
+
+    use crate::admin::reconcile_table::ReconcileTableFunction;
+    use crate::function::{AsyncFunction, FunctionContext};
+
+    #[tokio::test]
+    async fn test_reconcile_table() {
+        common_telemetry::init_default_ut_logging();
+
+        // reconcile_table(table_name)
+        let f = ReconcileTableFunction;
+        let args = vec![Arc::new(StringVector::from(vec!["test"])) as _];
+        let result = f.eval(FunctionContext::mock(), &args).await.unwrap();
+        let expect: VectorRef = Arc::new(StringVector::from(vec!["test_pid"]));
+        assert_eq!(expect, result);
+
+        // reconcile_table(table_name, resolve_strategy)
+        let f = ReconcileTableFunction;
+        let args = vec![
+            Arc::new(StringVector::from(vec!["test"])) as _,
+            Arc::new(StringVector::from(vec!["UseMetasrv"])) as _,
+        ];
+        let result = f.eval(FunctionContext::mock(), &args).await.unwrap();
+        let expect: VectorRef = Arc::new(StringVector::from(vec!["test_pid"]));
+        assert_eq!(expect, result);
+
+        // unsupported input data type
+        let f = ReconcileTableFunction;
+        let args = vec![
+            Arc::new(StringVector::from(vec!["test"])) as _,
+            Arc::new(StringVector::from(vec!["UseMetasrv"])) as _,
+            Arc::new(StringVector::from(vec!["10"])) as _,
+        ];
+        let err = f.eval(FunctionContext::mock(), &args).await.unwrap_err();
+        assert_matches!(err, Error::UnsupportedInputDataType { .. });
+    }
+}
--- a/src/common/function/src/aggrs/aggr_wrapper.rs
+++ b/src/common/function/src/aggrs/aggr_wrapper.rs
@@ -26,6 +26,8 @@ use std::sync::Arc;

 use arrow::array::StructArray;
 use arrow_schema::Fields;
+use common_telemetry::debug;
+use datafusion::functions_aggregate::all_default_aggregate_functions;
 use datafusion::optimizer::analyzer::type_coercion::TypeCoercion;
 use datafusion::optimizer::AnalyzerRule;
 use datafusion::physical_planner::create_aggregate_expr_and_maybe_filter;
@@ -39,6 +41,8 @@ use datafusion_expr::{
 use datafusion_physical_expr::aggregate::AggregateFunctionExpr;
 use datatypes::arrow::datatypes::{DataType, Field};

+use crate::function_registry::FunctionRegistry;
+
 /// Returns the name of the state function for the given aggregate function name.
 /// The state function is used to compute the state of the aggregate function.
 /// The state function's name is in the format `__<aggr_name>_state
@@ -65,9 +69,9 @@ pub struct StateMergeHelper;
 #[derive(Debug, Clone)]
 pub struct StepAggrPlan {
    /// Upper merge plan, which is the aggregate plan that merges the states of the state function.
-    pub upper_merge: Arc<LogicalPlan>,
+    pub upper_merge: LogicalPlan,
    /// Lower state plan, which is the aggregate plan that computes the state of the aggregate function.
-    pub lower_state: Arc<LogicalPlan>,
+    pub lower_state: LogicalPlan,
 }

 pub fn get_aggr_func(expr: &Expr) -> Option<&datafusion_expr::expr::AggregateFunction> {
@@ -83,6 +87,36 @@ pub fn get_aggr_func(expr: &Expr) -> Option<&datafusion_expr::expr::AggregateFun
 }

 impl StateMergeHelper {
+    /// Register all the `state` function of supported aggregate functions.
+    /// Note that can't register `merge` function here, as it needs to be created from the original aggregate function with given input types.
+    pub fn register(registry: &FunctionRegistry) {
+        let all_default = all_default_aggregate_functions();
+        let greptime_custom_aggr_functions = registry.aggregate_functions();
+
+        // if our custom aggregate function have the same name as the default aggregate function, we will override it.
+        let supported = all_default
+            .into_iter()
+            .chain(greptime_custom_aggr_functions.into_iter().map(Arc::new))
+            .collect::<Vec<_>>();
+        debug!(
+            "Registering state functions for supported: {:?}",
+            supported.iter().map(|f| f.name()).collect::<Vec<_>>()
+        );
+
+        let state_func = supported.into_iter().filter_map(|f| {
+            StateWrapper::new((*f).clone())
+                .inspect_err(
+                    |e| common_telemetry::error!(e; "Failed to register state function for {:?}", f),
+                )
+                .ok()
+                .map(AggregateUDF::new_from_impl)
+        });
+
+        for func in state_func {
+            registry.register_aggr(func);
+        }
+    }
+
    /// Split an aggregate plan into two aggregate plans, one for the state function and one for the merge function.
    pub fn split_aggr_node(aggr_plan: Aggregate) -> datafusion_common::Result<StepAggrPlan> {
        let aggr = {
@@ -166,18 +200,18 @@ impl StateMergeHelper {
        let lower_plan = LogicalPlan::Aggregate(lower);

        // update aggregate's output schema
-        let lower_plan = Arc::new(lower_plan.recompute_schema()?);
+        let lower_plan = lower_plan.recompute_schema()?;

        let mut upper = aggr.clone();
        let aggr_plan = LogicalPlan::Aggregate(aggr);
        upper.aggr_expr = upper_aggr_exprs;
-        upper.input = lower_plan.clone();
+        upper.input = Arc::new(lower_plan.clone());
        // upper schema's output schema should be the same as the original aggregate plan's output schema
-        let upper_check = upper.clone();
-        let upper_plan = Arc::new(LogicalPlan::Aggregate(upper_check).recompute_schema()?);
+        let upper_check = upper;
+        let upper_plan = LogicalPlan::Aggregate(upper_check).recompute_schema()?;
        if *upper_plan.schema() != *aggr_plan.schema() {
            return Err(datafusion_common::DataFusionError::Internal(format!(
-                 "Upper aggregate plan's schema is not the same as the original aggregate plan's schema: \n[transformed]:{}\n[   original]{}",
+                 "Upper aggregate plan's schema is not the same as the original aggregate plan's schema: \n[transformed]:{}\n[original]:{}",
                upper_plan.schema(), aggr_plan.schema()
            )));
        }
@@ -407,15 +441,18 @@ impl AggregateUDFImpl for MergeWrapper {
        &'a self,
        acc_args: datafusion_expr::function::AccumulatorArgs<'b>,
    ) -> datafusion_common::Result<Box<dyn Accumulator>> {
-        if acc_args.schema.fields().len() != 1
-            || !matches!(acc_args.schema.field(0).data_type(), DataType::Struct(_))
+        if acc_args.exprs.len() != 1
+            || !matches!(
+                acc_args.exprs[0].data_type(acc_args.schema)?,
+                DataType::Struct(_)
+            )
        {
            return Err(datafusion_common::DataFusionError::Internal(format!(
                "Expected one struct type as input, got: {:?}",
                acc_args.schema
            )));
        }
-        let input_type = acc_args.schema.field(0).data_type();
+        let input_type = acc_args.exprs[0].data_type(acc_args.schema)?;
        let DataType::Struct(fields) = input_type else {
            return Err(datafusion_common::DataFusionError::Internal(format!(
                "Expected a struct type for input, got: {:?}",
@@ -424,7 +461,7 @@ impl AggregateUDFImpl for MergeWrapper {
        };

        let inner_accum = self.original_phy_expr.create_accumulator()?;
-        Ok(Box::new(MergeAccum::new(inner_accum, fields)))
+        Ok(Box::new(MergeAccum::new(inner_accum, &fields)))
    }

    fn as_any(&self) -> &dyn std::any::Any {
--- a/src/common/function/src/aggrs/aggr_wrapper/tests.rs
+++ b/src/common/function/src/aggrs/aggr_wrapper/tests.rs
@@ -258,7 +258,7 @@ async fn test_sum_udaf() {
    )
    .recompute_schema()
    .unwrap();
-    assert_eq!(res.lower_state.as_ref(), &expected_lower_plan);
+    assert_eq!(&res.lower_state, &expected_lower_plan);

    let expected_merge_plan = LogicalPlan::Aggregate(
        Aggregate::try_new(
@@ -297,7 +297,7 @@ async fn test_sum_udaf() {
        )
        .unwrap(),
    );
-    assert_eq!(res.upper_merge.as_ref(), &expected_merge_plan);
+    assert_eq!(&res.upper_merge, &expected_merge_plan);

    let phy_aggr_state_plan = DefaultPhysicalPlanner::default()
        .create_physical_plan(&res.lower_state, &ctx.state())
@@ -405,7 +405,7 @@ async fn test_avg_udaf() {
    let coerced_aggr_state_plan = TypeCoercion::new()
        .analyze(expected_aggr_state_plan.clone(), &Default::default())
        .unwrap();
-    assert_eq!(res.lower_state.as_ref(), &coerced_aggr_state_plan);
+    assert_eq!(&res.lower_state, &coerced_aggr_state_plan);
    assert_eq!(
        res.lower_state.schema().as_arrow(),
        &arrow_schema::Schema::new(vec![Field::new(
@@ -456,7 +456,7 @@ async fn test_avg_udaf() {
        )
        .unwrap(),
    );
-    assert_eq!(res.upper_merge.as_ref(), &expected_merge_plan);
+    assert_eq!(&res.upper_merge, &expected_merge_plan);

    let phy_aggr_state_plan = DefaultPhysicalPlanner::default()
        .create_physical_plan(&coerced_aggr_state_plan, &ctx.state())
--- a/src/common/function/src/function_registry.rs
+++ b/src/common/function/src/function_registry.rs
@@ -20,6 +20,7 @@ use datafusion_expr::AggregateUDF;
 use once_cell::sync::Lazy;

 use crate::admin::AdminFunction;
+use crate::aggrs::aggr_wrapper::StateMergeHelper;
 use crate::aggrs::approximate::ApproximateFunction;
 use crate::aggrs::count_hash::CountHash;
 use crate::aggrs::vector::VectorFunction as VectorAggrFunction;
@@ -105,6 +106,10 @@ impl FunctionRegistry {
            .cloned()
            .collect()
    }
+
+    pub fn is_aggr_func_exist(&self, name: &str) -> bool {
+        self.aggregate_functions.read().unwrap().contains_key(name)
+    }
 }

 pub static FUNCTION_REGISTRY: Lazy<Arc<FunctionRegistry>> = Lazy::new(|| {
@@ -148,6 +153,9 @@ pub static FUNCTION_REGISTRY: Lazy<Arc<FunctionRegistry>> = Lazy::new(|| {
    // CountHash function
    CountHash::register(&function_registry);

+    // state function of supported aggregate functions
+    StateMergeHelper::register(&function_registry);
+
    Arc::new(function_registry)
 });

--- a/src/common/function/src/handlers.rs
+++ b/src/common/function/src/handlers.rs
@@ -14,6 +14,7 @@

 use std::sync::Arc;

+use api::v1::meta::ReconcileRequest;
 use async_trait::async_trait;
 use catalog::CatalogManagerRef;
 use common_base::AffectedRows;
@@ -65,6 +66,9 @@ pub trait ProcedureServiceHandler: Send + Sync {
    /// Migrate a region from source peer to target peer, returns the procedure id if success.
    async fn migrate_region(&self, request: MigrateRegionRequest) -> Result<Option<String>>;

+    /// Reconcile a table, database or catalog, returns the procedure id if success.
+    async fn reconcile(&self, request: ReconcileRequest) -> Result<Option<String>>;
+
    /// Query the procedure' state by its id
    async fn query_procedure_state(&self, pid: &str) -> Result<ProcedureStateResponse>;

--- a/src/common/function/src/helper.rs
+++ b/src/common/function/src/helper.rs
@@ -12,12 +12,15 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

-use common_query::error::{InvalidInputTypeSnafu, Result};
+use api::v1::meta::ResolveStrategy;
+use common_query::error::{
+    InvalidFuncArgsSnafu, InvalidInputTypeSnafu, Result, UnsupportedInputDataTypeSnafu,
+};
 use common_query::prelude::{Signature, TypeSignature, Volatility};
 use datatypes::prelude::ConcreteDataType;
 use datatypes::types::cast::cast;
 use datatypes::value::ValueRef;
-use snafu::ResultExt;
+use snafu::{OptionExt, ResultExt};

 /// Create a function signature with oneof signatures of interleaving two arguments.
 pub fn one_of_sigs2(args1: Vec<ConcreteDataType>, args2: Vec<ConcreteDataType>) -> Signature {
@@ -43,3 +46,64 @@ pub fn cast_u64(value: &ValueRef) -> Result<Option<u64>> {
        })
        .map(|v| v.as_u64())
 }
+
+/// Cast a [`ValueRef`] to u32, returns `None` if fails
+pub fn cast_u32(value: &ValueRef) -> Result<Option<u32>> {
+    cast((*value).into(), &ConcreteDataType::uint32_datatype())
+        .context(InvalidInputTypeSnafu {
+            err_msg: format!(
+                "Failed to cast input into uint32, actual type: {:#?}",
+                value.data_type(),
+            ),
+        })
+        .map(|v| v.as_u64().map(|v| v as u32))
+}
+
+/// Parse a resolve strategy from a string.
+pub fn parse_resolve_strategy(strategy: &str) -> Result<ResolveStrategy> {
+    ResolveStrategy::from_str_name(strategy).context(InvalidFuncArgsSnafu {
+        err_msg: format!("Invalid resolve strategy: {}", strategy),
+    })
+}
+
+/// Default parallelism for reconcile operations.
+pub fn default_parallelism() -> u32 {
+    64
+}
+
+/// Default resolve strategy for reconcile operations.
+pub fn default_resolve_strategy() -> ResolveStrategy {
+    ResolveStrategy::UseLatest
+}
+
+/// Get the string value from the params.
+///
+/// # Errors
+/// Returns an error if the input type is not a string.
+pub fn get_string_from_params<'a>(
+    params: &'a [ValueRef<'a>],
+    index: usize,
+    fn_name: &'a str,
+) -> Result<&'a str> {
+    let ValueRef::String(s) = &params[index] else {
+        return UnsupportedInputDataTypeSnafu {
+            function: fn_name,
+            datatypes: params.iter().map(|v| v.data_type()).collect::<Vec<_>>(),
+        }
+        .fail();
+    };
+    Ok(s)
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_parse_resolve_strategy() {
+        assert_eq!(
+            parse_resolve_strategy("UseLatest").unwrap(),
+            ResolveStrategy::UseLatest
+        );
+    }
+}
--- a/src/common/function/src/lib.rs
+++ b/src/common/function/src/lib.rs
@@ -14,6 +14,7 @@

 #![feature(let_chains)]
 #![feature(try_blocks)]
+#![feature(assert_matches)]

 mod admin;
 mod flush_flow;
--- a/src/common/function/src/state.rs
+++ b/src/common/function/src/state.rs
@@ -32,7 +32,7 @@ impl FunctionState {
    pub fn mock() -> Self {
        use std::sync::Arc;

-        use api::v1::meta::ProcedureStatus;
+        use api::v1::meta::{ProcedureStatus, ReconcileRequest};
        use async_trait::async_trait;
        use catalog::CatalogManagerRef;
        use common_base::AffectedRows;
@@ -63,6 +63,10 @@ impl FunctionState {
                Ok(Some("test_pid".to_string()))
            }

+            async fn reconcile(&self, _request: ReconcileRequest) -> Result<Option<String>> {
+                Ok(Some("test_pid".to_string()))
+            }
+
            async fn query_procedure_state(&self, _pid: &str) -> Result<ProcedureStateResponse> {
                Ok(ProcedureStateResponse {
                    status: ProcedureStatus::Done.into(),
--- a/src/common/mem-prof/src/jemalloc.rs
+++ b/src/common/mem-prof/src/jemalloc.rs
@@ -19,8 +19,8 @@ use std::io::BufReader;
 use std::path::PathBuf;

 use error::{
-    BuildTempPathSnafu, DumpProfileDataSnafu, OpenTempFileSnafu, ProfilingNotEnabledSnafu,
-    ReadOptProfSnafu,
+    ActivateProfSnafu, BuildTempPathSnafu, DeactivateProfSnafu, DumpProfileDataSnafu,
+    OpenTempFileSnafu, ProfilingNotEnabledSnafu, ReadOptProfSnafu, ReadProfActiveSnafu,
 };
 use jemalloc_pprof_mappings::MAPPINGS;
 use jemalloc_pprof_utils::{parse_jeheap, FlamegraphOptions, StackProfile};
@@ -31,6 +31,7 @@ use crate::error::{FlamegraphSnafu, ParseJeHeapSnafu, Result};

 const PROF_DUMP: &[u8] = b"prof.dump\0";
 const OPT_PROF: &[u8] = b"opt.prof\0";
+const PROF_ACTIVE: &[u8] = b"prof.active\0";

 pub async fn dump_profile() -> Result<Vec<u8>> {
    ensure!(is_prof_enabled()?, ProfilingNotEnabledSnafu);
@@ -93,6 +94,27 @@ pub async fn dump_flamegraph() -> Result<Vec<u8>> {
    let flamegraph = profile.to_flamegraph(&mut opts).context(FlamegraphSnafu)?;
    Ok(flamegraph)
 }
+
+pub fn activate_heap_profile() -> Result<()> {
+    ensure!(is_prof_enabled()?, ProfilingNotEnabledSnafu);
+    unsafe {
+        tikv_jemalloc_ctl::raw::update(PROF_ACTIVE, true).context(ActivateProfSnafu)?;
+    }
+    Ok(())
+}
+
+pub fn deactivate_heap_profile() -> Result<()> {
+    ensure!(is_prof_enabled()?, ProfilingNotEnabledSnafu);
+    unsafe {
+        tikv_jemalloc_ctl::raw::update(PROF_ACTIVE, false).context(DeactivateProfSnafu)?;
+    }
+    Ok(())
+}
+
+pub fn is_heap_profile_active() -> Result<bool> {
+    unsafe { Ok(tikv_jemalloc_ctl::raw::read::<bool>(PROF_ACTIVE).context(ReadProfActiveSnafu)?) }
+}
+
 fn is_prof_enabled() -> Result<bool> {
    // safety: OPT_PROF variable, if present, is always a boolean value.
    Ok(unsafe { tikv_jemalloc_ctl::raw::read::<bool>(OPT_PROF).context(ReadOptProfSnafu)? })
--- a/src/common/mem-prof/src/jemalloc/error.rs
+++ b/src/common/mem-prof/src/jemalloc/error.rs
@@ -53,6 +53,24 @@ pub enum Error {
        #[snafu(source)]
        error: tikv_jemalloc_ctl::Error,
    },
+
+    #[snafu(display("Failed to activate heap profiling"))]
+    ActivateProf {
+        #[snafu(source)]
+        error: tikv_jemalloc_ctl::Error,
+    },
+
+    #[snafu(display("Failed to deactivate heap profiling"))]
+    DeactivateProf {
+        #[snafu(source)]
+        error: tikv_jemalloc_ctl::Error,
+    },
+
+    #[snafu(display("Failed to read heap profiling status"))]
+    ReadProfActive {
+        #[snafu(source)]
+        error: tikv_jemalloc_ctl::Error,
+    },
 }

 impl ErrorExt for Error {
@@ -63,6 +81,9 @@ impl ErrorExt for Error {
            Error::BuildTempPath { .. } => StatusCode::Internal,
            Error::OpenTempFile { .. } => StatusCode::StorageUnavailable,
            Error::DumpProfileData { .. } => StatusCode::StorageUnavailable,
+            Error::ActivateProf { .. } => StatusCode::Internal,
+            Error::DeactivateProf { .. } => StatusCode::Internal,
+            Error::ReadProfActive { .. } => StatusCode::Internal,
        }
    }

--- a/src/common/mem-prof/src/lib.rs
+++ b/src/common/mem-prof/src/lib.rs
@@ -17,7 +17,10 @@ pub mod error;
 #[cfg(not(windows))]
 mod jemalloc;
 #[cfg(not(windows))]
-pub use jemalloc::{dump_flamegraph, dump_pprof, dump_profile};
+pub use jemalloc::{
+    activate_heap_profile, deactivate_heap_profile, dump_flamegraph, dump_pprof, dump_profile,
+    is_heap_profile_active,
+};

 #[cfg(windows)]
 pub async fn dump_profile() -> error::Result<Vec<u8>> {
@@ -33,3 +36,18 @@ pub async fn dump_pprof() -> error::Result<Vec<u8>> {
 pub async fn dump_flamegraph() -> error::Result<Vec<u8>> {
    error::ProfilingNotSupportedSnafu.fail()
 }
+
+#[cfg(windows)]
+pub fn activate_heap_profile() -> error::Result<()> {
+    error::ProfilingNotSupportedSnafu.fail()
+}
+
+#[cfg(windows)]
+pub fn deactivate_heap_profile() -> error::Result<()> {
+    error::ProfilingNotSupportedSnafu.fail()
+}
+
+#[cfg(windows)]
+pub fn is_heap_profile_active() -> error::Result<bool> {
+    error::ProfilingNotSupportedSnafu.fail()
+}
--- a/src/common/meta/Cargo.toml
+++ b/src/common/meta/Cargo.toml
@@ -6,7 +6,16 @@ license.workspace = true

 [features]
 testing = []
-pg_kvbackend = ["dep:tokio-postgres", "dep:backon", "dep:deadpool-postgres", "dep:deadpool"]
+pg_kvbackend = [
+    "dep:tokio-postgres",
+    "dep:backon",
+    "dep:deadpool-postgres",
+    "dep:deadpool",
+    "dep:tokio-postgres-rustls",
+    "dep:rustls-pemfile",
+    "dep:rustls-native-certs",
+    "dep:rustls",
+]
 mysql_kvbackend = ["dep:sqlx", "dep:backon"]
 enterprise = []

@@ -32,6 +41,7 @@ common-procedure.workspace = true
 common-procedure-test.workspace = true
 common-query.workspace = true
 common-recordbatch.workspace = true
+common-runtime.workspace = true
 common-telemetry.workspace = true
 common-time.workspace = true
 common-wal.workspace = true
@@ -57,6 +67,9 @@ prost.workspace = true
 rand.workspace = true
 regex.workspace = true
 rskafka.workspace = true
+rustls = { workspace = true, default-features = false, features = ["ring", "logging", "std", "tls12"], optional = true }
+rustls-native-certs = { version = "0.7", optional = true }
+rustls-pemfile = { version = "2.0", optional = true }
 serde.workspace = true
 serde_json.workspace = true
 serde_with.workspace = true
@@ -68,6 +81,7 @@ strum.workspace = true
 table = { workspace = true, features = ["testing"] }
 tokio.workspace = true
 tokio-postgres = { workspace = true, optional = true }
+tokio-postgres-rustls = { version = "0.12", optional = true }
 tonic.workspace = true
 tracing.workspace = true
 typetag.workspace = true
--- a/src/common/meta/src/ddl.rs
+++ b/src/common/meta/src/ddl.rs
@@ -15,25 +15,17 @@
 use std::collections::HashMap;
 use std::sync::Arc;

-use api::v1::meta::ProcedureDetailResponse;
-use common_telemetry::tracing_context::W3cTrace;
 use store_api::storage::{RegionId, RegionNumber, TableId};

 use crate::cache_invalidator::CacheInvalidatorRef;
 use crate::ddl::flow_meta::FlowMetadataAllocatorRef;
 use crate::ddl::table_meta::TableMetadataAllocatorRef;
-use crate::error::{Result, UnsupportedSnafu};
 use crate::key::flow::FlowMetadataManagerRef;
 use crate::key::table_route::PhysicalTableRouteValue;
 use crate::key::TableMetadataManagerRef;
 use crate::node_manager::NodeManagerRef;
 use crate::region_keeper::MemoryRegionKeeperRef;
 use crate::region_registry::LeaderRegionRegistryRef;
-use crate::rpc::ddl::{SubmitDdlTaskRequest, SubmitDdlTaskResponse};
-use crate::rpc::procedure::{
-    AddRegionFollowerRequest, MigrateRegionRequest, MigrateRegionResponse, ProcedureStateResponse,
-    RemoveRegionFollowerRequest,
-};
 use crate::DatanodeId;

 pub mod alter_database;
@@ -59,64 +51,6 @@ pub(crate) mod tests;
 pub mod truncate_table;
 pub mod utils;

-#[derive(Debug, Default)]
-pub struct ExecutorContext {
-    pub tracing_context: Option<W3cTrace>,
-}
-
-/// The procedure executor that accepts ddl, region migration task etc.
-#[async_trait::async_trait]
-pub trait ProcedureExecutor: Send + Sync {
-    /// Submit a ddl task
-    async fn submit_ddl_task(
-        &self,
-        ctx: &ExecutorContext,
-        request: SubmitDdlTaskRequest,
-    ) -> Result<SubmitDdlTaskResponse>;
-
-    /// Add a region follower
-    async fn add_region_follower(
-        &self,
-        _ctx: &ExecutorContext,
-        _request: AddRegionFollowerRequest,
-    ) -> Result<()> {
-        UnsupportedSnafu {
-            operation: "add_region_follower",
-        }
-        .fail()
-    }
-
-    /// Remove a region follower
-    async fn remove_region_follower(
-        &self,
-        _ctx: &ExecutorContext,
-        _request: RemoveRegionFollowerRequest,
-    ) -> Result<()> {
-        UnsupportedSnafu {
-            operation: "remove_region_follower",
-        }
-        .fail()
-    }
-
-    /// Submit a region migration task
-    async fn migrate_region(
-        &self,
-        ctx: &ExecutorContext,
-        request: MigrateRegionRequest,
-    ) -> Result<MigrateRegionResponse>;
-
-    /// Query the procedure state by its id
-    async fn query_procedure_state(
-        &self,
-        ctx: &ExecutorContext,
-        pid: &str,
-    ) -> Result<ProcedureStateResponse>;
-
-    async fn list_procedures(&self, ctx: &ExecutorContext) -> Result<ProcedureDetailResponse>;
-}
-
-pub type ProcedureExecutorRef = Arc<dyn ProcedureExecutor>;
-
 /// Metadata allocated to a table.
 #[derive(Default)]
 pub struct TableMetadata {
--- a/src/common/meta/src/ddl/truncate_table.rs
+++ b/src/common/meta/src/ddl/truncate_table.rs
@@ -12,8 +12,10 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

+use api::helper::to_pb_time_ranges;
 use api::v1::region::{
-    region_request, RegionRequest, RegionRequestHeader, TruncateRequest as PbTruncateRegionRequest,
+    region_request, truncate_request, RegionRequest, RegionRequestHeader,
+    TruncateRequest as PbTruncateRegionRequest,
 };
 use async_trait::async_trait;
 use common_procedure::error::{FromJsonSnafu, ToJsonSnafu};
@@ -33,7 +35,7 @@ use table::table_reference::TableReference;

 use crate::ddl::utils::{add_peer_context_if_needed, map_to_procedure_error};
 use crate::ddl::DdlContext;
-use crate::error::{Result, TableNotFoundSnafu};
+use crate::error::{ConvertTimeRangesSnafu, Result, TableNotFoundSnafu};
 use crate::key::table_info::TableInfoValue;
 use crate::key::table_name::TableNameKey;
 use crate::key::DeserializedValueWithBytes;
@@ -153,6 +155,15 @@ impl TruncateTableProcedure {
                    datanode
                );

+                let time_ranges = &self.data.task.time_ranges;
+                let kind = if time_ranges.is_empty() {
+                    truncate_request::Kind::All(api::v1::region::All {})
+                } else {
+                    let pb_time_ranges =
+                        to_pb_time_ranges(time_ranges).context(ConvertTimeRangesSnafu)?;
+                    truncate_request::Kind::TimeRanges(pb_time_ranges)
+                };
+
                let request = RegionRequest {
                    header: Some(RegionRequestHeader {
                        tracing_context: TracingContext::from_current_span().to_w3c(),
@@ -160,6 +171,7 @@ impl TruncateTableProcedure {
                    }),
                    body: Some(region_request::Body::Truncate(PbTruncateRegionRequest {
                        region_id: region_id.as_u64(),
+                        kind: Some(kind),
                    })),
                };

--- a/src/common/meta/src/ddl_manager.rs
+++ b/src/common/meta/src/ddl_manager.rs
@@ -14,7 +14,6 @@

 use std::sync::Arc;

-use api::v1::meta::ProcedureDetailResponse;
 use common_procedure::{
    watcher, BoxedProcedureLoader, Output, ProcedureId, ProcedureManagerRef, ProcedureWithId,
 };
@@ -37,16 +36,16 @@ use crate::ddl::drop_flow::DropFlowProcedure;
 use crate::ddl::drop_table::DropTableProcedure;
 use crate::ddl::drop_view::DropViewProcedure;
 use crate::ddl::truncate_table::TruncateTableProcedure;
-use crate::ddl::{utils, DdlContext, ExecutorContext, ProcedureExecutor};
+use crate::ddl::{utils, DdlContext};
 use crate::error::{
-    EmptyDdlTasksSnafu, ParseProcedureIdSnafu, ProcedureNotFoundSnafu, ProcedureOutputSnafu,
-    QueryProcedureSnafu, RegisterProcedureLoaderSnafu, Result, SubmitProcedureSnafu,
-    TableInfoNotFoundSnafu, TableNotFoundSnafu, TableRouteNotFoundSnafu,
-    UnexpectedLogicalRouteTableSnafu, UnsupportedSnafu, WaitProcedureSnafu,
+    EmptyDdlTasksSnafu, ProcedureOutputSnafu, RegisterProcedureLoaderSnafu, Result,
+    SubmitProcedureSnafu, TableInfoNotFoundSnafu, TableNotFoundSnafu, TableRouteNotFoundSnafu,
+    UnexpectedLogicalRouteTableSnafu, WaitProcedureSnafu,
 };
 use crate::key::table_info::TableInfoValue;
 use crate::key::table_name::TableNameKey;
 use crate::key::{DeserializedValueWithBytes, TableMetadataManagerRef};
+use crate::procedure_executor::ExecutorContext;
 #[cfg(feature = "enterprise")]
 use crate::rpc::ddl::trigger::CreateTriggerTask;
 #[cfg(feature = "enterprise")]
@@ -65,8 +64,6 @@ use crate::rpc::ddl::{
    CreateViewTask, DropDatabaseTask, DropFlowTask, DropTableTask, DropViewTask, QueryContext,
    SubmitDdlTaskRequest, SubmitDdlTaskResponse, TruncateTableTask,
 };
-use crate::rpc::procedure;
-use crate::rpc::procedure::{MigrateRegionRequest, MigrateRegionResponse, ProcedureStateResponse};
 use crate::rpc::router::RegionRoute;

 pub type DdlManagerRef = Arc<DdlManager>;
@@ -418,6 +415,75 @@ impl DdlManager {

        Ok((procedure_id, output))
    }
+
+    pub async fn submit_ddl_task(
+        &self,
+        ctx: &ExecutorContext,
+        request: SubmitDdlTaskRequest,
+    ) -> Result<SubmitDdlTaskResponse> {
+        let span = ctx
+            .tracing_context
+            .as_ref()
+            .map(TracingContext::from_w3c)
+            .unwrap_or_else(TracingContext::from_current_span)
+            .attach(tracing::info_span!("DdlManager::submit_ddl_task"));
+        async move {
+            debug!("Submitting Ddl task: {:?}", request.task);
+            match request.task {
+                CreateTable(create_table_task) => {
+                    handle_create_table_task(self, create_table_task).await
+                }
+                DropTable(drop_table_task) => handle_drop_table_task(self, drop_table_task).await,
+                AlterTable(alter_table_task) => {
+                    handle_alter_table_task(self, alter_table_task).await
+                }
+                TruncateTable(truncate_table_task) => {
+                    handle_truncate_table_task(self, truncate_table_task).await
+                }
+                CreateLogicalTables(create_table_tasks) => {
+                    handle_create_logical_table_tasks(self, create_table_tasks).await
+                }
+                AlterLogicalTables(alter_table_tasks) => {
+                    handle_alter_logical_table_tasks(self, alter_table_tasks).await
+                }
+                DropLogicalTables(_) => todo!(),
+                CreateDatabase(create_database_task) => {
+                    handle_create_database_task(self, create_database_task).await
+                }
+                DropDatabase(drop_database_task) => {
+                    handle_drop_database_task(self, drop_database_task).await
+                }
+                AlterDatabase(alter_database_task) => {
+                    handle_alter_database_task(self, alter_database_task).await
+                }
+                CreateFlow(create_flow_task) => {
+                    handle_create_flow_task(self, create_flow_task, request.query_context.into())
+                        .await
+                }
+                DropFlow(drop_flow_task) => handle_drop_flow_task(self, drop_flow_task).await,
+                CreateView(create_view_task) => {
+                    handle_create_view_task(self, create_view_task).await
+                }
+                DropView(drop_view_task) => handle_drop_view_task(self, drop_view_task).await,
+                #[cfg(feature = "enterprise")]
+                CreateTrigger(create_trigger_task) => {
+                    handle_create_trigger_task(
+                        self,
+                        create_trigger_task,
+                        request.query_context.into(),
+                    )
+                    .await
+                }
+                #[cfg(feature = "enterprise")]
+                DropTrigger(drop_trigger_task) => {
+                    handle_drop_trigger_task(self, drop_trigger_task, request.query_context.into())
+                        .await
+                }
+            }
+        }
+        .trace(span)
+        .await
+    }
 }

 async fn handle_truncate_table_task(
@@ -667,6 +733,8 @@ async fn handle_drop_trigger_task(
    query_context: QueryContext,
 ) -> Result<SubmitDdlTaskResponse> {
    let Some(m) = ddl_manager.trigger_ddl_manager.as_ref() else {
+        use crate::error::UnsupportedSnafu;
+
        return UnsupportedSnafu {
            operation: "drop trigger",
        }
@@ -746,6 +814,8 @@ async fn handle_create_trigger_task(
    query_context: QueryContext,
 ) -> Result<SubmitDdlTaskResponse> {
    let Some(m) = ddl_manager.trigger_ddl_manager.as_ref() else {
+        use crate::error::UnsupportedSnafu;
+
        return UnsupportedSnafu {
            operation: "create trigger",
        }
@@ -822,119 +892,6 @@ async fn handle_create_view_task(
    })
 }

-/// TODO(dennis): let [`DdlManager`] implement [`ProcedureExecutor`] looks weird, find some way to refactor it.
-#[async_trait::async_trait]
-impl ProcedureExecutor for DdlManager {
-    async fn submit_ddl_task(
-        &self,
-        ctx: &ExecutorContext,
-        request: SubmitDdlTaskRequest,
-    ) -> Result<SubmitDdlTaskResponse> {
-        let span = ctx
-            .tracing_context
-            .as_ref()
-            .map(TracingContext::from_w3c)
-            .unwrap_or(TracingContext::from_current_span())
-            .attach(tracing::info_span!("DdlManager::submit_ddl_task"));
-        async move {
-            debug!("Submitting Ddl task: {:?}", request.task);
-            match request.task {
-                CreateTable(create_table_task) => {
-                    handle_create_table_task(self, create_table_task).await
-                }
-                DropTable(drop_table_task) => handle_drop_table_task(self, drop_table_task).await,
-                AlterTable(alter_table_task) => {
-                    handle_alter_table_task(self, alter_table_task).await
-                }
-                TruncateTable(truncate_table_task) => {
-                    handle_truncate_table_task(self, truncate_table_task).await
-                }
-                CreateLogicalTables(create_table_tasks) => {
-                    handle_create_logical_table_tasks(self, create_table_tasks).await
-                }
-                AlterLogicalTables(alter_table_tasks) => {
-                    handle_alter_logical_table_tasks(self, alter_table_tasks).await
-                }
-                DropLogicalTables(_) => todo!(),
-                CreateDatabase(create_database_task) => {
-                    handle_create_database_task(self, create_database_task).await
-                }
-                DropDatabase(drop_database_task) => {
-                    handle_drop_database_task(self, drop_database_task).await
-                }
-                AlterDatabase(alter_database_task) => {
-                    handle_alter_database_task(self, alter_database_task).await
-                }
-                CreateFlow(create_flow_task) => {
-                    handle_create_flow_task(self, create_flow_task, request.query_context.into())
-                        .await
-                }
-                DropFlow(drop_flow_task) => handle_drop_flow_task(self, drop_flow_task).await,
-                CreateView(create_view_task) => {
-                    handle_create_view_task(self, create_view_task).await
-                }
-                DropView(drop_view_task) => handle_drop_view_task(self, drop_view_task).await,
-                #[cfg(feature = "enterprise")]
-                CreateTrigger(create_trigger_task) => {
-                    handle_create_trigger_task(
-                        self,
-                        create_trigger_task,
-                        request.query_context.into(),
-                    )
-                    .await
-                }
-                #[cfg(feature = "enterprise")]
-                DropTrigger(drop_trigger_task) => {
-                    handle_drop_trigger_task(self, drop_trigger_task, request.query_context.into())
-                        .await
-                }
-            }
-        }
-        .trace(span)
-        .await
-    }
-
-    async fn migrate_region(
-        &self,
-        _ctx: &ExecutorContext,
-        _request: MigrateRegionRequest,
-    ) -> Result<MigrateRegionResponse> {
-        UnsupportedSnafu {
-            operation: "migrate_region",
-        }
-        .fail()
-    }
-
-    async fn query_procedure_state(
-        &self,
-        _ctx: &ExecutorContext,
-        pid: &str,
-    ) -> Result<ProcedureStateResponse> {
-        let pid =
-            ProcedureId::parse_str(pid).with_context(|_| ParseProcedureIdSnafu { key: pid })?;
-
-        let state = self
-            .procedure_manager
-            .procedure_state(pid)
-            .await
-            .context(QueryProcedureSnafu)?
-            .context(ProcedureNotFoundSnafu {
-                pid: pid.to_string(),
-            })?;
-
-        Ok(procedure::procedure_state_to_pb_response(&state))
-    }
-
-    async fn list_procedures(&self, _ctx: &ExecutorContext) -> Result<ProcedureDetailResponse> {
-        let metas = self
-            .procedure_manager
-            .list_procedures()
-            .await
-            .context(QueryProcedureSnafu)?;
-        Ok(procedure::procedure_details_to_pb_response(metas))
-    }
-}
-
 #[cfg(test)]
 mod tests {
    use std::sync::Arc;
@@ -996,6 +953,7 @@ mod tests {
            state_store,
            poison_manager,
            None,
+            None,
        ));

        let _ = DdlManager::try_new(
--- a/src/common/meta/src/error.rs
+++ b/src/common/meta/src/error.rs
@@ -403,6 +403,13 @@ pub enum Error {
        location: Location,
    },

+    #[snafu(display("Catalog not found, catalog: {}", catalog))]
+    CatalogNotFound {
+        catalog: String,
+        #[snafu(implicit)]
+        location: Location,
+    },
+
    #[snafu(display("Invalid metadata, err: {}", err_msg))]
    InvalidMetadata {
        err_msg: String,
@@ -733,6 +740,32 @@ pub enum Error {
        operation: String,
    },

+    #[cfg(feature = "pg_kvbackend")]
+    #[snafu(display("Failed to setup PostgreSQL TLS configuration: {}", reason))]
+    PostgresTlsConfig {
+        reason: String,
+        #[snafu(implicit)]
+        location: Location,
+    },
+
+    #[cfg(feature = "pg_kvbackend")]
+    #[snafu(display("Failed to load TLS certificate from path: {}", path))]
+    LoadTlsCertificate {
+        path: String,
+        #[snafu(source)]
+        error: std::io::Error,
+        #[snafu(implicit)]
+        location: Location,
+    },
+
+    #[cfg(feature = "pg_kvbackend")]
+    #[snafu(display("Invalid TLS configuration: {}", reason))]
+    InvalidTlsConfig {
+        reason: String,
+        #[snafu(implicit)]
+        location: Location,
+    },
+
    #[cfg(feature = "mysql_kvbackend")]
    #[snafu(display("Failed to execute via MySql, sql: {}", sql))]
    MySqlExecution {
@@ -938,6 +971,13 @@ pub enum Error {
        source: api::error::Error,
    },

+    #[snafu(display("Failed to convert time ranges"))]
+    ConvertTimeRanges {
+        #[snafu(implicit)]
+        location: Location,
+        source: api::error::Error,
+    },
+
    #[snafu(display(
        "Column metadata inconsistencies found in table: {}, table_id: {}",
        table_name,
@@ -947,6 +987,39 @@ pub enum Error {
        table_name: String,
        table_id: TableId,
    },
+
+    #[snafu(display(
+        "Column not found in column metadata, column_name: {}, column_id: {}",
+        column_name,
+        column_id
+    ))]
+    ColumnNotFound { column_name: String, column_id: u32 },
+
+    #[snafu(display(
+        "Column id mismatch, column_name: {}, expected column_id: {}, actual column_id: {}",
+        column_name,
+        expected_column_id,
+        actual_column_id
+    ))]
+    ColumnIdMismatch {
+        column_name: String,
+        expected_column_id: u32,
+        actual_column_id: u32,
+    },
+
+    #[snafu(display(
+        "Timestamp column mismatch, expected column_name: {}, expected column_id: {}, actual column_name: {}, actual column_id: {}",
+        expected_column_name,
+        expected_column_id,
+        actual_column_name,
+        actual_column_id,
+    ))]
+    TimestampMismatch {
+        expected_column_name: String,
+        expected_column_id: u32,
+        actual_column_name: String,
+        actual_column_id: u32,
+    },
 }

 pub type Result<T> = std::result::Result<T, Error>;
@@ -972,7 +1045,10 @@ impl ErrorExt for Error {
            | MissingColumnIds { .. }
            | MissingColumnInColumnMetadata { .. }
            | MismatchColumnId { .. }
-            | ColumnMetadataConflicts { .. } => StatusCode::Unexpected,
+            | ColumnMetadataConflicts { .. }
+            | ColumnNotFound { .. }
+            | ColumnIdMismatch { .. }
+            | TimestampMismatch { .. } => StatusCode::Unexpected,

            Unsupported { .. } => StatusCode::Unsupported,
            WriteObject { .. } | ReadObject { .. } => StatusCode::StorageUnavailable,
@@ -1012,7 +1088,8 @@ impl ErrorExt for Error {
            | KafkaGetOffset { .. }
            | ReadFlexbuffers { .. }
            | SerializeFlexbuffers { .. }
-            | DeserializeFlexbuffers { .. } => StatusCode::Unexpected,
+            | DeserializeFlexbuffers { .. }
+            | ConvertTimeRanges { .. } => StatusCode::Unexpected,

            SendMessage { .. } | GetKvCache { .. } | CacheNotGet { .. } => StatusCode::Internal,

@@ -1062,6 +1139,7 @@ impl ErrorExt for Error {
            ParseProcedureId { .. }
            | InvalidNumTopics { .. }
            | SchemaNotFound { .. }
+            | CatalogNotFound { .. }
            | InvalidNodeInfoKey { .. }
            | InvalidStatKey { .. }
            | ParseNum { .. }
@@ -1072,7 +1150,10 @@ impl ErrorExt for Error {
            PostgresExecution { .. }
            | CreatePostgresPool { .. }
            | GetPostgresConnection { .. }
-            | PostgresTransaction { .. } => StatusCode::Internal,
+            | PostgresTransaction { .. }
+            | PostgresTlsConfig { .. }
+            | LoadTlsCertificate { .. }
+            | InvalidTlsConfig { .. } => StatusCode::Internal,
            #[cfg(feature = "mysql_kvbackend")]
            MySqlExecution { .. } | CreateMySqlPool { .. } | MySqlTransaction { .. } => {
                StatusCode::Internal
--- a/src/common/meta/src/kv_backend/rds.rs
+++ b/src/common/meta/src/kv_backend/rds.rs
@@ -40,7 +40,7 @@ const RDS_STORE_OP_RANGE_DELETE: &str = "range_delete";
 const RDS_STORE_OP_BATCH_DELETE: &str = "batch_delete";

 #[cfg(feature = "pg_kvbackend")]
-mod postgres;
+pub mod postgres;
 #[cfg(feature = "pg_kvbackend")]
 pub use postgres::PgStore;

@@ -118,7 +118,7 @@ impl<T: Executor> ExecutorImpl<'_, T> {
        }
    }

-    #[warn(dead_code)] // Used in #[cfg(feature = "mysql_kvbackend")]
+    #[allow(dead_code)] // Used in #[cfg(feature = "mysql_kvbackend")]
    async fn execute(&mut self, query: &str, params: &Vec<&Vec<u8>>) -> Result<()> {
        match self {
            Self::Default(executor) => executor.execute(query, params).await,
--- a/src/common/meta/src/kv_backend/rds/postgres.rs
+++ b/src/common/meta/src/kv_backend/rds/postgres.rs
@@ -12,19 +12,29 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

+use std::fs::File;
+use std::io::BufReader;
 use std::marker::PhantomData;
 use std::sync::Arc;

 use common_telemetry::debug;
 use deadpool_postgres::{Config, Pool, Runtime};
+use rustls::client::danger::{HandshakeSignatureValid, ServerCertVerified, ServerCertVerifier};
+use rustls::pki_types::{CertificateDer, ServerName, UnixTime};
+use rustls::server::ParsedCertificate;
+// TLS-related imports (feature-gated)
+use rustls::ClientConfig;
+use rustls::{DigitallySignedStruct, Error as TlsError, SignatureScheme};
+use rustls_pemfile::{certs, private_key};
 use snafu::ResultExt;
 use strum::AsRefStr;
 use tokio_postgres::types::ToSql;
 use tokio_postgres::{IsolationLevel, NoTls, Row};
+use tokio_postgres_rustls::MakeRustlsConnect;

 use crate::error::{
-    CreatePostgresPoolSnafu, GetPostgresConnectionSnafu, PostgresExecutionSnafu,
-    PostgresTransactionSnafu, Result,
+    CreatePostgresPoolSnafu, GetPostgresConnectionSnafu, LoadTlsCertificateSnafu,
+    PostgresExecutionSnafu, PostgresTlsConfigSnafu, PostgresTransactionSnafu, Result,
 };
 use crate::kv_backend::rds::{
    Executor, ExecutorFactory, ExecutorImpl, KvQueryExecutor, RdsStore, Transaction,
@@ -38,6 +48,41 @@ use crate::rpc::store::{
 };
 use crate::rpc::KeyValue;

+/// TLS mode configuration for PostgreSQL connections.
+/// This mirrors the TlsMode from servers::tls to avoid circular dependencies.
+#[derive(Debug, Clone, PartialEq, Eq, Default)]
+pub enum TlsMode {
+    Disable,
+    #[default]
+    Prefer,
+    Require,
+    VerifyCa,
+    VerifyFull,
+}
+
+/// TLS configuration for PostgreSQL connections.
+/// This mirrors the TlsOption from servers::tls to avoid circular dependencies.
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub struct TlsOption {
+    pub mode: TlsMode,
+    pub cert_path: String,
+    pub key_path: String,
+    pub ca_cert_path: String,
+    pub watch: bool,
+}
+
+impl Default for TlsOption {
+    fn default() -> Self {
+        TlsOption {
+            mode: TlsMode::Prefer,
+            cert_path: String::new(),
+            key_path: String::new(),
+            ca_cert_path: String::new(),
+            watch: false,
+        }
+    }
+}
+
 const PG_STORE_NAME: &str = "pg_store";

 pub struct PgClient(deadpool::managed::Object<deadpool_postgres::Manager>);
@@ -348,6 +393,265 @@ impl ExecutorFactory<PgClient> for PgExecutorFactory {
 /// It uses [deadpool_postgres::Pool] as the connection pool for [RdsStore].
 pub type PgStore = RdsStore<PgClient, PgExecutorFactory, PgSqlTemplateSet>;

+/// Creates a PostgreSQL TLS connector based on the provided configuration.
+///
+/// This function creates a rustls-based TLS connector for PostgreSQL connections,
+/// following PostgreSQL's TLS mode specifications exactly:
+///
+/// # TLS Modes (PostgreSQL Specification)
+///
+/// - `Disable`: No TLS connection attempted
+/// - `Prefer`: Try TLS first, fallback to plaintext if TLS fails (handled by connection logic)
+/// - `Require`: Only TLS connections, but NO certificate verification (accept any cert)
+/// - `VerifyCa`: TLS + verify certificate is signed by trusted CA (no hostname verification)
+/// - `VerifyFull`: TLS + verify CA + verify hostname matches certificate SAN
+///
+pub fn create_postgres_tls_connector(tls_config: &TlsOption) -> Result<MakeRustlsConnect> {
+    common_telemetry::info!(
+        "Creating PostgreSQL TLS connector with mode: {:?}",
+        tls_config.mode
+    );
+
+    let config_builder = match tls_config.mode {
+        TlsMode::Disable => {
+            return PostgresTlsConfigSnafu {
+                reason: "Cannot create TLS connector for Disable mode".to_string(),
+            }
+            .fail();
+        }
+        TlsMode::Prefer | TlsMode::Require => {
+            // For Prefer/Require: Accept any certificate (no verification)
+            let verifier = Arc::new(AcceptAnyVerifier);
+            ClientConfig::builder()
+                .dangerous()
+                .with_custom_certificate_verifier(verifier)
+        }
+        TlsMode::VerifyCa => {
+            // For VerifyCa: Verify server cert against CA store, but skip hostname verification
+            let ca_store = load_ca(&tls_config.ca_cert_path)?;
+            let verifier = Arc::new(NoHostnameVerification { roots: ca_store });
+            ClientConfig::builder()
+                .dangerous()
+                .with_custom_certificate_verifier(verifier)
+        }
+        TlsMode::VerifyFull => {
+            let ca_store = load_ca(&tls_config.ca_cert_path)?;
+            ClientConfig::builder().with_root_certificates(ca_store)
+        }
+    };
+
+    // Create the TLS client configuration based on the mode and client cert requirements
+    let client_config = if !tls_config.cert_path.is_empty() && !tls_config.key_path.is_empty() {
+        // Client certificate authentication required
+        common_telemetry::info!("Loading client certificate for mutual TLS");
+        let cert_chain = load_certs(&tls_config.cert_path)?;
+        let private_key = load_private_key(&tls_config.key_path)?;
+
+        config_builder
+            .with_client_auth_cert(cert_chain, private_key)
+            .map_err(|e| {
+                PostgresTlsConfigSnafu {
+                    reason: format!("Failed to configure client authentication: {}", e),
+                }
+                .build()
+            })?
+    } else {
+        common_telemetry::info!("No client certificate provided, skip client authentication");
+        config_builder.with_no_client_auth()
+    };
+
+    common_telemetry::info!("Successfully created PostgreSQL TLS connector");
+    Ok(MakeRustlsConnect::new(client_config))
+}
+
+/// For Prefer/Require mode, we accept any server certificate without verification.
+#[derive(Debug)]
+struct AcceptAnyVerifier;
+
+impl ServerCertVerifier for AcceptAnyVerifier {
+    fn verify_server_cert(
+        &self,
+        _end_entity: &CertificateDer<'_>,
+        _intermediates: &[CertificateDer<'_>],
+        _server_name: &ServerName<'_>,
+        _ocsp_response: &[u8],
+        _now: UnixTime,
+    ) -> std::result::Result<ServerCertVerified, TlsError> {
+        common_telemetry::debug!(
+            "Accepting server certificate without verification (Prefer/Require mode)"
+        );
+        Ok(ServerCertVerified::assertion())
+    }
+
+    fn verify_tls12_signature(
+        &self,
+        _message: &[u8],
+        _cert: &CertificateDer<'_>,
+        _dss: &DigitallySignedStruct,
+    ) -> std::result::Result<HandshakeSignatureValid, TlsError> {
+        // Accept any signature without verification
+        Ok(HandshakeSignatureValid::assertion())
+    }
+
+    fn verify_tls13_signature(
+        &self,
+        _message: &[u8],
+        _cert: &CertificateDer<'_>,
+        _dss: &DigitallySignedStruct,
+    ) -> std::result::Result<HandshakeSignatureValid, TlsError> {
+        // Accept any signature without verification
+        Ok(HandshakeSignatureValid::assertion())
+    }
+
+    fn supported_verify_schemes(&self) -> Vec<SignatureScheme> {
+        // Support all signature schemes
+        rustls::crypto::ring::default_provider()
+            .signature_verification_algorithms
+            .supported_schemes()
+    }
+}
+
+/// For VerifyCa mode, we verify the server certificate against our CA store
+/// and skip verify server's HostName.
+#[derive(Debug)]
+struct NoHostnameVerification {
+    roots: Arc<rustls::RootCertStore>,
+}
+
+impl ServerCertVerifier for NoHostnameVerification {
+    fn verify_server_cert(
+        &self,
+        end_entity: &CertificateDer<'_>,
+        intermediates: &[CertificateDer<'_>],
+        _server_name: &ServerName<'_>,
+        _ocsp_response: &[u8],
+        now: UnixTime,
+    ) -> std::result::Result<ServerCertVerified, TlsError> {
+        let cert = ParsedCertificate::try_from(end_entity)?;
+        rustls::client::verify_server_cert_signed_by_trust_anchor(
+            &cert,
+            &self.roots,
+            intermediates,
+            now,
+            rustls::crypto::ring::default_provider()
+                .signature_verification_algorithms
+                .all,
+        )?;
+
+        Ok(ServerCertVerified::assertion())
+    }
+
+    fn verify_tls12_signature(
+        &self,
+        message: &[u8],
+        cert: &CertificateDer<'_>,
+        dss: &DigitallySignedStruct,
+    ) -> std::result::Result<HandshakeSignatureValid, TlsError> {
+        rustls::crypto::verify_tls12_signature(
+            message,
+            cert,
+            dss,
+            &rustls::crypto::ring::default_provider().signature_verification_algorithms,
+        )
+    }
+
+    fn verify_tls13_signature(
+        &self,
+        message: &[u8],
+        cert: &CertificateDer<'_>,
+        dss: &DigitallySignedStruct,
+    ) -> std::result::Result<HandshakeSignatureValid, TlsError> {
+        rustls::crypto::verify_tls13_signature(
+            message,
+            cert,
+            dss,
+            &rustls::crypto::ring::default_provider().signature_verification_algorithms,
+        )
+    }
+
+    fn supported_verify_schemes(&self) -> Vec<SignatureScheme> {
+        // Support all signature schemes
+        rustls::crypto::ring::default_provider()
+            .signature_verification_algorithms
+            .supported_schemes()
+    }
+}
+
+fn load_certs(path: &str) -> Result<Vec<rustls::pki_types::CertificateDer<'static>>> {
+    let file = File::open(path).context(LoadTlsCertificateSnafu { path })?;
+    let mut reader = BufReader::new(file);
+    let certs = certs(&mut reader)
+        .collect::<std::result::Result<Vec<_>, _>>()
+        .map_err(|e| {
+            PostgresTlsConfigSnafu {
+                reason: format!("Failed to parse certificates from {}: {}", path, e),
+            }
+            .build()
+        })?;
+    Ok(certs)
+}
+
+fn load_private_key(path: &str) -> Result<rustls::pki_types::PrivateKeyDer<'static>> {
+    let file = File::open(path).context(LoadTlsCertificateSnafu { path })?;
+    let mut reader = BufReader::new(file);
+    let key = private_key(&mut reader)
+        .map_err(|e| {
+            PostgresTlsConfigSnafu {
+                reason: format!("Failed to parse private key from {}: {}", path, e),
+            }
+            .build()
+        })?
+        .ok_or_else(|| {
+            PostgresTlsConfigSnafu {
+                reason: format!("No private key found in {}", path),
+            }
+            .build()
+        })?;
+    Ok(key)
+}
+
+fn load_ca(path: &str) -> Result<Arc<rustls::RootCertStore>> {
+    let mut root_store = rustls::RootCertStore::empty();
+
+    // Add system root certificates
+    match rustls_native_certs::load_native_certs() {
+        Ok(certs) => {
+            let num_certs = certs.len();
+            for cert in certs {
+                if let Err(e) = root_store.add(cert) {
+                    return PostgresTlsConfigSnafu {
+                        reason: format!("Failed to add root certificate: {}", e),
+                    }
+                    .fail();
+                }
+            }
+            common_telemetry::info!("Loaded {num_certs} system root certificates successfully");
+        }
+        Err(e) => {
+            return PostgresTlsConfigSnafu {
+                reason: format!("Failed to load system root certificates: {}", e),
+            }
+            .fail();
+        }
+    }
+
+    // Try add custom CA certificate if provided
+    if !path.is_empty() {
+        let ca_certs = load_certs(path)?;
+        for cert in ca_certs {
+            if let Err(e) = root_store.add(cert) {
+                return PostgresTlsConfigSnafu {
+                    reason: format!("Failed to add custom CA certificate: {}", e),
+                }
+                .fail();
+            }
+        }
+        common_telemetry::info!("Added custom CA certificate from {}", path);
+    }
+
+    Ok(Arc::new(root_store))
+}
+
 #[async_trait::async_trait]
 impl KvQueryExecutor<PgClient> for PgStore {
    async fn range_with_query_executor(
@@ -491,17 +795,54 @@ impl KvQueryExecutor<PgClient> for PgStore {
 }

 impl PgStore {
-    /// Create [PgStore] impl of [KvBackendRef] from url.
-    pub async fn with_url(url: &str, table_name: &str, max_txn_ops: usize) -> Result<KvBackendRef> {
+    /// Create [PgStore] impl of [KvBackendRef] from url with optional TLS support.
+    ///
+    /// # Arguments
+    ///
+    /// * `url` - PostgreSQL connection URL
+    /// * `table_name` - Name of the table to use for key-value storage
+    /// * `max_txn_ops` - Maximum number of operations per transaction
+    /// * `tls_config` - Optional TLS configuration. If None, uses plaintext connection.
+    pub async fn with_url_and_tls(
+        url: &str,
+        table_name: &str,
+        max_txn_ops: usize,
+        tls_config: Option<TlsOption>,
+    ) -> Result<KvBackendRef> {
        let mut cfg = Config::new();
        cfg.url = Some(url.to_string());
-        // TODO(weny, CookiePie): add tls support
-        let pool = cfg
-            .create_pool(Some(Runtime::Tokio1), NoTls)
-            .context(CreatePostgresPoolSnafu)?;
+
+        let pool = match tls_config {
+            Some(tls_config) if tls_config.mode != TlsMode::Disable => {
+                match create_postgres_tls_connector(&tls_config) {
+                    Ok(tls_connector) => cfg
+                        .create_pool(Some(Runtime::Tokio1), tls_connector)
+                        .context(CreatePostgresPoolSnafu)?,
+                    Err(e) => {
+                        if tls_config.mode == TlsMode::Prefer {
+                            // Fallback to insecure connection if TLS fails
+                            common_telemetry::info!("Failed to create TLS connector, falling back to insecure connection");
+                            cfg.create_pool(Some(Runtime::Tokio1), NoTls)
+                                .context(CreatePostgresPoolSnafu)?
+                        } else {
+                            return Err(e);
+                        }
+                    }
+                }
+            }
+            _ => cfg
+                .create_pool(Some(Runtime::Tokio1), NoTls)
+                .context(CreatePostgresPoolSnafu)?,
+        };
+
        Self::with_pg_pool(pool, table_name, max_txn_ops).await
    }

+    /// Create [PgStore] impl of [KvBackendRef] from url (backward compatibility).
+    pub async fn with_url(url: &str, table_name: &str, max_txn_ops: usize) -> Result<KvBackendRef> {
+        Self::with_url_and_tls(url, table_name, max_txn_ops, None).await
+    }
+
    /// Create [PgStore] impl of [KvBackendRef] from [deadpool_postgres::Pool].
    pub async fn with_pg_pool(
        pool: Pool,
--- a/src/common/meta/src/lib.rs
+++ b/src/common/meta/src/lib.rs
@@ -37,6 +37,7 @@ pub mod node_expiry_listener;
 pub mod node_manager;
 pub mod peer;
 pub mod poison_key;
+pub mod procedure_executor;
 pub mod range_stream;
 pub mod reconciliation;
 pub mod region_keeper;
--- a/src/common/meta/src/metrics.rs
+++ b/src/common/meta/src/metrics.rs
@@ -15,6 +15,13 @@
 use lazy_static::lazy_static;
 use prometheus::*;

+pub const TABLE_TYPE_PHYSICAL: &str = "physical";
+pub const TABLE_TYPE_LOGICAL: &str = "logical";
+pub const ERROR_TYPE_RETRYABLE: &str = "retryable";
+pub const ERROR_TYPE_EXTERNAL: &str = "external";
+pub const STATS_TYPE_NO_REGION_METADATA: &str = "no_region_metadata";
+pub const STATS_TYPE_REGION_NOT_OPEN: &str = "region_not_open";
+
 lazy_static! {
    pub static ref METRIC_META_TXN_REQUEST: HistogramVec = register_histogram_vec!(
        "greptime_meta_txn_request",
@@ -114,4 +121,39 @@ lazy_static! {
        &["backend", "result", "op", "type"]
    )
    .unwrap();
+    pub static ref METRIC_META_RECONCILIATION_LIST_REGION_METADATA_DURATION: HistogramVec =
+        register_histogram_vec!(
+            "greptime_meta_reconciliation_list_region_metadata_duration",
+            "reconciliation list region metadata duration",
+            &["table_type"]
+        )
+        .unwrap();
+    pub static ref METRIC_META_RECONCILIATION_RESOLVED_COLUMN_METADATA: IntCounterVec =
+        register_int_counter_vec!(
+            "greptime_meta_reconciliation_resolved_column_metadata",
+            "reconciliation resolved column metadata",
+            &["strategy"]
+        )
+        .unwrap();
+    pub static ref METRIC_META_RECONCILIATION_STATS: IntCounterVec =
+        register_int_counter_vec!(
+            "greptime_meta_reconciliation_stats",
+            "reconciliation stats",
+            &["procedure_name", "table_type", "type"]
+        )
+        .unwrap();
+    pub static ref METRIC_META_RECONCILIATION_PROCEDURE: HistogramVec =
+        register_histogram_vec!(
+            "greptime_meta_reconciliation_procedure",
+            "reconcile table procedure",
+            &["procedure_name", "step"]
+        )
+        .unwrap();
+    pub static ref METRIC_META_RECONCILIATION_PROCEDURE_ERROR: IntCounterVec =
+        register_int_counter_vec!(
+            "greptime_meta_reconciliation_procedure_error",
+            "reconciliation procedure error",
+            &["procedure_name", "step", "error_type"]
+        )
+        .unwrap();
 }
--- a/src/common/meta/src/procedure_executor.rs
+++ b/src/common/meta/src/procedure_executor.rs
@@ -0,0 +1,173 @@
+// Copyright 2023 Greptime Team
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use std::sync::Arc;
+
+use api::v1::meta::{ProcedureDetailResponse, ReconcileRequest, ReconcileResponse};
+use common_procedure::{ProcedureId, ProcedureManagerRef};
+use common_telemetry::tracing_context::W3cTrace;
+use snafu::{OptionExt, ResultExt};
+
+use crate::ddl_manager::DdlManagerRef;
+use crate::error::{
+    ParseProcedureIdSnafu, ProcedureNotFoundSnafu, QueryProcedureSnafu, Result, UnsupportedSnafu,
+};
+use crate::rpc::ddl::{SubmitDdlTaskRequest, SubmitDdlTaskResponse};
+use crate::rpc::procedure::{
+    self, AddRegionFollowerRequest, MigrateRegionRequest, MigrateRegionResponse,
+    ProcedureStateResponse, RemoveRegionFollowerRequest,
+};
+
+/// The context of procedure executor.
+#[derive(Debug, Default)]
+pub struct ExecutorContext {
+    pub tracing_context: Option<W3cTrace>,
+}
+
+/// The procedure executor that accepts ddl, region migration task etc.
+#[async_trait::async_trait]
+pub trait ProcedureExecutor: Send + Sync {
+    /// Submit a ddl task
+    async fn submit_ddl_task(
+        &self,
+        ctx: &ExecutorContext,
+        request: SubmitDdlTaskRequest,
+    ) -> Result<SubmitDdlTaskResponse>;
+
+    /// Add a region follower
+    async fn add_region_follower(
+        &self,
+        _ctx: &ExecutorContext,
+        _request: AddRegionFollowerRequest,
+    ) -> Result<()> {
+        UnsupportedSnafu {
+            operation: "add_region_follower",
+        }
+        .fail()
+    }
+
+    /// Remove a region follower
+    async fn remove_region_follower(
+        &self,
+        _ctx: &ExecutorContext,
+        _request: RemoveRegionFollowerRequest,
+    ) -> Result<()> {
+        UnsupportedSnafu {
+            operation: "remove_region_follower",
+        }
+        .fail()
+    }
+
+    /// Submit a region migration task
+    async fn migrate_region(
+        &self,
+        ctx: &ExecutorContext,
+        request: MigrateRegionRequest,
+    ) -> Result<MigrateRegionResponse>;
+
+    /// Submit a reconcile task.
+    async fn reconcile(
+        &self,
+        _ctx: &ExecutorContext,
+        request: ReconcileRequest,
+    ) -> Result<ReconcileResponse>;
+
+    /// Query the procedure state by its id
+    async fn query_procedure_state(
+        &self,
+        ctx: &ExecutorContext,
+        pid: &str,
+    ) -> Result<ProcedureStateResponse>;
+
+    async fn list_procedures(&self, ctx: &ExecutorContext) -> Result<ProcedureDetailResponse>;
+}
+
+pub type ProcedureExecutorRef = Arc<dyn ProcedureExecutor>;
+
+/// The local procedure executor that accepts ddl, region migration task etc.
+pub struct LocalProcedureExecutor {
+    pub ddl_manager: DdlManagerRef,
+    pub procedure_manager: ProcedureManagerRef,
+}
+
+impl LocalProcedureExecutor {
+    pub fn new(ddl_manager: DdlManagerRef, procedure_manager: ProcedureManagerRef) -> Self {
+        Self {
+            ddl_manager,
+            procedure_manager,
+        }
+    }
+}
+
+#[async_trait::async_trait]
+impl ProcedureExecutor for LocalProcedureExecutor {
+    async fn submit_ddl_task(
+        &self,
+        ctx: &ExecutorContext,
+        request: SubmitDdlTaskRequest,
+    ) -> Result<SubmitDdlTaskResponse> {
+        self.ddl_manager.submit_ddl_task(ctx, request).await
+    }
+
+    async fn migrate_region(
+        &self,
+        _ctx: &ExecutorContext,
+        _request: MigrateRegionRequest,
+    ) -> Result<MigrateRegionResponse> {
+        UnsupportedSnafu {
+            operation: "migrate_region",
+        }
+        .fail()
+    }
+
+    async fn reconcile(
+        &self,
+        _ctx: &ExecutorContext,
+        _request: ReconcileRequest,
+    ) -> Result<ReconcileResponse> {
+        UnsupportedSnafu {
+            operation: "reconcile",
+        }
+        .fail()
+    }
+
+    async fn query_procedure_state(
+        &self,
+        _ctx: &ExecutorContext,
+        pid: &str,
+    ) -> Result<ProcedureStateResponse> {
+        let pid =
+            ProcedureId::parse_str(pid).with_context(|_| ParseProcedureIdSnafu { key: pid })?;
+
+        let state = self
+            .procedure_manager
+            .procedure_state(pid)
+            .await
+            .context(QueryProcedureSnafu)?
+            .with_context(|| ProcedureNotFoundSnafu {
+                pid: pid.to_string(),
+            })?;
+
+        Ok(procedure::procedure_state_to_pb_response(&state))
+    }
+
+    async fn list_procedures(&self, _ctx: &ExecutorContext) -> Result<ProcedureDetailResponse> {
+        let metas = self
+            .procedure_manager
+            .list_procedures()
+            .await
+            .context(QueryProcedureSnafu)?;
+        Ok(procedure::procedure_details_to_pb_response(metas))
+    }
+}
--- a/src/common/meta/src/reconciliation.rs
+++ b/src/common/meta/src/reconciliation.rs
@@ -12,15 +12,9 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

-// TODO(weny): Remove it
-#[allow(dead_code)]
+pub mod manager;
+pub(crate) mod reconcile_catalog;
 pub(crate) mod reconcile_database;
-// TODO(weny): Remove it
-#[allow(dead_code)]
-pub(crate) mod reconcile_table;
-// TODO(weny): Remove it
-#[allow(dead_code)]
 pub(crate) mod reconcile_logical_tables;
-// TODO(weny): Remove it
-#[allow(dead_code)]
+pub(crate) mod reconcile_table;
 pub(crate) mod utils;
--- a/src/common/meta/src/reconciliation/manager.rs
+++ b/src/common/meta/src/reconciliation/manager.rs
@@ -0,0 +1,246 @@
+// Copyright 2023 Greptime Team
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use std::sync::Arc;
+
+use common_procedure::{
+    watcher, BoxedProcedure, ProcedureId, ProcedureManagerRef, ProcedureWithId,
+};
+use common_telemetry::{error, info, warn};
+use snafu::{OptionExt, ResultExt};
+use store_api::storage::TableId;
+use table::table_name::TableName;
+use table::table_reference::TableReference;
+
+use crate::cache_invalidator::CacheInvalidatorRef;
+use crate::error::{self, Result, TableNotFoundSnafu};
+use crate::key::table_name::TableNameKey;
+use crate::key::TableMetadataManagerRef;
+use crate::node_manager::NodeManagerRef;
+use crate::reconciliation::reconcile_catalog::ReconcileCatalogProcedure;
+use crate::reconciliation::reconcile_database::{ReconcileDatabaseProcedure, DEFAULT_PARALLELISM};
+use crate::reconciliation::reconcile_logical_tables::ReconcileLogicalTablesProcedure;
+use crate::reconciliation::reconcile_table::resolve_column_metadata::ResolveStrategy;
+use crate::reconciliation::reconcile_table::ReconcileTableProcedure;
+use crate::reconciliation::utils::Context;
+
+pub type ReconciliationManagerRef = Arc<ReconciliationManager>;
+
+/// The manager for reconciliation procedures.
+pub struct ReconciliationManager {
+    procedure_manager: ProcedureManagerRef,
+    context: Context,
+}
+
+macro_rules! register_reconcile_loader {
+    ($self:ident, $procedure:ty) => {{
+        let context = $self.context.clone();
+        $self
+            .procedure_manager
+            .register_loader(
+                <$procedure>::TYPE_NAME,
+                Box::new(move |json| {
+                    let context = context.clone();
+                    let procedure = <$procedure>::from_json(context, json)?;
+                    Ok(Box::new(procedure))
+                }),
+            )
+            .context(error::RegisterProcedureLoaderSnafu {
+                type_name: <$procedure>::TYPE_NAME,
+            })?;
+    }};
+}
+
+impl ReconciliationManager {
+    pub fn new(
+        node_manager: NodeManagerRef,
+        table_metadata_manager: TableMetadataManagerRef,
+        cache_invalidator: CacheInvalidatorRef,
+        procedure_manager: ProcedureManagerRef,
+    ) -> Self {
+        Self {
+            procedure_manager,
+            context: Context {
+                node_manager,
+                table_metadata_manager,
+                cache_invalidator,
+            },
+        }
+    }
+
+    /// Try to start the reconciliation manager.
+    ///
+    /// This function will register the procedure loaders for the reconciliation procedures.
+    /// Returns an error if the procedure loaders are already registered.
+    pub fn try_start(&self) -> Result<()> {
+        register_reconcile_loader!(self, ReconcileLogicalTablesProcedure);
+        register_reconcile_loader!(self, ReconcileTableProcedure);
+        register_reconcile_loader!(self, ReconcileDatabaseProcedure);
+        register_reconcile_loader!(self, ReconcileCatalogProcedure);
+
+        Ok(())
+    }
+
+    /// Reconcile a table.
+    ///
+    /// Returns the procedure id of the reconciliation procedure.
+    pub async fn reconcile_table(
+        &self,
+        table_ref: TableReference<'_>,
+        resolve_strategy: ResolveStrategy,
+    ) -> Result<ProcedureId> {
+        let table_name_key =
+            TableNameKey::new(table_ref.catalog, table_ref.schema, table_ref.table);
+        let table_metadata_manager = &self.context.table_metadata_manager;
+        let table_id = table_metadata_manager
+            .table_name_manager()
+            .get(table_name_key)
+            .await?
+            .with_context(|| TableNotFoundSnafu {
+                table_name: table_ref.to_string(),
+            })?
+            .table_id();
+        let (physical_table_id, _) = table_metadata_manager
+            .table_route_manager()
+            .get_physical_table_route(table_id)
+            .await?;
+
+        if physical_table_id == table_id {
+            Ok(self.reconcile_physical_table(table_id, table_ref.into(), resolve_strategy))
+        } else {
+            let physical_table_info = table_metadata_manager
+                .table_info_manager()
+                .get(physical_table_id)
+                .await?
+                .with_context(|| TableNotFoundSnafu {
+                    table_name: format!("table_id: {}", physical_table_id),
+                })?;
+
+            Ok(self.reconcile_logical_tables(
+                physical_table_id,
+                physical_table_info.table_name(),
+                vec![(table_id, table_ref.into())],
+            ))
+        }
+    }
+
+    /// Reconcile a database.
+    ///
+    /// Returns the procedure id of the reconciliation procedure.
+    pub fn reconcile_database(
+        &self,
+        catalog: String,
+        schema: String,
+        resolve_strategy: ResolveStrategy,
+        parallelism: usize,
+    ) -> ProcedureId {
+        let parallelism = normalize_parallelism(parallelism);
+        let procedure = ReconcileDatabaseProcedure::new(
+            self.context.clone(),
+            catalog,
+            schema,
+            false,
+            parallelism,
+            resolve_strategy,
+            false,
+        );
+        self.spawn_procedure(Box::new(procedure))
+    }
+
+    fn reconcile_physical_table(
+        &self,
+        table_id: TableId,
+        table_name: TableName,
+        resolve_strategy: ResolveStrategy,
+    ) -> ProcedureId {
+        let procedure = ReconcileTableProcedure::new(
+            self.context.clone(),
+            table_id,
+            table_name,
+            resolve_strategy,
+            false,
+        );
+        self.spawn_procedure(Box::new(procedure))
+    }
+
+    fn reconcile_logical_tables(
+        &self,
+        physical_table_id: TableId,
+        physical_table_name: TableName,
+        logical_tables: Vec<(TableId, TableName)>,
+    ) -> ProcedureId {
+        let procedure = ReconcileLogicalTablesProcedure::new(
+            self.context.clone(),
+            physical_table_id,
+            physical_table_name,
+            logical_tables,
+            false,
+        );
+        self.spawn_procedure(Box::new(procedure))
+    }
+
+    /// Reconcile a catalog.
+    ///
+    /// Returns the procedure id of the reconciliation procedure.
+    pub fn reconcile_catalog(
+        &self,
+        catalog: String,
+        resolve_strategy: ResolveStrategy,
+        parallelism: usize,
+    ) -> ProcedureId {
+        let parallelism = normalize_parallelism(parallelism);
+        let procedure = ReconcileCatalogProcedure::new(
+            self.context.clone(),
+            catalog,
+            false,
+            resolve_strategy,
+            parallelism,
+        );
+        self.spawn_procedure(Box::new(procedure))
+    }
+
+    fn spawn_procedure(&self, procedure: BoxedProcedure) -> ProcedureId {
+        let procedure_manager = self.procedure_manager.clone();
+        let procedure_with_id = ProcedureWithId::with_random_id(procedure);
+        let procedure_id = procedure_with_id.id;
+        common_runtime::spawn_global(async move {
+            let watcher = &mut match procedure_manager.submit(procedure_with_id).await {
+                Ok(watcher) => watcher,
+                Err(e) => {
+                    error!(e; "Failed to submit reconciliation procedure {procedure_id}");
+                    return;
+                }
+            };
+            if let Err(e) = watcher::wait(watcher).await {
+                error!(e; "Failed to wait reconciliation procedure {procedure_id}");
+                return;
+            }
+
+            info!("Reconciliation procedure {procedure_id} is finished successfully!");
+        });
+        procedure_id
+    }
+}
+
+fn normalize_parallelism(parallelism: usize) -> usize {
+    if parallelism == 0 {
+        warn!(
+            "Parallelism is 0, using default parallelism: {}",
+            DEFAULT_PARALLELISM
+        );
+        DEFAULT_PARALLELISM
+    } else {
+        parallelism
+    }
+}
--- a/src/common/meta/src/reconciliation/reconcile_catalog.rs
+++ b/src/common/meta/src/reconciliation/reconcile_catalog.rs
@@ -0,0 +1,237 @@
+// Copyright 2023 Greptime Team
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use std::any::Any;
+use std::fmt::Debug;
+use std::time::Instant;
+
+use common_procedure::error::FromJsonSnafu;
+use common_procedure::{
+    Context as ProcedureContext, Error as ProcedureError, LockKey, Procedure,
+    Result as ProcedureResult, Status,
+};
+use futures::stream::BoxStream;
+use serde::{Deserialize, Serialize};
+use snafu::ResultExt;
+
+use crate::cache_invalidator::CacheInvalidatorRef;
+use crate::error::Result;
+use crate::key::TableMetadataManagerRef;
+use crate::lock_key::CatalogLock;
+use crate::metrics;
+use crate::node_manager::NodeManagerRef;
+use crate::reconciliation::reconcile_catalog::start::ReconcileCatalogStart;
+use crate::reconciliation::reconcile_table::resolve_column_metadata::ResolveStrategy;
+use crate::reconciliation::utils::{
+    wait_for_inflight_subprocedures, Context, ReconcileCatalogMetrics, SubprocedureMeta,
+};
+
+pub(crate) mod end;
+pub(crate) mod reconcile_databases;
+pub(crate) mod start;
+
+pub(crate) struct ReconcileCatalogContext {
+    pub node_manager: NodeManagerRef,
+    pub table_metadata_manager: TableMetadataManagerRef,
+    pub cache_invalidator: CacheInvalidatorRef,
+    persistent_ctx: PersistentContext,
+    volatile_ctx: VolatileContext,
+}
+
+impl ReconcileCatalogContext {
+    pub fn new(ctx: Context, persistent_ctx: PersistentContext) -> Self {
+        Self {
+            node_manager: ctx.node_manager,
+            table_metadata_manager: ctx.table_metadata_manager,
+            cache_invalidator: ctx.cache_invalidator,
+            persistent_ctx,
+            volatile_ctx: VolatileContext::default(),
+        }
+    }
+
+    pub(crate) async fn wait_for_inflight_subprocedure(
+        &mut self,
+        procedure_ctx: &ProcedureContext,
+    ) -> Result<()> {
+        if let Some(subprocedure) = self.volatile_ctx.inflight_subprocedure.take() {
+            let subprocedures = [subprocedure];
+            let result = wait_for_inflight_subprocedures(
+                procedure_ctx,
+                &subprocedures,
+                self.persistent_ctx.fast_fail,
+            )
+            .await?;
+            self.volatile_ctx.metrics += result.into();
+        }
+        Ok(())
+    }
+}
+
+#[derive(Debug, Serialize, Deserialize)]
+pub(crate) struct PersistentContext {
+    catalog: String,
+    fast_fail: bool,
+    resolve_strategy: ResolveStrategy,
+    parallelism: usize,
+}
+
+impl PersistentContext {
+    pub fn new(
+        catalog: String,
+        fast_fail: bool,
+        resolve_strategy: ResolveStrategy,
+        parallelism: usize,
+    ) -> Self {
+        Self {
+            catalog,
+            fast_fail,
+            resolve_strategy,
+            parallelism,
+        }
+    }
+}
+
+pub(crate) struct VolatileContext {
+    /// Stores the stream of catalogs.
+    schemas: Option<BoxStream<'static, Result<String>>>,
+    /// Stores the inflight subprocedure.
+    inflight_subprocedure: Option<SubprocedureMeta>,
+    /// Stores the metrics of reconciling catalog.
+    metrics: ReconcileCatalogMetrics,
+    /// The start time of the reconciliation.
+    start_time: Instant,
+}
+
+impl Default for VolatileContext {
+    fn default() -> Self {
+        Self {
+            schemas: None,
+            inflight_subprocedure: None,
+            metrics: Default::default(),
+            start_time: Instant::now(),
+        }
+    }
+}
+
+pub struct ReconcileCatalogProcedure {
+    pub context: ReconcileCatalogContext,
+    state: Box<dyn State>,
+}
+
+impl ReconcileCatalogProcedure {
+    pub const TYPE_NAME: &'static str = "metasrv-procedure::ReconcileCatalog";
+
+    pub fn new(
+        ctx: Context,
+        catalog: String,
+        fast_fail: bool,
+        resolve_strategy: ResolveStrategy,
+        parallelism: usize,
+    ) -> Self {
+        let persistent_ctx =
+            PersistentContext::new(catalog, fast_fail, resolve_strategy, parallelism);
+        let context = ReconcileCatalogContext::new(ctx, persistent_ctx);
+        let state = Box::new(ReconcileCatalogStart);
+        Self { context, state }
+    }
+
+    pub(crate) fn from_json(ctx: Context, json: &str) -> ProcedureResult<Self> {
+        let ProcedureDataOwned {
+            state,
+            persistent_ctx,
+        } = serde_json::from_str(json).context(FromJsonSnafu)?;
+        let context = ReconcileCatalogContext::new(ctx, persistent_ctx);
+        Ok(Self { context, state })
+    }
+}
+
+#[derive(Debug, Serialize)]
+struct ProcedureData<'a> {
+    state: &'a dyn State,
+    persistent_ctx: &'a PersistentContext,
+}
+
+#[derive(Debug, Deserialize)]
+struct ProcedureDataOwned {
+    state: Box<dyn State>,
+    persistent_ctx: PersistentContext,
+}
+
+#[async_trait::async_trait]
+impl Procedure for ReconcileCatalogProcedure {
+    fn type_name(&self) -> &str {
+        Self::TYPE_NAME
+    }
+
+    async fn execute(&mut self, _ctx: &ProcedureContext) -> ProcedureResult<Status> {
+        let state = &mut self.state;
+
+        let procedure_name = Self::TYPE_NAME;
+        let step = state.name();
+        let _timer = metrics::METRIC_META_RECONCILIATION_PROCEDURE
+            .with_label_values(&[procedure_name, step])
+            .start_timer();
+        match state.next(&mut self.context, _ctx).await {
+            Ok((next, status)) => {
+                *state = next;
+                Ok(status)
+            }
+            Err(e) => {
+                if e.is_retry_later() {
+                    metrics::METRIC_META_RECONCILIATION_PROCEDURE_ERROR
+                        .with_label_values(&[procedure_name, step, metrics::ERROR_TYPE_RETRYABLE])
+                        .inc();
+                    Err(ProcedureError::retry_later(e))
+                } else {
+                    metrics::METRIC_META_RECONCILIATION_PROCEDURE_ERROR
+                        .with_label_values(&[procedure_name, step, metrics::ERROR_TYPE_EXTERNAL])
+                        .inc();
+                    Err(ProcedureError::external(e))
+                }
+            }
+        }
+    }
+
+    fn dump(&self) -> ProcedureResult<String> {
+        let data = ProcedureData {
+            state: self.state.as_ref(),
+            persistent_ctx: &self.context.persistent_ctx,
+        };
+        serde_json::to_string(&data).context(FromJsonSnafu)
+    }
+
+    fn lock_key(&self) -> LockKey {
+        let catalog = &self.context.persistent_ctx.catalog;
+
+        LockKey::new(vec![CatalogLock::Write(catalog).into()])
+    }
+}
+
+#[async_trait::async_trait]
+#[typetag::serde(tag = "reconcile_catalog_state")]
+pub(crate) trait State: Sync + Send + Debug {
+    fn name(&self) -> &'static str {
+        let type_name = std::any::type_name::<Self>();
+        // short name
+        type_name.split("::").last().unwrap_or(type_name)
+    }
+
+    async fn next(
+        &mut self,
+        ctx: &mut ReconcileCatalogContext,
+        procedure_ctx: &ProcedureContext,
+    ) -> Result<(Box<dyn State>, Status)>;
+
+    fn as_any(&self) -> &dyn Any;
+}
--- a/src/common/meta/src/reconciliation/reconcile_catalog/end.rs
+++ b/src/common/meta/src/reconciliation/reconcile_catalog/end.rs
@@ -0,0 +1,48 @@
+// Copyright 2023 Greptime Team
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use std::any::Any;
+
+use common_procedure::{Context as ProcedureContext, Status};
+use common_telemetry::info;
+use serde::{Deserialize, Serialize};
+
+use crate::error::Result;
+use crate::reconciliation::reconcile_catalog::{ReconcileCatalogContext, State};
+
+#[derive(Debug, Serialize, Deserialize)]
+pub(crate) struct ReconcileCatalogEnd;
+
+#[async_trait::async_trait]
+#[typetag::serde]
+impl State for ReconcileCatalogEnd {
+    async fn next(
+        &mut self,
+        ctx: &mut ReconcileCatalogContext,
+        procedure_ctx: &ProcedureContext,
+    ) -> Result<(Box<dyn State>, Status)> {
+        info!(
+            "Catalog reconciliation completed. catalog: {}, procedure_id: {}, metrics: {}, elapsed: {:?}",
+            ctx.persistent_ctx.catalog,
+            procedure_ctx.procedure_id,
+            ctx.volatile_ctx.metrics,
+            ctx.volatile_ctx.start_time.elapsed()
+        );
+        Ok((Box::new(ReconcileCatalogEnd), Status::done()))
+    }
+
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+}
--- a/src/common/meta/src/reconciliation/reconcile_catalog/reconcile_databases.rs
+++ b/src/common/meta/src/reconciliation/reconcile_catalog/reconcile_databases.rs
@@ -0,0 +1,104 @@
+// Copyright 2023 Greptime Team
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use std::any::Any;
+
+use common_procedure::{Context as ProcedureContext, ProcedureWithId, Status};
+use common_telemetry::info;
+use futures::TryStreamExt;
+use serde::{Deserialize, Serialize};
+
+use crate::error::Result;
+use crate::reconciliation::reconcile_catalog::end::ReconcileCatalogEnd;
+use crate::reconciliation::reconcile_catalog::{ReconcileCatalogContext, State};
+use crate::reconciliation::reconcile_database::ReconcileDatabaseProcedure;
+use crate::reconciliation::utils::{Context, SubprocedureMeta};
+
+#[derive(Debug, Serialize, Deserialize)]
+pub(crate) struct ReconcileDatabases;
+
+#[async_trait::async_trait]
+#[typetag::serde]
+impl State for ReconcileDatabases {
+    async fn next(
+        &mut self,
+        ctx: &mut ReconcileCatalogContext,
+        procedure_ctx: &ProcedureContext,
+    ) -> Result<(Box<dyn State>, Status)> {
+        // Waits for inflight subprocedure first.
+        ctx.wait_for_inflight_subprocedure(procedure_ctx).await?;
+
+        if ctx.volatile_ctx.schemas.as_deref().is_none() {
+            let schemas = ctx
+                .table_metadata_manager
+                .schema_manager()
+                .schema_names(&ctx.persistent_ctx.catalog);
+            ctx.volatile_ctx.schemas = Some(schemas);
+        }
+
+        if let Some(catalog) = ctx
+            .volatile_ctx
+            .schemas
+            .as_mut()
+            .unwrap()
+            .try_next()
+            .await?
+        {
+            return Self::schedule_reconcile_database(ctx, catalog);
+        }
+
+        Ok((Box::new(ReconcileCatalogEnd), Status::executing(false)))
+    }
+
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+}
+
+impl ReconcileDatabases {
+    fn schedule_reconcile_database(
+        ctx: &mut ReconcileCatalogContext,
+        schema: String,
+    ) -> Result<(Box<dyn State>, Status)> {
+        let context = Context {
+            node_manager: ctx.node_manager.clone(),
+            table_metadata_manager: ctx.table_metadata_manager.clone(),
+            cache_invalidator: ctx.cache_invalidator.clone(),
+        };
+        info!(
+            "Scheduling reconcile database: {}, catalog: {}",
+            schema, ctx.persistent_ctx.catalog
+        );
+        let procedure = ReconcileDatabaseProcedure::new(
+            context,
+            ctx.persistent_ctx.catalog.clone(),
+            schema.clone(),
+            ctx.persistent_ctx.fast_fail,
+            ctx.persistent_ctx.parallelism,
+            ctx.persistent_ctx.resolve_strategy,
+            true,
+        );
+        let procedure_with_id = ProcedureWithId::with_random_id(Box::new(procedure));
+        ctx.volatile_ctx.inflight_subprocedure = Some(SubprocedureMeta::new_reconcile_database(
+            procedure_with_id.id,
+            ctx.persistent_ctx.catalog.clone(),
+            schema,
+        ));
+
+        Ok((
+            Box::new(ReconcileDatabases),
+            Status::suspended(vec![procedure_with_id], false),
+        ))
+    }
+}
--- a/src/common/meta/src/reconciliation/reconcile_catalog/start.rs
+++ b/src/common/meta/src/reconciliation/reconcile_catalog/start.rs
@@ -0,0 +1,58 @@
+// Copyright 2023 Greptime Team
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use std::any::Any;
+
+use common_procedure::{Context as ProcedureContext, Status};
+use serde::{Deserialize, Serialize};
+use snafu::ensure;
+
+use crate::error::{self, Result};
+use crate::key::catalog_name::CatalogNameKey;
+use crate::reconciliation::reconcile_catalog::reconcile_databases::ReconcileDatabases;
+use crate::reconciliation::reconcile_catalog::{ReconcileCatalogContext, State};
+
+#[derive(Debug, Serialize, Deserialize)]
+pub(crate) struct ReconcileCatalogStart;
+
+#[async_trait::async_trait]
+#[typetag::serde]
+impl State for ReconcileCatalogStart {
+    async fn next(
+        &mut self,
+        ctx: &mut ReconcileCatalogContext,
+        _procedure_ctx: &ProcedureContext,
+    ) -> Result<(Box<dyn State>, Status)> {
+        let exists = ctx
+            .table_metadata_manager
+            .catalog_manager()
+            .exists(CatalogNameKey {
+                catalog: &ctx.persistent_ctx.catalog,
+            })
+            .await?;
+
+        ensure!(
+            exists,
+            error::CatalogNotFoundSnafu {
+                catalog: &ctx.persistent_ctx.catalog
+            },
+        );
+
+        Ok((Box::new(ReconcileDatabases), Status::executing(true)))
+    }
+
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+}
--- a/src/common/meta/src/reconciliation/reconcile_database.rs
+++ b/src/common/meta/src/reconciliation/reconcile_database.rs
@@ -16,16 +16,16 @@ pub(crate) mod end;
 pub(crate) mod reconcile_logical_tables;
 pub(crate) mod reconcile_tables;
 pub(crate) mod start;
-pub(crate) mod utils;

 use std::any::Any;
 use std::collections::HashMap;
 use std::fmt::Debug;
+use std::time::Instant;

 use async_trait::async_trait;
 use common_procedure::error::{FromJsonSnafu, ToJsonSnafu};
 use common_procedure::{
-    Context as ProcedureContext, Error as ProcedureError, LockKey, Procedure, ProcedureId,
+    Context as ProcedureContext, Error as ProcedureError, LockKey, Procedure,
    Result as ProcedureResult, Status,
 };
 use futures::stream::BoxStream;
@@ -39,11 +39,14 @@ use crate::error::Result;
 use crate::key::table_name::TableNameValue;
 use crate::key::TableMetadataManagerRef;
 use crate::lock_key::{CatalogLock, SchemaLock};
+use crate::metrics;
 use crate::node_manager::NodeManagerRef;
 use crate::reconciliation::reconcile_database::start::ReconcileDatabaseStart;
-use crate::reconciliation::reconcile_database::utils::wait_for_inflight_subprocedures;
 use crate::reconciliation::reconcile_table::resolve_column_metadata::ResolveStrategy;
-use crate::reconciliation::utils::Context;
+use crate::reconciliation::utils::{
+    wait_for_inflight_subprocedures, Context, ReconcileDatabaseMetrics, SubprocedureMeta,
+};
+pub(crate) const DEFAULT_PARALLELISM: usize = 64;

 pub(crate) struct ReconcileDatabaseContext {
    pub node_manager: NodeManagerRef,
@@ -64,22 +67,32 @@ impl ReconcileDatabaseContext {
        }
    }

+    /// Waits for inflight subprocedures to complete.
    pub(crate) async fn wait_for_inflight_subprocedures(
        &mut self,
        procedure_ctx: &ProcedureContext,
    ) -> Result<()> {
        if !self.volatile_ctx.inflight_subprocedures.is_empty() {
-            wait_for_inflight_subprocedures(
+            let result = wait_for_inflight_subprocedures(
                procedure_ctx,
                &self.volatile_ctx.inflight_subprocedures,
                self.persistent_ctx.fail_fast,
            )
            .await?;
+
+            // Collects result into metrics
+            let metrics = result.into();
            self.volatile_ctx.inflight_subprocedures.clear();
+            self.volatile_ctx.metrics += metrics;
        }

        Ok(())
    }
+
+    /// Returns the immutable metrics.
+    pub(crate) fn metrics(&self) -> &ReconcileDatabaseMetrics {
+        &self.volatile_ctx.metrics
+    }
 }

 #[derive(Debug, Serialize, Deserialize)]
@@ -89,6 +102,7 @@ pub(crate) struct PersistentContext {
    fail_fast: bool,
    parallelism: usize,
    resolve_strategy: ResolveStrategy,
+    is_subprocedure: bool,
 }

 impl PersistentContext {
@@ -98,6 +112,7 @@ impl PersistentContext {
        fail_fast: bool,
        parallelism: usize,
        resolve_strategy: ResolveStrategy,
+        is_subprocedure: bool,
    ) -> Self {
        Self {
            catalog,
@@ -105,11 +120,11 @@ impl PersistentContext {
            fail_fast,
            parallelism,
            resolve_strategy,
+            is_subprocedure,
        }
    }
 }

-#[derive(Default)]
 pub(crate) struct VolatileContext {
    /// Stores pending physical tables.
    pending_tables: Vec<(TableId, TableName)>,
@@ -119,9 +134,26 @@ pub(crate) struct VolatileContext {
    /// - Value: Vector of (TableId, TableName) tuples representing logical tables belonging to the physical table.
    pending_logical_tables: HashMap<TableId, Vec<(TableId, TableName)>>,
    /// Stores inflight subprocedures.
-    inflight_subprocedures: Vec<ProcedureId>,
+    inflight_subprocedures: Vec<SubprocedureMeta>,
    /// Stores the stream of tables.
    tables: Option<BoxStream<'static, Result<(String, TableNameValue)>>>,
+    /// The metrics of reconciling database.
+    metrics: ReconcileDatabaseMetrics,
+    /// The start time of the reconciliation.
+    start_time: Instant,
+}
+
+impl Default for VolatileContext {
+    fn default() -> Self {
+        Self {
+            pending_tables: vec![],
+            pending_logical_tables: HashMap::new(),
+            inflight_subprocedures: vec![],
+            tables: None,
+            metrics: ReconcileDatabaseMetrics::default(),
+            start_time: Instant::now(),
+        }
+    }
 }

 pub struct ReconcileDatabaseProcedure {
@@ -139,9 +171,16 @@ impl ReconcileDatabaseProcedure {
        fail_fast: bool,
        parallelism: usize,
        resolve_strategy: ResolveStrategy,
+        is_subprocedure: bool,
    ) -> Self {
-        let persistent_ctx =
-            PersistentContext::new(catalog, schema, fail_fast, parallelism, resolve_strategy);
+        let persistent_ctx = PersistentContext::new(
+            catalog,
+            schema,
+            fail_fast,
+            parallelism,
+            resolve_strategy,
+            is_subprocedure,
+        );
        let context = ReconcileDatabaseContext::new(ctx, persistent_ctx);
        let state = Box::new(ReconcileDatabaseStart);
        Self { context, state }
@@ -178,6 +217,11 @@ impl Procedure for ReconcileDatabaseProcedure {
    async fn execute(&mut self, _ctx: &ProcedureContext) -> ProcedureResult<Status> {
        let state = &mut self.state;

+        let procedure_name = Self::TYPE_NAME;
+        let step = state.name();
+        let _timer = metrics::METRIC_META_RECONCILIATION_PROCEDURE
+            .with_label_values(&[procedure_name, step])
+            .start_timer();
        match state.next(&mut self.context, _ctx).await {
            Ok((next, status)) => {
                *state = next;
@@ -185,8 +229,14 @@ impl Procedure for ReconcileDatabaseProcedure {
            }
            Err(e) => {
                if e.is_retry_later() {
+                    metrics::METRIC_META_RECONCILIATION_PROCEDURE_ERROR
+                        .with_label_values(&[procedure_name, step, metrics::ERROR_TYPE_RETRYABLE])
+                        .inc();
                    Err(ProcedureError::retry_later(e))
                } else {
+                    metrics::METRIC_META_RECONCILIATION_PROCEDURE_ERROR
+                        .with_label_values(&[procedure_name, step, metrics::ERROR_TYPE_EXTERNAL])
+                        .inc();
                    Err(ProcedureError::external(e))
                }
            }
@@ -204,6 +254,10 @@ impl Procedure for ReconcileDatabaseProcedure {
    fn lock_key(&self) -> LockKey {
        let catalog = &self.context.persistent_ctx.catalog;
        let schema = &self.context.persistent_ctx.schema;
+        // If the procedure is a subprocedure, only lock the schema.
+        if self.context.persistent_ctx.is_subprocedure {
+            return LockKey::new(vec![SchemaLock::write(catalog, schema).into()]);
+        }

        LockKey::new(vec![
            CatalogLock::Read(catalog).into(),
--- a/src/common/meta/src/reconciliation/reconcile_database/end.rs
+++ b/src/common/meta/src/reconciliation/reconcile_database/end.rs
@@ -15,6 +15,7 @@
 use std::any::Any;

 use common_procedure::{Context as ProcedureContext, Status};
+use common_telemetry::info;
 use serde::{Deserialize, Serialize};

 use crate::error::Result;
@@ -28,9 +29,17 @@ pub(crate) struct ReconcileDatabaseEnd;
 impl State for ReconcileDatabaseEnd {
    async fn next(
        &mut self,
-        _ctx: &mut ReconcileDatabaseContext,
-        _procedure_ctx: &ProcedureContext,
+        ctx: &mut ReconcileDatabaseContext,
+        procedure_ctx: &ProcedureContext,
    ) -> Result<(Box<dyn State>, Status)> {
+        info!(
+            "Database reconciliation completed. schema: {}, catalog: {}, procedure_id: {}, metrics: {}, elapsed: {:?}",
+            ctx.persistent_ctx.schema,
+            ctx.persistent_ctx.catalog,
+            procedure_ctx.procedure_id,
+            ctx.metrics(),
+            ctx.volatile_ctx.start_time.elapsed(),
+        );
        Ok((Box::new(ReconcileDatabaseEnd), Status::done()))
    }

--- a/src/common/meta/src/reconciliation/reconcile_database/reconcile_logical_tables.rs
+++ b/src/common/meta/src/reconciliation/reconcile_database/reconcile_logical_tables.rs
@@ -28,7 +28,8 @@ use crate::error::{Result, TableInfoNotFoundSnafu};
 use crate::key::table_route::TableRouteValue;
 use crate::reconciliation::reconcile_database::end::ReconcileDatabaseEnd;
 use crate::reconciliation::reconcile_database::{ReconcileDatabaseContext, State};
-use crate::reconciliation::utils::Context;
+use crate::reconciliation::reconcile_logical_tables::ReconcileLogicalTablesProcedure;
+use crate::reconciliation::utils::{Context, SubprocedureMeta};

 #[derive(Debug, Serialize, Deserialize)]
 pub(crate) struct ReconcileLogicalTables;
@@ -127,13 +128,12 @@ impl State for ReconcileLogicalTables {
 impl ReconcileLogicalTables {
    fn schedule_reconcile_logical_tables(
        ctx: &mut ReconcileDatabaseContext,
-        buffer: &mut Vec<ProcedureWithId>,
+        buffer: &mut Vec<(ProcedureWithId, SubprocedureMeta)>,
    ) -> Result<(Box<dyn State>, Status)> {
-        let procedures = std::mem::take(buffer);
-        ctx.volatile_ctx
-            .inflight_subprocedures
-            .extend(procedures.iter().map(|p| p.id));
+        let buffer = std::mem::take(buffer);
+        let (procedures, meta): (Vec<_>, Vec<_>) = buffer.into_iter().unzip();

+        ctx.volatile_ctx.inflight_subprocedures.extend(meta);
        Ok((
            Box::new(ReconcileLogicalTables),
            Status::suspended(procedures, false),
@@ -141,7 +141,7 @@ impl ReconcileLogicalTables {
    }

    fn should_schedule_reconcile_logical_tables(
-        buffer: &[ProcedureWithId],
+        buffer: &[(ProcedureWithId, SubprocedureMeta)],
        parallelism: usize,
    ) -> bool {
        buffer.len() >= parallelism
@@ -151,7 +151,7 @@ impl ReconcileLogicalTables {
        ctx: &Context,
        pending_logical_tables: &mut HashMap<TableId, Vec<(TableId, TableName)>>,
        parallelism: usize,
-    ) -> Result<Option<ProcedureWithId>> {
+    ) -> Result<Option<(ProcedureWithId, SubprocedureMeta)>> {
        let mut physical_table_id = None;
        for (table_id, tables) in pending_logical_tables.iter() {
            if tables.len() >= parallelism {
@@ -175,7 +175,7 @@ impl ReconcileLogicalTables {
    async fn build_remaining_procedures(
        ctx: &Context,
        pending_logical_tables: &mut HashMap<TableId, Vec<(TableId, TableName)>>,
-        pending_procedures: &mut Vec<ProcedureWithId>,
+        pending_procedures: &mut Vec<(ProcedureWithId, SubprocedureMeta)>,
        parallelism: usize,
    ) -> Result<()> {
        if pending_logical_tables.is_empty() {
@@ -201,8 +201,8 @@ impl ReconcileLogicalTables {
    async fn build_reconcile_logical_tables_procedure(
        ctx: &Context,
        physical_table_id: TableId,
-        _logical_tables: Vec<(TableId, TableName)>,
-    ) -> Result<ProcedureWithId> {
+        logical_tables: Vec<(TableId, TableName)>,
+    ) -> Result<(ProcedureWithId, SubprocedureMeta)> {
        let table_info = ctx
            .table_metadata_manager
            .table_info_manager()
@@ -212,8 +212,22 @@ impl ReconcileLogicalTables {
                table: format!("table_id: {}", physical_table_id),
            })?;

-        let _physical_table_name = table_info.table_name();
-        todo!()
+        let physical_table_name = table_info.table_name();
+        let procedure = ReconcileLogicalTablesProcedure::new(
+            ctx.clone(),
+            physical_table_id,
+            physical_table_name.clone(),
+            logical_tables.clone(),
+            true,
+        );
+        let procedure_with_id = ProcedureWithId::with_random_id(Box::new(procedure));
+        let subprocedure_meta = SubprocedureMeta::new_logical_table(
+            procedure_with_id.id,
+            physical_table_id,
+            physical_table_name,
+            logical_tables,
+        );
+        Ok((procedure_with_id, subprocedure_meta))
    }

    fn enqueue_logical_table(
--- a/src/common/meta/src/reconciliation/reconcile_database/reconcile_tables.rs
+++ b/src/common/meta/src/reconciliation/reconcile_database/reconcile_tables.rs
@@ -27,7 +27,7 @@ use crate::key::table_route::TableRouteValue;
 use crate::reconciliation::reconcile_database::reconcile_logical_tables::ReconcileLogicalTables;
 use crate::reconciliation::reconcile_database::{ReconcileDatabaseContext, State};
 use crate::reconciliation::reconcile_table::ReconcileTableProcedure;
-use crate::reconciliation::utils::Context;
+use crate::reconciliation::utils::{Context, SubprocedureMeta};

 #[derive(Debug, Serialize, Deserialize)]
 pub(crate) struct ReconcileTables;
@@ -104,14 +104,14 @@ impl ReconcileTables {
        ctx: &mut ReconcileDatabaseContext,
    ) -> Result<(Box<dyn State>, Status)> {
        let tables = std::mem::take(&mut ctx.volatile_ctx.pending_tables);
-        let subprocedures = Self::build_reconcile_table_procedures(ctx, tables);
-        ctx.volatile_ctx
-            .inflight_subprocedures
-            .extend(subprocedures.iter().map(|p| p.id));
-
+        let (procedures, meta): (Vec<_>, Vec<_>) =
+            Self::build_reconcile_table_procedures(ctx, tables)
+                .into_iter()
+                .unzip();
+        ctx.volatile_ctx.inflight_subprocedures.extend(meta);
        Ok((
            Box::new(ReconcileTables),
-            Status::suspended(subprocedures, false),
+            Status::suspended(procedures, false),
        ))
    }

@@ -125,7 +125,7 @@ impl ReconcileTables {
    fn build_reconcile_table_procedures(
        ctx: &ReconcileDatabaseContext,
        tables: Vec<(TableId, TableName)>,
-    ) -> Vec<ProcedureWithId> {
+    ) -> Vec<(ProcedureWithId, SubprocedureMeta)> {
        let mut procedures = Vec::with_capacity(tables.len());
        for (table_id, table_name) in tables {
            let context = Context {
@@ -141,11 +141,13 @@ impl ReconcileTables {
                true,
            );
            let procedure = ProcedureWithId::with_random_id(Box::new(procedure));
+            let meta =
+                SubprocedureMeta::new_physical_table(procedure.id, table_id, table_name.clone());
            info!(
                "Reconcile table: {}, table_id: {}, procedure_id: {}",
                table_name, table_id, procedure.id
            );
-            procedures.push(procedure)
+            procedures.push((procedure, meta));
        }

        procedures
--- a/src/common/meta/src/reconciliation/reconcile_database/start.rs
+++ b/src/common/meta/src/reconciliation/reconcile_database/start.rs
@@ -33,7 +33,7 @@ impl State for ReconcileDatabaseStart {
    async fn next(
        &mut self,
        ctx: &mut ReconcileDatabaseContext,
-        _procedure_ctx: &ProcedureContext,
+        procedure_ctx: &ProcedureContext,
    ) -> Result<(Box<dyn State>, Status)> {
        let exists = ctx
            .table_metadata_manager
@@ -51,8 +51,8 @@ impl State for ReconcileDatabaseStart {
            },
        );
        info!(
-            "Reconcile database: {}, catalog: {}",
-            ctx.persistent_ctx.schema, ctx.persistent_ctx.catalog
+            "Reconcile database: {}, catalog: {}, procedure_id: {}",
+            ctx.persistent_ctx.schema, ctx.persistent_ctx.catalog, procedure_ctx.procedure_id,
        );
        Ok((Box::new(ReconcileTables), Status::executing(true)))
    }
--- a/src/common/meta/src/reconciliation/reconcile_database/utils.rs
+++ b/src/common/meta/src/reconciliation/reconcile_database/utils.rs
@@ -1,79 +0,0 @@
-// Copyright 2023 Greptime Team
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-use common_procedure::{watcher, Context as ProcedureContext, ProcedureId};
-use common_telemetry::{error, info, warn};
-use futures::future::{join_all, try_join_all};
-use snafu::{OptionExt, ResultExt};
-
-use crate::error::{
-    ProcedureStateReceiverNotFoundSnafu, ProcedureStateReceiverSnafu, Result, WaitProcedureSnafu,
-};
-
-/// Wait for inflight subprocedures.
-///
-/// If `fail_fast` is true, the function will return an error if any subprocedure fails.
-/// Otherwise, the function will continue waiting for all subprocedures to complete.
-pub(crate) async fn wait_for_inflight_subprocedures(
-    procedure_ctx: &ProcedureContext,
-    subprocedures: &[ProcedureId],
-    fail_fast: bool,
-) -> Result<()> {
-    let mut receivers = Vec::with_capacity(subprocedures.len());
-    for procedure_id in subprocedures {
-        let receiver = procedure_ctx
-            .provider
-            .procedure_state_receiver(*procedure_id)
-            .await
-            .context(ProcedureStateReceiverSnafu {
-                procedure_id: *procedure_id,
-            })?
-            .context(ProcedureStateReceiverNotFoundSnafu {
-                procedure_id: *procedure_id,
-            })?;
-        receivers.push(receiver);
-    }
-
-    let mut tasks = Vec::with_capacity(receivers.len());
-    for receiver in receivers.iter_mut() {
-        let fut = watcher::wait(receiver);
-        tasks.push(fut);
-    }
-
-    if fail_fast {
-        try_join_all(tasks).await.context(WaitProcedureSnafu)?;
-    } else {
-        let mut failed = 0;
-        let total = tasks.len();
-        for result in join_all(tasks).await {
-            if let Err(e) = result {
-                error!(e; "inflight subprocedure, procedure_id: {}", procedure_ctx.procedure_id);
-                failed += 1;
-            }
-        }
-        if failed > 0 {
-            warn!(
-                "{} inflight subprocedures failed, total: {}, procedure_id: {}",
-                failed, total, procedure_ctx.procedure_id
-            );
-        } else {
-            info!(
-                "{} inflight subprocedures completed, procedure_id: {}",
-                total, procedure_ctx.procedure_id
-            );
-        }
-    }
-
-    Ok(())
-}
--- a/src/common/meta/src/reconciliation/reconcile_logical_tables.rs
+++ b/src/common/meta/src/reconciliation/reconcile_logical_tables.rs
@@ -40,15 +40,17 @@ use crate::key::table_info::TableInfoValue;
 use crate::key::table_route::PhysicalTableRouteValue;
 use crate::key::{DeserializedValueWithBytes, TableMetadataManagerRef};
 use crate::lock_key::{CatalogLock, SchemaLock, TableLock};
+use crate::metrics;
 use crate::node_manager::NodeManagerRef;
 use crate::reconciliation::reconcile_logical_tables::reconciliation_start::ReconciliationStart;
-use crate::reconciliation::utils::Context;
+use crate::reconciliation::utils::{Context, ReconcileLogicalTableMetrics};

 pub struct ReconcileLogicalTablesContext {
    pub node_manager: NodeManagerRef,
    pub table_metadata_manager: TableMetadataManagerRef,
    pub cache_invalidator: CacheInvalidatorRef,
    pub persistent_ctx: PersistentContext,
+    pub volatile_ctx: VolatileContext,
 }

 impl ReconcileLogicalTablesContext {
@@ -59,16 +61,29 @@ impl ReconcileLogicalTablesContext {
            table_metadata_manager: ctx.table_metadata_manager,
            cache_invalidator: ctx.cache_invalidator,
            persistent_ctx,
+            volatile_ctx: VolatileContext::default(),
        }
    }

+    /// Returns the physical table name.
    pub(crate) fn table_name(&self) -> &TableName {
        &self.persistent_ctx.table_name
    }

+    /// Returns the physical table id.
    pub(crate) fn table_id(&self) -> TableId {
        self.persistent_ctx.table_id
    }
+
+    /// Returns a mutable reference to the metrics.
+    pub(crate) fn mut_metrics(&mut self) -> &mut ReconcileLogicalTableMetrics {
+        &mut self.volatile_ctx.metrics
+    }
+
+    /// Returns a reference to the metrics.
+    pub(crate) fn metrics(&self) -> &ReconcileLogicalTableMetrics {
+        &self.volatile_ctx.metrics
+    }
 }

 #[derive(Debug, Serialize, Deserialize)]
@@ -120,6 +135,11 @@ impl PersistentContext {
    }
 }

+#[derive(Default)]
+pub(crate) struct VolatileContext {
+    pub(crate) metrics: ReconcileLogicalTableMetrics,
+}
+
 pub struct ReconcileLogicalTablesProcedure {
    pub context: ReconcileLogicalTablesContext,
    state: Box<dyn State>,
@@ -173,6 +193,11 @@ impl Procedure for ReconcileLogicalTablesProcedure {
    async fn execute(&mut self, _ctx: &ProcedureContext) -> ProcedureResult<Status> {
        let state = &mut self.state;

+        let procedure_name = Self::TYPE_NAME;
+        let step = state.name();
+        let _timer = metrics::METRIC_META_RECONCILIATION_PROCEDURE
+            .with_label_values(&[procedure_name, step])
+            .start_timer();
        match state.next(&mut self.context, _ctx).await {
            Ok((next, status)) => {
                *state = next;
@@ -180,8 +205,14 @@ impl Procedure for ReconcileLogicalTablesProcedure {
            }
            Err(e) => {
                if e.is_retry_later() {
+                    metrics::METRIC_META_RECONCILIATION_PROCEDURE_ERROR
+                        .with_label_values(&[procedure_name, step, metrics::ERROR_TYPE_RETRYABLE])
+                        .inc();
                    Err(ProcedureError::retry_later(e))
                } else {
+                    metrics::METRIC_META_RECONCILIATION_PROCEDURE_ERROR
+                        .with_label_values(&[procedure_name, step, metrics::ERROR_TYPE_EXTERNAL])
+                        .inc();
                    Err(ProcedureError::external(e))
                }
            }
--- a/src/common/meta/src/reconciliation/reconcile_logical_tables/reconciliation_end.rs
+++ b/src/common/meta/src/reconciliation/reconcile_logical_tables/reconciliation_end.rs
@@ -15,6 +15,7 @@
 use std::any::Any;

 use common_procedure::{Context as ProcedureContext, Status};
+use common_telemetry::info;
 use serde::{Deserialize, Serialize};

 use crate::error::Result;
@@ -28,9 +29,21 @@ pub struct ReconciliationEnd;
 impl State for ReconciliationEnd {
    async fn next(
        &mut self,
-        _ctx: &mut ReconcileLogicalTablesContext,
-        _procedure_ctx: &ProcedureContext,
+        ctx: &mut ReconcileLogicalTablesContext,
+        procedure_ctx: &ProcedureContext,
    ) -> Result<(Box<dyn State>, Status)> {
+        let table_id = ctx.table_id();
+        let table_name = ctx.table_name();
+        let metrics = ctx.metrics();
+
+        info!(
+            "Logical tables reconciliation completed. logical tables: {:?}, physical_table_id: {}, table_name: {}, procedure_id: {}, metrics: {}",
+            ctx.persistent_ctx.logical_table_ids,
+            table_id,
+            table_name,
+            procedure_ctx.procedure_id,
+            metrics
+        );
        Ok((Box::new(ReconciliationEnd), Status::done()))
    }

--- a/src/common/meta/src/reconciliation/reconcile_logical_tables/reconciliation_start.rs
+++ b/src/common/meta/src/reconciliation/reconcile_logical_tables/reconciliation_start.rs
@@ -25,8 +25,11 @@ use crate::ddl::utils::region_metadata_lister::RegionMetadataLister;
 use crate::ddl::utils::table_id::get_all_table_ids_by_names;
 use crate::ddl::utils::table_info::all_logical_table_routes_have_same_physical_id;
 use crate::error::{self, Result};
+use crate::metrics;
 use crate::reconciliation::reconcile_logical_tables::resolve_table_metadatas::ResolveTableMetadatas;
-use crate::reconciliation::reconcile_logical_tables::{ReconcileLogicalTablesContext, State};
+use crate::reconciliation::reconcile_logical_tables::{
+    ReconcileLogicalTablesContext, ReconcileLogicalTablesProcedure, State,
+};
 use crate::reconciliation::utils::check_column_metadatas_consistent;

 /// The start state of the reconciliation procedure.
@@ -39,7 +42,7 @@ impl State for ReconciliationStart {
    async fn next(
        &mut self,
        ctx: &mut ReconcileLogicalTablesContext,
-        _procedure_ctx: &ProcedureContext,
+        procedure_ctx: &ProcedureContext,
    ) -> Result<(Box<dyn State>, Status)> {
        let table_id = ctx.table_id();
        let table_name = ctx.table_name();
@@ -58,35 +61,48 @@ impl State for ReconciliationStart {
            }
        );

-        info!(
-            "Starting reconciliation for logical table: table_id: {}, table_name: {}",
-            table_id, table_name
-        );
-
        let region_metadata_lister = RegionMetadataLister::new(ctx.node_manager.clone());
-        let region_metadatas = region_metadata_lister
-            .list(physical_table_id, &physical_table_route.region_routes)
-            .await?;
+        let region_metadatas = {
+            let _timer = metrics::METRIC_META_RECONCILIATION_LIST_REGION_METADATA_DURATION
+                .with_label_values(&[metrics::TABLE_TYPE_PHYSICAL])
+                .start_timer();
+            region_metadata_lister
+                .list(physical_table_id, &physical_table_route.region_routes)
+                .await?
+        };
+
+        ensure!(!region_metadatas.is_empty(), {
+            metrics::METRIC_META_RECONCILIATION_STATS
+                .with_label_values(&[
+                    ReconcileLogicalTablesProcedure::TYPE_NAME,
+                    metrics::TABLE_TYPE_PHYSICAL,
+                    metrics::STATS_TYPE_NO_REGION_METADATA,
+                ])
+                .inc();

-        ensure!(
-            !region_metadatas.is_empty(),
            error::UnexpectedSnafu {
                err_msg: format!(
-                    "No region metadata found for table: {}, table_id: {}",
+                    "No region metadata found for physical table: {}, table_id: {}",
                    table_name, table_id
                ),
            }
-        );
+        });

-        if region_metadatas.iter().any(|r| r.is_none()) {
-            return error::UnexpectedSnafu {
+        ensure!(region_metadatas.iter().all(|r| r.is_some()), {
+            metrics::METRIC_META_RECONCILIATION_STATS
+                .with_label_values(&[
+                    ReconcileLogicalTablesProcedure::TYPE_NAME,
+                    metrics::TABLE_TYPE_PHYSICAL,
+                    metrics::STATS_TYPE_REGION_NOT_OPEN,
+                ])
+                .inc();
+            error::UnexpectedSnafu {
                err_msg: format!(
-                    "Some regions of the physical table are not open. Table: {}, table_id: {}",
+                    "Some regions of the physical table are not open. physical table: {}, table_id: {}",
                    table_name, table_id
                ),
            }
-            .fail();
-        }
+        });

        // Safety: checked above
        let region_metadatas = region_metadatas
@@ -96,14 +112,13 @@ impl State for ReconciliationStart {
        let _region_metadata = check_column_metadatas_consistent(&region_metadatas).context(
            error::UnexpectedSnafu {
                err_msg: format!(
-                    "Column metadatas are not consistent for table: {}, table_id: {}",
+                    "Column metadatas are not consistent for physical table: {}, table_id: {}",
                    table_name, table_id
                ),
            },
        )?;

        // TODO(weny): ensure all columns in region metadata can be found in table info.
-
        // Validates the logical tables.
        Self::validate_schema(&ctx.persistent_ctx.logical_tables)?;
        let table_refs = ctx
@@ -119,6 +134,12 @@ impl State for ReconciliationStart {
        .await?;
        Self::validate_logical_table_routes(ctx, &table_ids).await?;

+        let table_name = ctx.table_name();
+        info!(
+            "Starting reconciliation for logical tables: {:?}, physical_table_id: {}, table_name: {}, procedure_id: {}",
+            table_ids, table_id, table_name, procedure_ctx.procedure_id
+        );
+
        ctx.persistent_ctx.physical_table_route = Some(physical_table_route);
        ctx.persistent_ctx.logical_table_ids = table_ids;
        Ok((Box::new(ResolveTableMetadatas), Status::executing(true)))
--- a/src/common/meta/src/reconciliation/reconcile_logical_tables/resolve_table_metadatas.rs
+++ b/src/common/meta/src/reconciliation/reconcile_logical_tables/resolve_table_metadatas.rs
@@ -22,8 +22,11 @@ use snafu::ensure;
 use crate::ddl::utils::region_metadata_lister::RegionMetadataLister;
 use crate::ddl::utils::table_info::get_all_table_info_values_by_table_ids;
 use crate::error::{self, Result};
+use crate::metrics;
 use crate::reconciliation::reconcile_logical_tables::reconcile_regions::ReconcileRegions;
-use crate::reconciliation::reconcile_logical_tables::{ReconcileLogicalTablesContext, State};
+use crate::reconciliation::reconcile_logical_tables::{
+    ReconcileLogicalTablesContext, ReconcileLogicalTablesProcedure, State,
+};
 use crate::reconciliation::utils::{
    check_column_metadatas_consistent, need_update_logical_table_info,
 };
@@ -65,22 +68,38 @@ impl State for ResolveTableMetadatas {
            .unwrap()
            .region_routes;
        let region_metadata_lister = RegionMetadataLister::new(ctx.node_manager.clone());
+        let mut metadata_consistent_count = 0;
+        let mut metadata_inconsistent_count = 0;
+        let mut create_tables_count = 0;
        for (table_id, table_info_value) in table_ids.iter().zip(table_info_values.iter()) {
-            let region_metadatas = region_metadata_lister
-                .list(*table_id, region_routes)
-                .await?;
+            let region_metadatas = {
+                let _timer = metrics::METRIC_META_RECONCILIATION_LIST_REGION_METADATA_DURATION
+                    .with_label_values(&[metrics::TABLE_TYPE_LOGICAL])
+                    .start_timer();
+                region_metadata_lister
+                    .list(*table_id, region_routes)
+                    .await?
+            };
+
+            ensure!(!region_metadatas.is_empty(), {
+                metrics::METRIC_META_RECONCILIATION_STATS
+                    .with_label_values(&[
+                        ReconcileLogicalTablesProcedure::TYPE_NAME,
+                        metrics::TABLE_TYPE_LOGICAL,
+                        metrics::STATS_TYPE_NO_REGION_METADATA,
+                    ])
+                    .inc();

-            ensure!(
-                !region_metadatas.is_empty(),
                error::UnexpectedSnafu {
                    err_msg: format!(
                        "No region metadata found for table: {}, table_id: {}",
                        table_info_value.table_info.name, table_id
                    ),
                }
-            );
+            });

            if region_metadatas.iter().any(|r| r.is_none()) {
+                create_tables_count += 1;
                create_tables.push((*table_id, table_info_value.table_info.clone()));
                continue;
            }
@@ -91,10 +110,12 @@ impl State for ResolveTableMetadatas {
                .map(|r| r.unwrap())
                .collect::<Vec<_>>();
            if let Some(column_metadatas) = check_column_metadatas_consistent(&region_metadatas) {
+                metadata_consistent_count += 1;
                if need_update_logical_table_info(&table_info_value.table_info, &column_metadatas) {
                    update_table_infos.push((*table_id, column_metadatas));
                }
            } else {
+                metadata_inconsistent_count += 1;
                // If the logical regions have inconsistent column metadatas, it won't affect read and write.
                // It's safe to continue if the column metadatas of the logical table are inconsistent.
                warn!(
@@ -121,6 +142,11 @@ impl State for ResolveTableMetadatas {
        );
        ctx.persistent_ctx.update_table_infos = update_table_infos;
        ctx.persistent_ctx.create_tables = create_tables;
+        // Update metrics.
+        let metrics = ctx.mut_metrics();
+        metrics.column_metadata_consistent_count = metadata_consistent_count;
+        metrics.column_metadata_inconsistent_count = metadata_inconsistent_count;
+        metrics.create_tables_count = create_tables_count;
        Ok((Box::new(ReconcileRegions), Status::executing(true)))
    }

--- a/src/common/meta/src/reconciliation/reconcile_logical_tables/update_table_infos.rs
+++ b/src/common/meta/src/reconciliation/reconcile_logical_tables/update_table_infos.rs
@@ -96,6 +96,7 @@ impl State for UpdateTableInfos {
        let table_id = ctx.table_id();
        let table_name = ctx.table_name();

+        let updated_table_info_num = table_info_values_to_update.len();
        batch_update_table_info_values(&ctx.table_metadata_manager, table_info_values_to_update)
            .await?;

@@ -122,6 +123,9 @@ impl State for UpdateTableInfos {
            .await?;

        ctx.persistent_ctx.update_table_infos.clear();
+        // Update metrics.
+        let metrics = ctx.mut_metrics();
+        metrics.update_table_info_count = updated_table_info_num;
        Ok((Box::new(ReconciliationEnd), Status::executing(false)))
    }

--- a/src/common/meta/src/reconciliation/reconcile_table.rs
+++ b/src/common/meta/src/reconciliation/reconcile_table.rs
@@ -40,10 +40,13 @@ use crate::key::table_info::TableInfoValue;
 use crate::key::table_route::PhysicalTableRouteValue;
 use crate::key::{DeserializedValueWithBytes, TableMetadataManagerRef};
 use crate::lock_key::{CatalogLock, SchemaLock, TableNameLock};
+use crate::metrics;
 use crate::node_manager::NodeManagerRef;
 use crate::reconciliation::reconcile_table::reconciliation_start::ReconciliationStart;
 use crate::reconciliation::reconcile_table::resolve_column_metadata::ResolveStrategy;
-use crate::reconciliation::utils::{build_table_meta_from_column_metadatas, Context};
+use crate::reconciliation::utils::{
+    build_table_meta_from_column_metadatas, Context, ReconcileTableMetrics,
+};

 pub struct ReconcileTableContext {
    pub node_manager: NodeManagerRef,
@@ -65,13 +68,46 @@ impl ReconcileTableContext {
        }
    }

+    /// Returns the physical table name.
    pub(crate) fn table_name(&self) -> &TableName {
        &self.persistent_ctx.table_name
    }

+    /// Returns the physical table id.
    pub(crate) fn table_id(&self) -> TableId {
        self.persistent_ctx.table_id
    }
+
+    /// Builds a [`RawTableMeta`] from the provided [`ColumnMetadata`]s.
+    pub(crate) fn build_table_meta(
+        &self,
+        column_metadatas: &[ColumnMetadata],
+    ) -> Result<RawTableMeta> {
+        // Safety: The table info value is set in `ReconciliationStart` state.
+        let table_info_value = self.persistent_ctx.table_info_value.as_ref().unwrap();
+        let table_id = self.table_id();
+        let table_ref = self.table_name().table_ref();
+        let name_to_ids = table_info_value.table_info.name_to_ids();
+        let table_meta = build_table_meta_from_column_metadatas(
+            table_id,
+            table_ref,
+            &table_info_value.table_info.meta,
+            name_to_ids,
+            column_metadatas,
+        )?;
+
+        Ok(table_meta)
+    }
+
+    /// Returns a mutable reference to the metrics.
+    pub(crate) fn mut_metrics(&mut self) -> &mut ReconcileTableMetrics {
+        &mut self.volatile_ctx.metrics
+    }
+
+    /// Returns a reference to the metrics.
+    pub(crate) fn metrics(&self) -> &ReconcileTableMetrics {
+        &self.volatile_ctx.metrics
+    }
 }

 #[derive(Debug, Serialize, Deserialize)]
@@ -110,29 +146,7 @@ impl PersistentContext {
 #[derive(Default)]
 pub(crate) struct VolatileContext {
    pub(crate) table_meta: Option<RawTableMeta>,
-}
-
-impl ReconcileTableContext {
-    /// Builds a [`RawTableMeta`] from the provided [`ColumnMetadata`]s.
-    pub(crate) fn build_table_meta(
-        &self,
-        column_metadatas: &[ColumnMetadata],
-    ) -> Result<RawTableMeta> {
-        // Safety: The table info value is set in `ReconciliationStart` state.
-        let table_info_value = self.persistent_ctx.table_info_value.as_ref().unwrap();
-        let table_id = self.table_id();
-        let table_ref = self.table_name().table_ref();
-        let name_to_ids = table_info_value.table_info.name_to_ids();
-        let table_meta = build_table_meta_from_column_metadatas(
-            table_id,
-            table_ref,
-            &table_info_value.table_info.meta,
-            name_to_ids,
-            column_metadatas,
-        )?;
-
-        Ok(table_meta)
-    }
+    pub(crate) metrics: ReconcileTableMetrics,
 }

 pub struct ReconcileTableProcedure {
@@ -191,6 +205,11 @@ impl Procedure for ReconcileTableProcedure {
    async fn execute(&mut self, _ctx: &ProcedureContext) -> ProcedureResult<Status> {
        let state = &mut self.state;

+        let procedure_name = Self::TYPE_NAME;
+        let step = state.name();
+        let _timer = metrics::METRIC_META_RECONCILIATION_PROCEDURE
+            .with_label_values(&[procedure_name, step])
+            .start_timer();
        match state.next(&mut self.context, _ctx).await {
            Ok((next, status)) => {
                *state = next;
@@ -198,8 +217,14 @@ impl Procedure for ReconcileTableProcedure {
            }
            Err(e) => {
                if e.is_retry_later() {
+                    metrics::METRIC_META_RECONCILIATION_PROCEDURE_ERROR
+                        .with_label_values(&[procedure_name, step, metrics::ERROR_TYPE_RETRYABLE])
+                        .inc();
                    Err(ProcedureError::retry_later(e))
                } else {
+                    metrics::METRIC_META_RECONCILIATION_PROCEDURE_ERROR
+                        .with_label_values(&[procedure_name, step, metrics::ERROR_TYPE_EXTERNAL])
+                        .inc();
                    Err(ProcedureError::external(e))
                }
            }
--- a/src/common/meta/src/reconciliation/reconcile_table/reconciliation_end.rs
+++ b/src/common/meta/src/reconciliation/reconcile_table/reconciliation_end.rs
@@ -15,6 +15,7 @@
 use std::any::Any;

 use common_procedure::{Context as ProcedureContext, Status};
+use common_telemetry::info;
 use serde::{Deserialize, Serialize};
 use tonic::async_trait;

@@ -31,9 +32,18 @@ pub struct ReconciliationEnd;
 impl State for ReconciliationEnd {
    async fn next(
        &mut self,
-        _ctx: &mut ReconcileTableContext,
-        _procedure_ctx: &ProcedureContext,
+        ctx: &mut ReconcileTableContext,
+        procedure_ctx: &ProcedureContext,
    ) -> Result<(Box<dyn State>, Status)> {
+        let table_id = ctx.table_id();
+        let table_name = ctx.table_name();
+        let metrics = ctx.metrics();
+
+        info!(
+            "Physical table reconciliation completed. table_name: {}, table_id: {}, procedure_id: {}, metrics: {}",
+            table_name, table_id, procedure_ctx.procedure_id, metrics
+        );
+
        Ok((Box::new(ReconciliationEnd), Status::done()))
    }

--- a/src/common/meta/src/reconciliation/reconcile_table/reconciliation_start.rs
+++ b/src/common/meta/src/reconciliation/reconcile_table/reconciliation_start.rs
@@ -20,9 +20,12 @@ use serde::{Deserialize, Serialize};
 use snafu::ensure;

 use crate::ddl::utils::region_metadata_lister::RegionMetadataLister;
-use crate::error::{self, Result, UnexpectedSnafu};
+use crate::error::{self, Result};
+use crate::metrics::{self};
 use crate::reconciliation::reconcile_table::resolve_column_metadata::ResolveColumnMetadata;
-use crate::reconciliation::reconcile_table::{ReconcileTableContext, State};
+use crate::reconciliation::reconcile_table::{
+    ReconcileTableContext, ReconcileTableProcedure, State,
+};

 /// The start state of the reconciliation procedure.
 ///
@@ -40,7 +43,7 @@ impl State for ReconciliationStart {
    async fn next(
        &mut self,
        ctx: &mut ReconcileTableContext,
-        _procedure_ctx: &ProcedureContext,
+        procedure_ctx: &ProcedureContext,
    ) -> Result<(Box<dyn State>, Status)> {
        let table_id = ctx.table_id();
        let table_name = ctx.table_name();
@@ -60,33 +63,56 @@ impl State for ReconciliationStart {
            }
        );

-        info!("Reconciling table: {}, table_id: {}", table_name, table_id);
+        info!(
+            "Reconciling table: {}, table_id: {}, procedure_id: {}",
+            table_name, table_id, procedure_ctx.procedure_id
+        );
        // TODO(weny): Repairs the table route if needed.
        let region_metadata_lister = RegionMetadataLister::new(ctx.node_manager.clone());
-        // Always list region metadatas for the physical table.
-        let region_metadatas = region_metadata_lister
-            .list(physical_table_id, &physical_table_route.region_routes)
-            .await?;

-        ensure!(
-            !region_metadatas.is_empty(),
+        let region_metadatas = {
+            let _timer = metrics::METRIC_META_RECONCILIATION_LIST_REGION_METADATA_DURATION
+                .with_label_values(&[metrics::TABLE_TYPE_PHYSICAL])
+                .start_timer();
+            // Always list region metadatas for the physical table.
+            region_metadata_lister
+                .list(physical_table_id, &physical_table_route.region_routes)
+                .await?
+        };
+
+        ensure!(!region_metadatas.is_empty(), {
+            metrics::METRIC_META_RECONCILIATION_STATS
+                .with_label_values(&[
+                    ReconcileTableProcedure::TYPE_NAME,
+                    metrics::TABLE_TYPE_PHYSICAL,
+                    metrics::STATS_TYPE_NO_REGION_METADATA,
+                ])
+                .inc();
+
            error::UnexpectedSnafu {
                err_msg: format!(
                    "No region metadata found for table: {}, table_id: {}",
                    table_name, table_id
                ),
            }
-        );
+        });

-        if region_metadatas.iter().any(|r| r.is_none()) {
-            return UnexpectedSnafu {
+        ensure!(region_metadatas.iter().all(|r| r.is_some()), {
+            metrics::METRIC_META_RECONCILIATION_STATS
+                .with_label_values(&[
+                    ReconcileTableProcedure::TYPE_NAME,
+                    metrics::TABLE_TYPE_PHYSICAL,
+                    metrics::STATS_TYPE_REGION_NOT_OPEN,
+                ])
+                .inc();
+
+            error::UnexpectedSnafu {
                err_msg: format!(
                    "Some regions are not opened, table: {}, table_id: {}",
                    table_name, table_id
                ),
            }
-            .fail();
-        }
+        });

        // Persist the physical table route.
        // TODO(weny): refetch the physical table route if repair is needed.
--- a/src/common/meta/src/reconciliation/reconcile_table/resolve_column_metadata.rs
+++ b/src/common/meta/src/reconciliation/reconcile_table/resolve_column_metadata.rs
@@ -20,6 +20,7 @@ use common_telemetry::info;
 use serde::{Deserialize, Serialize};
 use snafu::OptionExt;
 use store_api::metadata::RegionMetadata;
+use strum::AsRefStr;

 use crate::error::{self, MissingColumnIdsSnafu, Result};
 use crate::reconciliation::reconcile_table::reconcile_regions::ReconcileRegions;
@@ -28,21 +29,33 @@ use crate::reconciliation::reconcile_table::{ReconcileTableContext, State};
 use crate::reconciliation::utils::{
    build_column_metadata_from_table_info, check_column_metadatas_consistent,
    resolve_column_metadatas_with_latest, resolve_column_metadatas_with_metasrv,
+    ResolveColumnMetadataResult,
 };

 /// Strategy for resolving column metadata inconsistencies.
-#[derive(Debug, Serialize, Deserialize, Clone, Copy)]
-pub(crate) enum ResolveStrategy {
-    /// Always uses the column metadata from metasrv.
-    UseMetasrv,
-
+#[derive(Debug, Serialize, Deserialize, Clone, Copy, Default, AsRefStr)]
+pub enum ResolveStrategy {
+    #[default]
    /// Trusts the latest column metadata from datanode.
    UseLatest,

+    /// Always uses the column metadata from metasrv.
+    UseMetasrv,
+
    /// Aborts the resolution process if inconsistencies are detected.
    AbortOnConflict,
 }

+impl From<api::v1::meta::ResolveStrategy> for ResolveStrategy {
+    fn from(strategy: api::v1::meta::ResolveStrategy) -> Self {
+        match strategy {
+            api::v1::meta::ResolveStrategy::UseMetasrv => Self::UseMetasrv,
+            api::v1::meta::ResolveStrategy::UseLatest => Self::UseLatest,
+            api::v1::meta::ResolveStrategy::AbortOnConflict => Self::AbortOnConflict,
+        }
+    }
+}
+
 /// State responsible for resolving inconsistencies in column metadata across physical regions.
 #[derive(Debug, Serialize, Deserialize)]
 pub struct ResolveColumnMetadata {
@@ -87,6 +100,10 @@ impl State for ResolveColumnMetadata {
                "Column metadatas are consistent for table: {}, table_id: {}.",
                table_name, table_id
            );
+
+            // Update metrics.
+            ctx.mut_metrics().resolve_column_metadata_result =
+                Some(ResolveColumnMetadataResult::Consistent);
            return Ok((
                Box::new(UpdateTableInfo::new(table_info_value, column_metadatas)),
                Status::executing(false),
@@ -108,6 +125,11 @@ impl State for ResolveColumnMetadata {

                let region_ids =
                    resolve_column_metadatas_with_metasrv(&column_metadata, &self.region_metadata)?;
+
+                // Update metrics.
+                let metrics = ctx.mut_metrics();
+                metrics.resolve_column_metadata_result =
+                    Some(ResolveColumnMetadataResult::Inconsistent(self.strategy));
                Ok((
                    Box::new(ReconcileRegions::new(column_metadata, region_ids)),
                    Status::executing(true),
@@ -116,16 +138,29 @@ impl State for ResolveColumnMetadata {
            ResolveStrategy::UseLatest => {
                let (column_metadatas, region_ids) =
                    resolve_column_metadatas_with_latest(&self.region_metadata)?;
+
+                // Update metrics.
+                let metrics = ctx.mut_metrics();
+                metrics.resolve_column_metadata_result =
+                    Some(ResolveColumnMetadataResult::Inconsistent(self.strategy));
                Ok((
                    Box::new(ReconcileRegions::new(column_metadatas, region_ids)),
                    Status::executing(true),
                ))
            }
-            ResolveStrategy::AbortOnConflict => error::ColumnMetadataConflictsSnafu {
-                table_name: table_name.to_string(),
-                table_id,
+            ResolveStrategy::AbortOnConflict => {
+                let table_name = table_name.to_string();
+
+                // Update metrics.
+                let metrics = ctx.mut_metrics();
+                metrics.resolve_column_metadata_result =
+                    Some(ResolveColumnMetadataResult::Inconsistent(self.strategy));
+                error::ColumnMetadataConflictsSnafu {
+                    table_name,
+                    table_id,
+                }
+                .fail()
            }
-            .fail(),
        }
    }

--- a/src/common/meta/src/reconciliation/reconcile_table/update_table_info.rs
+++ b/src/common/meta/src/reconciliation/reconcile_table/update_table_info.rs
@@ -116,6 +116,9 @@ impl State for UpdateTableInfo {
                ],
            )
            .await?;
+        // Update metrics.
+        let metrics = ctx.mut_metrics();
+        metrics.update_table_info = true;

        Ok((Box::new(ReconciliationEnd), Status::executing(true)))
    }
--- a/src/common/meta/src/reconciliation/utils.rs
+++ b/src/common/meta/src/reconciliation/utils.rs
@@ -13,12 +13,16 @@
 // limitations under the License.

 use std::collections::{HashMap, HashSet};
-use std::fmt;
+use std::fmt::{self, Display};
+use std::ops::AddAssign;
+use std::time::Instant;

 use api::v1::SemanticType;
-use common_telemetry::warn;
+use common_procedure::{watcher, Context as ProcedureContext, ProcedureId};
+use common_telemetry::{error, warn};
 use datatypes::schema::ColumnSchema;
-use snafu::{ensure, OptionExt};
+use futures::future::{join_all, try_join_all};
+use snafu::{ensure, OptionExt, ResultExt};
 use store_api::metadata::{ColumnMetadata, RegionMetadata};
 use store_api::storage::{RegionId, TableId};
 use table::metadata::{RawTableInfo, RawTableMeta};
@@ -27,11 +31,17 @@ use table::table_reference::TableReference;

 use crate::cache_invalidator::CacheInvalidatorRef;
 use crate::error::{
-    self, MismatchColumnIdSnafu, MissingColumnInColumnMetadataSnafu, Result, UnexpectedSnafu,
+    ColumnIdMismatchSnafu, ColumnNotFoundSnafu, MismatchColumnIdSnafu,
+    MissingColumnInColumnMetadataSnafu, ProcedureStateReceiverNotFoundSnafu,
+    ProcedureStateReceiverSnafu, Result, TimestampMismatchSnafu, UnexpectedSnafu,
+    WaitProcedureSnafu,
 };
-use crate::key::table_name::{TableNameKey, TableNameManager};
 use crate::key::TableMetadataManagerRef;
+use crate::metrics;
 use crate::node_manager::NodeManagerRef;
+use crate::reconciliation::reconcile_logical_tables::ReconcileLogicalTablesProcedure;
+use crate::reconciliation::reconcile_table::resolve_column_metadata::ResolveStrategy;
+use crate::reconciliation::reconcile_table::ReconcileTableProcedure;

 #[derive(Debug, PartialEq, Eq)]
 pub(crate) struct PartialRegionMetadata<'a> {
@@ -50,20 +60,6 @@ impl<'a> From<&'a RegionMetadata> for PartialRegionMetadata<'a> {
    }
 }

-/// A display wrapper for [`ColumnMetadata`] that formats the column metadata in a more readable way.
-struct ColumnMetadataDisplay<'a>(pub &'a ColumnMetadata);
-
-impl<'a> fmt::Debug for ColumnMetadataDisplay<'a> {
-    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        let col = self.0;
-        write!(
-            f,
-            "Column {{ name: {}, id: {}, semantic_type: {:?}, data_type: {:?} }}",
-            col.column_schema.name, col.column_id, col.semantic_type, col.column_schema.data_type,
-        )
-    }
-}
-
 /// Checks if the column metadatas are consistent.
 ///
 /// The column metadatas are consistent if:
@@ -112,21 +108,7 @@ pub(crate) fn resolve_column_metadatas_with_metasrv(
    let mut regions_ids = vec![];
    for region_metadata in region_metadatas {
        if region_metadata.column_metadatas != column_metadatas {
-            let is_invariant_preserved = check_column_metadata_invariants(
-                column_metadatas,
-                &region_metadata.column_metadatas,
-            );
-            ensure!(
-                is_invariant_preserved,
-                UnexpectedSnafu {
-                    err_msg: format!(
-                        "Column metadata invariants violated for region {}. Resolved column metadata: {:?}, region column metadata: {:?}",
-                        region_metadata.region_id,
-                        column_metadatas.iter().map(ColumnMetadataDisplay).collect::<Vec<_>>(),
-                        region_metadata.column_metadatas.iter().map(ColumnMetadataDisplay).collect::<Vec<_>>(),
-                    )
-                }
-            );
+            check_column_metadata_invariants(column_metadatas, &region_metadata.column_metadatas)?;
            regions_ids.push(region_metadata.region_id);
        }
    }
@@ -165,21 +147,10 @@ pub(crate) fn resolve_column_metadatas_with_latest(
    let mut region_ids = vec![];
    for region_metadata in region_metadatas {
        if PartialRegionMetadata::from(region_metadata) != latest_column_metadatas {
-            let is_invariant_preserved = check_column_metadata_invariants(
+            check_column_metadata_invariants(
                &latest_region_metadata.column_metadatas,
                &region_metadata.column_metadatas,
-            );
-            ensure!(
-                is_invariant_preserved,
-                UnexpectedSnafu {
-                    err_msg: format!(
-                        "Column metadata invariants violated for region {}. Resolved column metadata: {:?}, region column metadata: {:?}",
-                        region_metadata.region_id,
-                        latest_column_metadatas.column_metadatas.iter().map(ColumnMetadataDisplay).collect::<Vec<_>>(),
-                        region_metadata.column_metadatas.iter().map(ColumnMetadataDisplay).collect::<Vec<_>>()
-                    )
-                }
-            );
+            )?;
            region_ids.push(region_metadata.region_id);
        }
    }
@@ -241,7 +212,7 @@ pub(crate) fn build_column_metadata_from_table_info(
 pub(crate) fn check_column_metadata_invariants(
    new_column_metadatas: &[ColumnMetadata],
    column_metadatas: &[ColumnMetadata],
-) -> bool {
+) -> Result<()> {
    let new_primary_keys = new_column_metadatas
        .iter()
        .filter(|c| c.semantic_type == SemanticType::Tag)
@@ -254,22 +225,50 @@ pub(crate) fn check_column_metadata_invariants(
        .map(|c| (c.column_schema.name.as_str(), c.column_id));

    for (name, id) in old_primary_keys {
-        if new_primary_keys.get(name) != Some(&id) {
-            return false;
-        }
+        let column_id = new_primary_keys
+            .get(name)
+            .cloned()
+            .context(ColumnNotFoundSnafu {
+                column_name: name,
+                column_id: id,
+            })?;
+
+        ensure!(
+            column_id == id,
+            ColumnIdMismatchSnafu {
+                column_name: name,
+                expected_column_id: id,
+                actual_column_id: column_id,
+            }
+        );
    }

    let new_ts_column = new_column_metadatas
        .iter()
        .find(|c| c.semantic_type == SemanticType::Timestamp)
-        .map(|c| (c.column_schema.name.as_str(), c.column_id));
+        .map(|c| (c.column_schema.name.as_str(), c.column_id))
+        .context(UnexpectedSnafu {
+            err_msg: "Timestamp column not found in new column metadata",
+        })?;

    let old_ts_column = column_metadatas
        .iter()
        .find(|c| c.semantic_type == SemanticType::Timestamp)
-        .map(|c| (c.column_schema.name.as_str(), c.column_id));
+        .map(|c| (c.column_schema.name.as_str(), c.column_id))
+        .context(UnexpectedSnafu {
+            err_msg: "Timestamp column not found in column metadata",
+        })?;
+    ensure!(
+        new_ts_column == old_ts_column,
+        TimestampMismatchSnafu {
+            expected_column_name: old_ts_column.0,
+            expected_column_id: old_ts_column.1,
+            actual_column_name: new_ts_column.0,
+            actual_column_id: new_ts_column.1,
+        }
+    );

-    new_ts_column == old_ts_column
+    Ok(())
 }

 /// Builds a [`RawTableMeta`] from the provided [`ColumnMetadata`]s.
@@ -397,87 +396,6 @@ pub(crate) fn build_table_meta_from_column_metadatas(
    Ok(new_raw_table_meta)
 }

-/// Validates the table id and name consistency.
-///
-/// It will check the table id and table name consistency.
-/// If the table id and table name are not consistent, it will return an error.
-pub(crate) async fn validate_table_id_and_name(
-    table_name_manager: &TableNameManager,
-    table_id: TableId,
-    table_name: &TableName,
-) -> Result<()> {
-    let table_name_key = TableNameKey::new(
-        &table_name.catalog_name,
-        &table_name.schema_name,
-        &table_name.table_name,
-    );
-    let table_name_value = table_name_manager
-        .get(table_name_key)
-        .await?
-        .with_context(|| error::TableNotFoundSnafu {
-            table_name: table_name.to_string(),
-        })?;
-
-    ensure!(
-        table_name_value.table_id() == table_id,
-        error::UnexpectedSnafu {
-            err_msg: format!(
-                "The table id mismatch for table: {}, expected {}, actual {}",
-                table_name,
-                table_id,
-                table_name_value.table_id()
-            ),
-        }
-    );
-
-    Ok(())
-}
-
-/// Checks whether the column metadata invariants hold for the logical table.
-///
-/// Invariants:
-/// - Primary key (Tag) columns must exist in the new metadata.
-/// - Timestamp column must remain exactly the same in name and ID.
-///
-/// TODO(weny): add tests
-pub(crate) fn check_column_metadatas_invariants_for_logical_table(
-    column_metadatas: &[ColumnMetadata],
-    table_info: &RawTableInfo,
-) -> bool {
-    let new_primary_keys = column_metadatas
-        .iter()
-        .filter(|c| c.semantic_type == SemanticType::Tag)
-        .map(|c| c.column_schema.name.as_str())
-        .collect::<HashSet<_>>();
-
-    let old_primary_keys = table_info
-        .meta
-        .primary_key_indices
-        .iter()
-        .map(|i| table_info.meta.schema.column_schemas[*i].name.as_str());
-
-    for name in old_primary_keys {
-        if !new_primary_keys.contains(name) {
-            return false;
-        }
-    }
-
-    let old_timestamp_column_name = table_info
-        .meta
-        .schema
-        .column_schemas
-        .iter()
-        .find(|c| c.is_time_index())
-        .map(|c| c.name.as_str());
-
-    let new_timestamp_column_name = column_metadatas
-        .iter()
-        .find(|c| c.semantic_type == SemanticType::Timestamp)
-        .map(|c| c.column_schema.name.as_str());
-
-    old_timestamp_column_name != new_timestamp_column_name
-}
-
 /// Returns true if the logical table info needs to be updated.
 ///
 /// The logical table only support to add columns, so we can check the length of column metadatas
@@ -489,6 +407,88 @@ pub(crate) fn need_update_logical_table_info(
    table_info.meta.schema.column_schemas.len() != column_metadatas.len()
 }

+/// The result of waiting for inflight subprocedures.
+pub struct PartialSuccessResult<'a> {
+    pub failed_procedures: Vec<&'a SubprocedureMeta>,
+    pub success_procedures: Vec<&'a SubprocedureMeta>,
+}
+
+/// The result of waiting for inflight subprocedures.
+pub enum WaitForInflightSubproceduresResult<'a> {
+    Success(Vec<&'a SubprocedureMeta>),
+    PartialSuccess(PartialSuccessResult<'a>),
+}
+
+/// Wait for inflight subprocedures.
+///
+/// If `fail_fast` is true, the function will return an error if any subprocedure fails.
+/// Otherwise, the function will continue waiting for all subprocedures to complete.
+pub(crate) async fn wait_for_inflight_subprocedures<'a>(
+    procedure_ctx: &ProcedureContext,
+    subprocedures: &'a [SubprocedureMeta],
+    fail_fast: bool,
+) -> Result<WaitForInflightSubproceduresResult<'a>> {
+    let mut receivers = Vec::with_capacity(subprocedures.len());
+    for subprocedure in subprocedures {
+        let procedure_id = subprocedure.procedure_id();
+        let receiver = procedure_ctx
+            .provider
+            .procedure_state_receiver(procedure_id)
+            .await
+            .context(ProcedureStateReceiverSnafu { procedure_id })?
+            .context(ProcedureStateReceiverNotFoundSnafu { procedure_id })?;
+        receivers.push((receiver, subprocedure));
+    }
+
+    let mut tasks = Vec::with_capacity(receivers.len());
+    for (receiver, subprocedure) in receivers.iter_mut() {
+        tasks.push(async move {
+            watcher::wait(receiver).await.inspect_err(|e| {
+                error!(e; "inflight subprocedure failed, parent procedure_id: {}, procedure: {}", procedure_ctx.procedure_id, subprocedure);
+            })
+        });
+    }
+
+    if fail_fast {
+        try_join_all(tasks).await.context(WaitProcedureSnafu)?;
+        return Ok(WaitForInflightSubproceduresResult::Success(
+            subprocedures.iter().collect(),
+        ));
+    }
+
+    // If fail_fast is false, we need to wait for all subprocedures to complete.
+    let results = join_all(tasks).await;
+    let failed_procedures_num = results.iter().filter(|r| r.is_err()).count();
+    if failed_procedures_num == 0 {
+        return Ok(WaitForInflightSubproceduresResult::Success(
+            subprocedures.iter().collect(),
+        ));
+    }
+    warn!(
+        "{} inflight subprocedures failed, total: {}, parent procedure_id: {}",
+        failed_procedures_num,
+        subprocedures.len(),
+        procedure_ctx.procedure_id
+    );
+
+    let mut failed_procedures = Vec::with_capacity(failed_procedures_num);
+    let mut success_procedures = Vec::with_capacity(subprocedures.len() - failed_procedures_num);
+    for (result, subprocedure) in results.into_iter().zip(subprocedures) {
+        if result.is_err() {
+            failed_procedures.push(subprocedure);
+        } else {
+            success_procedures.push(subprocedure);
+        }
+    }
+
+    Ok(WaitForInflightSubproceduresResult::PartialSuccess(
+        PartialSuccessResult {
+            failed_procedures,
+            success_procedures,
+        },
+    ))
+}
+
 #[derive(Clone)]
 pub struct Context {
    pub node_manager: NodeManagerRef,
@@ -496,6 +496,446 @@ pub struct Context {
    pub cache_invalidator: CacheInvalidatorRef,
 }

+/// Metadata for an inflight physical table subprocedure.
+pub struct PhysicalTableMeta {
+    pub procedure_id: ProcedureId,
+    pub table_id: TableId,
+    pub table_name: TableName,
+}
+
+/// Metadata for an inflight logical table subprocedure.
+pub struct LogicalTableMeta {
+    pub procedure_id: ProcedureId,
+    pub physical_table_id: TableId,
+    pub physical_table_name: TableName,
+    pub logical_tables: Vec<(TableId, TableName)>,
+}
+
+/// Metadata for an inflight database subprocedure.
+pub struct ReconcileDatabaseMeta {
+    pub procedure_id: ProcedureId,
+    pub catalog: String,
+    pub schema: String,
+}
+
+/// The inflight subprocedure metadata.
+pub enum SubprocedureMeta {
+    PhysicalTable(PhysicalTableMeta),
+    LogicalTable(LogicalTableMeta),
+    Database(ReconcileDatabaseMeta),
+}
+
+impl Display for SubprocedureMeta {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        match self {
+            SubprocedureMeta::PhysicalTable(meta) => {
+                write!(
+                    f,
+                    "ReconcilePhysicalTable(procedure_id: {}, table_id: {}, table_name: {})",
+                    meta.procedure_id, meta.table_id, meta.table_name
+                )
+            }
+            SubprocedureMeta::LogicalTable(meta) => {
+                write!(
+                    f,
+                    "ReconcileLogicalTable(procedure_id: {}, physical_table_id: {}, physical_table_name: {}, logical_tables: {:?})",
+                    meta.procedure_id, meta.physical_table_id, meta.physical_table_name, meta.logical_tables
+                )
+            }
+            SubprocedureMeta::Database(meta) => {
+                write!(
+                    f,
+                    "ReconcileDatabase(procedure_id: {}, catalog: {}, schema: {})",
+                    meta.procedure_id, meta.catalog, meta.schema
+                )
+            }
+        }
+    }
+}
+
+impl SubprocedureMeta {
+    /// Creates a new logical table subprocedure metadata.
+    pub fn new_logical_table(
+        procedure_id: ProcedureId,
+        physical_table_id: TableId,
+        physical_table_name: TableName,
+        logical_tables: Vec<(TableId, TableName)>,
+    ) -> Self {
+        Self::LogicalTable(LogicalTableMeta {
+            procedure_id,
+            physical_table_id,
+            physical_table_name,
+            logical_tables,
+        })
+    }
+
+    /// Creates a new physical table subprocedure metadata.
+    pub fn new_physical_table(
+        procedure_id: ProcedureId,
+        table_id: TableId,
+        table_name: TableName,
+    ) -> Self {
+        Self::PhysicalTable(PhysicalTableMeta {
+            procedure_id,
+            table_id,
+            table_name,
+        })
+    }
+
+    /// Creates a new reconcile database subprocedure metadata.
+    pub fn new_reconcile_database(
+        procedure_id: ProcedureId,
+        catalog: String,
+        schema: String,
+    ) -> Self {
+        Self::Database(ReconcileDatabaseMeta {
+            procedure_id,
+            catalog,
+            schema,
+        })
+    }
+
+    /// Returns the procedure id of the subprocedure.
+    pub fn procedure_id(&self) -> ProcedureId {
+        match self {
+            SubprocedureMeta::PhysicalTable(meta) => meta.procedure_id,
+            SubprocedureMeta::LogicalTable(meta) => meta.procedure_id,
+            SubprocedureMeta::Database(meta) => meta.procedure_id,
+        }
+    }
+
+    /// Returns the number of tables will be reconciled.
+    pub fn table_num(&self) -> usize {
+        match self {
+            SubprocedureMeta::PhysicalTable(_) => 1,
+            SubprocedureMeta::LogicalTable(meta) => meta.logical_tables.len(),
+            SubprocedureMeta::Database(_) => 0,
+        }
+    }
+
+    /// Returns the number of databases will be reconciled.
+    pub fn database_num(&self) -> usize {
+        match self {
+            SubprocedureMeta::Database(_) => 1,
+            _ => 0,
+        }
+    }
+}
+
+/// The metrics of reconciling catalog.
+#[derive(Clone, Default)]
+pub struct ReconcileCatalogMetrics {
+    pub succeeded_databases: usize,
+    pub failed_databases: usize,
+}
+
+impl AddAssign for ReconcileCatalogMetrics {
+    fn add_assign(&mut self, other: Self) {
+        self.succeeded_databases += other.succeeded_databases;
+        self.failed_databases += other.failed_databases;
+    }
+}
+
+impl Display for ReconcileCatalogMetrics {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        write!(
+            f,
+            "succeeded_databases: {}, failed_databases: {}",
+            self.succeeded_databases, self.failed_databases
+        )
+    }
+}
+
+impl From<WaitForInflightSubproceduresResult<'_>> for ReconcileCatalogMetrics {
+    fn from(result: WaitForInflightSubproceduresResult<'_>) -> Self {
+        match result {
+            WaitForInflightSubproceduresResult::Success(subprocedures) => ReconcileCatalogMetrics {
+                succeeded_databases: subprocedures.len(),
+                failed_databases: 0,
+            },
+            WaitForInflightSubproceduresResult::PartialSuccess(PartialSuccessResult {
+                failed_procedures,
+                success_procedures,
+            }) => {
+                let succeeded_databases = success_procedures
+                    .iter()
+                    .map(|subprocedure| subprocedure.database_num())
+                    .sum();
+                let failed_databases = failed_procedures
+                    .iter()
+                    .map(|subprocedure| subprocedure.database_num())
+                    .sum();
+                ReconcileCatalogMetrics {
+                    succeeded_databases,
+                    failed_databases,
+                }
+            }
+        }
+    }
+}
+
+/// The metrics of reconciling database.
+#[derive(Clone, Default)]
+pub struct ReconcileDatabaseMetrics {
+    pub succeeded_tables: usize,
+    pub failed_tables: usize,
+    pub succeeded_procedures: usize,
+    pub failed_procedures: usize,
+}
+
+impl Display for ReconcileDatabaseMetrics {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        write!(f, "succeeded_tables: {}, failed_tables: {}, succeeded_procedures: {}, failed_procedures: {}", self.succeeded_tables, self.failed_tables, self.succeeded_procedures, self.failed_procedures)
+    }
+}
+
+impl AddAssign for ReconcileDatabaseMetrics {
+    fn add_assign(&mut self, other: Self) {
+        self.succeeded_tables += other.succeeded_tables;
+        self.failed_tables += other.failed_tables;
+        self.succeeded_procedures += other.succeeded_procedures;
+        self.failed_procedures += other.failed_procedures;
+    }
+}
+
+impl From<WaitForInflightSubproceduresResult<'_>> for ReconcileDatabaseMetrics {
+    fn from(result: WaitForInflightSubproceduresResult<'_>) -> Self {
+        match result {
+            WaitForInflightSubproceduresResult::Success(subprocedures) => {
+                let table_num = subprocedures
+                    .iter()
+                    .map(|subprocedure| subprocedure.table_num())
+                    .sum();
+                ReconcileDatabaseMetrics {
+                    succeeded_procedures: subprocedures.len(),
+                    failed_procedures: 0,
+                    succeeded_tables: table_num,
+                    failed_tables: 0,
+                }
+            }
+            WaitForInflightSubproceduresResult::PartialSuccess(PartialSuccessResult {
+                failed_procedures,
+                success_procedures,
+            }) => {
+                let succeeded_tables = success_procedures
+                    .iter()
+                    .map(|subprocedure| subprocedure.table_num())
+                    .sum();
+                let failed_tables = failed_procedures
+                    .iter()
+                    .map(|subprocedure| subprocedure.table_num())
+                    .sum();
+                ReconcileDatabaseMetrics {
+                    succeeded_procedures: success_procedures.len(),
+                    failed_procedures: failed_procedures.len(),
+                    succeeded_tables,
+                    failed_tables,
+                }
+            }
+        }
+    }
+}
+
+/// The metrics of reconciling logical tables.
+#[derive(Clone)]
+pub struct ReconcileLogicalTableMetrics {
+    pub start_time: Instant,
+    pub update_table_info_count: usize,
+    pub create_tables_count: usize,
+    pub column_metadata_consistent_count: usize,
+    pub column_metadata_inconsistent_count: usize,
+}
+
+impl Default for ReconcileLogicalTableMetrics {
+    fn default() -> Self {
+        Self {
+            start_time: Instant::now(),
+            update_table_info_count: 0,
+            create_tables_count: 0,
+            column_metadata_consistent_count: 0,
+            column_metadata_inconsistent_count: 0,
+        }
+    }
+}
+
+const CREATE_TABLES: &str = "create_tables";
+const UPDATE_TABLE_INFO: &str = "update_table_info";
+const COLUMN_METADATA_CONSISTENT: &str = "column_metadata_consistent";
+const COLUMN_METADATA_INCONSISTENT: &str = "column_metadata_inconsistent";
+
+impl ReconcileLogicalTableMetrics {
+    /// The total number of tables that have been reconciled.
+    pub fn total_table_count(&self) -> usize {
+        self.create_tables_count
+            + self.column_metadata_consistent_count
+            + self.column_metadata_inconsistent_count
+    }
+}
+
+impl Drop for ReconcileLogicalTableMetrics {
+    fn drop(&mut self) {
+        let procedure_name = ReconcileLogicalTablesProcedure::TYPE_NAME;
+        metrics::METRIC_META_RECONCILIATION_STATS
+            .with_label_values(&[procedure_name, metrics::TABLE_TYPE_LOGICAL, CREATE_TABLES])
+            .inc_by(self.create_tables_count as u64);
+        metrics::METRIC_META_RECONCILIATION_STATS
+            .with_label_values(&[
+                procedure_name,
+                metrics::TABLE_TYPE_LOGICAL,
+                UPDATE_TABLE_INFO,
+            ])
+            .inc_by(self.update_table_info_count as u64);
+        metrics::METRIC_META_RECONCILIATION_STATS
+            .with_label_values(&[
+                procedure_name,
+                metrics::TABLE_TYPE_LOGICAL,
+                COLUMN_METADATA_CONSISTENT,
+            ])
+            .inc_by(self.column_metadata_consistent_count as u64);
+        metrics::METRIC_META_RECONCILIATION_STATS
+            .with_label_values(&[
+                procedure_name,
+                metrics::TABLE_TYPE_LOGICAL,
+                COLUMN_METADATA_INCONSISTENT,
+            ])
+            .inc_by(self.column_metadata_inconsistent_count as u64);
+    }
+}
+
+impl Display for ReconcileLogicalTableMetrics {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        let elapsed = self.start_time.elapsed();
+        if self.create_tables_count > 0 {
+            write!(f, "create_tables_count: {}, ", self.create_tables_count)?;
+        }
+        if self.update_table_info_count > 0 {
+            write!(
+                f,
+                "update_table_info_count: {}, ",
+                self.update_table_info_count
+            )?;
+        }
+        if self.column_metadata_consistent_count > 0 {
+            write!(
+                f,
+                "column_metadata_consistent_count: {}, ",
+                self.column_metadata_consistent_count
+            )?;
+        }
+        if self.column_metadata_inconsistent_count > 0 {
+            write!(
+                f,
+                "column_metadata_inconsistent_count: {}, ",
+                self.column_metadata_inconsistent_count
+            )?;
+        }
+
+        write!(
+            f,
+            "total_table_count: {}, elapsed: {:?}",
+            self.total_table_count(),
+            elapsed
+        )
+    }
+}
+
+/// The result of resolving column metadata.
+#[derive(Clone, Copy)]
+pub enum ResolveColumnMetadataResult {
+    Consistent,
+    Inconsistent(ResolveStrategy),
+}
+
+impl Display for ResolveColumnMetadataResult {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        match self {
+            ResolveColumnMetadataResult::Consistent => write!(f, "Consistent"),
+            ResolveColumnMetadataResult::Inconsistent(strategy) => {
+                let strategy_str = strategy.as_ref();
+                write!(f, "Inconsistent({})", strategy_str)
+            }
+        }
+    }
+}
+
+/// The metrics of reconciling physical tables.
+#[derive(Clone)]
+pub struct ReconcileTableMetrics {
+    /// The start time of the reconciliation.
+    pub start_time: Instant,
+    /// The result of resolving column metadata.
+    pub resolve_column_metadata_result: Option<ResolveColumnMetadataResult>,
+    /// Whether the table info has been updated.
+    pub update_table_info: bool,
+}
+
+impl Drop for ReconcileTableMetrics {
+    fn drop(&mut self) {
+        if let Some(resolve_column_metadata_result) = self.resolve_column_metadata_result {
+            match resolve_column_metadata_result {
+                ResolveColumnMetadataResult::Consistent => {
+                    metrics::METRIC_META_RECONCILIATION_STATS
+                        .with_label_values(&[
+                            ReconcileTableProcedure::TYPE_NAME,
+                            metrics::TABLE_TYPE_PHYSICAL,
+                            COLUMN_METADATA_CONSISTENT,
+                        ])
+                        .inc();
+                }
+                ResolveColumnMetadataResult::Inconsistent(strategy) => {
+                    metrics::METRIC_META_RECONCILIATION_STATS
+                        .with_label_values(&[
+                            ReconcileTableProcedure::TYPE_NAME,
+                            metrics::TABLE_TYPE_PHYSICAL,
+                            COLUMN_METADATA_INCONSISTENT,
+                        ])
+                        .inc();
+                    metrics::METRIC_META_RECONCILIATION_RESOLVED_COLUMN_METADATA
+                        .with_label_values(&[strategy.as_ref()])
+                        .inc();
+                }
+            }
+        }
+        if self.update_table_info {
+            metrics::METRIC_META_RECONCILIATION_STATS
+                .with_label_values(&[
+                    ReconcileTableProcedure::TYPE_NAME,
+                    metrics::TABLE_TYPE_PHYSICAL,
+                    UPDATE_TABLE_INFO,
+                ])
+                .inc();
+        }
+    }
+}
+
+impl Default for ReconcileTableMetrics {
+    fn default() -> Self {
+        Self {
+            start_time: Instant::now(),
+            resolve_column_metadata_result: None,
+            update_table_info: false,
+        }
+    }
+}
+
+impl Display for ReconcileTableMetrics {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        let elapsed = self.start_time.elapsed();
+        if let Some(resolve_column_metadata_result) = self.resolve_column_metadata_result {
+            write!(
+                f,
+                "resolve_column_metadata_result: {}, ",
+                resolve_column_metadata_result
+            )?;
+        }
+        write!(
+            f,
+            "update_table_info: {}, elapsed: {:?}",
+            self.update_table_info, elapsed
+        )
+    }
+}
+
 #[cfg(test)]
 mod tests {
    use std::assert_matches::assert_matches;
@@ -748,10 +1188,7 @@ mod tests {
            semantic_type: SemanticType::Field,
            column_id: 3,
        });
-        assert!(check_column_metadata_invariants(
-            &new_column_metadatas,
-            &column_metadatas
-        ));
+        check_column_metadata_invariants(&new_column_metadatas, &column_metadatas).unwrap();
    }

    #[test]
@@ -759,18 +1196,12 @@ mod tests {
        let column_metadatas = new_test_column_metadatas();
        let mut new_column_metadatas = column_metadatas.clone();
        new_column_metadatas.retain(|c| c.semantic_type != SemanticType::Timestamp);
-        assert!(!check_column_metadata_invariants(
-            &new_column_metadatas,
-            &column_metadatas
-        ));
+        check_column_metadata_invariants(&new_column_metadatas, &column_metadatas).unwrap_err();

        let column_metadatas = new_test_column_metadatas();
        let mut new_column_metadatas = column_metadatas.clone();
        new_column_metadatas.retain(|c| c.semantic_type != SemanticType::Tag);
-        assert!(!check_column_metadata_invariants(
-            &new_column_metadatas,
-            &column_metadatas
-        ));
+        check_column_metadata_invariants(&new_column_metadatas, &column_metadatas).unwrap_err();
    }

    #[test]
@@ -783,10 +1214,7 @@ mod tests {
        {
            col.column_id = 100;
        }
-        assert!(!check_column_metadata_invariants(
-            &new_column_metadatas,
-            &column_metadatas
-        ));
+        check_column_metadata_invariants(&new_column_metadatas, &column_metadatas).unwrap_err();

        let column_metadatas = new_test_column_metadatas();
        let mut new_column_metadatas = column_metadatas.clone();
@@ -796,10 +1224,7 @@ mod tests {
        {
            col.column_id = 100;
        }
-        assert!(!check_column_metadata_invariants(
-            &new_column_metadatas,
-            &column_metadatas
-        ));
+        check_column_metadata_invariants(&new_column_metadatas, &column_metadatas).unwrap_err();
    }

    #[test]
--- a/src/common/meta/src/rpc/ddl.rs
+++ b/src/common/meta/src/rpc/ddl.rs
@@ -18,6 +18,7 @@ pub mod trigger;
 use std::collections::{HashMap, HashSet};
 use std::result;

+use api::helper::{from_pb_time_ranges, to_pb_time_ranges};
 use api::v1::alter_database_expr::Kind as PbAlterDatabaseKind;
 use api::v1::meta::ddl_task_request::Task;
 use api::v1::meta::{
@@ -38,7 +39,8 @@ use api::v1::{
 };
 use base64::engine::general_purpose;
 use base64::Engine as _;
-use common_time::{DatabaseTimeToLive, Timezone};
+use common_error::ext::BoxedError;
+use common_time::{DatabaseTimeToLive, Timestamp, Timezone};
 use prost::Message;
 use serde::{Deserialize, Serialize};
 use serde_with::{serde_as, DefaultOnNull};
@@ -49,8 +51,8 @@ use table::table_name::TableName;
 use table::table_reference::TableReference;

 use crate::error::{
-    self, InvalidSetDatabaseOptionSnafu, InvalidTimeZoneSnafu, InvalidUnsetDatabaseOptionSnafu,
-    Result,
+    self, ConvertTimeRangesSnafu, ExternalSnafu, InvalidSetDatabaseOptionSnafu,
+    InvalidTimeZoneSnafu, InvalidUnsetDatabaseOptionSnafu, Result,
 };
 use crate::key::FlowId;

@@ -179,12 +181,14 @@ impl DdlTask {
        schema: String,
        table: String,
        table_id: TableId,
+        time_ranges: Vec<(Timestamp, Timestamp)>,
    ) -> Self {
        DdlTask::TruncateTable(TruncateTableTask {
            catalog,
            schema,
            table,
            table_id,
+            time_ranges,
        })
    }

@@ -826,6 +830,7 @@ pub struct TruncateTableTask {
    pub schema: String,
    pub table: String,
    pub table_id: TableId,
+    pub time_ranges: Vec<(Timestamp, Timestamp)>,
 }

 impl TruncateTableTask {
@@ -864,6 +869,13 @@ impl TryFrom<PbTruncateTableTask> for TruncateTableTask {
                    err_msg: "expected table_id",
                })?
                .id,
+            time_ranges: truncate_table
+                .time_ranges
+                .map(from_pb_time_ranges)
+                .transpose()
+                .map_err(BoxedError::new)
+                .context(ExternalSnafu)?
+                .unwrap_or_default(),
        })
    }
 }
@@ -878,6 +890,9 @@ impl TryFrom<TruncateTableTask> for PbTruncateTableTask {
                schema_name: task.schema,
                table_name: task.table,
                table_id: Some(api::v1::TableId { id: task.table_id }),
+                time_ranges: Some(
+                    to_pb_time_ranges(&task.time_ranges).context(ConvertTimeRangesSnafu)?,
+                ),
            }),
        })
    }
--- a/src/common/meta/src/sequence.rs
+++ b/src/common/meta/src/sequence.rs
@@ -15,7 +15,7 @@
 use std::ops::Range;
 use std::sync::Arc;

-use common_telemetry::warn;
+use common_telemetry::{debug, warn};
 use snafu::ensure;
 use tokio::sync::Mutex;

@@ -95,13 +95,27 @@ impl Sequence {
        inner.initial..inner.max
    }

-    /// Returns the next value without incrementing the sequence.
-    pub async fn peek(&self) -> u64 {
+    /// Returns the current value stored in the remote storage without incrementing the sequence.
+    ///
+    /// This function always fetches the true current state from the remote storage (KV backend),
+    /// ignoring any local cache to provide the most accurate view of the sequence's remote state.
+    /// It does not consume or advance the sequence value.
+    ///
+    /// Note: Since this always queries the remote storage, it may be slower than `next()` but
+    /// provides the most accurate and up-to-date information about the sequence state.
+    pub async fn peek(&self) -> Result<u64> {
        let inner = self.inner.lock().await;
-        inner.next
+        inner.peek().await
    }

    /// Jumps to the given value.
+    ///
+    /// The next value must be greater than both:
+    /// 1. The current local next value
+    /// 2. The current value stored in the remote storage (KV backend)
+    ///
+    /// This ensures the sequence can only move forward and maintains consistency
+    /// across different instances accessing the same sequence.
    pub async fn jump_to(&self, next: u64) -> Result<()> {
        let mut inner = self.inner.lock().await;
        inner.jump_to(next).await
@@ -136,6 +150,7 @@ impl Inner {
                    if range.contains(&self.next) {
                        let res = Ok(self.next);
                        self.next += 1;
+                        debug!("sequence {} next: {}", self.name, self.next);
                        return res;
                    }
                    self.range = None;
@@ -144,6 +159,10 @@ impl Inner {
                    let range = self.next_range().await?;
                    self.next = range.start;
                    self.range = Some(range);
+                    debug!(
+                        "sequence {} next: {}, range: {:?}",
+                        self.name, self.next, self.range
+                    );
                }
            }
        }
@@ -154,6 +173,26 @@ impl Inner {
        .fail()
    }

+    /// Returns the current value from remote storage without advancing the sequence.
+    /// If no value exists in remote storage, returns the initial value.
+    pub async fn peek(&self) -> Result<u64> {
+        let key = self.name.as_bytes();
+        let value = self.generator.get(key).await?.map(|kv| kv.value);
+        let next = if let Some(value) = value {
+            let next = self.initial.max(self.parse_sequence_value(value)?);
+            debug!("The next value of sequence {} is {}", self.name, next);
+            next
+        } else {
+            debug!(
+                "The next value of sequence {} is not set, use initial value {}",
+                self.name, self.initial
+            );
+            self.initial
+        };
+
+        Ok(next)
+    }
+
    pub async fn next_range(&self) -> Result<Range<u64>> {
        let key = self.name.as_bytes();
        let mut start = self.next;
@@ -187,16 +226,7 @@ impl Inner {

            if !res.success {
                if let Some(kv) = res.prev_kv {
-                    let v: [u8; 8] = match kv.value.clone().try_into() {
-                        Ok(a) => a,
-                        Err(v) => {
-                            return error::UnexpectedSequenceValueSnafu {
-                                err_msg: format!("Not a valid u64 for '{}': {v:?}", self.name),
-                            }
-                            .fail()
-                        }
-                    };
-                    let v = u64::from_le_bytes(v);
+                    let v = self.parse_sequence_value(kv.value.clone())?;
                    // If the existed value is smaller than the initial, we should start from the initial.
                    start = v.max(self.initial);
                    expect = kv.value;
@@ -220,25 +250,33 @@ impl Inner {
    }

    /// Jumps to the given value.
-    /// The next value must be greater than the current next value.
+    ///
+    /// The next value must be greater than both:
+    /// 1. The current local next value (self.next)
+    /// 2. The current value stored in the remote storage (KV backend)
+    ///
+    /// This ensures the sequence can only move forward and maintains consistency
+    /// across different instances accessing the same sequence.
    pub async fn jump_to(&mut self, next: u64) -> Result<()> {
+        let key = self.name.as_bytes();
+        let current = self.generator.get(key).await?.map(|kv| kv.value);
+
+        let curr_val = match &current {
+            Some(val) => self.initial.max(self.parse_sequence_value(val.clone())?),
+            None => self.initial,
+        };
+
        ensure!(
-            next > self.next,
+            next > curr_val,
            error::UnexpectedSnafu {
                err_msg: format!(
                    "The next value {} is not greater than the current next value {}",
-                    next, self.next
+                    next, curr_val
                ),
            }
        );

-        let key = self.name.as_bytes();
-        let expect = self
-            .generator
-            .get(key)
-            .await?
-            .map(|kv| kv.value)
-            .unwrap_or_default();
+        let expect = current.unwrap_or_default();

        let req = CompareAndPutRequest {
            key: key.to_vec(),
@@ -260,6 +298,20 @@ impl Inner {

        Ok(())
    }
+
+    /// Converts a Vec<u8> to u64 with proper error handling for sequence values
+    fn parse_sequence_value(&self, value: Vec<u8>) -> Result<u64> {
+        let v: [u8; 8] = match value.try_into() {
+            Ok(a) => a,
+            Err(v) => {
+                return error::UnexpectedSequenceValueSnafu {
+                    err_msg: format!("Not a valid u64 for '{}': {v:?}", self.name),
+                }
+                .fail()
+            }
+        };
+        Ok(u64::from_le_bytes(v))
+    }
 }

 #[cfg(test)]
@@ -388,7 +440,7 @@ mod tests {
    }

    #[tokio::test]
-    async fn test_sequence_out_of_rage() {
+    async fn test_sequence_out_of_range() {
        let seq = SequenceBuilder::new("test_seq", Arc::new(MemoryKvBackend::default()))
            .initial(u64::MAX - 10)
            .step(10)
@@ -458,4 +510,139 @@ mod tests {
        let next = seq.next().await;
        assert!(next.is_err());
    }
+
+    #[tokio::test]
+    async fn test_sequence_peek() {
+        common_telemetry::init_default_ut_logging();
+        let kv_backend = Arc::new(MemoryKvBackend::default());
+        let seq = SequenceBuilder::new("test_seq", kv_backend.clone())
+            .step(10)
+            .initial(1024)
+            .build();
+        // The sequence value in the kv backend is not set, so the peek value should be the initial value.
+        assert_eq!(seq.peek().await.unwrap(), 1024);
+
+        for i in 0..11 {
+            let v = seq.next().await.unwrap();
+            assert_eq!(v, 1024 + i);
+        }
+        let seq = SequenceBuilder::new("test_seq", kv_backend)
+            .initial(1024)
+            .build();
+        // The sequence is not initialized, it will fetch the value from the kv backend.
+        assert_eq!(seq.peek().await.unwrap(), 1044);
+    }
+
+    #[tokio::test]
+    async fn test_sequence_peek_shared_storage() {
+        let kv_backend = Arc::new(MemoryKvBackend::default());
+        let shared_seq = "shared_seq";
+
+        // Create two sequence instances with the SAME name but DIFFERENT configs
+        let seq1 = SequenceBuilder::new(shared_seq, kv_backend.clone())
+            .initial(100)
+            .step(5)
+            .build();
+        let seq2 = SequenceBuilder::new(shared_seq, kv_backend.clone())
+            .initial(200) // different initial
+            .step(3) // different step
+            .build();
+
+        // Initially both return their own initial values when no remote value exists
+        assert_eq!(seq1.peek().await.unwrap(), 100);
+        assert_eq!(seq2.peek().await.unwrap(), 200);
+
+        // seq1 calls next() to allocate range and update remote storage
+        assert_eq!(seq1.next().await.unwrap(), 100);
+        // After seq1.next(), remote storage has 100 + seq1.step(5) = 105
+
+        // seq2 should now see the updated remote value through peek(), not its own initial(200)
+        assert_eq!(seq1.peek().await.unwrap(), 105);
+        assert_eq!(seq2.peek().await.unwrap(), 200); // sees seq1's update, but use its own initial(200)
+
+        // seq2 calls next(), should start from its initial(200)
+        assert_eq!(seq2.next().await.unwrap(), 200);
+        // After seq2.next(), remote storage updated to 200 + seq2.step(3) = 203
+
+        // Both should see the new remote value (seq2's step was used)
+        assert_eq!(seq1.peek().await.unwrap(), 203);
+        assert_eq!(seq2.peek().await.unwrap(), 203);
+
+        // seq1 calls next(), should start from its next(105)
+        assert_eq!(seq1.next().await.unwrap(), 101);
+        assert_eq!(seq1.next().await.unwrap(), 102);
+        assert_eq!(seq1.next().await.unwrap(), 103);
+        assert_eq!(seq1.next().await.unwrap(), 104);
+        assert_eq!(seq1.next().await.unwrap(), 203);
+        // After seq1.next(), remote storage updated to 203 + seq1.step(5) = 208
+        assert_eq!(seq1.peek().await.unwrap(), 208);
+        assert_eq!(seq2.peek().await.unwrap(), 208);
+    }
+
+    #[tokio::test]
+    async fn test_sequence_peek_initial_max_logic() {
+        let kv_backend = Arc::new(MemoryKvBackend::default());
+
+        // Manually set a small value in storage
+        let key = seq_name("test_max").into_bytes();
+        kv_backend
+            .put(
+                PutRequest::new()
+                    .with_key(key)
+                    .with_value(u64::to_le_bytes(50)),
+            )
+            .await
+            .unwrap();
+
+        // Create sequence with larger initial value
+        let seq = SequenceBuilder::new("test_max", kv_backend)
+            .initial(100) // larger than remote value (50)
+            .build();
+
+        // peek() should return max(initial, remote) = max(100, 50) = 100
+        assert_eq!(seq.peek().await.unwrap(), 100);
+
+        // next() should start from the larger initial value
+        assert_eq!(seq.next().await.unwrap(), 100);
+    }
+
+    #[tokio::test]
+    async fn test_sequence_initial_greater_than_storage() {
+        let kv_backend = Arc::new(MemoryKvBackend::default());
+
+        // Test sequence behavior when initial > storage value
+        // This verifies the max(storage, initial) logic works correctly
+
+        // Step 1: Establish a low value in storage
+        let seq1 = SequenceBuilder::new("max_test", kv_backend.clone())
+            .initial(10)
+            .step(5)
+            .build();
+        assert_eq!(seq1.next().await.unwrap(), 10); // storage: 15
+
+        // Step 2: Create sequence with much larger initial
+        let seq2 = SequenceBuilder::new("max_test", kv_backend.clone())
+            .initial(100) // much larger than storage (15)
+            .step(5)
+            .build();
+
+        // seq2 should start from max(15, 100) = 100 (its initial value)
+        assert_eq!(seq2.next().await.unwrap(), 100); // storage updated to: 105
+        assert_eq!(seq2.peek().await.unwrap(), 105);
+
+        // Step 3: Verify subsequent sequences continue from updated storage
+        let seq3 = SequenceBuilder::new("max_test", kv_backend)
+            .initial(50) // smaller than current storage (105)
+            .step(1)
+            .build();
+
+        // seq3 should use max(105, 50) = 105 (storage value)
+        assert_eq!(seq3.peek().await.unwrap(), 105);
+        assert_eq!(seq3.next().await.unwrap(), 105); // storage: 106
+
+        // This demonstrates the correct max(storage, initial) behavior:
+        // - Sequences never generate values below their initial requirement
+        // - Storage always reflects the highest allocated value
+        // - Value gaps (15-99) are acceptable to maintain minimum constraints
+    }
 }
--- a/src/common/options/src/lib.rs
+++ b/src/common/options/src/lib.rs
@@ -13,3 +13,4 @@
 // limitations under the License.

 pub mod datanode;
+pub mod memory;
--- a/src/common/options/src/memory.rs
+++ b/src/common/options/src/memory.rs
@@ -0,0 +1,33 @@
+// Copyright 2023 Greptime Team
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use serde::{Deserialize, Serialize};
+
+#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)]
+#[serde(default)]
+pub struct MemoryOptions {
+    /// Whether to enable heap profiling activation.
+    /// When enabled, heap profiling will be activated if the `MALLOC_CONF` environment variable
+    /// is set to "prof:true,prof_active:false". The official image adds this env variable.
+    /// Default is true.
+    pub enable_heap_profiling: bool,
+}
+
+impl Default for MemoryOptions {
+    fn default() -> Self {
+        Self {
+            enable_heap_profiling: true,
+        }
+    }
+}
--- a/src/common/procedure/Cargo.toml
+++ b/src/common/procedure/Cargo.toml
@@ -11,11 +11,13 @@ testing = []
 workspace = true

 [dependencies]
+api.workspace = true
 async-stream.workspace = true
 async-trait.workspace = true
 backon.workspace = true
 common-base.workspace = true
 common-error.workspace = true
+common-event-recorder.workspace = true
 common-macro.workspace = true
 common-runtime.workspace = true
 common-telemetry.workspace = true
--- a/src/common/procedure/src/event.rs
+++ b/src/common/procedure/src/event.rs
@@ -0,0 +1,116 @@
+// Copyright 2023 Greptime Team
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use std::any::Any;
+
+use api::v1::value::ValueData;
+use api::v1::{ColumnDataType, ColumnSchema, Row, SemanticType};
+use common_event_recorder::error::Result;
+use common_event_recorder::Event;
+use common_time::timestamp::{TimeUnit, Timestamp};
+
+use crate::{ProcedureId, ProcedureState};
+
+pub const EVENTS_TABLE_PROCEDURE_ID_COLUMN_NAME: &str = "procedure_id";
+pub const EVENTS_TABLE_PROCEDURE_STATE_COLUMN_NAME: &str = "procedure_state";
+pub const EVENTS_TABLE_PROCEDURE_ERROR_COLUMN_NAME: &str = "procedure_error";
+
+/// `ProcedureEvent` represents an event emitted by a procedure during its execution lifecycle.
+#[derive(Debug)]
+pub struct ProcedureEvent {
+    /// Unique identifier associated with the originating procedure instance.
+    pub procedure_id: ProcedureId,
+    /// The timestamp of the event.
+    pub timestamp: Timestamp,
+    /// The state of the procedure.
+    pub state: ProcedureState,
+    /// The event emitted by the procedure. It's generated by [Procedure::event].
+    pub internal_event: Box<dyn Event>,
+}
+
+impl ProcedureEvent {
+    pub fn new(
+        procedure_id: ProcedureId,
+        internal_event: Box<dyn Event>,
+        state: ProcedureState,
+    ) -> Self {
+        Self {
+            procedure_id,
+            internal_event,
+            timestamp: Timestamp::current_time(TimeUnit::Nanosecond),
+            state,
+        }
+    }
+}
+
+impl Event for ProcedureEvent {
+    fn event_type(&self) -> &str {
+        self.internal_event.event_type()
+    }
+
+    fn timestamp(&self) -> Timestamp {
+        self.timestamp
+    }
+
+    fn json_payload(&self) -> Result<String> {
+        self.internal_event.json_payload()
+    }
+
+    fn extra_schema(&self) -> Vec<ColumnSchema> {
+        let mut schema = vec![
+            ColumnSchema {
+                column_name: EVENTS_TABLE_PROCEDURE_ID_COLUMN_NAME.to_string(),
+                datatype: ColumnDataType::String.into(),
+                semantic_type: SemanticType::Field.into(),
+                ..Default::default()
+            },
+            ColumnSchema {
+                column_name: EVENTS_TABLE_PROCEDURE_STATE_COLUMN_NAME.to_string(),
+                datatype: ColumnDataType::String.into(),
+                semantic_type: SemanticType::Field.into(),
+                ..Default::default()
+            },
+            ColumnSchema {
+                column_name: EVENTS_TABLE_PROCEDURE_ERROR_COLUMN_NAME.to_string(),
+                datatype: ColumnDataType::String.into(),
+                semantic_type: SemanticType::Field.into(),
+                ..Default::default()
+            },
+        ];
+        schema.append(&mut self.internal_event.extra_schema());
+        schema
+    }
+
+    fn extra_row(&self) -> Result<Row> {
+        let error_str = match &self.state {
+            ProcedureState::Failed { error } => format!("{:?}", error),
+            ProcedureState::PrepareRollback { error } => format!("{:?}", error),
+            ProcedureState::RollingBack { error } => format!("{:?}", error),
+            ProcedureState::Retrying { error } => format!("{:?}", error),
+            ProcedureState::Poisoned { error, .. } => format!("{:?}", error),
+            _ => "".to_string(),
+        };
+        let mut row = vec![
+            ValueData::StringValue(self.procedure_id.to_string()).into(),
+            ValueData::StringValue(self.state.as_str_name().to_string()).into(),
+            ValueData::StringValue(error_str).into(),
+        ];
+        row.append(&mut self.internal_event.extra_row()?.values);
+        Ok(Row { values: row })
+    }
+
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+}
--- a/src/common/procedure/src/lib.rs
+++ b/src/common/procedure/src/lib.rs
@@ -17,6 +17,7 @@
 #![feature(assert_matches)]

 pub mod error;
+pub mod event;
 pub mod local;
 pub mod options;
 mod procedure;
@@ -28,9 +29,11 @@ pub mod watcher;
 pub mod test_util;

 pub use crate::error::{Error, Result};
+pub use crate::event::ProcedureEvent;
 pub use crate::procedure::{
    BoxedProcedure, BoxedProcedureLoader, Context, ContextProvider, ContextProviderRef, LockKey,
    Output, ParseIdError, PoisonKey, PoisonKeys, Procedure, ProcedureId, ProcedureInfo,
    ProcedureManager, ProcedureManagerRef, ProcedureState, ProcedureWithId, Status, StringKey,
+    UserMetadata,
 };
 pub use crate::watcher::Watcher;
--- a/Show More
+++ b/Show More