wip

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>
Merge remote-tracking branch 'origin/main' into zhongzc/repartition-procedure-scaffold
2026-01-03 20:02:54 +00:00 · 2025-10-31 05:03:38 +00:00 · 2025-10-26 10:56:09 +00:00 · 2025-10-23 10:19:06 +00:00 · 2025-10-22 08:50:16 +00:00 · 2025-10-13 06:45:56 +00:00
153 changed files with 3541 additions and 4238 deletions
--- a/.github/scripts/deploy-greptimedb.sh
+++ b/.github/scripts/deploy-greptimedb.sh
@@ -7,8 +7,6 @@ KUBERNETES_VERSION="${KUBERNETES_VERSION:-v1.32.0}"
 ENABLE_STANDALONE_MODE="${ENABLE_STANDALONE_MODE:-true}"
 DEFAULT_INSTALL_NAMESPACE=${DEFAULT_INSTALL_NAMESPACE:-default}
 GREPTIMEDB_IMAGE_TAG=${GREPTIMEDB_IMAGE_TAG:-latest}
-GREPTIMEDB_OPERATOR_IMAGE_TAG=${GREPTIMEDB_OPERATOR_IMAGE_TAG:-v0.5.1}
-GREPTIMEDB_INITIALIZER_IMAGE_TAG="${GREPTIMEDB_OPERATOR_IMAGE_TAG}"
 GREPTIME_CHART="https://greptimeteam.github.io/helm-charts/"
 ETCD_CHART="oci://registry-1.docker.io/bitnamicharts/etcd"
 ETCD_CHART_VERSION="${ETCD_CHART_VERSION:-12.0.8}"
@@ -60,7 +58,7 @@ function deploy_greptimedb_operator() {
  # Use the latest chart and image.
  helm upgrade --install greptimedb-operator greptime/greptimedb-operator \
    --create-namespace \
-    --set image.tag="$GREPTIMEDB_OPERATOR_IMAGE_TAG" \
+    --set image.tag=latest \
    -n "$DEFAULT_INSTALL_NAMESPACE"

  # Wait for greptimedb-operator to be ready.
@@ -80,7 +78,6 @@ function deploy_greptimedb_cluster() {
  helm upgrade --install "$cluster_name" greptime/greptimedb-cluster \
    --create-namespace \
    --set image.tag="$GREPTIMEDB_IMAGE_TAG" \
-    --set initializer.tag="$GREPTIMEDB_INITIALIZER_IMAGE_TAG" \
    --set meta.backendStorage.etcd.endpoints="etcd.$install_namespace:2379" \
    --set meta.backendStorage.etcd.storeKeyPrefix="$cluster_name" \
    -n "$install_namespace"
@@ -118,7 +115,6 @@ function deploy_greptimedb_cluster_with_s3_storage() {
  helm upgrade --install "$cluster_name" greptime/greptimedb-cluster -n "$install_namespace" \
    --create-namespace \
    --set image.tag="$GREPTIMEDB_IMAGE_TAG" \
-    --set initializer.tag="$GREPTIMEDB_INITIALIZER_IMAGE_TAG" \
    --set meta.backendStorage.etcd.endpoints="etcd.$install_namespace:2379" \
    --set meta.backendStorage.etcd.storeKeyPrefix="$cluster_name" \
    --set objectStorage.s3.bucket="$AWS_CI_TEST_BUCKET" \
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1336,13 +1336,9 @@ checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b"
 [[package]]
 name = "bytes"
 version = "1.10.1"
-source = "git+https://github.com/discord9/bytes?rev=1572ab22c3cbad0e9b6681d1f68eca4139322a2a#1572ab22c3cbad0e9b6681d1f68eca4139322a2a"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d71b6127be86fdcfddb610f7182ac57211d4b18a3e9c82eb2d17662f2227ad6a"
 dependencies = [
- "backtrace",
- "crossbeam-channel",
- "inferno 0.12.2",
- "papaya",
- "quanta",
 "serde",
 ]

@@ -1900,7 +1896,6 @@ dependencies = [
 "clap 4.5.40",
 "cli",
 "client",
- "colored",
 "common-base",
 "common-catalog",
 "common-config",
@@ -1922,7 +1917,6 @@ dependencies = [
 "common-wal",
 "datanode",
 "datatypes",
- "either",
 "etcd-client",
 "file-engine",
 "flow",
@@ -1938,9 +1932,7 @@ dependencies = [
 "moka",
 "nu-ansi-term",
 "object-store",
- "parquet",
 "plugins",
- "pprof",
 "prometheus",
 "prost 0.13.5",
 "query",
@@ -1983,16 +1975,6 @@ version = "1.0.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "b05b61dc5112cbb17e4b6cd61790d9845d13888356391624cbe7e41efeac1e75"

-[[package]]
-name = "colored"
-version = "2.2.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "117725a109d387c937a1533ce01b450cbde6b88abceea8473c4d7a85853cda3c"
-dependencies = [
- "lazy_static",
- "windows-sys 0.59.0",
-]
-
 [[package]]
 name = "comfy-table"
 version = "7.1.2"
@@ -2022,11 +2004,9 @@ dependencies = [
 "common-macro",
 "common-test-util",
 "futures",
- "lazy_static",
 "paste",
 "pin-project",
 "rand 0.9.1",
- "regex",
 "serde",
 "snafu 0.8.6",
 "tokio",
@@ -2037,9 +2017,6 @@ dependencies = [
 [[package]]
 name = "common-catalog"
 version = "0.18.0"
-dependencies = [
- "const_format",
-]

 [[package]]
 name = "common-config"
@@ -2477,7 +2454,6 @@ dependencies = [
 "datafusion-expr",
 "datatypes",
 "futures-util",
- "once_cell",
 "serde",
 "snafu 0.8.6",
 "sqlparser",
@@ -3738,9 +3714,9 @@ dependencies = [

 [[package]]
 name = "datafusion-pg-catalog"
-version = "0.12.1"
+version = "0.11.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "15824c98ff2009c23b0398d441499b147f7c5ac0e5ee993e7a473d79040e3626"
+checksum = "f258caedd1593e7dca3bf53912249de6685fa224bcce897ede1fbb7b040ac6f6"
 dependencies = [
 "async-trait",
 "datafusion",
@@ -3958,6 +3934,7 @@ dependencies = [
 "mito2",
 "num_cpus",
 "object-store",
+ "partition",
 "prometheus",
 "prost 0.13.5",
 "query",
@@ -5348,7 +5325,6 @@ dependencies = [
 [[package]]
 name = "greptime-proto"
 version = "0.1.0"
-source = "git+https://github.com/GreptimeTeam/greptime-proto.git?rev=14b9dc40bdc8288742b0cefc7bb024303b7429ef#14b9dc40bdc8288742b0cefc7bb024303b7429ef"
 dependencies = [
 "prost 0.13.5",
 "prost-types 0.13.5",
@@ -6328,6 +6304,17 @@ dependencies = [
 "derive_utils",
 ]

+[[package]]
+name = "io-uring"
+version = "0.7.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d93587f37623a1a17d94ef2bc9ada592f5465fe7732084ab7beefabe5c77c0c4"
+dependencies = [
+ "bitflags 2.9.1",
+ "cfg-if",
+ "libc",
+]
+
 [[package]]
 name = "ipnet"
 version = "2.11.0"
@@ -6935,7 +6922,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "07033963ba89ebaf1584d767badaa2e8fcec21aedea6b8c0346d487d49c28667"
 dependencies = [
 "cfg-if",
- "windows-targets 0.52.6",
+ "windows-targets 0.48.5",
 ]

 [[package]]
@@ -7441,6 +7428,7 @@ dependencies = [
 "local-ip-address",
 "once_cell",
 "parking_lot 0.12.4",
+ "partition",
 "prometheus",
 "prost 0.13.5",
 "rand 0.9.1",
@@ -7592,7 +7580,6 @@ dependencies = [
 "common-decimal",
 "common-error",
 "common-macro",
- "common-query",
 "common-recordbatch",
 "common-telemetry",
 "common-time",
@@ -8866,16 +8853,6 @@ dependencies = [
 "unicode-width 0.1.14",
 ]

-[[package]]
-name = "papaya"
-version = "0.2.3"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f92dd0b07c53a0a0c764db2ace8c541dc47320dad97c2200c2a637ab9dd2328f"
-dependencies = [
- "equivalent",
- "seize",
-]
-
 [[package]]
 name = "parking"
 version = "2.2.1"
@@ -9883,7 +9860,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "be769465445e8c1474e9c5dac2018218498557af32d9ed057325ec9a41ae81bf"
 dependencies = [
 "heck 0.5.0",
- "itertools 0.14.0",
+ "itertools 0.10.5",
 "log",
 "multimap",
 "once_cell",
@@ -9929,7 +9906,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "8a56d757972c98b346a9b766e3f02746cde6dd1cd1d1d563472929fdd74bec4d"
 dependencies = [
 "anyhow",
- "itertools 0.14.0",
+ "itertools 0.10.5",
 "proc-macro2",
 "quote",
 "syn 2.0.106",
@@ -10109,21 +10086,6 @@ dependencies = [
 "variadics",
 ]

-[[package]]
-name = "quanta"
-version = "0.12.6"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f3ab5a9d756f0d97bdc89019bd2e4ea098cf9cde50ee7564dde6b81ccc8f06c7"
-dependencies = [
- "crossbeam-utils",
- "libc",
- "once_cell",
- "raw-cpuid",
- "wasi 0.11.1+wasi-snapshot-preview1",
- "web-sys",
- "winapi",
-]
-
 [[package]]
 name = "query"
 version = "0.18.0"
@@ -10423,15 +10385,6 @@ dependencies = [
 "thiserror 1.0.69",
 ]

-[[package]]
-name = "raw-cpuid"
-version = "11.6.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "498cd0dc59d73224351ee52a95fee0f1a617a2eae0e7d9d720cc622c73a54186"
-dependencies = [
- "bitflags 2.9.1",
-]
-
 [[package]]
 name = "rawpointer"
 version = "0.2.1"
@@ -11372,16 +11325,6 @@ dependencies = [
 "libc",
 ]

-[[package]]
-name = "seize"
-version = "0.5.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5b55fb86dfd3a2f5f76ea78310a88f96c4ea21a3031f8d212443d56123fd0521"
-dependencies = [
- "libc",
- "windows-sys 0.52.0",
-]
-
 [[package]]
 name = "semver"
 version = "1.0.26"
@@ -13310,20 +13253,23 @@ checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20"

 [[package]]
 name = "tokio"
-version = "1.48.0"
+version = "1.47.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ff360e02eab121e0bc37a2d3b4d4dc622e6eda3a8e5253d5435ecf5bd4c68408"
+checksum = "89e49afdadebb872d3145a5638b59eb0691ea23e46ca484037cfab3b76b95038"
 dependencies = [
+ "backtrace",
 "bytes",
+ "io-uring",
 "libc",
 "mio",
 "parking_lot 0.12.4",
 "pin-project-lite",
 "signal-hook-registry",
+ "slab",
 "socket2 0.6.0",
 "tokio-macros",
 "tracing",
- "windows-sys 0.61.2",
+ "windows-sys 0.59.0",
 ]

 [[package]]
@@ -13338,9 +13284,9 @@ dependencies = [

 [[package]]
 name = "tokio-macros"
-version = "2.6.0"
+version = "2.5.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "af407857209536a95c8e56f8231ef2c2e2aff839b22e07a1ffcbc617e9db9fa5"
+checksum = "6e06d43f1345a3bcd39f6a56dbb7dcab2ba47e68e8ac134855e7e2bdbaf8cab8"
 dependencies = [
 "proc-macro2",
 "quote",
@@ -14565,7 +14511,7 @@ version = "0.1.9"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "cf221c93e13a30d793f7645a0e7762c55d169dbb0a49671918a2319d289b10bb"
 dependencies = [
- "windows-sys 0.59.0",
+ "windows-sys 0.48.0",
 ]

 [[package]]
@@ -14762,15 +14708,6 @@ dependencies = [
 "windows-targets 0.52.6",
 ]

-[[package]]
-name = "windows-sys"
-version = "0.61.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ae137229bcbd6cdf0f7b80a31df61766145077ddf49416a728b02cb3921ff3fc"
-dependencies = [
- "windows-link 0.2.1",
-]
-
 [[package]]
 name = "windows-targets"
 version = "0.48.5"
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -121,7 +121,6 @@ chrono = { version = "0.4", features = ["serde"] }
 chrono-tz = "0.10.1"
 clap = { version = "4.4", features = ["derive"] }
 config = "0.13.0"
-const_format = "0.2"
 crossbeam-utils = "0.8"
 dashmap = "6.1"
 datafusion = "50"
@@ -131,7 +130,7 @@ datafusion-functions = "50"
 datafusion-functions-aggregate-common = "50"
 datafusion-optimizer = "50"
 datafusion-orc = "0.5"
-datafusion-pg-catalog = "0.12.1"
+datafusion-pg-catalog = "0.11"
 datafusion-physical-expr = "50"
 datafusion-physical-plan = "50"
 datafusion-sql = "50"
@@ -148,7 +147,8 @@ etcd-client = { git = "https://github.com/GreptimeTeam/etcd-client", rev = "f62d
 fst = "0.4.7"
 futures = "0.3"
 futures-util = "0.3"
-greptime-proto = { git = "https://github.com/GreptimeTeam/greptime-proto.git", rev = "14b9dc40bdc8288742b0cefc7bb024303b7429ef" }
+# greptime-proto = { git = "https://github.com/GreptimeTeam/greptime-proto.git", rev = "14b9dc40bdc8288742b0cefc7bb024303b7429ef" }
+greptime-proto = { path = "../greptime-proto" }
 hex = "0.4"
 http = "1"
 humantime = "2.1"
@@ -333,7 +333,6 @@ datafusion-datasource = { git = "https://github.com/GreptimeTeam/datafusion.git"
 datafusion-sql = { git = "https://github.com/GreptimeTeam/datafusion.git", rev = "fd4b2abcf3c3e43e94951bda452c9fd35243aab0" }
 datafusion-substrait = { git = "https://github.com/GreptimeTeam/datafusion.git", rev = "fd4b2abcf3c3e43e94951bda452c9fd35243aab0" }
 sqlparser = { git = "https://github.com/GreptimeTeam/sqlparser-rs.git", rev = "4b519a5caa95472cc3988f5556813a583dd35af1" }                           # branch = "v0.58.x"
-bytes = { git = "https://github.com/discord9/bytes", rev = "1572ab22c3cbad0e9b6681d1f68eca4139322a2a" }

 [profile.release]
 debug = 1
--- a/config/config.md
+++ b/config/config.md
@@ -13,7 +13,6 @@
 | Key | Type | Default | Descriptions |
 | --- | -----| ------- | ----------- |
 | `default_timezone` | String | Unset | The default timezone of the server. |
-| `default_column_prefix` | String | Unset | The default column prefix for auto-created time index and value columns. |
 | `init_regions_in_background` | Bool | `false` | Initialize all regions in the background during the startup.<br/>By default, it provides services after all regions have been initialized. |
 | `init_regions_parallelism` | Integer | `16` | Parallelism of initializing regions. |
 | `max_concurrent_queries` | Integer | `0` | The maximum current queries allowed to be executed. Zero means unlimited. |
@@ -227,7 +226,6 @@
 | Key | Type | Default | Descriptions |
 | --- | -----| ------- | ----------- |
 | `default_timezone` | String | Unset | The default timezone of the server. |
-| `default_column_prefix` | String | Unset | The default column prefix for auto-created time index and value columns. |
 | `max_in_flight_write_bytes` | String | Unset | The maximum in-flight write bytes. |
 | `runtime` | -- | -- | The runtime options. |
 | `runtime.global_rt_size` | Integer | `8` | The number of threads to execute the runtime for global read operations. |
@@ -442,7 +440,6 @@
 | Key | Type | Default | Descriptions |
 | --- | -----| ------- | ----------- |
 | `node_id` | Integer | Unset | The datanode identifier and should be unique in the cluster. |
-| `default_column_prefix` | String | Unset | The default column prefix for auto-created time index and value columns. |
 | `require_lease_before_startup` | Bool | `false` | Start services after regions have obtained leases.<br/>It will block the datanode start if it can't receive leases in the heartbeat from metasrv. |
 | `init_regions_in_background` | Bool | `false` | Initialize all regions in the background during the startup.<br/>By default, it provides services after all regions have been initialized. |
 | `init_regions_parallelism` | Integer | `16` | Parallelism of initializing regions. |
--- a/config/datanode.example.toml
+++ b/config/datanode.example.toml
@@ -2,10 +2,6 @@
 ## @toml2docs:none-default
 node_id = 42

-## The default column prefix for auto-created time index and value columns.
-## @toml2docs:none-default
-default_column_prefix = "greptime"
-
 ## Start services after regions have obtained leases.
 ## It will block the datanode start if it can't receive leases in the heartbeat from metasrv.
 require_lease_before_startup = false
--- a/config/frontend.example.toml
+++ b/config/frontend.example.toml
@@ -2,10 +2,6 @@
 ## @toml2docs:none-default
 default_timezone = "UTC"

-## The default column prefix for auto-created time index and value columns.
-## @toml2docs:none-default
-default_column_prefix = "greptime"
-
 ## The maximum in-flight write bytes.
 ## @toml2docs:none-default
 #+ max_in_flight_write_bytes = "500MB"
--- a/config/standalone.example.toml
+++ b/config/standalone.example.toml
@@ -2,10 +2,6 @@
 ## @toml2docs:none-default
 default_timezone = "UTC"

-## The default column prefix for auto-created time index and value columns.
-## @toml2docs:none-default
-default_column_prefix = "greptime"
-
 ## Initialize all regions in the background during the startup.
 ## By default, it provides services after all regions have been initialized.
 init_regions_in_background = false
--- a/docs/how-to/how-to-profile-memory.md
+++ b/docs/how-to/how-to-profile-memory.md
@@ -71,15 +71,6 @@ curl -X POST localhost:4000/debug/prof/mem/activate

 # Deactivate heap profiling
 curl -X POST localhost:4000/debug/prof/mem/deactivate
-
-# Activate gdump feature that dumps memory profiling data every time virtual memory usage exceeds previous maximum value.
-curl -X POST localhost:4000/debug/prof/mem/gdump -d 'activate=true'
-
-# Deactivate gdump.
-curl -X POST localhost:4000/debug/prof/mem/gdump -d 'activate=false'
-
-# Retrieve current gdump status.
-curl -X GET localhost:4000/debug/prof/mem/gdump
 ```

 ### Dump memory profiling data
@@ -92,9 +83,6 @@ curl -X POST localhost:4000/debug/prof/mem > greptime.hprof
 curl -X POST "localhost:4000/debug/prof/mem?output=flamegraph" > greptime.svg
 # or output pprof format
 curl -X POST "localhost:4000/debug/prof/mem?output=proto" > greptime.pprof
-
-
-curl -X POST "localhost:4000/debug/prof/bytes" > greptime.svg
 ```

 You can periodically dump profiling data and compare them to find the delta memory usage.
--- a/src/catalog/src/kvbackend/builder.rs
+++ b/src/catalog/src/kvbackend/builder.rs
@@ -29,7 +29,6 @@ use crate::information_schema::{InformationExtensionRef, InformationSchemaProvid
 use crate::kvbackend::KvBackendCatalogManager;
 use crate::kvbackend::manager::{CATALOG_CACHE_MAX_CAPACITY, SystemCatalog};
 use crate::process_manager::ProcessManagerRef;
-use crate::system_schema::numbers_table_provider::NumbersTableProvider;
 use crate::system_schema::pg_catalog::PGCatalogProvider;

 pub struct KvBackendCatalogManagerBuilder {
@@ -120,7 +119,6 @@ impl KvBackendCatalogManagerBuilder {
                    DEFAULT_CATALOG_NAME.to_string(),
                    me.clone(),
                )),
-                numbers_table_provider: NumbersTableProvider,
                backend,
                process_manager,
                #[cfg(feature = "enterprise")]
--- a/src/catalog/src/kvbackend/manager.rs
+++ b/src/catalog/src/kvbackend/manager.rs
@@ -18,7 +18,8 @@ use std::sync::{Arc, Weak};

 use async_stream::try_stream;
 use common_catalog::consts::{
-    DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, INFORMATION_SCHEMA_NAME, PG_CATALOG_NAME,
+    DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, INFORMATION_SCHEMA_NAME, NUMBERS_TABLE_ID,
+    PG_CATALOG_NAME,
 };
 use common_error::ext::BoxedError;
 use common_meta::cache::{
@@ -44,6 +45,7 @@ use table::TableRef;
 use table::dist_table::DistTable;
 use table::metadata::{TableId, TableInfoRef};
 use table::table::PartitionRules;
+use table::table::numbers::{NUMBERS_TABLE_NAME, NumbersTable};
 use table::table_name::TableName;
 use tokio::sync::Semaphore;
 use tokio_stream::wrappers::ReceiverStream;
@@ -59,7 +61,6 @@ use crate::information_schema::{InformationExtensionRef, InformationSchemaProvid
 use crate::kvbackend::TableCacheRef;
 use crate::process_manager::ProcessManagerRef;
 use crate::system_schema::SystemSchemaProvider;
-use crate::system_schema::numbers_table_provider::NumbersTableProvider;
 use crate::system_schema::pg_catalog::PGCatalogProvider;

 /// Access all existing catalog, schema and tables.
@@ -554,7 +555,6 @@ pub(super) struct SystemCatalog {
    // system_schema_provider for default catalog
    pub(super) information_schema_provider: Arc<InformationSchemaProvider>,
    pub(super) pg_catalog_provider: Arc<PGCatalogProvider>,
-    pub(super) numbers_table_provider: NumbersTableProvider,
    pub(super) backend: KvBackendRef,
    pub(super) process_manager: Option<ProcessManagerRef>,
    #[cfg(feature = "enterprise")]
@@ -584,7 +584,9 @@ impl SystemCatalog {
            PG_CATALOG_NAME if channel == Channel::Postgres => {
                self.pg_catalog_provider.table_names()
            }
-            DEFAULT_SCHEMA_NAME => self.numbers_table_provider.table_names(),
+            DEFAULT_SCHEMA_NAME => {
+                vec![NUMBERS_TABLE_NAME.to_string()]
+            }
            _ => vec![],
        }
    }
@@ -602,7 +604,7 @@ impl SystemCatalog {
        if schema == INFORMATION_SCHEMA_NAME {
            self.information_schema_provider.table(table).is_some()
        } else if schema == DEFAULT_SCHEMA_NAME {
-            self.numbers_table_provider.table_exists(table)
+            table == NUMBERS_TABLE_NAME
        } else if schema == PG_CATALOG_NAME && channel == Channel::Postgres {
            self.pg_catalog_provider.table(table).is_some()
        } else {
@@ -647,8 +649,8 @@ impl SystemCatalog {
                    });
                pg_catalog_provider.table(table_name)
            }
-        } else if schema == DEFAULT_SCHEMA_NAME {
-            self.numbers_table_provider.table(table_name)
+        } else if schema == DEFAULT_SCHEMA_NAME && table_name == NUMBERS_TABLE_NAME {
+            Some(NumbersTable::table(NUMBERS_TABLE_ID))
        } else {
            None
        }
--- a/src/catalog/src/system_schema.rs
+++ b/src/catalog/src/system_schema.rs
@@ -14,7 +14,6 @@

 pub mod information_schema;
 mod memory_table;
-pub mod numbers_table_provider;
 pub mod pg_catalog;
 pub mod predicate;
 mod utils;
--- a/src/catalog/src/system_schema/numbers_table_provider.rs
+++ b/src/catalog/src/system_schema/numbers_table_provider.rs
@@ -1,59 +0,0 @@
-// Copyright 2023 Greptime Team
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#[cfg(any(test, feature = "testing", debug_assertions))]
-use common_catalog::consts::NUMBERS_TABLE_ID;
-use table::TableRef;
-#[cfg(any(test, feature = "testing", debug_assertions))]
-use table::table::numbers::NUMBERS_TABLE_NAME;
-#[cfg(any(test, feature = "testing", debug_assertions))]
-use table::table::numbers::NumbersTable;
-
-// NumbersTableProvider is a dedicated provider for feature-gating the numbers table.
-#[derive(Clone)]
-pub struct NumbersTableProvider;
-
-#[cfg(any(test, feature = "testing", debug_assertions))]
-impl NumbersTableProvider {
-    pub(crate) fn table_exists(&self, name: &str) -> bool {
-        name == NUMBERS_TABLE_NAME
-    }
-
-    pub(crate) fn table_names(&self) -> Vec<String> {
-        vec![NUMBERS_TABLE_NAME.to_string()]
-    }
-
-    pub(crate) fn table(&self, name: &str) -> Option<TableRef> {
-        if name == NUMBERS_TABLE_NAME {
-            Some(NumbersTable::table(NUMBERS_TABLE_ID))
-        } else {
-            None
-        }
-    }
-}
-
-#[cfg(not(any(test, feature = "testing", debug_assertions)))]
-impl NumbersTableProvider {
-    pub(crate) fn table_exists(&self, _name: &str) -> bool {
-        false
-    }
-
-    pub(crate) fn table_names(&self) -> Vec<String> {
-        vec![]
-    }
-
-    pub(crate) fn table(&self, _name: &str) -> Option<TableRef> {
-        None
-    }
-}
--- a/src/cli/src/data.rs
+++ b/src/cli/src/data.rs
@@ -16,7 +16,6 @@ mod export;
 mod import;

 use clap::Subcommand;
-use client::DEFAULT_CATALOG_NAME;
 use common_error::ext::BoxedError;

 use crate::Tool;
@@ -38,7 +37,3 @@ impl DataCommand {
        }
    }
 }
-
-pub(crate) fn default_database() -> String {
-    format!("{DEFAULT_CATALOG_NAME}-*")
-}
--- a/src/cli/src/data/export.rs
+++ b/src/cli/src/data/export.rs
@@ -30,7 +30,6 @@ use snafu::{OptionExt, ResultExt};
 use tokio::sync::Semaphore;
 use tokio::time::Instant;

-use crate::data::default_database;
 use crate::database::{DatabaseClient, parse_proxy_opts};
 use crate::error::{
    EmptyResultSnafu, Error, OpenDalSnafu, OutputDirNotSetSnafu, Result, S3ConfigNotSetSnafu,
@@ -64,7 +63,7 @@ pub struct ExportCommand {
    output_dir: Option<String>,

    /// The name of the catalog to export.
-    #[clap(long, default_value_t = default_database())]
+    #[clap(long, default_value = "greptime-*")]
    database: String,

    /// Parallelism of the export.
--- a/src/cli/src/data/import.rs
+++ b/src/cli/src/data/import.rs
@@ -25,7 +25,6 @@ use snafu::{OptionExt, ResultExt};
 use tokio::sync::Semaphore;
 use tokio::time::Instant;

-use crate::data::default_database;
 use crate::database::{DatabaseClient, parse_proxy_opts};
 use crate::error::{Error, FileIoSnafu, Result, SchemaNotFoundSnafu};
 use crate::{Tool, database};
@@ -53,7 +52,7 @@ pub struct ImportCommand {
    input_dir: String,

    /// The name of the catalog to import.
-    #[clap(long, default_value_t = default_database())]
+    #[clap(long, default_value = "greptime-*")]
    database: String,

    /// Parallelism of the import.
--- a/src/cmd/Cargo.toml
+++ b/src/cmd/Cargo.toml
@@ -29,11 +29,9 @@ base64.workspace = true
 cache.workspace = true
 catalog.workspace = true
 chrono.workspace = true
-either = "1.15"
 clap.workspace = true
 cli.workspace = true
 client.workspace = true
-colored = "2.1.0"
 common-base.workspace = true
 common-catalog.workspace = true
 common-config.workspace = true
@@ -65,11 +63,9 @@ lazy_static.workspace = true
 meta-client.workspace = true
 meta-srv.workspace = true
 metric-engine.workspace = true
-mito2.workspace = true
 moka.workspace = true
 nu-ansi-term = "0.46"
 object-store.workspace = true
-parquet = { workspace = true, features = ["object_store"] }
 plugins.workspace = true
 prometheus.workspace = true
 prost.workspace = true
@@ -92,11 +88,6 @@ toml.workspace = true
 tonic.workspace = true
 tracing-appender.workspace = true

-[target.'cfg(unix)'.dependencies]
-pprof = { version = "0.14", features = [
-    "flamegraph",
-] }
-
 [target.'cfg(not(windows))'.dependencies]
 tikv-jemallocator = "0.6"

--- a/src/cmd/src/bin/greptime.rs
+++ b/src/cmd/src/bin/greptime.rs
@@ -103,15 +103,12 @@ async fn main_body() -> Result<()> {

 async fn start(cli: Command) -> Result<()> {
    match cli.subcmd {
-        SubCommand::Datanode(cmd) => match cmd.subcmd {
-            datanode::SubCommand::Start(ref start) => {
-                let opts = start.load_options(&cli.global_options)?;
-                let plugins = Plugins::new();
-                let builder = InstanceBuilder::try_new_with_init(opts, plugins).await?;
-                cmd.build_with(builder).await?.run().await
-            }
-            datanode::SubCommand::Objbench(ref bench) => bench.run().await,
-        },
+        SubCommand::Datanode(cmd) => {
+            let opts = cmd.load_options(&cli.global_options)?;
+            let plugins = Plugins::new();
+            let builder = InstanceBuilder::try_new_with_init(opts, plugins).await?;
+            cmd.build_with(builder).await?.run().await
+        }
        SubCommand::Flownode(cmd) => {
            cmd.build(cmd.load_options(&cli.global_options)?)
                .await?
--- a/src/cmd/src/datanode.rs
+++ b/src/cmd/src/datanode.rs
@@ -13,8 +13,6 @@
 // limitations under the License.

 pub mod builder;
-#[allow(clippy::print_stdout)]
-mod objbench;

 use std::path::Path;
 use std::time::Duration;
@@ -25,16 +23,13 @@ use common_config::Configurable;
 use common_telemetry::logging::{DEFAULT_LOGGING_DIR, TracingOptions};
 use common_telemetry::{info, warn};
 use common_wal::config::DatanodeWalConfig;
-use datanode::config::RegionEngineConfig;
 use datanode::datanode::Datanode;
 use meta_client::MetaClientOptions;
-use serde::{Deserialize, Serialize};
 use snafu::{ResultExt, ensure};
 use tracing_appender::non_blocking::WorkerGuard;

 use crate::App;
 use crate::datanode::builder::InstanceBuilder;
-use crate::datanode::objbench::ObjbenchCommand;
 use crate::error::{
    LoadLayeredConfigSnafu, MissingConfigSnafu, Result, ShutdownDatanodeSnafu, StartDatanodeSnafu,
 };
@@ -94,7 +89,7 @@ impl App for Instance {
 #[derive(Parser)]
 pub struct Command {
    #[clap(subcommand)]
-    pub subcmd: SubCommand,
+    subcmd: SubCommand,
 }

 impl Command {
@@ -105,26 +100,13 @@ impl Command {
    pub fn load_options(&self, global_options: &GlobalOptions) -> Result<DatanodeOptions> {
        match &self.subcmd {
            SubCommand::Start(cmd) => cmd.load_options(global_options),
-            SubCommand::Objbench(_) => {
-                // For objbench command, we don't need to load DatanodeOptions
-                // It's a standalone utility command
-                let mut opts = datanode::config::DatanodeOptions::default();
-                opts.sanitize();
-                Ok(DatanodeOptions {
-                    runtime: Default::default(),
-                    plugins: Default::default(),
-                    component: opts,
-                })
-            }
        }
    }
 }

 #[derive(Parser)]
-pub enum SubCommand {
+enum SubCommand {
    Start(StartCommand),
-    /// Object storage benchmark tool
-    Objbench(ObjbenchCommand),
 }

 impl SubCommand {
@@ -134,33 +116,12 @@ impl SubCommand {
                info!("Building datanode with {:#?}", cmd);
                builder.build().await
            }
-            SubCommand::Objbench(cmd) => {
-                cmd.run().await?;
-                std::process::exit(0);
-            }
        }
    }
 }

-/// Storage engine config
-#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Default)]
-#[serde(default)]
-pub struct StorageConfig {
-    /// The working directory of database
-    pub data_home: String,
-    #[serde(flatten)]
-    pub store: object_store::config::ObjectStoreConfig,
-}
-
-#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Default)]
-#[serde(default)]
-struct StorageConfigWrapper {
-    storage: StorageConfig,
-    region_engine: Vec<RegionEngineConfig>,
-}
-
 #[derive(Debug, Parser, Default)]
-pub struct StartCommand {
+struct StartCommand {
    #[clap(long)]
    node_id: Option<u64>,
    /// The address to bind the gRPC server.
@@ -188,7 +149,7 @@ pub struct StartCommand {
 }

 impl StartCommand {
-    pub fn load_options(&self, global_options: &GlobalOptions) -> Result<DatanodeOptions> {
+    fn load_options(&self, global_options: &GlobalOptions) -> Result<DatanodeOptions> {
        let mut opts = DatanodeOptions::load_layered_options(
            self.config_file.as_deref(),
            self.env_prefix.as_ref(),
--- a/src/cmd/src/datanode/objbench.rs
+++ b/src/cmd/src/datanode/objbench.rs
@@ -1,676 +0,0 @@
-// Copyright 2023 Greptime Team
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-use std::path::PathBuf;
-use std::sync::Arc;
-use std::time::Instant;
-
-use clap::Parser;
-use colored::Colorize;
-use datanode::config::RegionEngineConfig;
-use datanode::store;
-use either::Either;
-use mito2::access_layer::{
-    AccessLayer, AccessLayerRef, Metrics, OperationType, SstWriteRequest, WriteType,
-};
-use mito2::cache::{CacheManager, CacheManagerRef};
-use mito2::config::{FulltextIndexConfig, MitoConfig, Mode};
-use mito2::read::Source;
-use mito2::sst::file::{FileHandle, FileMeta};
-use mito2::sst::file_purger::{FilePurger, FilePurgerRef};
-use mito2::sst::index::intermediate::IntermediateManager;
-use mito2::sst::index::puffin_manager::PuffinManagerFactory;
-use mito2::sst::parquet::reader::ParquetReaderBuilder;
-use mito2::sst::parquet::{PARQUET_METADATA_KEY, WriteOptions};
-use mito2::worker::write_cache_from_config;
-use object_store::ObjectStore;
-use regex::Regex;
-use snafu::OptionExt;
-use store_api::metadata::{RegionMetadata, RegionMetadataRef};
-use store_api::path_utils::region_name;
-use store_api::region_request::PathType;
-use store_api::storage::FileId;
-
-use crate::datanode::{StorageConfig, StorageConfigWrapper};
-use crate::error;
-
-/// Object storage benchmark command
-#[derive(Debug, Parser)]
-pub struct ObjbenchCommand {
-    /// Path to the object-store config file (TOML). Must deserialize into object_store::config::ObjectStoreConfig.
-    #[clap(long, value_name = "FILE")]
-    pub config: PathBuf,
-
-    /// Source SST file path in object-store (e.g. "region_dir/<uuid>.parquet").
-    #[clap(long, value_name = "PATH")]
-    pub source: String,
-
-    /// Verbose output
-    #[clap(short, long, default_value_t = false)]
-    pub verbose: bool,
-
-    /// Output file path for pprof flamegraph (enables profiling)
-    #[clap(long, value_name = "FILE")]
-    pub pprof_file: Option<PathBuf>,
-}
-
-fn parse_config(config_path: &PathBuf) -> error::Result<(StorageConfig, MitoConfig)> {
-    let cfg_str = std::fs::read_to_string(config_path).map_err(|e| {
-        error::IllegalConfigSnafu {
-            msg: format!("failed to read config {}: {e}", config_path.display()),
-        }
-        .build()
-    })?;
-
-    let store_cfg: StorageConfigWrapper = toml::from_str(&cfg_str).map_err(|e| {
-        error::IllegalConfigSnafu {
-            msg: format!("failed to parse config {}: {e}", config_path.display()),
-        }
-        .build()
-    })?;
-
-    let storage_config = store_cfg.storage;
-    let mito_engine_config = store_cfg
-        .region_engine
-        .into_iter()
-        .filter_map(|c| {
-            if let RegionEngineConfig::Mito(mito) = c {
-                Some(mito)
-            } else {
-                None
-            }
-        })
-        .next()
-        .with_context(|| error::IllegalConfigSnafu {
-            msg: format!("Engine config not found in {:?}", config_path),
-        })?;
-    Ok((storage_config, mito_engine_config))
-}
-
-impl ObjbenchCommand {
-    pub async fn run(&self) -> error::Result<()> {
-        if self.verbose {
-            common_telemetry::init_default_ut_logging();
-        }
-
-        println!("{}", "Starting objbench with config:".cyan().bold());
-
-        // Build object store from config
-        let (store_cfg, mut mito_engine_config) = parse_config(&self.config)?;
-
-        let object_store = build_object_store(&store_cfg).await?;
-        println!("{} Object store initialized", "✓".green());
-
-        // Prepare source identifiers
-        let components = parse_file_dir_components(&self.source)?;
-        println!(
-            "{} Source path parsed: {}, components: {:?}",
-            "✓".green(),
-            self.source,
-            components
-        );
-
-        // Load parquet metadata to extract RegionMetadata and file stats
-        println!("{}", "Loading parquet metadata...".yellow());
-        let file_size = object_store
-            .stat(&self.source)
-            .await
-            .map_err(|e| {
-                error::IllegalConfigSnafu {
-                    msg: format!("stat failed: {e}"),
-                }
-                .build()
-            })?
-            .content_length();
-        let parquet_meta = load_parquet_metadata(object_store.clone(), &self.source, file_size)
-            .await
-            .map_err(|e| {
-                error::IllegalConfigSnafu {
-                    msg: format!("read parquet metadata failed: {e}"),
-                }
-                .build()
-            })?;
-
-        let region_meta = extract_region_metadata(&self.source, &parquet_meta)?;
-        let num_rows = parquet_meta.file_metadata().num_rows() as u64;
-        let num_row_groups = parquet_meta.num_row_groups() as u64;
-
-        println!(
-            "{} Metadata loaded - rows: {}, size: {} bytes",
-            "✓".green(),
-            num_rows,
-            file_size
-        );
-
-        // Build a FileHandle for the source file
-        let file_meta = FileMeta {
-            region_id: region_meta.region_id,
-            file_id: components.file_id,
-            time_range: Default::default(),
-            level: 0,
-            file_size,
-            available_indexes: Default::default(),
-            index_file_size: 0,
-            num_rows,
-            num_row_groups,
-            sequence: None,
-            partition_expr: None,
-            num_series: 0,
-        };
-        let src_handle = FileHandle::new(file_meta, new_noop_file_purger());
-
-        // Build the reader for a single file via ParquetReaderBuilder
-        let table_dir = components.table_dir();
-        let (src_access_layer, cache_manager) = build_access_layer_simple(
-            &components,
-            object_store.clone(),
-            &mut mito_engine_config,
-            &store_cfg.data_home,
-        )
-        .await?;
-        let reader_build_start = Instant::now();
-
-        let reader = ParquetReaderBuilder::new(
-            table_dir,
-            components.path_type,
-            src_handle.clone(),
-            object_store.clone(),
-        )
-        .expected_metadata(Some(region_meta.clone()))
-        .build()
-        .await
-        .map_err(|e| {
-            error::IllegalConfigSnafu {
-                msg: format!("build reader failed: {e:?}"),
-            }
-            .build()
-        })?;
-
-        let reader_build_elapsed = reader_build_start.elapsed();
-        let total_rows = reader.parquet_metadata().file_metadata().num_rows();
-        println!("{} Reader built in {:?}", "✓".green(), reader_build_elapsed);
-
-        // Build write request
-        let fulltext_index_config = FulltextIndexConfig {
-            create_on_compaction: Mode::Disable,
-            ..Default::default()
-        };
-
-        let write_req = SstWriteRequest {
-            op_type: OperationType::Flush,
-            metadata: region_meta,
-            source: Either::Left(Source::Reader(Box::new(reader))),
-            cache_manager,
-            storage: None,
-            max_sequence: None,
-            index_options: Default::default(),
-            index_config: mito_engine_config.index.clone(),
-            inverted_index_config: MitoConfig::default().inverted_index,
-            fulltext_index_config,
-            bloom_filter_index_config: MitoConfig::default().bloom_filter_index,
-        };
-
-        // Write SST
-        println!("{}", "Writing SST...".yellow());
-
-        // Start profiling if pprof_file is specified
-        #[cfg(unix)]
-        let profiler_guard = if self.pprof_file.is_some() {
-            println!("{} Starting profiling...", "⚡".yellow());
-            Some(
-                pprof::ProfilerGuardBuilder::default()
-                    .frequency(99)
-                    .blocklist(&["libc", "libgcc", "pthread", "vdso"])
-                    .build()
-                    .map_err(|e| {
-                        error::IllegalConfigSnafu {
-                            msg: format!("Failed to start profiler: {e}"),
-                        }
-                        .build()
-                    })?,
-            )
-        } else {
-            None
-        };
-
-        #[cfg(not(unix))]
-        if self.pprof_file.is_some() {
-            eprintln!(
-                "{}: Profiling is not supported on this platform",
-                "Warning".yellow()
-            );
-        }
-
-        let write_start = Instant::now();
-        let mut metrics = Metrics::new(WriteType::Flush);
-        let infos = src_access_layer
-            .write_sst(write_req, &WriteOptions::default(), &mut metrics)
-            .await
-            .map_err(|e| {
-                error::IllegalConfigSnafu {
-                    msg: format!("write_sst failed: {e:?}"),
-                }
-                .build()
-            })?;
-
-        let write_elapsed = write_start.elapsed();
-
-        // Stop profiling and generate flamegraph if enabled
-        #[cfg(unix)]
-        if let (Some(guard), Some(pprof_file)) = (profiler_guard, &self.pprof_file) {
-            println!("{} Generating flamegraph...", "🔥".yellow());
-            match guard.report().build() {
-                Ok(report) => {
-                    let mut flamegraph_data = Vec::new();
-                    if let Err(e) = report.flamegraph(&mut flamegraph_data) {
-                        println!("{}: Failed to generate flamegraph: {}", "Error".red(), e);
-                    } else if let Err(e) = std::fs::write(pprof_file, flamegraph_data) {
-                        println!(
-                            "{}: Failed to write flamegraph to {}: {}",
-                            "Error".red(),
-                            pprof_file.display(),
-                            e
-                        );
-                    } else {
-                        println!(
-                            "{} Flamegraph saved to {}",
-                            "✓".green(),
-                            pprof_file.display().to_string().cyan()
-                        );
-                    }
-                }
-                Err(e) => {
-                    println!("{}: Failed to generate pprof report: {}", "Error".red(), e);
-                }
-            }
-        }
-        assert_eq!(infos.len(), 1);
-        let dst_file_id = infos[0].file_id;
-        let dst_file_path = format!("{}/{}.parquet", components.region_dir(), dst_file_id);
-        let mut dst_index_path = None;
-        if infos[0].index_metadata.file_size > 0 {
-            dst_index_path = Some(format!(
-                "{}/index/{}.puffin",
-                components.region_dir(),
-                dst_file_id
-            ));
-        }
-
-        // Report results with ANSI colors
-        println!("\n{} {}", "Write complete!".green().bold(), "✓".green());
-        println!("  {}: {}", "Destination file".bold(), dst_file_path.cyan());
-        println!("  {}: {}", "Rows".bold(), total_rows.to_string().cyan());
-        println!(
-            "  {}: {}",
-            "File size".bold(),
-            format!("{} bytes", file_size).cyan()
-        );
-        println!(
-            "  {}: {:?}",
-            "Reader build time".bold(),
-            reader_build_elapsed
-        );
-        println!("  {}: {:?}", "Total time".bold(), write_elapsed);
-
-        // Print metrics in a formatted way
-        println!("  {}: {:?}", "Metrics".bold(), metrics,);
-
-        // Print infos
-        println!("  {}: {:?}", "Index".bold(), infos[0].index_metadata);
-
-        // Cleanup
-        println!("\n{}", "Cleaning up...".yellow());
-        object_store.delete(&dst_file_path).await.map_err(|e| {
-            error::IllegalConfigSnafu {
-                msg: format!("Failed to delete dest file {}: {}", dst_file_path, e),
-            }
-            .build()
-        })?;
-        println!("{} Temporary file {} deleted", "✓".green(), dst_file_path);
-
-        if let Some(index_path) = dst_index_path {
-            object_store.delete(&index_path).await.map_err(|e| {
-                error::IllegalConfigSnafu {
-                    msg: format!("Failed to delete dest index file {}: {}", index_path, e),
-                }
-                .build()
-            })?;
-            println!(
-                "{} Temporary index file {} deleted",
-                "✓".green(),
-                index_path
-            );
-        }
-
-        println!("\n{}", "Benchmark completed successfully!".green().bold());
-        Ok(())
-    }
-}
-
-#[derive(Debug)]
-struct FileDirComponents {
-    catalog: String,
-    schema: String,
-    table_id: u32,
-    region_sequence: u32,
-    path_type: PathType,
-    file_id: FileId,
-}
-
-impl FileDirComponents {
-    fn table_dir(&self) -> String {
-        format!("data/{}/{}/{}", self.catalog, self.schema, self.table_id)
-    }
-
-    fn region_dir(&self) -> String {
-        let region_name = region_name(self.table_id, self.region_sequence);
-        match self.path_type {
-            PathType::Bare => {
-                format!(
-                    "data/{}/{}/{}/{}",
-                    self.catalog, self.schema, self.table_id, region_name
-                )
-            }
-            PathType::Data => {
-                format!(
-                    "data/{}/{}/{}/{}/data",
-                    self.catalog, self.schema, self.table_id, region_name
-                )
-            }
-            PathType::Metadata => {
-                format!(
-                    "data/{}/{}/{}/{}/metadata",
-                    self.catalog, self.schema, self.table_id, region_name
-                )
-            }
-        }
-    }
-}
-
-fn parse_file_dir_components(path: &str) -> error::Result<FileDirComponents> {
-    // Define the regex pattern to match all three path styles
-    let pattern =
-        r"^data/([^/]+)/([^/]+)/([^/]+)/([^/]+)_([^/]+)(?:/data|/metadata)?/(.+).parquet$";
-
-    // Compile the regex
-    let re = Regex::new(pattern).expect("Invalid regex pattern");
-
-    // Determine the path type
-    let path_type = if path.contains("/data/") {
-        PathType::Data
-    } else if path.contains("/metadata/") {
-        PathType::Metadata
-    } else {
-        PathType::Bare
-    };
-
-    // Try to match the path
-    let components = (|| {
-        let captures = re.captures(path)?;
-        if captures.len() != 7 {
-            return None;
-        }
-        let mut components = FileDirComponents {
-            catalog: "".to_string(),
-            schema: "".to_string(),
-            table_id: 0,
-            region_sequence: 0,
-            path_type,
-            file_id: FileId::default(),
-        };
-        // Extract the components
-        components.catalog = captures.get(1)?.as_str().to_string();
-        components.schema = captures.get(2)?.as_str().to_string();
-        components.table_id = captures[3].parse().ok()?;
-        components.region_sequence = captures[5].parse().ok()?;
-        let file_id_str = &captures[6];
-        components.file_id = FileId::parse_str(file_id_str).ok()?;
-        Some(components)
-    })();
-    components.context(error::IllegalConfigSnafu {
-        msg: format!("Expect valid source file path, got: {}", path),
-    })
-}
-
-fn extract_region_metadata(
-    file_path: &str,
-    meta: &parquet::file::metadata::ParquetMetaData,
-) -> error::Result<RegionMetadataRef> {
-    use parquet::format::KeyValue;
-    let kvs: Option<&Vec<KeyValue>> = meta.file_metadata().key_value_metadata();
-    let Some(kvs) = kvs else {
-        return Err(error::IllegalConfigSnafu {
-            msg: format!("{file_path}: missing parquet key_value metadata"),
-        }
-        .build());
-    };
-    let json = kvs
-        .iter()
-        .find(|kv| kv.key == PARQUET_METADATA_KEY)
-        .and_then(|kv| kv.value.as_ref())
-        .ok_or_else(|| {
-            error::IllegalConfigSnafu {
-                msg: format!("{file_path}: key {PARQUET_METADATA_KEY} not found or empty"),
-            }
-            .build()
-        })?;
-    let region: RegionMetadata = RegionMetadata::from_json(json).map_err(|e| {
-        error::IllegalConfigSnafu {
-            msg: format!("invalid region metadata json: {e}"),
-        }
-        .build()
-    })?;
-    Ok(Arc::new(region))
-}
-
-async fn build_object_store(sc: &StorageConfig) -> error::Result<ObjectStore> {
-    store::new_object_store(sc.store.clone(), &sc.data_home)
-        .await
-        .map_err(|e| {
-            error::IllegalConfigSnafu {
-                msg: format!("Failed to build object store: {e:?}"),
-            }
-            .build()
-        })
-}
-
-async fn build_access_layer_simple(
-    components: &FileDirComponents,
-    object_store: ObjectStore,
-    config: &mut MitoConfig,
-    data_home: &str,
-) -> error::Result<(AccessLayerRef, CacheManagerRef)> {
-    let _ = config.index.sanitize(data_home, &config.inverted_index);
-    let puffin_manager = PuffinManagerFactory::new(
-        &config.index.aux_path,
-        config.index.staging_size.as_bytes(),
-        Some(config.index.write_buffer_size.as_bytes() as _),
-        config.index.staging_ttl,
-    )
-    .await
-    .map_err(|e| {
-        error::IllegalConfigSnafu {
-            msg: format!("Failed to build access layer: {e:?}"),
-        }
-        .build()
-    })?;
-
-    let intermediate_manager = IntermediateManager::init_fs(&config.index.aux_path)
-        .await
-        .map_err(|e| {
-            error::IllegalConfigSnafu {
-                msg: format!("Failed to build IntermediateManager: {e:?}"),
-            }
-            .build()
-        })?
-        .with_buffer_size(Some(config.index.write_buffer_size.as_bytes() as _));
-
-    let cache_manager =
-        build_cache_manager(config, puffin_manager.clone(), intermediate_manager.clone()).await?;
-    let layer = AccessLayer::new(
-        components.table_dir(),
-        components.path_type,
-        object_store,
-        puffin_manager,
-        intermediate_manager,
-    );
-    Ok((Arc::new(layer), cache_manager))
-}
-
-async fn build_cache_manager(
-    config: &MitoConfig,
-    puffin_manager: PuffinManagerFactory,
-    intermediate_manager: IntermediateManager,
-) -> error::Result<CacheManagerRef> {
-    let write_cache = write_cache_from_config(config, puffin_manager, intermediate_manager)
-        .await
-        .map_err(|e| {
-            error::IllegalConfigSnafu {
-                msg: format!("Failed to build write cache: {e:?}"),
-            }
-            .build()
-        })?;
-    let cache_manager = Arc::new(
-        CacheManager::builder()
-            .sst_meta_cache_size(config.sst_meta_cache_size.as_bytes())
-            .vector_cache_size(config.vector_cache_size.as_bytes())
-            .page_cache_size(config.page_cache_size.as_bytes())
-            .selector_result_cache_size(config.selector_result_cache_size.as_bytes())
-            .index_metadata_size(config.index.metadata_cache_size.as_bytes())
-            .index_content_size(config.index.content_cache_size.as_bytes())
-            .index_content_page_size(config.index.content_cache_page_size.as_bytes())
-            .index_result_cache_size(config.index.result_cache_size.as_bytes())
-            .puffin_metadata_size(config.index.metadata_cache_size.as_bytes())
-            .write_cache(write_cache)
-            .build(),
-    );
-    Ok(cache_manager)
-}
-
-fn new_noop_file_purger() -> FilePurgerRef {
-    #[derive(Debug)]
-    struct Noop;
-    impl FilePurger for Noop {
-        fn remove_file(&self, _file_meta: FileMeta, _is_delete: bool) {}
-    }
-    Arc::new(Noop)
-}
-
-async fn load_parquet_metadata(
-    object_store: ObjectStore,
-    path: &str,
-    file_size: u64,
-) -> Result<parquet::file::metadata::ParquetMetaData, Box<dyn std::error::Error + Send + Sync>> {
-    use parquet::file::FOOTER_SIZE;
-    use parquet::file::metadata::ParquetMetaDataReader;
-    let actual_size = if file_size == 0 {
-        object_store.stat(path).await?.content_length()
-    } else {
-        file_size
-    };
-    if actual_size < FOOTER_SIZE as u64 {
-        return Err("file too small".into());
-    }
-    let prefetch: u64 = 64 * 1024;
-    let start = actual_size.saturating_sub(prefetch);
-    let buffer = object_store
-        .read_with(path)
-        .range(start..actual_size)
-        .await?
-        .to_vec();
-    let buffer_len = buffer.len();
-    let mut footer = [0; 8];
-    footer.copy_from_slice(&buffer[buffer_len - FOOTER_SIZE..]);
-    let footer = ParquetMetaDataReader::decode_footer_tail(&footer)?;
-    let metadata_len = footer.metadata_length() as u64;
-    if actual_size - (FOOTER_SIZE as u64) < metadata_len {
-        return Err("invalid footer/metadata length".into());
-    }
-    if (metadata_len as usize) <= buffer_len - FOOTER_SIZE {
-        let metadata_start = buffer_len - metadata_len as usize - FOOTER_SIZE;
-        let meta = ParquetMetaDataReader::decode_metadata(
-            &buffer[metadata_start..buffer_len - FOOTER_SIZE],
-        )?;
-        Ok(meta)
-    } else {
-        let metadata_start = actual_size - metadata_len - FOOTER_SIZE as u64;
-        let data = object_store
-            .read_with(path)
-            .range(metadata_start..(actual_size - FOOTER_SIZE as u64))
-            .await?
-            .to_vec();
-        let meta = ParquetMetaDataReader::decode_metadata(&data)?;
-        Ok(meta)
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use std::path::PathBuf;
-    use std::str::FromStr;
-
-    use common_base::readable_size::ReadableSize;
-    use store_api::region_request::PathType;
-
-    use crate::datanode::objbench::{parse_config, parse_file_dir_components};
-
-    #[test]
-    fn test_parse_dir() {
-        let meta_path = "data/greptime/public/1024/1024_0000000000/metadata/00020380-009c-426d-953e-b4e34c15af34.parquet";
-        let c = parse_file_dir_components(meta_path).unwrap();
-        assert_eq!(
-            c.file_id.to_string(),
-            "00020380-009c-426d-953e-b4e34c15af34"
-        );
-        assert_eq!(c.catalog, "greptime");
-        assert_eq!(c.schema, "public");
-        assert_eq!(c.table_id, 1024);
-        assert_eq!(c.region_sequence, 0);
-        assert_eq!(c.path_type, PathType::Metadata);
-
-        let c = parse_file_dir_components(
-            "data/greptime/public/1024/1024_0000000000/data/00020380-009c-426d-953e-b4e34c15af34.parquet",
-        ).unwrap();
-        assert_eq!(
-            c.file_id.to_string(),
-            "00020380-009c-426d-953e-b4e34c15af34"
-        );
-        assert_eq!(c.catalog, "greptime");
-        assert_eq!(c.schema, "public");
-        assert_eq!(c.table_id, 1024);
-        assert_eq!(c.region_sequence, 0);
-        assert_eq!(c.path_type, PathType::Data);
-
-        let c = parse_file_dir_components(
-            "data/greptime/public/1024/1024_0000000000/00020380-009c-426d-953e-b4e34c15af34.parquet",
-        ).unwrap();
-        assert_eq!(
-            c.file_id.to_string(),
-            "00020380-009c-426d-953e-b4e34c15af34"
-        );
-        assert_eq!(c.catalog, "greptime");
-        assert_eq!(c.schema, "public");
-        assert_eq!(c.table_id, 1024);
-        assert_eq!(c.region_sequence, 0);
-        assert_eq!(c.path_type, PathType::Bare);
-    }
-
-    #[test]
-    fn test_parse_config() {
-        let path = "../../config/datanode.example.toml";
-        let (storage, engine) = parse_config(&PathBuf::from_str(path).unwrap()).unwrap();
-        assert_eq!(storage.data_home, "./greptimedb_data");
-        assert_eq!(engine.index.staging_size, ReadableSize::gb(2));
-    }
-}
--- a/src/cmd/src/frontend.rs
+++ b/src/cmd/src/frontend.rs
@@ -25,13 +25,11 @@ use clap::Parser;
 use client::client_manager::NodeClients;
 use common_base::Plugins;
 use common_config::{Configurable, DEFAULT_DATA_HOME};
-use common_error::ext::BoxedError;
 use common_grpc::channel_manager::ChannelConfig;
 use common_meta::cache::{CacheRegistryBuilder, LayeredCacheRegistryBuilder};
 use common_meta::heartbeat::handler::HandlerGroupExecutor;
 use common_meta::heartbeat::handler::invalidate_table_cache::InvalidateCacheHandler;
 use common_meta::heartbeat::handler::parse_mailbox_message::ParseMailboxMessageHandler;
-use common_query::prelude::set_default_prefix;
 use common_stat::ResourceStatImpl;
 use common_telemetry::info;
 use common_telemetry::logging::{DEFAULT_LOGGING_DIR, TracingOptions};
@@ -335,9 +333,6 @@ impl StartCommand {
            .context(error::StartFrontendSnafu)?;

        set_default_timezone(opts.default_timezone.as_deref()).context(error::InitTimezoneSnafu)?;
-        set_default_prefix(opts.default_column_prefix.as_deref())
-            .map_err(BoxedError::new)
-            .context(error::BuildCliSnafu)?;

        let meta_client_options = opts
            .meta_client
--- a/src/cmd/src/standalone.rs
+++ b/src/cmd/src/standalone.rs
@@ -41,7 +41,6 @@ use common_meta::region_registry::LeaderRegionRegistry;
 use common_meta::sequence::SequenceBuilder;
 use common_meta::wal_options_allocator::{WalOptionsAllocatorRef, build_wal_options_allocator};
 use common_procedure::ProcedureManagerRef;
-use common_query::prelude::set_default_prefix;
 use common_telemetry::info;
 use common_telemetry::logging::{DEFAULT_LOGGING_DIR, TracingOptions};
 use common_time::timezone::set_default_timezone;
@@ -356,10 +355,6 @@ impl StartCommand {
        let mut plugins = Plugins::new();
        let plugin_opts = opts.plugins;
        let mut opts = opts.component;
-        set_default_prefix(opts.default_column_prefix.as_deref())
-            .map_err(BoxedError::new)
-            .context(error::BuildCliSnafu)?;
-
        opts.grpc.detect_server_addr();
        let fe_opts = opts.frontend_options();
        let dn_opts = opts.datanode_options();
--- a/src/cmd/tests/load_config_test.rs
+++ b/src/cmd/tests/load_config_test.rs
@@ -48,7 +48,6 @@ fn test_load_datanode_example_config() {
    let expected = GreptimeOptions::<DatanodeOptions> {
        component: DatanodeOptions {
            node_id: Some(42),
-            default_column_prefix: Some("greptime".to_string()),
            meta_client: Some(MetaClientOptions {
                metasrv_addrs: vec!["127.0.0.1:3002".to_string()],
                timeout: Duration::from_secs(3),
@@ -114,7 +113,6 @@ fn test_load_frontend_example_config() {
    let expected = GreptimeOptions::<FrontendOptions> {
        component: FrontendOptions {
            default_timezone: Some("UTC".to_string()),
-            default_column_prefix: Some("greptime".to_string()),
            meta_client: Some(MetaClientOptions {
                metasrv_addrs: vec!["127.0.0.1:3002".to_string()],
                timeout: Duration::from_secs(3),
@@ -275,7 +273,6 @@ fn test_load_standalone_example_config() {
    let expected = GreptimeOptions::<StandaloneOptions> {
        component: StandaloneOptions {
            default_timezone: Some("UTC".to_string()),
-            default_column_prefix: Some("greptime".to_string()),
            wal: DatanodeWalConfig::RaftEngine(RaftEngineConfig {
                dir: Some(format!("{}/{}", DEFAULT_DATA_HOME, WAL_DIR)),
                sync_period: Some(Duration::from_secs(10)),
--- a/src/common/base/Cargo.toml
+++ b/src/common/base/Cargo.toml
@@ -18,11 +18,9 @@ bytes.workspace = true
 common-error.workspace = true
 common-macro.workspace = true
 futures.workspace = true
-lazy_static.workspace = true
 paste.workspace = true
 pin-project.workspace = true
 rand.workspace = true
-regex.workspace = true
 serde = { version = "1.0", features = ["derive"] }
 snafu.workspace = true
 tokio.workspace = true
--- a/src/common/base/src/lib.rs
+++ b/src/common/base/src/lib.rs
@@ -19,7 +19,6 @@ pub mod plugins;
 pub mod range_read;
 #[allow(clippy::all)]
 pub mod readable_size;
-pub mod regex_pattern;
 pub mod secrets;
 pub mod serde;

--- a/src/common/base/src/regex_pattern.rs
+++ b/src/common/base/src/regex_pattern.rs
@@ -1,22 +0,0 @@
-// Copyright 2023 Greptime Team
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-use lazy_static::lazy_static;
-use regex::Regex;
-
-pub const NAME_PATTERN: &str = r"[a-zA-Z_:-][a-zA-Z0-9_:\-\.@#]*";
-
-lazy_static! {
-    pub static ref NAME_PATTERN_REG: Regex = Regex::new(&format!("^{NAME_PATTERN}$")).unwrap();
-}
--- a/src/common/catalog/Cargo.toml
+++ b/src/common/catalog/Cargo.toml
@@ -8,6 +8,5 @@ license.workspace = true
 workspace = true

 [dependencies]
-const_format.workspace = true

 [dev-dependencies]
--- a/src/common/catalog/build.rs
+++ b/src/common/catalog/build.rs
@@ -1,27 +0,0 @@
-// Copyright 2023 Greptime Team
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-fn main() {
-    // Set DEFAULT_CATALOG_NAME from environment variable or use default value
-    let default_catalog_name =
-        std::env::var("DEFAULT_CATALOG_NAME").unwrap_or_else(|_| "greptime".to_string());
-
-    println!(
-        "cargo:rustc-env=DEFAULT_CATALOG_NAME={}",
-        default_catalog_name
-    );
-
-    // Rerun build script if the environment variable changes
-    println!("cargo:rerun-if-env-changed=DEFAULT_CATALOG_NAME");
-}
--- a/src/common/catalog/src/consts.rs
+++ b/src/common/catalog/src/consts.rs
@@ -12,15 +12,13 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

-use const_format::concatcp;
-
 pub const SYSTEM_CATALOG_NAME: &str = "system";
 pub const INFORMATION_SCHEMA_NAME: &str = "information_schema";
 pub const PG_CATALOG_NAME: &str = "pg_catalog";
 pub const SYSTEM_CATALOG_TABLE_NAME: &str = "system_catalog";
-pub const DEFAULT_CATALOG_NAME: &str = env!("DEFAULT_CATALOG_NAME");
+pub const DEFAULT_CATALOG_NAME: &str = "greptime";
 pub const DEFAULT_SCHEMA_NAME: &str = "public";
-pub const DEFAULT_PRIVATE_SCHEMA_NAME: &str = concatcp!(DEFAULT_CATALOG_NAME, "_private");
+pub const DEFAULT_PRIVATE_SCHEMA_NAME: &str = "greptime_private";

 /// Reserves [0,MIN_USER_FLOW_ID) for internal usage.
 /// User defined table id starts from this value.
@@ -152,9 +150,4 @@ pub const TRACE_TABLE_NAME_SESSION_KEY: &str = "trace_table_name";
 pub fn trace_services_table_name(trace_table_name: &str) -> String {
    format!("{}_services", trace_table_name)
 }
-
-/// Generate the trace operations table name from the trace table name by adding `_operations` suffix.
-pub fn trace_operations_table_name(trace_table_name: &str) -> String {
-    format!("{}_operations", trace_table_name)
-}
 // ---- End of special table and fields ----
--- a/src/common/function/src/aggrs/aggr_wrapper.rs
+++ b/src/common/function/src/aggrs/aggr_wrapper.rs
@@ -29,8 +29,6 @@ use arrow::array::StructArray;
 use arrow_schema::{FieldRef, Fields};
 use common_telemetry::debug;
 use datafusion::functions_aggregate::all_default_aggregate_functions;
-use datafusion::functions_aggregate::count::Count;
-use datafusion::functions_aggregate::min_max::{Max, Min};
 use datafusion::optimizer::AnalyzerRule;
 use datafusion::optimizer::analyzer::type_coercion::TypeCoercion;
 use datafusion::physical_planner::create_aggregate_expr_and_maybe_filter;
@@ -415,51 +413,6 @@ impl AggregateUDFImpl for StateWrapper {
    fn coerce_types(&self, arg_types: &[DataType]) -> datafusion_common::Result<Vec<DataType>> {
        self.inner.coerce_types(arg_types)
    }
-
-    fn value_from_stats(
-        &self,
-        statistics_args: &datafusion_expr::StatisticsArgs,
-    ) -> Option<ScalarValue> {
-        let inner = self.inner().inner().as_any();
-        // only count/min/max need special handling here, for getting result from statistics
-        // the result of count/min/max is also the result of count_state so can return directly
-        let can_use_stat = inner.is::<Count>() || inner.is::<Max>() || inner.is::<Min>();
-        if !can_use_stat {
-            return None;
-        }
-
-        // fix return type by extract the first field's data type from the struct type
-        let state_type = if let DataType::Struct(fields) = &statistics_args.return_type {
-            if fields.is_empty() {
-                return None;
-            }
-            fields[0].data_type().clone()
-        } else {
-            return None;
-        };
-
-        let fixed_args = datafusion_expr::StatisticsArgs {
-            statistics: statistics_args.statistics,
-            return_type: &state_type,
-            is_distinct: statistics_args.is_distinct,
-            exprs: statistics_args.exprs,
-        };
-
-        let ret = self.inner().value_from_stats(&fixed_args)?;
-
-        // wrap the result into struct scalar value
-        let fields = if let DataType::Struct(fields) = &statistics_args.return_type {
-            fields
-        } else {
-            return None;
-        };
-
-        let array = ret.to_array().ok()?;
-
-        let struct_array = StructArray::new(fields.clone(), vec![array], None);
-        let ret = ScalarValue::Struct(Arc::new(struct_array));
-        Some(ret)
-    }
 }

 /// The wrapper's input is the same as the original aggregate function's input,
--- a/src/common/function/src/system/pg_catalog.rs
+++ b/src/common/function/src/system/pg_catalog.rs
@@ -16,9 +16,6 @@ mod version;

 use std::sync::Arc;

-use common_catalog::consts::{
-    DEFAULT_PRIVATE_SCHEMA_NAME, INFORMATION_SCHEMA_NAME, PG_CATALOG_NAME,
-};
 use datafusion::arrow::array::{ArrayRef, StringArray, as_boolean_array};
 use datafusion::catalog::TableFunction;
 use datafusion::common::ScalarValue;
@@ -146,9 +143,9 @@ impl Function for CurrentSchemasFunction {
        let mut values = vec!["public"];
        // include implicit schemas
        if input.value(0) {
-            values.push(INFORMATION_SCHEMA_NAME);
-            values.push(PG_CATALOG_NAME);
-            values.push(DEFAULT_PRIVATE_SCHEMA_NAME);
+            values.push("information_schema");
+            values.push("pg_catalog");
+            values.push("greptime_private");
        }

        let list_array = SingleRowListArrayBuilder::new(Arc::new(StringArray::from(values)));
@@ -194,10 +191,7 @@ impl PGCatalogFunction {
        registry.register(pg_catalog::create_pg_get_userbyid_udf());
        registry.register(pg_catalog::create_pg_table_is_visible());
        registry.register(pg_catalog::pg_get_expr_udf::create_pg_get_expr_udf());
-        registry.register(pg_catalog::create_pg_encoding_to_char_udf());
-        registry.register(pg_catalog::create_pg_relation_size_udf());
-        registry.register(pg_catalog::create_pg_total_relation_size_udf());
-        registry.register(pg_catalog::create_pg_stat_get_numscans());
-        registry.register(pg_catalog::create_pg_get_constraintdef());
+        // TODO(sunng87): upgrade datafusion to add
+        //registry.register(pg_catalog::create_pg_encoding_to_char_udf());
    }
 }
--- a/src/common/mem-prof/src/jemalloc.rs
+++ b/src/common/mem-prof/src/jemalloc.rs
@@ -32,7 +32,6 @@ use crate::error::{FlamegraphSnafu, ParseJeHeapSnafu, Result};
 const PROF_DUMP: &[u8] = b"prof.dump\0";
 const OPT_PROF: &[u8] = b"opt.prof\0";
 const PROF_ACTIVE: &[u8] = b"prof.active\0";
-const PROF_GDUMP: &[u8] = b"prof.gdump\0";

 pub async fn dump_profile() -> Result<Vec<u8>> {
    ensure!(is_prof_enabled()?, ProfilingNotEnabledSnafu);
@@ -120,16 +119,3 @@ fn is_prof_enabled() -> Result<bool> {
    // safety: OPT_PROF variable, if present, is always a boolean value.
    Ok(unsafe { tikv_jemalloc_ctl::raw::read::<bool>(OPT_PROF).context(ReadOptProfSnafu)? })
 }
-
-pub fn set_gdump_active(active: bool) -> Result<()> {
-    ensure!(is_prof_enabled()?, ProfilingNotEnabledSnafu);
-    unsafe {
-        tikv_jemalloc_ctl::raw::update(PROF_GDUMP, active).context(error::UpdateGdumpSnafu)?;
-    }
-    Ok(())
-}
-
-pub fn is_gdump_active() -> Result<bool> {
-    // safety: PROF_GDUMP, if present, is a boolean value.
-    unsafe { Ok(tikv_jemalloc_ctl::raw::read::<bool>(PROF_GDUMP).context(error::ReadGdumpSnafu)?) }
-}
--- a/src/common/mem-prof/src/jemalloc/error.rs
+++ b/src/common/mem-prof/src/jemalloc/error.rs
@@ -71,18 +71,6 @@ pub enum Error {
        #[snafu(source)]
        error: tikv_jemalloc_ctl::Error,
    },
-
-    #[snafu(display("Failed to read jemalloc gdump flag"))]
-    ReadGdump {
-        #[snafu(source)]
-        error: tikv_jemalloc_ctl::Error,
-    },
-
-    #[snafu(display("Failed to update jemalloc gdump flag"))]
-    UpdateGdump {
-        #[snafu(source)]
-        error: tikv_jemalloc_ctl::Error,
-    },
 }

 impl ErrorExt for Error {
@@ -96,8 +84,6 @@ impl ErrorExt for Error {
            Error::ActivateProf { .. } => StatusCode::Internal,
            Error::DeactivateProf { .. } => StatusCode::Internal,
            Error::ReadProfActive { .. } => StatusCode::Internal,
-            Error::ReadGdump { .. } => StatusCode::Internal,
-            Error::UpdateGdump { .. } => StatusCode::Internal,
        }
    }

--- a/src/common/mem-prof/src/lib.rs
+++ b/src/common/mem-prof/src/lib.rs
@@ -19,7 +19,7 @@ mod jemalloc;
 #[cfg(not(windows))]
 pub use jemalloc::{
    activate_heap_profile, deactivate_heap_profile, dump_flamegraph, dump_pprof, dump_profile,
-    is_gdump_active, is_heap_profile_active, set_gdump_active,
+    is_heap_profile_active,
 };

 #[cfg(windows)]
@@ -51,13 +51,3 @@ pub fn deactivate_heap_profile() -> error::Result<()> {
 pub fn is_heap_profile_active() -> error::Result<bool> {
    error::ProfilingNotSupportedSnafu.fail()
 }
-
-#[cfg(windows)]
-pub fn is_gdump_active() -> error::Result<bool> {
-    error::ProfilingNotSupportedSnafu.fail()
-}
-
-#[cfg(windows)]
-pub fn set_gdump_active(_: bool) -> error::Result<()> {
-    error::ProfilingNotSupportedSnafu.fail()
-}
--- a/src/common/meta/Cargo.toml
+++ b/src/common/meta/Cargo.toml
@@ -87,6 +87,7 @@ tokio-postgres-rustls = { version = "0.12", optional = true }
 tonic.workspace = true
 tracing.workspace = true
 typetag.workspace = true
+uuid.workspace = true

 [dev-dependencies]
 chrono.workspace = true
--- a/src/common/meta/src/datanode.rs
+++ b/src/common/meta/src/datanode.rs
@@ -25,7 +25,8 @@ use store_api::region_engine::{RegionRole, RegionStatistic};
 use store_api::storage::RegionId;
 use table::metadata::TableId;

-use crate::error::{self, DeserializeFromJsonSnafu, Result};
+use crate::error;
+use crate::error::Result;
 use crate::heartbeat::utils::get_datanode_workloads;

 const DATANODE_STAT_PREFIX: &str = "__meta_datanode_stat";
@@ -65,12 +66,10 @@ pub struct Stat {
    pub node_epoch: u64,
    /// The datanode workloads.
    pub datanode_workloads: DatanodeWorkloads,
-    /// The GC statistics of the datanode.
-    pub gc_stat: Option<GcStat>,
 }

 /// The statistics of a region.
-#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
+#[derive(Debug, Clone, Serialize, Deserialize)]
 pub struct RegionStat {
    /// The region_id.
    pub id: RegionId,
@@ -127,7 +126,7 @@ pub trait TopicStatsReporter: Send + Sync {
    fn reportable_topics(&mut self) -> Vec<TopicStat>;
 }

-#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, Hash)]
+#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
 pub enum RegionManifestInfo {
    Mito {
        manifest_version: u64,
@@ -223,12 +222,11 @@ impl TryFrom<&HeartbeatRequest> for Stat {
            node_epoch,
            node_workloads,
            topic_stats,
-            extensions,
            ..
        } = value;

        match (header, peer) {
-            (Some(header), Some(peer)) => {
+            (Some(_header), Some(peer)) => {
                let region_stats = region_stats
                    .iter()
                    .map(RegionStat::from)
@@ -236,14 +234,6 @@ impl TryFrom<&HeartbeatRequest> for Stat {
                let topic_stats = topic_stats.iter().map(TopicStat::from).collect::<Vec<_>>();

                let datanode_workloads = get_datanode_workloads(node_workloads.as_ref());
-
-                let gc_stat = GcStat::from_extensions(extensions).map_err(|err| {
-                    common_telemetry::error!(
-                        "Failed to deserialize GcStat from extensions: {}",
-                        err
-                    );
-                    header.clone()
-                })?;
                Ok(Self {
                    timestamp_millis: time_util::current_time_millis(),
                    // datanode id
@@ -257,7 +247,6 @@ impl TryFrom<&HeartbeatRequest> for Stat {
                    topic_stats,
                    node_epoch: *node_epoch,
                    datanode_workloads,
-                    gc_stat,
                })
            }
            (header, _) => Err(header.clone()),
@@ -330,43 +319,6 @@ impl From<&api::v1::meta::TopicStat> for TopicStat {
    }
 }

-#[derive(Debug, Clone, Serialize, Deserialize, Default)]
-pub struct GcStat {
-    /// Number of GC tasks currently running on the datanode.
-    pub running_gc_tasks: u32,
-    /// The maximum number of concurrent GC tasks the datanode can handle.
-    pub gc_concurrency: u32,
-}
-
-impl GcStat {
-    pub const GC_STAT_KEY: &str = "__gc_stat";
-
-    pub fn new(running_gc_tasks: u32, gc_concurrency: u32) -> Self {
-        Self {
-            running_gc_tasks,
-            gc_concurrency,
-        }
-    }
-
-    pub fn into_extensions(&self, extensions: &mut std::collections::HashMap<String, Vec<u8>>) {
-        let bytes = serde_json::to_vec(self).unwrap_or_default();
-        extensions.insert(Self::GC_STAT_KEY.to_string(), bytes);
-    }
-
-    pub fn from_extensions(
-        extensions: &std::collections::HashMap<String, Vec<u8>>,
-    ) -> Result<Option<Self>> {
-        extensions
-            .get(Self::GC_STAT_KEY)
-            .map(|bytes| {
-                serde_json::from_slice(bytes).with_context(|_| DeserializeFromJsonSnafu {
-                    input: String::from_utf8_lossy(bytes).to_string(),
-                })
-            })
-            .transpose()
-    }
-}
-
 /// The key of the datanode stat in the memory store.
 ///
 /// The format is `__meta_datanode_stat-0-{node_id}`.
--- a/src/common/meta/src/instruction.rs
+++ b/src/common/meta/src/instruction.rs
@@ -17,7 +17,7 @@ use std::fmt::{Display, Formatter};
 use std::time::Duration;

 use serde::{Deserialize, Deserializer, Serialize};
-use store_api::storage::{FileRefsManifest, GcReport, RegionId, RegionNumber};
+use store_api::storage::{RegionId, RegionNumber};
 use strum::Display;
 use table::metadata::TableId;
 use table::table_name::TableName;
@@ -417,88 +417,6 @@ where
    })
 }

-/// Instruction to get file references for specified regions.
-#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
-pub struct GetFileRefs {
-    /// List of region IDs to get file references for.
-    pub region_ids: Vec<RegionId>,
-}
-
-impl Display for GetFileRefs {
-    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
-        write!(f, "GetFileRefs(region_ids={:?})", self.region_ids)
-    }
-}
-
-/// Instruction to trigger garbage collection for a region.
-#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
-pub struct GcRegions {
-    /// The region ID to perform GC on.
-    pub regions: Vec<RegionId>,
-    /// The file references manifest containing temporary file references.
-    pub file_refs_manifest: FileRefsManifest,
-    /// Whether to perform a full file listing to find orphan files.
-    pub full_file_listing: bool,
-}
-
-impl Display for GcRegions {
-    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
-        write!(
-            f,
-            "GcRegion(regions={:?}, file_refs_count={}, full_file_listing={})",
-            self.regions,
-            self.file_refs_manifest.file_refs.len(),
-            self.full_file_listing
-        )
-    }
-}
-
-/// Reply for GetFileRefs instruction.
-#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
-pub struct GetFileRefsReply {
-    /// The file references manifest.
-    pub file_refs_manifest: FileRefsManifest,
-    /// Whether the operation was successful.
-    pub success: bool,
-    /// Error message if any.
-    pub error: Option<String>,
-}
-
-impl Display for GetFileRefsReply {
-    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
-        write!(
-            f,
-            "GetFileRefsReply(success={}, file_refs_count={}, error={:?})",
-            self.success,
-            self.file_refs_manifest.file_refs.len(),
-            self.error
-        )
-    }
-}
-
-/// Reply for GC instruction.
-#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
-pub struct GcRegionsReply {
-    pub result: Result<GcReport, String>,
-}
-
-impl Display for GcRegionsReply {
-    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
-        write!(
-            f,
-            "GcReply(result={})",
-            match &self.result {
-                Ok(report) => format!(
-                    "GcReport(deleted_files_count={}, need_retry_regions_count={})",
-                    report.deleted_files.len(),
-                    report.need_retry_regions.len()
-                ),
-                Err(err) => format!("Err({})", err),
-            }
-        )
-    }
-}
-
 #[derive(Debug, Clone, Serialize, Deserialize, Display, PartialEq)]
 pub enum Instruction {
    /// Opens regions.
@@ -519,10 +437,6 @@ pub enum Instruction {
    InvalidateCaches(Vec<CacheIdent>),
    /// Flushes regions.
    FlushRegions(FlushRegions),
-    /// Gets file references for regions.
-    GetFileRefs(GetFileRefs),
-    /// Triggers garbage collection for a region.
-    GcRegions(GcRegions),
 }

 impl Instruction {
@@ -565,20 +479,6 @@ impl Instruction {
            _ => None,
        }
    }
-
-    pub fn into_get_file_refs(self) -> Option<GetFileRefs> {
-        match self {
-            Self::GetFileRefs(get_file_refs) => Some(get_file_refs),
-            _ => None,
-        }
-    }
-
-    pub fn into_gc_regions(self) -> Option<GcRegions> {
-        match self {
-            Self::GcRegions(gc_regions) => Some(gc_regions),
-            _ => None,
-        }
-    }
 }

 /// The reply of [UpgradeRegion].
@@ -649,8 +549,6 @@ pub enum InstructionReply {
    )]
    DowngradeRegions(DowngradeRegionsReply),
    FlushRegions(FlushRegionReply),
-    GetFileRefs(GetFileRefsReply),
-    GcRegions(GcRegionsReply),
 }

 impl Display for InstructionReply {
@@ -663,8 +561,6 @@ impl Display for InstructionReply {
                write!(f, "InstructionReply::DowngradeRegions({:?})", reply)
            }
            Self::FlushRegions(reply) => write!(f, "InstructionReply::FlushRegions({})", reply),
-            Self::GetFileRefs(reply) => write!(f, "InstructionReply::GetFileRefs({})", reply),
-            Self::GcRegions(reply) => write!(f, "InstructionReply::GcRegion({})", reply),
        }
    }
 }
@@ -709,10 +605,6 @@ impl InstructionReply {

 #[cfg(test)]
 mod tests {
-    use std::collections::HashSet;
-
-    use store_api::storage::FileId;
-
    use super::*;

    #[test]
@@ -1011,30 +903,4 @@ mod tests {
            _ => panic!("Expected FlushRegions instruction"),
        }
    }
-
-    #[test]
-    fn test_serialize_get_file_refs_instruction_reply() {
-        let mut manifest = FileRefsManifest::default();
-        let r0 = RegionId::new(1024, 1);
-        let r1 = RegionId::new(1024, 2);
-        manifest
-            .file_refs
-            .insert(r0, HashSet::from([FileId::random()]));
-        manifest
-            .file_refs
-            .insert(r1, HashSet::from([FileId::random()]));
-        manifest.manifest_version.insert(r0, 10);
-        manifest.manifest_version.insert(r1, 20);
-
-        let instruction_reply = InstructionReply::GetFileRefs(GetFileRefsReply {
-            file_refs_manifest: manifest,
-            success: true,
-            error: None,
-        });
-
-        let serialized = serde_json::to_string(&instruction_reply).unwrap();
-        let deserialized = serde_json::from_str(&serialized).unwrap();
-
-        assert_eq!(instruction_reply, deserialized);
-    }
 }
--- a/src/common/meta/src/key.rs
+++ b/src/common/meta/src/key.rs
@@ -121,7 +121,6 @@ use std::ops::{Deref, DerefMut};
 use std::sync::Arc;

 use bytes::Bytes;
-use common_base::regex_pattern::NAME_PATTERN;
 use common_catalog::consts::{
    DEFAULT_CATALOG_NAME, DEFAULT_PRIVATE_SCHEMA_NAME, DEFAULT_SCHEMA_NAME, INFORMATION_SCHEMA_NAME,
 };
@@ -165,6 +164,7 @@ use crate::rpc::router::{LeaderState, RegionRoute, region_distribution};
 use crate::rpc::store::BatchDeleteRequest;
 use crate::state_store::PoisonValue;

+pub const NAME_PATTERN: &str = r"[a-zA-Z_:-][a-zA-Z0-9_:\-\.@#]*";
 pub const TOPIC_NAME_PATTERN: &str = r"[a-zA-Z0-9_:-][a-zA-Z0-9_:\-\.@#]*";
 pub const LEGACY_MAINTENANCE_KEY: &str = "__maintenance";
 pub const MAINTENANCE_KEY: &str = "__switches/maintenance";
@@ -269,6 +269,10 @@ pub type FlowId = u32;
 /// The partition of flow.
 pub type FlowPartitionId = u32;

+lazy_static! {
+    pub static ref NAME_PATTERN_REGEX: Regex = Regex::new(NAME_PATTERN).unwrap();
+}
+
 lazy_static! {
    pub static ref TOPIC_NAME_PATTERN_REGEX: Regex = Regex::new(TOPIC_NAME_PATTERN).unwrap();
 }
--- a/src/common/query/Cargo.toml
+++ b/src/common/query/Cargo.toml
@@ -14,7 +14,6 @@ workspace = true
 api.workspace = true
 async-trait.workspace = true
 bytes.workspace = true
-common-base.workspace = true
 common-error.workspace = true
 common-macro.workspace = true
 common-recordbatch.workspace = true
@@ -23,7 +22,6 @@ datafusion.workspace = true
 datafusion-common.workspace = true
 datafusion-expr.workspace = true
 datatypes.workspace = true
-once_cell.workspace = true
 serde.workspace = true
 snafu.workspace = true
 sqlparser.workspace = true
--- a/src/common/query/src/error.rs
+++ b/src/common/query/src/error.rs
@@ -199,9 +199,6 @@ pub enum Error {
        #[snafu(implicit)]
        location: Location,
    },
-
-    #[snafu(display("Invalid character in prefix config: {}", prefix))]
-    InvalidColumnPrefix { prefix: String },
 }

 pub type Result<T> = std::result::Result<T, Error>;
@@ -230,8 +227,7 @@ impl ErrorExt for Error {

            Error::UnsupportedInputDataType { .. }
            | Error::TypeCast { .. }
-            | Error::InvalidFuncArgs { .. }
-            | Error::InvalidColumnPrefix { .. } => StatusCode::InvalidArguments,
+            | Error::InvalidFuncArgs { .. } => StatusCode::InvalidArguments,

            Error::ConvertDfRecordBatchStream { source, .. } => source.status_code(),

--- a/src/common/query/src/prelude.rs
+++ b/src/common/query/src/prelude.rs
@@ -12,61 +12,15 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

-use common_base::regex_pattern::NAME_PATTERN_REG;
 pub use datafusion_common::ScalarValue;
-use once_cell::sync::OnceCell;
-use snafu::ensure;

 pub use crate::columnar_value::ColumnarValue;
-use crate::error::{InvalidColumnPrefixSnafu, Result};

-/// Default time index column name.
-static GREPTIME_TIMESTAMP_CELL: OnceCell<String> = OnceCell::new();
-
-/// Default value column name.
-static GREPTIME_VALUE_CELL: OnceCell<String> = OnceCell::new();
-
-pub fn set_default_prefix(prefix: Option<&str>) -> Result<()> {
-    match prefix {
-        None => {
-            // use default greptime prefix
-            GREPTIME_TIMESTAMP_CELL.get_or_init(|| GREPTIME_TIMESTAMP.to_string());
-            GREPTIME_VALUE_CELL.get_or_init(|| GREPTIME_VALUE.to_string());
-        }
-        Some(s) if s.trim().is_empty() => {
-            // use "" to disable prefix
-            GREPTIME_TIMESTAMP_CELL.get_or_init(|| "timestamp".to_string());
-            GREPTIME_VALUE_CELL.get_or_init(|| "value".to_string());
-        }
-        Some(x) => {
-            ensure!(
-                NAME_PATTERN_REG.is_match(x),
-                InvalidColumnPrefixSnafu { prefix: x }
-            );
-            GREPTIME_TIMESTAMP_CELL.get_or_init(|| format!("{}_timestamp", x));
-            GREPTIME_VALUE_CELL.get_or_init(|| format!("{}_value", x));
-        }
-    }
-    Ok(())
-}
-
-/// Get the default timestamp column name.
-/// Returns the configured value, or `greptime_timestamp` if not set.
-pub fn greptime_timestamp() -> &'static str {
-    GREPTIME_TIMESTAMP_CELL.get_or_init(|| GREPTIME_TIMESTAMP.to_string())
-}
-
-/// Get the default value column name.
-/// Returns the configured value, or `greptime_value` if not set.
-pub fn greptime_value() -> &'static str {
-    GREPTIME_VALUE_CELL.get_or_init(|| GREPTIME_VALUE.to_string())
-}
-
-/// Default timestamp column name constant for backward compatibility.
-const GREPTIME_TIMESTAMP: &str = "greptime_timestamp";
-/// Default value column name constant for backward compatibility.
-const GREPTIME_VALUE: &str = "greptime_value";
-/// Default counter column name for OTLP metrics (legacy mode).
+/// Default timestamp column name for Prometheus metrics.
+pub const GREPTIME_TIMESTAMP: &str = "greptime_timestamp";
+/// Default value column name for Prometheus metrics.
+pub const GREPTIME_VALUE: &str = "greptime_value";
+/// Default counter column name for OTLP metrics.
 pub const GREPTIME_COUNT: &str = "greptime_count";
 /// Default physical table name
 pub const GREPTIME_PHYSICAL_TABLE: &str = "greptime_physical_table";
--- a/src/common/version/Cargo.toml
+++ b/src/common/version/Cargo.toml
@@ -11,7 +11,7 @@ workspace = true
 codec = ["dep:serde"]

 [dependencies]
-const_format.workspace = true
+const_format = "0.2"
 serde = { workspace = true, optional = true }
 shadow-rs = { version = "1.2.1", default-features = false }

--- a/src/datanode/Cargo.toml
+++ b/src/datanode/Cargo.toml
@@ -53,6 +53,7 @@ metric-engine.workspace = true
 mito2.workspace = true
 num_cpus.workspace = true
 object-store.workspace = true
+partition.workspace = true
 prometheus.workspace = true
 prost.workspace = true
 query.workspace = true
--- a/src/datanode/src/config.rs
+++ b/src/datanode/src/config.rs
@@ -66,7 +66,6 @@ impl Default for StorageConfig {
 #[serde(default)]
 pub struct DatanodeOptions {
    pub node_id: Option<u64>,
-    pub default_column_prefix: Option<String>,
    pub workload_types: Vec<DatanodeWorkloadType>,
    pub require_lease_before_startup: bool,
    pub init_regions_in_background: bool,
@@ -120,7 +119,6 @@ impl Default for DatanodeOptions {
    fn default() -> Self {
        Self {
            node_id: None,
-            default_column_prefix: None,
            workload_types: vec![DatanodeWorkloadType::Hybrid],
            require_lease_before_startup: false,
            init_regions_in_background: false,
--- a/src/datanode/src/datanode.rs
+++ b/src/datanode/src/datanode.rs
@@ -27,7 +27,6 @@ use common_meta::key::runtime_switch::RuntimeSwitchManager;
 use common_meta::key::{SchemaMetadataManager, SchemaMetadataManagerRef};
 use common_meta::kv_backend::KvBackendRef;
 pub use common_procedure::options::ProcedureConfig;
-use common_query::prelude::set_default_prefix;
 use common_stat::ResourceStatImpl;
 use common_telemetry::{error, info, warn};
 use common_wal::config::DatanodeWalConfig;
@@ -60,9 +59,9 @@ use tokio::sync::Notify;

 use crate::config::{DatanodeOptions, RegionEngineConfig, StorageConfig};
 use crate::error::{
-    self, BuildDatanodeSnafu, BuildMetricEngineSnafu, BuildMitoEngineSnafu, CreateDirSnafu,
-    GetMetadataSnafu, MissingCacheSnafu, MissingNodeIdSnafu, OpenLogStoreSnafu, Result,
-    ShutdownInstanceSnafu, ShutdownServerSnafu, StartServerSnafu,
+    self, BuildMetricEngineSnafu, BuildMitoEngineSnafu, CreateDirSnafu, GetMetadataSnafu,
+    MissingCacheSnafu, MissingNodeIdSnafu, OpenLogStoreSnafu, Result, ShutdownInstanceSnafu,
+    ShutdownServerSnafu, StartServerSnafu,
 };
 use crate::event_listener::{
    NoopRegionServerEventListener, RegionServerEventListenerRef, RegionServerEventReceiver,
@@ -221,9 +220,6 @@ impl DatanodeBuilder {

    pub async fn build(mut self) -> Result<Datanode> {
        let node_id = self.opts.node_id.context(MissingNodeIdSnafu)?;
-        set_default_prefix(self.opts.default_column_prefix.as_deref())
-            .map_err(BoxedError::new)
-            .context(BuildDatanodeSnafu)?;

        let meta_client = self.meta_client.take();

--- a/src/datanode/src/error.rs
+++ b/src/datanode/src/error.rs
@@ -19,6 +19,7 @@ use common_error::define_into_tonic_status;
 use common_error::ext::{BoxedError, ErrorExt};
 use common_error::status_code::StatusCode;
 use common_macro::stack_trace_debug;
+use mito2::remap_manifest::Error as RemapManifestError;
 use snafu::{Location, Snafu};
 use store_api::storage::RegionId;
 use table::error::Error as TableError;
@@ -165,13 +166,6 @@ pub enum Error {
        location: Location,
    },

-    #[snafu(display("Failed to build datanode"))]
-    BuildDatanode {
-        #[snafu(implicit)]
-        location: Location,
-        source: BoxedError,
-    },
-
    #[snafu(display("Failed to build http client"))]
    BuildHttpClient {
        #[snafu(implicit)]
@@ -322,21 +316,6 @@ pub enum Error {
        location: Location,
    },

-    #[snafu(display("Failed to run gc for region {}", region_id))]
-    GcMitoEngine {
-        region_id: RegionId,
-        source: mito2::error::Error,
-        #[snafu(implicit)]
-        location: Location,
-    },
-
-    #[snafu(display("Invalid arguments for GC: {}", msg))]
-    InvalidGcArgs {
-        msg: String,
-        #[snafu(implicit)]
-        location: Location,
-    },
-
    #[snafu(display("Failed to list SST entries from storage"))]
    ListStorageSsts {
        #[snafu(implicit)]
@@ -418,6 +397,14 @@ pub enum Error {
        location: Location,
    },

+    #[snafu(display("Failed to remap manifests: {}", source))]
+    RemapManifest {
+        #[snafu(implicit)]
+        location: Location,
+        #[snafu(source)]
+        source: RemapManifestError,
+    },
+
    #[snafu(display("Not yet implemented: {what}"))]
    NotYetImplemented { what: String },
 }
@@ -451,8 +438,7 @@ impl ErrorExt for Error {
            | MissingRequiredField { .. }
            | RegionEngineNotFound { .. }
            | ParseAddr { .. }
-            | TomlFormat { .. }
-            | BuildDatanode { .. } => StatusCode::InvalidArguments,
+            | TomlFormat { .. } => StatusCode::InvalidArguments,

            PayloadNotExist { .. }
            | Unexpected { .. }
@@ -461,11 +447,9 @@ impl ErrorExt for Error {

            AsyncTaskExecute { source, .. } => source.status_code(),

-            CreateDir { .. }
-            | RemoveDir { .. }
-            | ShutdownInstance { .. }
-            | DataFusion { .. }
-            | InvalidGcArgs { .. } => StatusCode::Internal,
+            CreateDir { .. } | RemoveDir { .. } | ShutdownInstance { .. } | DataFusion { .. } => {
+                StatusCode::Internal
+            }

            RegionNotFound { .. } => StatusCode::RegionNotFound,
            RegionNotReady { .. } => StatusCode::RegionNotReady,
@@ -483,7 +467,7 @@ impl ErrorExt for Error {
            StopRegionEngine { source, .. } => source.status_code(),

            FindLogicalRegions { source, .. } => source.status_code(),
-            BuildMitoEngine { source, .. } | GcMitoEngine { source, .. } => source.status_code(),
+            BuildMitoEngine { source, .. } => source.status_code(),
            BuildMetricEngine { source, .. } => source.status_code(),
            ListStorageSsts { source, .. } => source.status_code(),
            ConcurrentQueryLimiterClosed { .. } | ConcurrentQueryLimiterTimeout { .. } => {
@@ -494,6 +478,7 @@ impl ErrorExt for Error {

            ObjectStore { source, .. } => source.status_code(),
            BuildCacheStore { .. } => StatusCode::StorageUnavailable,
+            RemapManifest { .. } => StatusCode::Unexpected,
        }
    }

--- a/src/datanode/src/heartbeat.rs
+++ b/src/datanode/src/heartbeat.rs
@@ -36,14 +36,14 @@ use common_workload::DatanodeWorkloadType;
 use meta_client::MetaClientRef;
 use meta_client::client::{HeartbeatSender, MetaClient};
 use servers::addrs;
-use snafu::{OptionExt as _, ResultExt};
+use snafu::ResultExt;
 use tokio::sync::{Notify, mpsc};
 use tokio::time::Instant;

 use self::handler::RegionHeartbeatResponseHandler;
 use crate::alive_keeper::{CountdownTaskHandlerExtRef, RegionAliveKeeper};
 use crate::config::DatanodeOptions;
-use crate::error::{self, MetaClientInitSnafu, RegionEngineNotFoundSnafu, Result};
+use crate::error::{self, MetaClientInitSnafu, Result};
 use crate::event_listener::RegionServerEventReceiver;
 use crate::metrics::{self, HEARTBEAT_RECV_COUNT, HEARTBEAT_SENT_COUNT};
 use crate::region_server::RegionServer;
@@ -242,18 +242,12 @@ impl HeartbeatTask {
        let total_cpu_millicores = self.resource_stat.get_total_cpu_millicores();
        let total_memory_bytes = self.resource_stat.get_total_memory_bytes();
        let resource_stat = self.resource_stat.clone();
-        let gc_limiter = self
-            .region_server
-            .mito_engine()
-            .context(RegionEngineNotFoundSnafu { name: "mito" })?
-            .gc_limiter();

        common_runtime::spawn_hb(async move {
            let sleep = tokio::time::sleep(Duration::from_millis(0));
            tokio::pin!(sleep);

            let build_info = common_version::build_info();
-
            let heartbeat_request = HeartbeatRequest {
                peer: self_peer,
                node_epoch,
@@ -289,13 +283,8 @@ impl HeartbeatTask {
                        if let Some(message) = message {
                            match outgoing_message_to_mailbox_message(message) {
                                Ok(message) => {
-                                    let mut extensions = heartbeat_request.extensions.clone();
-                                    let gc_stat = gc_limiter.gc_stat();
-                                    gc_stat.into_extensions(&mut extensions);
-
                                    let req = HeartbeatRequest {
                                        mailbox_message: Some(message),
-                                        extensions,
                                        ..heartbeat_request.clone()
                                    };
                                    HEARTBEAT_RECV_COUNT.with_label_values(&["success"]).inc();
@@ -316,16 +305,10 @@ impl HeartbeatTask {
                        let topic_stats = region_server_clone.topic_stats();
                        let now = Instant::now();
                        let duration_since_epoch = (now - epoch).as_millis() as u64;
-
-                        let mut extensions = heartbeat_request.extensions.clone();
-                        let gc_stat = gc_limiter.gc_stat();
-                        gc_stat.into_extensions(&mut extensions);
-
                        let mut req = HeartbeatRequest {
                            region_stats,
                            topic_stats,
                            duration_since_epoch,
-                            extensions,
                            ..heartbeat_request.clone()
                        };

--- a/src/datanode/src/heartbeat/handler.rs
+++ b/src/datanode/src/heartbeat/handler.rs
@@ -20,21 +20,16 @@ use common_meta::heartbeat::handler::{
 use common_meta::instruction::{Instruction, InstructionReply};
 use common_telemetry::error;
 use snafu::OptionExt;
-use store_api::storage::GcReport;

 mod close_region;
 mod downgrade_region;
-mod file_ref;
 mod flush_region;
-mod gc_worker;
 mod open_region;
 mod upgrade_region;

 use crate::heartbeat::handler::close_region::CloseRegionsHandler;
 use crate::heartbeat::handler::downgrade_region::DowngradeRegionsHandler;
-use crate::heartbeat::handler::file_ref::GetFileRefsHandler;
 use crate::heartbeat::handler::flush_region::FlushRegionsHandler;
-use crate::heartbeat::handler::gc_worker::GcRegionsHandler;
 use crate::heartbeat::handler::open_region::OpenRegionsHandler;
 use crate::heartbeat::handler::upgrade_region::UpgradeRegionsHandler;
 use crate::heartbeat::task_tracker::TaskTracker;
@@ -48,16 +43,14 @@ pub struct RegionHeartbeatResponseHandler {
    downgrade_tasks: TaskTracker<()>,
    flush_tasks: TaskTracker<()>,
    open_region_parallelism: usize,
-    gc_tasks: TaskTracker<GcReport>,
 }

 #[async_trait::async_trait]
 pub trait InstructionHandler: Send + Sync {
-    type Instruction;
    async fn handle(
        &self,
        ctx: &HandlerContext,
-        instruction: Self::Instruction,
+        instruction: Instruction,
    ) -> Option<InstructionReply>;
 }

@@ -67,7 +60,6 @@ pub struct HandlerContext {
    catchup_tasks: TaskTracker<()>,
    downgrade_tasks: TaskTracker<()>,
    flush_tasks: TaskTracker<()>,
-    gc_tasks: TaskTracker<GcReport>,
 }

 impl HandlerContext {
@@ -78,7 +70,6 @@ impl HandlerContext {
            catchup_tasks: TaskTracker::new(),
            downgrade_tasks: TaskTracker::new(),
            flush_tasks: TaskTracker::new(),
-            gc_tasks: TaskTracker::new(),
        }
    }
 }
@@ -93,7 +84,6 @@ impl RegionHeartbeatResponseHandler {
            flush_tasks: TaskTracker::new(),
            // Default to half of the number of CPUs.
            open_region_parallelism: (num_cpus::get() / 2).max(1),
-            gc_tasks: TaskTracker::new(),
        }
    }

@@ -103,109 +93,39 @@ impl RegionHeartbeatResponseHandler {
        self
    }

-    fn build_handler(&self, instruction: &Instruction) -> MetaResult<Box<InstructionHandlers>> {
+    fn build_handler(&self, instruction: &Instruction) -> MetaResult<Box<dyn InstructionHandler>> {
        match instruction {
-            Instruction::CloseRegions(_) => Ok(Box::new(CloseRegionsHandler.into())),
-            Instruction::OpenRegions(_) => Ok(Box::new(
-                OpenRegionsHandler {
-                    open_region_parallelism: self.open_region_parallelism,
-                }
-                .into(),
-            )),
-            Instruction::FlushRegions(_) => Ok(Box::new(FlushRegionsHandler.into())),
-            Instruction::DowngradeRegions(_) => Ok(Box::new(DowngradeRegionsHandler.into())),
-            Instruction::UpgradeRegion(_) => Ok(Box::new(UpgradeRegionsHandler.into())),
-            Instruction::GetFileRefs(_) => Ok(Box::new(GetFileRefsHandler.into())),
-            Instruction::GcRegions(_) => Ok(Box::new(GcRegionsHandler.into())),
+            Instruction::CloseRegions(_) => Ok(Box::new(CloseRegionsHandler)),
+            Instruction::OpenRegions(_) => Ok(Box::new(OpenRegionsHandler {
+                open_region_parallelism: self.open_region_parallelism,
+            })),
+            Instruction::FlushRegions(_) => Ok(Box::new(FlushRegionsHandler)),
+            Instruction::DowngradeRegions(_) => Ok(Box::new(DowngradeRegionsHandler)),
+            Instruction::UpgradeRegion(_) => Ok(Box::new(UpgradeRegionsHandler)),
            Instruction::InvalidateCaches(_) => InvalidHeartbeatResponseSnafu.fail(),
        }
    }
 }

-#[allow(clippy::enum_variant_names)]
-pub enum InstructionHandlers {
-    CloseRegions(CloseRegionsHandler),
-    OpenRegions(OpenRegionsHandler),
-    FlushRegions(FlushRegionsHandler),
-    DowngradeRegions(DowngradeRegionsHandler),
-    UpgradeRegions(UpgradeRegionsHandler),
-    GetFileRefs(GetFileRefsHandler),
-    GcRegions(GcRegionsHandler),
-}
-
-macro_rules! impl_from_handler {
-    ($($handler:ident => $variant:ident),*) => {
-        $(
-            impl From<$handler> for InstructionHandlers {
-                fn from(handler: $handler) -> Self {
-                    InstructionHandlers::$variant(handler)
-                }
-            }
-        )*
-    };
-}
-
-impl_from_handler!(
-    CloseRegionsHandler => CloseRegions,
-    OpenRegionsHandler => OpenRegions,
-    FlushRegionsHandler => FlushRegions,
-    DowngradeRegionsHandler => DowngradeRegions,
-    UpgradeRegionsHandler => UpgradeRegions,
-    GetFileRefsHandler => GetFileRefs,
-    GcRegionsHandler => GcRegions
-);
-
-macro_rules! dispatch_instr {
-    (
-        $( $instr_variant:ident => $handler_variant:ident ),* $(,)?
-    ) => {
-        impl InstructionHandlers {
-            pub async fn handle(
-                &self,
-                ctx: &HandlerContext,
-                instruction: Instruction,
-            ) -> Option<InstructionReply> {
-                match (self, instruction) {
-                    $(
-                        (
-                            InstructionHandlers::$handler_variant(handler),
-                            Instruction::$instr_variant(instr),
-                        ) => handler.handle(ctx, instr).await,
-                    )*
-                    // Safety: must be used in pairs with `build_handler`.
-                    _ => unreachable!(),
-                }
-            }
-            /// Check whether this instruction is acceptable by any handler.
-            pub fn is_acceptable(instruction: &Instruction) -> bool {
-                matches!(
-                    instruction,
-                    $(
-                        Instruction::$instr_variant { .. }
-                    )|*
-                )
-            }
-        }
-    };
-}
-
-dispatch_instr!(
-    CloseRegions => CloseRegions,
-    OpenRegions => OpenRegions,
-    FlushRegions => FlushRegions,
-    DowngradeRegions => DowngradeRegions,
-    UpgradeRegion => UpgradeRegions,
-    GetFileRefs => GetFileRefs,
-    GcRegions => GcRegions,
-);
-
 #[async_trait]
 impl HeartbeatResponseHandler for RegionHeartbeatResponseHandler {
    fn is_acceptable(&self, ctx: &HeartbeatResponseHandlerContext) -> bool {
-        if let Some((_, instruction)) = ctx.incoming_message.as_ref() {
-            return InstructionHandlers::is_acceptable(instruction);
-        }
-        false
+        matches!(ctx.incoming_message.as_ref(), |Some((
+            _,
+            Instruction::DowngradeRegions { .. },
+        ))| Some((
+            _,
+            Instruction::UpgradeRegion { .. }
+        )) | Some((
+            _,
+            Instruction::FlushRegions { .. }
+        )) | Some((
+            _,
+            Instruction::OpenRegions { .. }
+        )) | Some((
+            _,
+            Instruction::CloseRegions { .. }
+        )))
    }

    async fn handle(&self, ctx: &mut HeartbeatResponseHandlerContext) -> MetaResult<HandleControl> {
@@ -219,7 +139,6 @@ impl HeartbeatResponseHandler for RegionHeartbeatResponseHandler {
        let catchup_tasks = self.catchup_tasks.clone();
        let downgrade_tasks = self.downgrade_tasks.clone();
        let flush_tasks = self.flush_tasks.clone();
-        let gc_tasks = self.gc_tasks.clone();
        let handler = self.build_handler(&instruction)?;
        let _handle = common_runtime::spawn_global(async move {
            let reply = handler
@@ -229,7 +148,6 @@ impl HeartbeatResponseHandler for RegionHeartbeatResponseHandler {
                        catchup_tasks,
                        downgrade_tasks,
                        flush_tasks,
-                        gc_tasks,
                    },
                    instruction,
                )
--- a/src/datanode/src/heartbeat/handler/close_region.rs
+++ b/src/datanode/src/heartbeat/handler/close_region.rs
@@ -12,8 +12,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

-use common_meta::RegionIdent;
-use common_meta::instruction::{InstructionReply, SimpleReply};
+use common_meta::instruction::{Instruction, InstructionReply, SimpleReply};
 use common_telemetry::warn;
 use futures::future::join_all;
 use store_api::region_request::{RegionCloseRequest, RegionRequest};
@@ -27,13 +26,13 @@ pub struct CloseRegionsHandler;

 #[async_trait::async_trait]
 impl InstructionHandler for CloseRegionsHandler {
-    type Instruction = Vec<RegionIdent>;
-
    async fn handle(
        &self,
        ctx: &HandlerContext,
-        region_idents: Self::Instruction,
+        instruction: Instruction,
    ) -> Option<InstructionReply> {
+        // Safety: must be `Instruction::CloseRegions` instruction.
+        let region_idents = instruction.into_close_regions().unwrap();
        let region_ids = region_idents
            .into_iter()
            .map(|region_ident| RegionId::new(region_ident.table_id, region_ident.region_number))
--- a/src/datanode/src/heartbeat/handler/downgrade_region.rs
+++ b/src/datanode/src/heartbeat/handler/downgrade_region.rs
@@ -13,7 +13,7 @@
 // limitations under the License.

 use common_meta::instruction::{
-    DowngradeRegion, DowngradeRegionReply, DowngradeRegionsReply, InstructionReply,
+    DowngradeRegion, DowngradeRegionReply, DowngradeRegionsReply, Instruction, InstructionReply,
 };
 use common_telemetry::tracing::info;
 use common_telemetry::{error, warn};
@@ -156,13 +156,13 @@ impl DowngradeRegionsHandler {

 #[async_trait::async_trait]
 impl InstructionHandler for DowngradeRegionsHandler {
-    type Instruction = Vec<DowngradeRegion>;
-
    async fn handle(
        &self,
        ctx: &HandlerContext,
-        downgrade_regions: Self::Instruction,
+        instruction: Instruction,
    ) -> Option<InstructionReply> {
+        // Safety: must be `Instruction::DowngradeRegion` instruction.
+        let downgrade_regions = instruction.into_downgrade_regions().unwrap();
        let futures = downgrade_regions
            .into_iter()
            .map(|downgrade_region| Self::handle_downgrade_region(ctx, downgrade_region));
@@ -263,10 +263,10 @@ mod tests {
            let reply = DowngradeRegionsHandler
                .handle(
                    &handler_context,
-                    vec![DowngradeRegion {
+                    Instruction::DowngradeRegions(vec![DowngradeRegion {
                        region_id,
                        flush_timeout,
-                    }],
+                    }]),
                )
                .await;

@@ -306,10 +306,10 @@ mod tests {
            let reply = DowngradeRegionsHandler
                .handle(
                    &handler_context,
-                    vec![DowngradeRegion {
+                    Instruction::DowngradeRegions(vec![DowngradeRegion {
                        region_id,
                        flush_timeout,
-                    }],
+                    }]),
                )
                .await;

@@ -341,10 +341,10 @@ mod tests {
        let reply = DowngradeRegionsHandler
            .handle(
                &handler_context,
-                vec![DowngradeRegion {
+                Instruction::DowngradeRegions(vec![DowngradeRegion {
                    region_id,
                    flush_timeout: Some(flush_timeout),
-                }],
+                }]),
            )
            .await;

@@ -380,10 +380,10 @@ mod tests {
            let reply = DowngradeRegionsHandler
                .handle(
                    &handler_context,
-                    vec![DowngradeRegion {
+                    Instruction::DowngradeRegions(vec![DowngradeRegion {
                        region_id,
                        flush_timeout,
-                    }],
+                    }]),
                )
                .await;

@@ -396,10 +396,10 @@ mod tests {
        let reply = DowngradeRegionsHandler
            .handle(
                &handler_context,
-                vec![DowngradeRegion {
+                Instruction::DowngradeRegions(vec![DowngradeRegion {
                    region_id,
                    flush_timeout: Some(Duration::from_millis(500)),
-                }],
+                }]),
            )
            .await;
        // Must less than 300 ms.
@@ -443,10 +443,10 @@ mod tests {
            let reply = DowngradeRegionsHandler
                .handle(
                    &handler_context,
-                    vec![DowngradeRegion {
+                    Instruction::DowngradeRegions(vec![DowngradeRegion {
                        region_id,
                        flush_timeout,
-                    }],
+                    }]),
                )
                .await;
            let reply = &reply.unwrap().expect_downgrade_regions_reply()[0];
@@ -458,10 +458,10 @@ mod tests {
        let reply = DowngradeRegionsHandler
            .handle(
                &handler_context,
-                vec![DowngradeRegion {
+                Instruction::DowngradeRegions(vec![DowngradeRegion {
                    region_id,
                    flush_timeout: Some(Duration::from_millis(500)),
-                }],
+                }]),
            )
            .await;
        // Must less than 300 ms.
@@ -487,10 +487,10 @@ mod tests {
        let reply = DowngradeRegionsHandler
            .handle(
                &handler_context,
-                vec![DowngradeRegion {
+                Instruction::DowngradeRegions(vec![DowngradeRegion {
                    region_id,
                    flush_timeout: None,
-                }],
+                }]),
            )
            .await;
        let reply = &reply.unwrap().expect_downgrade_regions_reply()[0];
@@ -518,10 +518,10 @@ mod tests {
        let reply = DowngradeRegionsHandler
            .handle(
                &handler_context,
-                vec![DowngradeRegion {
+                Instruction::DowngradeRegions(vec![DowngradeRegion {
                    region_id,
                    flush_timeout: None,
-                }],
+                }]),
            )
            .await;
        let reply = &reply.unwrap().expect_downgrade_regions_reply()[0];
--- a/src/datanode/src/heartbeat/handler/file_ref.rs
+++ b/src/datanode/src/heartbeat/handler/file_ref.rs
@@ -1,62 +0,0 @@
-// Copyright 2023 Greptime Team
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-use common_error::ext::ErrorExt;
-use common_meta::instruction::{GetFileRefs, GetFileRefsReply, InstructionReply};
-use store_api::storage::FileRefsManifest;
-
-use crate::heartbeat::handler::{HandlerContext, InstructionHandler};
-
-pub struct GetFileRefsHandler;
-
-#[async_trait::async_trait]
-impl InstructionHandler for GetFileRefsHandler {
-    type Instruction = GetFileRefs;
-
-    async fn handle(
-        &self,
-        ctx: &HandlerContext,
-        get_file_refs: Self::Instruction,
-    ) -> Option<InstructionReply> {
-        let region_server = &ctx.region_server;
-
-        // Get the MitoEngine
-        let Some(mito_engine) = region_server.mito_engine() else {
-            return Some(InstructionReply::GetFileRefs(GetFileRefsReply {
-                file_refs_manifest: FileRefsManifest::default(),
-                success: false,
-                error: Some("MitoEngine not found".to_string()),
-            }));
-        };
-
-        match mito_engine
-            .get_snapshot_of_unmanifested_refs(get_file_refs.region_ids)
-            .await
-        {
-            Ok(all_file_refs) => {
-                // Return the file references
-                Some(InstructionReply::GetFileRefs(GetFileRefsReply {
-                    file_refs_manifest: all_file_refs,
-                    success: true,
-                    error: None,
-                }))
-            }
-            Err(e) => Some(InstructionReply::GetFileRefs(GetFileRefsReply {
-                file_refs_manifest: FileRefsManifest::default(),
-                success: false,
-                error: Some(format!("Failed to get file refs: {}", e.output_msg())),
-            })),
-        }
-    }
-}
--- a/src/datanode/src/heartbeat/handler/flush_region.rs
+++ b/src/datanode/src/heartbeat/handler/flush_region.rs
@@ -15,7 +15,7 @@
 use std::time::Instant;

 use common_meta::instruction::{
-    FlushErrorStrategy, FlushRegionReply, FlushRegions, FlushStrategy, InstructionReply,
+    FlushErrorStrategy, FlushRegionReply, FlushStrategy, Instruction, InstructionReply,
 };
 use common_telemetry::{debug, warn};
 use store_api::region_request::{RegionFlushRequest, RegionRequest};
@@ -28,14 +28,13 @@ pub struct FlushRegionsHandler;

 #[async_trait::async_trait]
 impl InstructionHandler for FlushRegionsHandler {
-    type Instruction = FlushRegions;
-
    async fn handle(
        &self,
        ctx: &HandlerContext,
-        flush_regions: FlushRegions,
+        instruction: Instruction,
    ) -> Option<InstructionReply> {
        let start_time = Instant::now();
+        let flush_regions = instruction.into_flush_regions().unwrap();
        let strategy = flush_regions.strategy;
        let region_ids = flush_regions.region_ids;
        let error_strategy = flush_regions.error_strategy;
@@ -206,7 +205,10 @@ mod tests {
        // Async hint mode
        let flush_instruction = FlushRegions::async_batch(region_ids.clone());
        let reply = FlushRegionsHandler
-            .handle(&handler_context, flush_instruction)
+            .handle(
+                &handler_context,
+                Instruction::FlushRegions(flush_instruction),
+            )
            .await;
        assert!(reply.is_none()); // Hint mode returns no reply
        assert_eq!(*flushed_region_ids.read().unwrap(), region_ids);
@@ -216,7 +218,10 @@ mod tests {
        let not_found_region_ids = (0..2).map(|i| RegionId::new(2048, i)).collect::<Vec<_>>();
        let flush_instruction = FlushRegions::async_batch(not_found_region_ids);
        let reply = FlushRegionsHandler
-            .handle(&handler_context, flush_instruction)
+            .handle(
+                &handler_context,
+                Instruction::FlushRegions(flush_instruction),
+            )
            .await;
        assert!(reply.is_none());
        assert!(flushed_region_ids.read().unwrap().is_empty());
@@ -242,7 +247,10 @@ mod tests {

        let flush_instruction = FlushRegions::sync_single(region_id);
        let reply = FlushRegionsHandler
-            .handle(&handler_context, flush_instruction)
+            .handle(
+                &handler_context,
+                Instruction::FlushRegions(flush_instruction),
+            )
            .await;
        let flush_reply = reply.unwrap().expect_flush_regions_reply();
        assert!(flush_reply.overall_success);
@@ -279,7 +287,10 @@ mod tests {
        let flush_instruction =
            FlushRegions::sync_batch(region_ids.clone(), FlushErrorStrategy::FailFast);
        let reply = FlushRegionsHandler
-            .handle(&handler_context, flush_instruction)
+            .handle(
+                &handler_context,
+                Instruction::FlushRegions(flush_instruction),
+            )
            .await;
        let flush_reply = reply.unwrap().expect_flush_regions_reply();
        assert!(!flush_reply.overall_success); // Should fail due to non-existent regions
@@ -310,7 +321,10 @@ mod tests {
        let flush_instruction =
            FlushRegions::sync_batch(region_ids.clone(), FlushErrorStrategy::TryAll);
        let reply = FlushRegionsHandler
-            .handle(&handler_context, flush_instruction)
+            .handle(
+                &handler_context,
+                Instruction::FlushRegions(flush_instruction),
+            )
            .await;
        let flush_reply = reply.unwrap().expect_flush_regions_reply();
        assert!(!flush_reply.overall_success); // Should fail due to one non-existent region
--- a/src/datanode/src/heartbeat/handler/gc_worker.rs
+++ b/src/datanode/src/heartbeat/handler/gc_worker.rs
@@ -1,156 +0,0 @@
-// Copyright 2023 Greptime Team
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-use common_meta::instruction::{GcRegions, GcRegionsReply, InstructionReply};
-use common_telemetry::{debug, warn};
-use mito2::gc::LocalGcWorker;
-use snafu::{OptionExt, ResultExt};
-use store_api::storage::{FileRefsManifest, RegionId};
-
-use crate::error::{GcMitoEngineSnafu, InvalidGcArgsSnafu, Result, UnexpectedSnafu};
-use crate::heartbeat::handler::{HandlerContext, InstructionHandler};
-
-pub struct GcRegionsHandler;
-
-#[async_trait::async_trait]
-impl InstructionHandler for GcRegionsHandler {
-    type Instruction = GcRegions;
-
-    async fn handle(
-        &self,
-        ctx: &HandlerContext,
-        gc_regions: Self::Instruction,
-    ) -> Option<InstructionReply> {
-        let region_ids = gc_regions.regions.clone();
-        debug!("Received gc regions instruction: {:?}", region_ids);
-
-        let is_same_table = region_ids.windows(2).all(|w| {
-            let t1 = w[0].table_id();
-            let t2 = w[1].table_id();
-            t1 == t2
-        });
-        if !is_same_table {
-            return Some(InstructionReply::GcRegions(GcRegionsReply {
-                result: Err(format!(
-                    "Regions to GC should belong to the same table, found: {:?}",
-                    region_ids
-                )),
-            }));
-        }
-
-        let (region_id, gc_worker) = match self
-            .create_gc_worker(
-                ctx,
-                region_ids,
-                &gc_regions.file_refs_manifest,
-                gc_regions.full_file_listing,
-            )
-            .await
-        {
-            Ok(worker) => worker,
-            Err(e) => {
-                return Some(InstructionReply::GcRegions(GcRegionsReply {
-                    result: Err(format!("Failed to create GC worker: {}", e)),
-                }));
-            }
-        };
-
-        let register_result = ctx
-            .gc_tasks
-            .try_register(
-                region_id,
-                Box::pin(async move {
-                    debug!("Starting gc worker for region {}", region_id);
-                    let report = gc_worker
-                        .run()
-                        .await
-                        .context(GcMitoEngineSnafu { region_id })?;
-                    debug!("Gc worker for region {} finished", region_id);
-                    Ok(report)
-                }),
-            )
-            .await;
-        if register_result.is_busy() {
-            warn!("Another gc task is running for the region: {region_id}");
-            return Some(InstructionReply::GcRegions(GcRegionsReply {
-                result: Err(format!(
-                    "Another gc task is running for the region: {region_id}"
-                )),
-            }));
-        }
-        let mut watcher = register_result.into_watcher();
-        let result = ctx.gc_tasks.wait_until_finish(&mut watcher).await;
-        match result {
-            Ok(report) => Some(InstructionReply::GcRegions(GcRegionsReply {
-                result: Ok(report),
-            })),
-            Err(err) => Some(InstructionReply::GcRegions(GcRegionsReply {
-                result: Err(format!("{err:?}")),
-            })),
-        }
-    }
-}
-
-impl GcRegionsHandler {
-    async fn create_gc_worker(
-        &self,
-        ctx: &HandlerContext,
-        mut region_ids: Vec<RegionId>,
-        file_ref_manifest: &FileRefsManifest,
-        full_file_listing: bool,
-    ) -> Result<(RegionId, LocalGcWorker)> {
-        // always use the smallest region id on datanode as the target region id
-        region_ids.sort_by_key(|r| r.region_number());
-        let mito_engine = ctx
-            .region_server
-            .mito_engine()
-            .with_context(|| UnexpectedSnafu {
-                violated: "MitoEngine not found".to_string(),
-            })?;
-        let region_id = *region_ids.first().with_context(|| UnexpectedSnafu {
-            violated: "No region ids provided".to_string(),
-        })?;
-
-        let mito_config = mito_engine.mito_config();
-
-        // Find the access layer from one of the regions that exists on this datanode
-        let access_layer = region_ids
-            .iter()
-            .find_map(|rid| mito_engine.find_region(*rid))
-            .with_context(|| InvalidGcArgsSnafu {
-                msg: format!(
-                    "None of the regions is on current datanode:{:?}",
-                    region_ids
-                ),
-            })?
-            .access_layer();
-
-        let cache_manager = mito_engine.cache_manager();
-
-        let gc_worker = LocalGcWorker::try_new(
-            access_layer.clone(),
-            Some(cache_manager),
-            region_ids.into_iter().collect(),
-            Default::default(),
-            mito_config.clone().into(),
-            file_ref_manifest.clone(),
-            &mito_engine.gc_limiter(),
-            full_file_listing,
-        )
-        .await
-        .context(GcMitoEngineSnafu { region_id })?;
-
-        Ok((region_id, gc_worker))
-    }
-}
--- a/src/datanode/src/heartbeat/handler/open_region.rs
+++ b/src/datanode/src/heartbeat/handler/open_region.rs
@@ -12,7 +12,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

-use common_meta::instruction::{InstructionReply, OpenRegion, SimpleReply};
+use common_meta::instruction::{Instruction, InstructionReply, OpenRegion, SimpleReply};
 use common_meta::wal_options_allocator::prepare_wal_options;
 use store_api::path_utils::table_dir;
 use store_api::region_request::{PathType, RegionOpenRequest};
@@ -26,12 +26,13 @@ pub struct OpenRegionsHandler {

 #[async_trait::async_trait]
 impl InstructionHandler for OpenRegionsHandler {
-    type Instruction = Vec<OpenRegion>;
    async fn handle(
        &self,
        ctx: &HandlerContext,
-        open_regions: Self::Instruction,
+        instruction: Instruction,
    ) -> Option<InstructionReply> {
+        let open_regions = instruction.into_open_regions().unwrap();
+
        let requests = open_regions
            .into_iter()
            .map(|open_region| {
--- a/src/datanode/src/heartbeat/handler/upgrade_region.rs
+++ b/src/datanode/src/heartbeat/handler/upgrade_region.rs
@@ -12,7 +12,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

-use common_meta::instruction::{InstructionReply, UpgradeRegion, UpgradeRegionReply};
+use common_meta::instruction::{Instruction, InstructionReply, UpgradeRegion, UpgradeRegionReply};
 use common_telemetry::{info, warn};
 use store_api::region_request::{RegionCatchupRequest, RegionRequest, ReplayCheckpoint};

@@ -24,12 +24,12 @@ pub struct UpgradeRegionsHandler;

 #[async_trait::async_trait]
 impl InstructionHandler for UpgradeRegionsHandler {
-    type Instruction = UpgradeRegion;
-
    async fn handle(
        &self,
        ctx: &HandlerContext,
-        UpgradeRegion {
+        instruction: Instruction,
+    ) -> Option<InstructionReply> {
+        let UpgradeRegion {
            region_id,
            last_entry_id,
            metadata_last_entry_id,
@@ -37,8 +37,8 @@ impl InstructionHandler for UpgradeRegionsHandler {
            location_id,
            replay_entry_id,
            metadata_replay_entry_id,
-        }: UpgradeRegion,
-    ) -> Option<InstructionReply> {
+        } = instruction.into_upgrade_regions().unwrap();
+
        let Some(writable) = ctx.region_server.is_region_leader(region_id) else {
            return Some(InstructionReply::UpgradeRegion(UpgradeRegionReply {
                ready: false,
@@ -138,7 +138,7 @@ impl InstructionHandler for UpgradeRegionsHandler {
 mod tests {
    use std::time::Duration;

-    use common_meta::instruction::UpgradeRegion;
+    use common_meta::instruction::{Instruction, UpgradeRegion};
    use mito2::engine::MITO_ENGINE_NAME;
    use store_api::region_engine::RegionRole;
    use store_api::storage::RegionId;
@@ -164,11 +164,11 @@ mod tests {
            let reply = UpgradeRegionsHandler
                .handle(
                    &handler_context,
-                    UpgradeRegion {
+                    Instruction::UpgradeRegion(UpgradeRegion {
                        region_id,
                        replay_timeout,
                        ..Default::default()
-                    },
+                    }),
                )
                .await;

@@ -201,11 +201,11 @@ mod tests {
            let reply = UpgradeRegionsHandler
                .handle(
                    &handler_context,
-                    UpgradeRegion {
+                    Instruction::UpgradeRegion(UpgradeRegion {
                        region_id,
                        replay_timeout,
                        ..Default::default()
-                    },
+                    }),
                )
                .await;

@@ -239,11 +239,11 @@ mod tests {
            let reply = UpgradeRegionsHandler
                .handle(
                    &handler_context,
-                    UpgradeRegion {
+                    Instruction::UpgradeRegion(UpgradeRegion {
                        region_id,
                        replay_timeout,
                        ..Default::default()
-                    },
+                    }),
                )
                .await;

@@ -280,11 +280,11 @@ mod tests {
            let reply = UpgradeRegionsHandler
                .handle(
                    &handler_context,
-                    UpgradeRegion {
+                    Instruction::UpgradeRegion(UpgradeRegion {
                        region_id,
                        replay_timeout,
                        ..Default::default()
-                    },
+                    }),
                )
                .await;

@@ -298,11 +298,11 @@ mod tests {
        let reply = UpgradeRegionsHandler
            .handle(
                &handler_context,
-                UpgradeRegion {
+                Instruction::UpgradeRegion(UpgradeRegion {
                    region_id,
                    replay_timeout: Some(Duration::from_millis(500)),
                    ..Default::default()
-                },
+                }),
            )
            .await;
        // Must less than 300 ms.
@@ -339,10 +339,10 @@ mod tests {
        let reply = UpgradeRegionsHandler
            .handle(
                &handler_context,
-                UpgradeRegion {
+                Instruction::UpgradeRegion(UpgradeRegion {
                    region_id,
                    ..Default::default()
-                },
+                }),
            )
            .await;

@@ -355,11 +355,11 @@ mod tests {
        let reply = UpgradeRegionsHandler
            .handle(
                &handler_context,
-                UpgradeRegion {
+                Instruction::UpgradeRegion(UpgradeRegion {
                    region_id,
                    replay_timeout: Some(Duration::from_millis(200)),
                    ..Default::default()
-                },
+                }),
            )
            .await;

--- a/src/datanode/src/region_server.rs
+++ b/src/datanode/src/region_server.rs
@@ -24,7 +24,9 @@ use api::region::RegionResponse;
 use api::v1::meta::TopicStat;
 use api::v1::region::sync_request::ManifestInfo;
 use api::v1::region::{
-    ListMetadataRequest, RegionResponse as RegionResponseV1, SyncRequest, region_request,
+    ApplyStagedManifestRequest, ListMetadataRequest, PauseRequest, PublishRegionRuleRequest,
+    RegionResponse as RegionResponseV1, RemapManifestRequest, ResumeRequest,
+    StageRegionRuleRequest, SyncRequest, region_request,
 };
 use api::v1::{ResponseHeader, Status};
 use arrow_flight::{FlightData, Ticket};
@@ -84,6 +86,8 @@ use crate::error::{
 use crate::event_listener::RegionServerEventListenerRef;
 use crate::region_server::catalog::{NameAwareCatalogList, NameAwareDataSourceInjectorBuilder};

+const REMAP_STATS_EXTENSION_KEY: &str = "repartition.manifest.stats";
+
 #[derive(Clone)]
 pub struct RegionServer {
    inner: Arc<RegionServerInner>,
@@ -158,27 +162,6 @@ impl RegionServer {
        }
    }

-    /// Gets the MitoEngine if it's registered.
-    pub fn mito_engine(&self) -> Option<MitoEngine> {
-        if let Some(mito) = self.inner.mito_engine.read().unwrap().clone() {
-            Some(mito)
-        } else {
-            self.inner
-                .engines
-                .read()
-                .unwrap()
-                .get(MITO_ENGINE_NAME)
-                .cloned()
-                .and_then(|e| {
-                    let mito = e.as_any().downcast_ref::<MitoEngine>().cloned();
-                    if mito.is_none() {
-                        warn!("Mito engine not found in region server engines");
-                    }
-                    mito
-                })
-        }
-    }
-
    #[tracing::instrument(skip_all)]
    pub async fn handle_batch_open_requests(
        &self,
@@ -391,6 +374,24 @@ impl RegionServer {
        }
    }

+    /// Temporarily pauses compaction and snapshot related activities for the region.
+    ///
+    /// Currently a stub; real implementation will coordinate with region worker.
+    pub async fn pause_compaction_and_snapshot(&self, region_id: RegionId) -> Result<()> {
+        info!("pause_compaction_and_snapshot stub invoked for region {region_id}");
+        let _ = region_id;
+        Ok(())
+    }
+
+    /// Resumes compaction and snapshot related activities for the region.
+    ///
+    /// Currently a stub; real implementation will coordinate with region worker.
+    pub async fn resume_compaction_and_snapshot(&self, region_id: RegionId) -> Result<()> {
+        info!("resume_compaction_and_snapshot stub invoked for region {region_id}");
+        let _ = region_id;
+        Ok(())
+    }
+
    /// Stop the region server.
    pub async fn stop(&self) -> Result<()> {
        self.inner.stop().await
@@ -559,6 +560,124 @@ impl RegionServer {
        Ok(response)
    }

+    async fn handle_pause_region_request(&self, request: &PauseRequest) -> Result<RegionResponse> {
+        let region_id = RegionId::from_u64(request.region_id);
+        let tracing_context = TracingContext::from_current_span();
+        let span = tracing_context.attach(info_span!(
+            "RegionServer::handle_pause_region_request",
+            region_id = region_id.to_string()
+        ));
+
+        self.pause_compaction_and_snapshot(region_id)
+            .trace(span)
+            .await
+            .map(|_| RegionResponse::new(AffectedRows::default()))
+    }
+
+    async fn handle_resume_region_request(
+        &self,
+        request: &ResumeRequest,
+    ) -> Result<RegionResponse> {
+        let region_id = RegionId::from_u64(request.region_id);
+        let tracing_context = TracingContext::from_current_span();
+        let span = tracing_context.attach(info_span!(
+            "RegionServer::handle_resume_region_request",
+            region_id = region_id.to_string()
+        ));
+
+        self.resume_compaction_and_snapshot(region_id)
+            .trace(span)
+            .await
+            .map(|_| RegionResponse::new(AffectedRows::default()))
+    }
+
+    async fn handle_stage_region_rule_request(
+        &self,
+        request: &StageRegionRuleRequest,
+    ) -> Result<RegionResponse> {
+        let region_id = RegionId::from_u64(request.region_id);
+        info!(
+            "Stage region rule for region {region_id} with version {}",
+            request.rule_version
+        );
+        match self
+            .set_region_role_state_gracefully(region_id, SettableRegionRoleState::StagingLeader)
+            .await?
+        {
+            SetRegionRoleStateResponse::Success(_) | SetRegionRoleStateResponse::NotFound => {
+                Ok(RegionResponse::new(AffectedRows::default()))
+            }
+            SetRegionRoleStateResponse::InvalidTransition(err) => {
+                Err(err).with_context(|_| HandleRegionRequestSnafu { region_id })
+            }
+        }
+    }
+
+    async fn handle_publish_region_rule_request(
+        &self,
+        request: &PublishRegionRuleRequest,
+    ) -> Result<RegionResponse> {
+        let region_id = RegionId::from_u64(request.region_id);
+        info!(
+            "Publish region rule for region {region_id} with version {}",
+            request.rule_version
+        );
+        match self
+            .set_region_role_state_gracefully(region_id, SettableRegionRoleState::Leader)
+            .await?
+        {
+            SetRegionRoleStateResponse::Success(_) | SetRegionRoleStateResponse::NotFound => {
+                Ok(RegionResponse::new(AffectedRows::default()))
+            }
+            SetRegionRoleStateResponse::InvalidTransition(err) => {
+                Err(err).with_context(|_| HandleRegionRequestSnafu { region_id })
+            }
+        }
+    }
+
+    async fn handle_remap_manifest_request(
+        &self,
+        request: &RemapManifestRequest,
+    ) -> Result<RegionResponse> {
+        info!(
+            "received remap manifest request for table {} group {}",
+            request.table_id, request.group_id
+        );
+
+        let stats_json = serde_json::to_vec(&serde_json::json!({
+            "files_per_region": HashMap::<u64, usize>::new(),
+            "total_file_refs": 0u64,
+            "empty_regions": Vec::<u64>::new(),
+            "group_id": &request.group_id,
+        }))
+        .context(SerializeJsonSnafu)?;
+
+        let mut extensions = HashMap::new();
+        extensions.insert(REMAP_STATS_EXTENSION_KEY.to_string(), stats_json);
+
+        Ok(RegionResponse {
+            affected_rows: 0,
+            extensions,
+            metadata: Vec::new(),
+        })
+    }
+
+    async fn handle_apply_staged_manifest_request(
+        &self,
+        request: &ApplyStagedManifestRequest,
+    ) -> Result<RegionResponse> {
+        info!(
+            "received manifest apply request for table {} group {} publish={} regions {:?}",
+            request.table_id, request.group_id, request.publish, request.region_ids
+        );
+
+        Ok(RegionResponse {
+            affected_rows: 0,
+            extensions: HashMap::new(),
+            metadata: Vec::new(),
+        })
+    }
+
    /// Sync region manifest and registers new opened logical regions.
    pub async fn sync_region(
        &self,
@@ -590,6 +709,26 @@ impl RegionServerHandler for RegionServer {
            region_request::Body::Sync(sync_request) => {
                self.handle_sync_region_request(sync_request).await
            }
+            region_request::Body::Pause(pause_request) => {
+                self.handle_pause_region_request(pause_request).await
+            }
+            region_request::Body::Resume(resume_request) => {
+                self.handle_resume_region_request(resume_request).await
+            }
+            region_request::Body::StageRegionRule(stage_request) => {
+                self.handle_stage_region_rule_request(stage_request).await
+            }
+            region_request::Body::PublishRegionRule(publish_request) => {
+                self.handle_publish_region_rule_request(publish_request)
+                    .await
+            }
+            region_request::Body::RemapManifest(remap_request) => {
+                self.handle_remap_manifest_request(remap_request).await
+            }
+            region_request::Body::ApplyStagedManifest(apply_request) => {
+                self.handle_apply_staged_manifest_request(apply_request)
+                    .await
+            }
            region_request::Body::ListMetadata(list_metadata_request) => {
                self.handle_list_metadata_request(list_metadata_request)
                    .await
@@ -697,14 +836,14 @@ struct RegionServerInner {
    runtime: Runtime,
    event_listener: RegionServerEventListenerRef,
    table_provider_factory: TableProviderFactoryRef,
-    /// The number of queries allowed to be executed at the same time.
-    /// Act as last line of defense on datanode to prevent query overloading.
+    // The number of queries allowed to be executed at the same time.
+    // Act as last line of defense on datanode to prevent query overloading.
    parallelism: Option<RegionServerParallelism>,
-    /// The topic stats reporter.
+    // The topic stats reporter.
    topic_stats_reporter: RwLock<Option<Box<dyn TopicStatsReporter>>>,
-    /// HACK(zhongzc): Direct MitoEngine handle for diagnostics. This couples the
-    /// server with a concrete engine; acceptable for now to fetch Mito-specific
-    /// info (e.g., list SSTs). Consider a diagnostics trait later.
+    // HACK(zhongzc): Direct MitoEngine handle for diagnostics. This couples the
+    // server with a concrete engine; acceptable for now to fetch Mito-specific
+    // info (e.g., list SSTs). Consider a diagnostics trait later.
    mito_engine: RwLock<Option<MitoEngine>>,
 }

--- a/src/datanode/src/store.rs
+++ b/src/datanode/src/store.rs
@@ -47,7 +47,10 @@ pub(crate) async fn new_object_store_without_cache(
    Ok(object_store)
 }

-pub async fn new_object_store(store: ObjectStoreConfig, data_home: &str) -> Result<ObjectStore> {
+pub(crate) async fn new_object_store(
+    store: ObjectStoreConfig,
+    data_home: &str,
+) -> Result<ObjectStore> {
    let object_store = new_raw_object_store(&store, data_home)
        .await
        .context(error::ObjectStoreSnafu)?;
@@ -56,7 +59,7 @@ pub async fn new_object_store(store: ObjectStoreConfig, data_home: &str) -> Resu
        let object_store = {
            // It's safe to unwrap here because we already checked above.
            let cache_config = store.cache_config().unwrap();
-            if let Some(cache_layer) = build_cache_layer(cache_config, data_home).await? {
+            if let Some(cache_layer) = build_cache_layer(cache_config).await? {
                // Adds cache layer
                object_store.layer(cache_layer)
            } else {
@@ -76,22 +79,17 @@ pub async fn new_object_store(store: ObjectStoreConfig, data_home: &str) -> Resu

 async fn build_cache_layer(
    cache_config: &ObjectStorageCacheConfig,
-    data_home: &str,
 ) -> Result<Option<LruCacheLayer<impl Access>>> {
    // No need to build cache layer if read cache is disabled.
    if !cache_config.enable_read_cache {
        return Ok(None);
    }
-    let cache_base_dir = if cache_config.cache_path.is_empty() {
-        data_home
-    } else {
-        &cache_config.cache_path
-    };
-    let atomic_temp_dir = join_dir(cache_base_dir, ATOMIC_WRITE_DIR);
+
+    let atomic_temp_dir = join_dir(&cache_config.cache_path, ATOMIC_WRITE_DIR);
    clean_temp_dir(&atomic_temp_dir).context(error::ObjectStoreSnafu)?;

    let cache_store = Fs::default()
-        .root(cache_base_dir)
+        .root(&cache_config.cache_path)
        .atomic_write_dir(&atomic_temp_dir)
        .build()
        .context(error::BuildCacheStoreSnafu)?;
--- a/src/datatypes/src/data_type.rs
+++ b/src/datatypes/src/data_type.rs
@@ -348,9 +348,9 @@ impl ConcreteDataType {
        }
    }

-    pub fn as_json(&self) -> Option<&JsonType> {
+    pub fn as_json(&self) -> Option<JsonType> {
        match self {
-            ConcreteDataType::Json(j) => Some(j),
+            ConcreteDataType::Json(j) => Some(j.clone()),
            _ => None,
        }
    }
--- a/src/datatypes/src/error.rs
+++ b/src/datatypes/src/error.rs
@@ -259,13 +259,6 @@ pub enum Error {
        #[snafu(implicit)]
        location: Location,
    },
-
-    #[snafu(display("Failed to merge JSON datatype: {reason}"))]
-    MergeJsonDatatype {
-        reason: String,
-        #[snafu(implicit)]
-        location: Location,
-    },
 }

 impl ErrorExt for Error {
@@ -288,8 +281,7 @@ impl ErrorExt for Error {
            | InvalidJsonb { .. }
            | InvalidVector { .. }
            | InvalidFulltextOption { .. }
-            | InvalidSkippingIndexOption { .. }
-            | MergeJsonDatatype { .. } => StatusCode::InvalidArguments,
+            | InvalidSkippingIndexOption { .. } => StatusCode::InvalidArguments,

            ValueExceedsPrecision { .. }
            | CastType { .. }
--- a/src/datatypes/src/json.rs
+++ b/src/datatypes/src/json.rs
@@ -30,7 +30,7 @@ use snafu::{ResultExt, ensure};

 use crate::data_type::{ConcreteDataType, DataType};
 use crate::error::{self, Error};
-use crate::types::{ListType, StructField, StructType};
+use crate::types::{StructField, StructType};
 use crate::value::{ListValue, StructValue, Value};

 /// The configuration of JSON encoding
@@ -375,8 +375,8 @@ fn encode_json_value_with_context<'a>(
        }
        Json::Array(arr) => {
            let list_value = encode_json_array_with_context(arr, expected_type, context)?;
-            let datatype = ConcreteDataType::List(ListType::new(list_value.datatype()));
-            Ok((Value::List(list_value), datatype))
+            let data_type = list_value.datatype().clone();
+            Ok((Value::List(list_value), (*data_type).clone()))
        }
        Json::Object(obj) => {
            let struct_value = encode_json_object_with_context(obj, None, context)?;
--- a/src/datatypes/src/types/json_type.rs
+++ b/src/datatypes/src/types/json_type.rs
@@ -12,9 +12,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

-use std::collections::BTreeMap;
 use std::str::FromStr;
-use std::sync::Arc;

 use arrow::datatypes::DataType as ArrowDataType;
 use arrow_schema::Fields;
@@ -23,13 +21,10 @@ use serde::{Deserialize, Serialize};
 use snafu::ResultExt;

 use crate::data_type::DataType;
-use crate::error::{
-    DeserializeSnafu, InvalidJsonSnafu, InvalidJsonbSnafu, MergeJsonDatatypeSnafu, Result,
-};
+use crate::error::{DeserializeSnafu, InvalidJsonSnafu, InvalidJsonbSnafu, Result};
 use crate::prelude::ConcreteDataType;
 use crate::scalars::ScalarVectorBuilder;
 use crate::type_id::LogicalTypeId;
-use crate::types::{ListType, StructField, StructType};
 use crate::value::Value;
 use crate::vectors::{BinaryVectorBuilder, MutableVector};

@@ -53,101 +48,11 @@ impl JsonType {
    pub fn new(format: JsonFormat) -> Self {
        Self { format }
    }
-
-    // TODO(LFC): remove "allow unused"
-    #[allow(unused)]
-    /// Make json type a struct type, by:
-    /// - if the json is an object, its entries are mapped to struct fields, obviously;
-    /// - if not, the json is one of bool, number, string or array, make it a special field called
-    ///   "__plain" in a struct with only that field.
-    pub(crate) fn as_struct_type(&self) -> StructType {
-        match &self.format {
-            JsonFormat::Jsonb => StructType::default(),
-            JsonFormat::Native(inner) => match inner.as_ref() {
-                ConcreteDataType::Struct(t) => t.clone(),
-                x => StructType::new(Arc::new(vec![StructField::new(
-                    "__plain".to_string(),
-                    x.clone(),
-                    true,
-                )])),
-            },
-        }
-    }
-
-    // TODO(LFC): remove "allow unused"
-    #[allow(unused)]
-    /// Try to merge this json type with others, error on datatype conflict.
-    pub(crate) fn merge(&mut self, other: &JsonType) -> Result<()> {
-        match (&self.format, &other.format) {
-            (JsonFormat::Jsonb, JsonFormat::Jsonb) => Ok(()),
-            (JsonFormat::Native(this), JsonFormat::Native(that)) => {
-                let merged = merge(this.as_ref(), that.as_ref())?;
-                self.format = JsonFormat::Native(Box::new(merged));
-                Ok(())
-            }
-            _ => MergeJsonDatatypeSnafu {
-                reason: "json format not match",
-            }
-            .fail(),
-        }
-    }
-}
-
-fn merge(this: &ConcreteDataType, that: &ConcreteDataType) -> Result<ConcreteDataType> {
-    match (this, that) {
-        (this, that) if this == that => Ok(this.clone()),
-        (ConcreteDataType::List(this), ConcreteDataType::List(that)) => {
-            merge_list(this, that).map(ConcreteDataType::List)
-        }
-        (ConcreteDataType::Struct(this), ConcreteDataType::Struct(that)) => {
-            merge_struct(this, that).map(ConcreteDataType::Struct)
-        }
-        (ConcreteDataType::Null(_), x) | (x, ConcreteDataType::Null(_)) => Ok(x.clone()),
-        _ => MergeJsonDatatypeSnafu {
-            reason: format!("datatypes have conflict, this: {this}, that: {that}"),
-        }
-        .fail(),
-    }
-}
-
-fn merge_list(this: &ListType, that: &ListType) -> Result<ListType> {
-    let merged = merge(this.item_type(), that.item_type())?;
-    Ok(ListType::new(Arc::new(merged)))
-}
-
-fn merge_struct(this: &StructType, that: &StructType) -> Result<StructType> {
-    let this = Arc::unwrap_or_clone(this.fields());
-    let that = Arc::unwrap_or_clone(that.fields());
-
-    let mut this: BTreeMap<String, StructField> = this
-        .into_iter()
-        .map(|x| (x.name().to_string(), x))
-        .collect();
-    // merge "that" into "this" directly:
-    for that_field in that {
-        let field_name = that_field.name().to_string();
-        if let Some(this_field) = this.get(&field_name) {
-            let merged_field = StructField::new(
-                field_name.clone(),
-                merge(this_field.data_type(), that_field.data_type())?,
-                true, // the value in json object must be always nullable
-            );
-            this.insert(field_name, merged_field);
-        } else {
-            this.insert(field_name, that_field);
-        }
-    }
-
-    let fields = this.into_values().collect::<Vec<_>>();
-    Ok(StructType::new(Arc::new(fields)))
 }

 impl DataType for JsonType {
    fn name(&self) -> String {
-        match &self.format {
-            JsonFormat::Jsonb => JSON_TYPE_NAME.to_string(),
-            JsonFormat::Native(x) => format!("Json<{x}>"),
-        }
+        JSON_TYPE_NAME.to_string()
    }

    fn logical_type_id(&self) -> LogicalTypeId {
@@ -201,95 +106,3 @@ pub fn parse_string_to_jsonb(s: &str) -> Result<Vec<u8>> {
        .map_err(|_| InvalidJsonSnafu { value: s }.build())
        .map(|json| json.to_vec())
 }
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use crate::json::JsonStructureSettings;
-
-    #[test]
-    fn test_merge_json_type() -> Result<()> {
-        fn test(
-            json: &str,
-            json_type: &mut JsonType,
-            expected: std::result::Result<&str, &str>,
-        ) -> Result<()> {
-            let json: serde_json::Value = serde_json::from_str(json).unwrap();
-
-            let settings = JsonStructureSettings::Structured(None);
-            let value = settings.encode(json)?;
-            let value_type = value.data_type();
-            let Some(other) = value_type.as_json() else {
-                unreachable!()
-            };
-
-            let result = json_type.merge(other);
-            match (result, expected) {
-                (Ok(()), Ok(expected)) => {
-                    assert_eq!(json_type.name(), expected)
-                }
-                (Err(err), Err(expected)) => {
-                    assert_eq!(err.to_string(), expected)
-                }
-                _ => unreachable!(),
-            }
-            Ok(())
-        }
-
-        let json_type = &mut JsonType::new(JsonFormat::Native(Box::new(
-            ConcreteDataType::null_datatype(),
-        )));
-
-        // can merge with json object:
-        let json = r#"{
-            "hello": "world",
-            "list": [1, 2, 3],
-            "object": {"a": 1}
-        }"#;
-        let expected =
-            r#"Json<Struct<"hello": String, "list": List<Int64>, "object": Struct<"a": Int64>>>"#;
-        test(json, json_type, Ok(expected))?;
-
-        // cannot merge with other non-object json values:
-        let jsons = [r#""s""#, "1", "[1]"];
-        let expects = [
-            r#"Failed to merge JSON datatype: datatypes have conflict, this: Struct<"hello": String, "list": List<Int64>, "object": Struct<"a": Int64>>, that: String"#,
-            r#"Failed to merge JSON datatype: datatypes have conflict, this: Struct<"hello": String, "list": List<Int64>, "object": Struct<"a": Int64>>, that: Int64"#,
-            r#"Failed to merge JSON datatype: datatypes have conflict, this: Struct<"hello": String, "list": List<Int64>, "object": Struct<"a": Int64>>, that: List<Int64>"#,
-        ];
-        for (json, expect) in jsons.into_iter().zip(expects.into_iter()) {
-            test(json, json_type, Err(expect))?;
-        }
-
-        // cannot merge with other json object with conflict field datatype:
-        let json = r#"{
-            "hello": 1,
-            "float": 0.123,
-            "no": 42
-        }"#;
-        let expected =
-            r#"Failed to merge JSON datatype: datatypes have conflict, this: String, that: Int64"#;
-        test(json, json_type, Err(expected))?;
-
-        // can merge with another json object:
-        let json = r#"{
-            "hello": "greptime",
-            "float": 0.123,
-            "int": 42
-        }"#;
-        let expected = r#"Json<Struct<"float": Float64, "hello": String, "int": Int64, "list": List<Int64>, "object": Struct<"a": Int64>>>"#;
-        test(json, json_type, Ok(expected))?;
-
-        // can merge with some complex nested json object:
-        let json = r#"{
-            "list": [4],
-            "object": {"foo": "bar", "l": ["x"], "o": {"key": "value"}},
-            "float": 0.456,
-            "int": 0
-        }"#;
-        let expected = r#"Json<Struct<"float": Float64, "hello": String, "int": Int64, "list": List<Int64>, "object": Struct<"a": Int64, "foo": String, "l": List<String>, "o": Struct<"key": String>>>>"#;
-        test(json, json_type, Ok(expected))?;
-
-        Ok(())
-    }
-}
--- a/src/datatypes/src/types/struct_type.rs
+++ b/src/datatypes/src/types/struct_type.rs
@@ -52,7 +52,7 @@ impl DataType for StructType {
            "Struct<{}>",
            self.fields
                .iter()
-                .map(|f| format!(r#""{}": {}"#, f.name(), f.data_type()))
+                .map(|f| f.name())
                .collect::<Vec<_>>()
                .join(", ")
        )
--- a/src/flow/src/adapter/refill.rs
+++ b/src/flow/src/adapter/refill.rs
@@ -18,7 +18,6 @@ use std::collections::BTreeSet;
 use std::sync::Arc;

 use catalog::CatalogManagerRef;
-use client::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME};
 use common_error::ext::BoxedError;
 use common_meta::key::flow::FlowMetadataManagerRef;
 use common_recordbatch::{RecordBatch, RecordBatches, SendableRecordBatchStream};
@@ -397,8 +396,8 @@ impl RefillTask {
        // we don't need information from query context in this query so a default query context is enough
        let query_ctx = Arc::new(
            QueryContextBuilder::default()
-                .current_catalog(DEFAULT_CATALOG_NAME.to_string())
-                .current_schema(DEFAULT_SCHEMA_NAME.to_string())
+                .current_catalog("greptime".to_string())
+                .current_schema("public".to_string())
                .build(),
        );

--- a/src/frontend/src/frontend.rs
+++ b/src/frontend/src/frontend.rs
@@ -45,7 +45,6 @@ use crate::service_config::{
 pub struct FrontendOptions {
    pub node_id: Option<String>,
    pub default_timezone: Option<String>,
-    pub default_column_prefix: Option<String>,
    pub heartbeat: HeartbeatOptions,
    pub http: HttpOptions,
    pub grpc: GrpcOptions,
@@ -78,7 +77,6 @@ impl Default for FrontendOptions {
        Self {
            node_id: None,
            default_timezone: None,
-            default_column_prefix: None,
            heartbeat: HeartbeatOptions::frontend_default(),
            http: HttpOptions::default(),
            grpc: GrpcOptions::default(),
--- a/src/frontend/src/heartbeat.rs
+++ b/src/frontend/src/heartbeat.rs
@@ -104,9 +104,6 @@ impl HeartbeatTask {
                match resp_stream.message().await {
                    Ok(Some(resp)) => {
                        debug!("Receiving heartbeat response: {:?}", resp);
-                        if let Some(message) = &resp.mailbox_message {
-                            info!("Received mailbox message: {message:?}");
-                        }
                        let ctx = HeartbeatResponseHandlerContext::new(mailbox.clone(), resp);
                        if let Err(e) = capture_self.handle_response(ctx).await {
                            error!(e; "Error while handling heartbeat response");
--- a/src/frontend/src/instance/jaeger.rs
+++ b/src/frontend/src/instance/jaeger.rs
@@ -17,9 +17,7 @@ use std::sync::Arc;

 use async_trait::async_trait;
 use catalog::CatalogManagerRef;
-use common_catalog::consts::{
-    TRACE_TABLE_NAME, trace_operations_table_name, trace_services_table_name,
-};
+use common_catalog::consts::{TRACE_TABLE_NAME, trace_services_table_name};
 use common_function::function::FunctionRef;
 use common_function::scalars::json::json_get::{
    JsonGetBool, JsonGetFloat, JsonGetInt, JsonGetString,
@@ -78,6 +76,8 @@ impl JaegerQueryHandler for Instance {
        ctx: QueryContextRef,
        service_name: &str,
        span_kind: Option<&str>,
+        start_time: Option<i64>,
+        end_time: Option<i64>,
    ) -> ServerResult<Output> {
        let mut filters = vec![col(SERVICE_NAME_COLUMN).eq(lit(service_name))];

@@ -89,6 +89,16 @@ impl JaegerQueryHandler for Instance {
            ))));
        }

+        if let Some(start_time) = start_time {
+            // Microseconds to nanoseconds.
+            filters.push(col(TIMESTAMP_COLUMN).gt_eq(lit_timestamp_nano(start_time * 1_000)));
+        }
+
+        if let Some(end_time) = end_time {
+            // Microseconds to nanoseconds.
+            filters.push(col(TIMESTAMP_COLUMN).lt_eq(lit_timestamp_nano(end_time * 1_000)));
+        }
+
        // It's equivalent to the following SQL query:
        //
        // ```
@@ -97,6 +107,8 @@ impl JaegerQueryHandler for Instance {
        //   {db}.{trace_table}
        // WHERE
        //   service_name = '{service_name}' AND
+        //   timestamp >= {start_time} AND
+        //   timestamp <= {end_time} AND
        //   span_kind = '{span_kind}'
        // ORDER BY
        //   span_name ASC
@@ -289,18 +301,12 @@ async fn query_trace_table(
        .unwrap_or(TRACE_TABLE_NAME);

    // If only select services, use the trace services table.
-    // If querying operations (distinct by span_name and span_kind), use the trace operations table.
    let table_name = {
        if match selects.as_slice() {
            [SelectExpr::Expression(x)] => x == &col(SERVICE_NAME_COLUMN),
            _ => false,
        } {
            &trace_services_table_name(trace_table_name)
-        } else if !distincts.is_empty()
-            && distincts.contains(&col(SPAN_NAME_COLUMN))
-            && distincts.contains(&col(SPAN_KIND_COLUMN))
-        {
-            &trace_operations_table_name(trace_table_name)
        } else {
            trace_table_name
        }
--- a/src/meta-srv/Cargo.toml
+++ b/src/meta-srv/Cargo.toml
@@ -45,6 +45,7 @@ common-time.workspace = true
 common-version.workspace = true
 common-wal.workspace = true
 common-workload.workspace = true
+partition.workspace = true
 dashmap.workspace = true
 datatypes.workspace = true
 deadpool = { workspace = true, optional = true }
--- a/src/meta-srv/src/error.rs
+++ b/src/meta-srv/src/error.rs
@@ -17,12 +17,14 @@ use common_error::ext::{BoxedError, ErrorExt};
 use common_error::status_code::StatusCode;
 use common_macro::stack_trace_debug;
 use common_meta::DatanodeId;
+use common_procedure::ProcedureId;
 use common_runtime::JoinError;
 use snafu::{Location, Snafu};
 use store_api::storage::RegionId;
 use table::metadata::TableId;
 use tokio::sync::mpsc::error::SendError;
 use tonic::codegen::http;
+use uuid::Uuid;

 use crate::metasrv::SelectTarget;
 use crate::pubsub::Message;
@@ -774,6 +776,129 @@ pub enum Error {
        location: Location,
    },

+    #[snafu(display("Failed to create repartition subtasks"))]
+    RepartitionCreateSubtasks {
+        source: partition::error::Error,
+        #[snafu(implicit)]
+        location: Location,
+    },
+
+    #[snafu(display("Failed to serialize partition expression"))]
+    RepartitionSerializePartitionExpr {
+        source: partition::error::Error,
+        #[snafu(implicit)]
+        location: Location,
+    },
+
+    #[snafu(display(
+        "Source partition expression '{}' does not match any existing region",
+        expr
+    ))]
+    RepartitionSourceExprMismatch {
+        expr: String,
+        #[snafu(implicit)]
+        location: Location,
+    },
+
+    #[snafu(display("Repartition group {} is missing a source region id", group_id))]
+    RepartitionMissingSourceRegionId {
+        group_id: Uuid,
+        #[snafu(implicit)]
+        location: Location,
+    },
+
+    #[snafu(display(
+        "Repartition group {} cannot find route for source region {}",
+        group_id,
+        region_id
+    ))]
+    RepartitionSourceRegionRouteMissing {
+        group_id: Uuid,
+        region_id: RegionId,
+        #[snafu(implicit)]
+        location: Location,
+    },
+
+    #[snafu(display("Repartition group {} has no source regions after planning", group_id))]
+    RepartitionNoSourceRegions {
+        group_id: Uuid,
+        #[snafu(implicit)]
+        location: Location,
+    },
+
+    #[snafu(display(
+        "Repartition group {} target {} is missing an allocated region id",
+        group_id,
+        target_index
+    ))]
+    RepartitionMissingTargetRegionId {
+        group_id: Uuid,
+        target_index: usize,
+        #[snafu(implicit)]
+        location: Location,
+    },
+
+    #[snafu(display("Route for target region {} not found", region_id))]
+    RepartitionTargetRegionRouteMissing {
+        region_id: RegionId,
+        #[snafu(implicit)]
+        location: Location,
+    },
+
+    #[snafu(display("Repartition group {} is missing prepare context", group_id))]
+    RepartitionMissingPrepareContext {
+        group_id: Uuid,
+        #[snafu(implicit)]
+        location: Location,
+    },
+
+    #[snafu(display("Repartition group {} has no registered subprocedure", group_id))]
+    RepartitionSubprocedureUnknown {
+        group_id: Uuid,
+        #[snafu(implicit)]
+        location: Location,
+    },
+
+    #[snafu(display(
+        "Failed to fetch state for repartition group {} subprocedure {}",
+        group_id,
+        procedure_id
+    ))]
+    RepartitionSubprocedureStateFetch {
+        group_id: Uuid,
+        procedure_id: ProcedureId,
+        #[snafu(source)]
+        source: common_procedure::Error,
+        #[snafu(implicit)]
+        location: Location,
+    },
+
+    #[snafu(display(
+        "Repartition group {} subprocedure {} state missing",
+        group_id,
+        procedure_id
+    ))]
+    RepartitionSubprocedureStateMissing {
+        group_id: Uuid,
+        procedure_id: ProcedureId,
+        #[snafu(implicit)]
+        location: Location,
+    },
+
+    #[snafu(display(
+        "Repartition group {} subprocedure {} failed: {}",
+        group_id,
+        procedure_id,
+        reason
+    ))]
+    RepartitionSubprocedureFailed {
+        group_id: Uuid,
+        procedure_id: ProcedureId,
+        reason: String,
+        #[snafu(implicit)]
+        location: Location,
+    },
+
    #[snafu(display("Unsupported operation {}", operation))]
    Unsupported {
        operation: String,
@@ -997,6 +1122,11 @@ impl Error {
        matches!(self, Error::RetryLater { .. })
            || matches!(self, Error::RetryLaterWithSource { .. })
    }
+
+    /// Returns `true` if the error requires cleaning poison records.
+    pub fn need_clean_poisons(&self) -> bool {
+        false
+    }
 }

 pub type Result<T> = std::result::Result<T, Error>;
@@ -1012,6 +1142,8 @@ impl ErrorExt for Error {
            | Error::TcpBind { .. }
            | Error::SerializeToJson { .. }
            | Error::DeserializeFromJson { .. }
+            | Error::RepartitionCreateSubtasks { .. }
+            | Error::RepartitionSerializePartitionExpr { .. }
            | Error::NoLeader { .. }
            | Error::LeaderLeaseExpired { .. }
            | Error::LeaderLeaseChanged { .. }
@@ -1032,7 +1164,8 @@ impl ErrorExt for Error {
            | Error::FlowStateHandler { .. }
            | Error::BuildWalOptionsAllocator { .. }
            | Error::BuildPartitionClient { .. }
-            | Error::BuildKafkaClient { .. } => StatusCode::Internal,
+            | Error::BuildKafkaClient { .. }
+            | Error::RepartitionSubprocedureStateFetch { .. } => StatusCode::Internal,

            Error::DeleteRecords { .. }
            | Error::GetOffset { .. }
@@ -1066,7 +1199,14 @@ impl ErrorExt for Error {
            | Error::TooManyPartitions { .. }
            | Error::TomlFormat { .. }
            | Error::HandlerNotFound { .. }
-            | Error::LeaderPeerChanged { .. } => StatusCode::InvalidArguments,
+            | Error::LeaderPeerChanged { .. }
+            | Error::RepartitionSourceExprMismatch { .. }
+            | Error::RepartitionMissingSourceRegionId { .. }
+            | Error::RepartitionSourceRegionRouteMissing { .. }
+            | Error::RepartitionNoSourceRegions { .. }
+            | Error::RepartitionMissingTargetRegionId { .. }
+            | Error::RepartitionTargetRegionRouteMissing { .. }
+            | Error::RepartitionMissingPrepareContext { .. } => StatusCode::InvalidArguments,
            Error::LeaseKeyFromUtf8 { .. }
            | Error::LeaseValueFromUtf8 { .. }
            | Error::InvalidRegionKeyFromUtf8 { .. }
@@ -1080,7 +1220,10 @@ impl ErrorExt for Error {
            | Error::RegionRouteNotFound { .. }
            | Error::MigrationAbort { .. }
            | Error::MigrationRunning { .. }
-            | Error::RegionMigrated { .. } => StatusCode::Unexpected,
+            | Error::RegionMigrated { .. }
+            | Error::RepartitionSubprocedureUnknown { .. }
+            | Error::RepartitionSubprocedureStateMissing { .. }
+            | Error::RepartitionSubprocedureFailed { .. } => StatusCode::Unexpected,
            Error::TableNotFound { .. } => StatusCode::TableNotFound,
            Error::SaveClusterInfo { source, .. }
            | Error::InvalidClusterInfoFormat { source, .. }
--- a/src/meta-srv/src/handler/persist_stats_handler.rs
+++ b/src/meta-srv/src/handler/persist_stats_handler.rs
@@ -77,7 +77,6 @@ struct PersistRegionStat<'a> {
    sst_size: u64,
    write_bytes_delta: u64,
    #[col(
-        // This col name is for the information schema table, so we don't touch it
        name = "greptime_timestamp",
        semantic = "Timestamp",
        datatype = "TimestampMillisecond"
--- a/src/meta-srv/src/procedure.rs
+++ b/src/meta-srv/src/procedure.rs
@@ -19,6 +19,7 @@ use common_procedure::ProcedureManagerRef;
 use snafu::ResultExt;

 pub mod region_migration;
+pub mod repartition;
 #[cfg(any(test, feature = "testing"))]
 pub mod test_util;
 #[cfg(test)]
--- a/src/meta-srv/src/procedure/repartition.rs
+++ b/src/meta-srv/src/procedure/repartition.rs
@@ -0,0 +1,506 @@
+// Copyright 2023 Greptime Team
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+mod context;
+mod group;
+mod plan;
+
+use std::collections::HashMap;
+
+use common_meta::ddl::DdlContext;
+use common_meta::key::table_route::PhysicalTableRouteValue;
+use common_meta::lock_key::{CatalogLock, SchemaLock, TableLock};
+use common_procedure::error::{Error as ProcedureError, Result as ProcedureResult, ToJsonSnafu};
+use common_procedure::{
+    Context as ProcedureContext, LockKey, Procedure, ProcedureId, ProcedureWithId, Status,
+};
+use common_telemetry::error;
+use partition::expr::PartitionExpr;
+use partition::subtask::{self, RepartitionSubtask};
+use serde::{Deserialize, Serialize};
+use snafu::{OptionExt, ResultExt};
+use store_api::storage::TableId;
+use strum::AsRefStr;
+use uuid::Uuid;
+
+use self::context::{GroupManifestSummary, RepartitionContext};
+use self::group::RepartitionGroupProcedure;
+use self::plan::{PlanEntry, PlanGroupId, RegionDescriptor, RepartitionPlan, ResourceDemand};
+use crate::error::{self, Result};
+
+/// Task payload passed from the DDL entry point.
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct RepartitionTask {
+    pub catalog_name: String,
+    pub schema_name: String,
+    pub table_name: String,
+    pub table_id: TableId,
+    /// Partition expressions representing the source regions.
+    pub from_exprs: Vec<PartitionExpr>,
+    /// Partition expressions representing the target regions.
+    pub into_exprs: Vec<PartitionExpr>,
+}
+
+/// Procedure that orchestrates the repartition flow.
+pub struct RepartitionProcedure {
+    context: DdlContext,
+    group_context: RepartitionContext,
+    data: RepartitionData,
+}
+
+impl RepartitionProcedure {
+    pub const TYPE_NAME: &'static str = "metasrv-procedure::Repartition";
+
+    /// Constructs a new procedure instance from a task payload.
+    pub fn new(task: RepartitionTask, context: DdlContext) -> Result<Self> {
+        let group_context = RepartitionContext::new(&context);
+        Ok(Self {
+            context,
+            group_context,
+            data: RepartitionData::new(task),
+        })
+    }
+
+    /// Builds the repartition plan if we have not done so yet.
+    async fn on_prepare(&mut self) -> Result<Status> {
+        if self.data.plan.is_none() {
+            self.build_plan().await?;
+        }
+
+        self.data.state = RepartitionState::AllocateResources;
+        Ok(Status::executing(true))
+    }
+
+    /// Allocates target regions and decides whether the procedure can proceed.
+    async fn on_allocate_resources(&mut self) -> Result<Status> {
+        if !self.data.resource_allocated {
+            let allocated = self.allocate_resources().await?;
+            if !allocated {
+                if let Some(plan) = &self.data.plan {
+                    let failed_groups = plan.entries.iter().map(|entry| entry.group_id);
+                    self.data.failed_groups.extend(failed_groups);
+                }
+                self.data.state = RepartitionState::Finalize;
+                return Ok(Status::executing(true));
+            }
+            self.data.resource_allocated = true;
+        }
+
+        self.data.state = RepartitionState::DispatchSubprocedures;
+        Ok(Status::executing(true))
+    }
+
+    /// Spawns group subprocedures for every pending plan entry.
+    async fn on_dispatch_subprocedures(&mut self) -> Result<Status> {
+        let plan = match self.data.plan.as_ref() {
+            Some(plan) => plan,
+            None => {
+                self.data.state = RepartitionState::Finalize;
+                return Ok(Status::executing(true));
+            }
+        };
+
+        let entries_to_schedule: Vec<PlanEntry> = plan
+            .entries
+            .iter()
+            .filter(|entry| {
+                !self.data.succeeded_groups.contains(&entry.group_id)
+                    && !self.data.failed_groups.contains(&entry.group_id)
+            })
+            .cloned()
+            .collect();
+
+        if entries_to_schedule.is_empty() {
+            self.data.state = RepartitionState::Finalize;
+            return Ok(Status::executing(true));
+        }
+
+        let groups_to_schedule: Vec<PlanGroupId> = entries_to_schedule
+            .iter()
+            .map(|entry| entry.group_id)
+            .collect();
+
+        let subprocedures = self.spawn_group_procedures(
+            plan.table_id,
+            plan.route_snapshot.clone(),
+            entries_to_schedule,
+        );
+        self.data.pending_groups = groups_to_schedule;
+        self.data.state = RepartitionState::CollectSubprocedures;
+
+        Ok(Status::suspended(subprocedures, true))
+    }
+
+    /// Records the list of subprocedures that finished and move to finalisation.
+    async fn on_collect_subprocedures(&mut self, ctx: &ProcedureContext) -> Result<Status> {
+        let pending = std::mem::take(&mut self.data.pending_groups);
+        let mut first_error: Option<error::Error> = None;
+        let mut succeeded = Vec::new();
+
+        for group_id in pending {
+            let procedure_id = match self.data.group_subprocedures.remove(&group_id) {
+                Some(id) => id,
+                None => {
+                    let err = error::RepartitionSubprocedureUnknownSnafu { group_id }.build();
+                    self.data.failed_groups.push(group_id);
+                    if first_error.is_none() {
+                        first_error = Some(err);
+                    }
+                    continue;
+                }
+            };
+
+            let state_opt = ctx.provider.procedure_state(procedure_id).await.context(
+                error::RepartitionSubprocedureStateFetchSnafu {
+                    group_id,
+                    procedure_id,
+                },
+            )?;
+
+            let state = match state_opt {
+                Some(state) => state,
+                None => {
+                    let err = error::RepartitionSubprocedureStateMissingSnafu {
+                        group_id,
+                        procedure_id,
+                    }
+                    .build();
+                    self.data.failed_groups.push(group_id);
+                    if first_error.is_none() {
+                        first_error = Some(err);
+                    }
+                    continue;
+                }
+            };
+
+            if state.is_done() {
+                succeeded.push(group_id);
+                continue;
+            }
+
+            let reason = state
+                .error()
+                .map(|err| err.to_string())
+                .unwrap_or_else(|| format!("subprocedure state {}", state.as_str_name()));
+            let err = error::RepartitionSubprocedureFailedSnafu {
+                group_id,
+                procedure_id,
+                reason,
+            }
+            .build();
+            self.data.failed_groups.push(group_id);
+            if first_error.is_none() {
+                first_error = Some(err);
+            }
+        }
+
+        self.data.succeeded_groups.extend(succeeded);
+        self.data.state = RepartitionState::Finalize;
+
+        if let Some(err) = first_error {
+            return Err(err);
+        }
+
+        Ok(Status::executing(true))
+    }
+
+    /// Builds the summary that will be returned to the caller.
+    async fn on_finalize(&mut self) -> Result<Status> {
+        self.deallocate_resources().await?;
+
+        self.data.summary = Some(RepartitionSummary {
+            succeeded_groups: self.data.succeeded_groups.clone(),
+            failed_groups: self.data.failed_groups.clone(),
+            manifest_summaries: self.group_context.manifest_summaries(),
+        });
+        self.group_context.clear_group_records();
+        self.data.state = RepartitionState::Finished;
+        Ok(Status::done())
+    }
+
+    /// Constructs the repartition plan from the task specification.
+    async fn build_plan(&mut self) -> Result<()> {
+        let table_id = self.data.task.table_id;
+        let from_exprs = &self.data.task.from_exprs;
+        let into_exprs = &self.data.task.into_exprs;
+
+        let (physical_table_id, physical_route) = self
+            .context
+            .table_metadata_manager
+            .table_route_manager()
+            .get_physical_table_route(table_id)
+            .await
+            .context(error::TableMetadataManagerSnafu)?;
+
+        let src_descriptors = Self::source_region_descriptors(from_exprs, &physical_route)?;
+        let subtasks = subtask::create_subtasks(from_exprs, into_exprs)
+            .context(error::RepartitionCreateSubtasksSnafu)?;
+        let entries = Self::build_plan_entries(subtasks, &src_descriptors, into_exprs);
+
+        let demand = ResourceDemand::from_plan_entries(&entries);
+        let plan = RepartitionPlan::new(physical_table_id, entries, demand, physical_route.clone());
+        self.data.plan = Some(plan);
+
+        Ok(())
+    }
+
+    fn source_region_descriptors(
+        from_exprs: &[PartitionExpr],
+        physical_route: &PhysicalTableRouteValue,
+    ) -> Result<Vec<RegionDescriptor>> {
+        let existing_regions = physical_route
+            .region_routes
+            .iter()
+            .map(|route| (route.region.id, route.region.partition_expr()))
+            .collect::<Vec<_>>();
+
+        let descriptors = from_exprs
+            .iter()
+            .map(|expr| {
+                let expr_json = expr
+                    .as_json_str()
+                    .context(error::RepartitionSerializePartitionExprSnafu)?;
+
+                let matched_region_id = existing_regions
+                    .iter()
+                    .find_map(|(region_id, existing_expr)| {
+                        (existing_expr == &expr_json).then_some(*region_id)
+                    })
+                    .with_context(|| error::RepartitionSourceExprMismatchSnafu {
+                        expr: expr_json,
+                    })?;
+
+                Ok(RegionDescriptor {
+                    region_id: Some(matched_region_id),
+                    partition_expr: expr.clone(),
+                })
+            })
+            .collect::<Result<Vec<_>>>()?;
+
+        Ok(descriptors)
+    }
+
+    fn build_plan_entries(
+        subtasks: Vec<RepartitionSubtask>,
+        source_index: &[RegionDescriptor],
+        target_exprs: &[PartitionExpr],
+    ) -> Vec<PlanEntry> {
+        let plan_entries = subtasks
+            .into_iter()
+            .map(|subtask| {
+                let group_id = Uuid::new_v4();
+                let sources = subtask
+                    .from_expr_indices
+                    .iter()
+                    .map(|&idx| source_index[idx].clone())
+                    .collect::<Vec<_>>();
+
+                let targets = subtask
+                    .to_expr_indices
+                    .iter()
+                    .map(|&idx| RegionDescriptor {
+                        region_id: None, // will be assigned later
+                        partition_expr: target_exprs[idx].clone(),
+                    })
+                    .collect::<Vec<_>>();
+
+                PlanEntry::new(group_id, subtask, sources, targets)
+            })
+            .collect::<Vec<_>>();
+
+        plan_entries
+    }
+
+    /// Allocates resources required by the plan. Returning `false`
+    /// indicates that the procedure should abort.
+    async fn allocate_resources(&mut self) -> Result<bool> {
+        todo!("allocate resources");
+    }
+
+    async fn deallocate_resources(&mut self) -> Result<()> {
+        if !self.data.resource_allocated {
+            return Ok(());
+        }
+        self.data.resource_allocated = false;
+
+        todo!("deallocate resources");
+    }
+
+    /// Builds the child procedure list for the provided plan groups.
+    fn spawn_group_procedures(
+        &mut self,
+        table_id: TableId,
+        route_snapshot: PhysicalTableRouteValue,
+        entries: Vec<PlanEntry>,
+    ) -> Vec<ProcedureWithId> {
+        let mut id_map = HashMap::new();
+
+        let procedures = entries
+            .into_iter()
+            .map(|entry| {
+                let group_id = entry.group_id;
+                let group_procedure = RepartitionGroupProcedure::new(
+                    entry,
+                    table_id,
+                    route_snapshot.clone(),
+                    self.data.task.catalog_name.clone(),
+                    self.data.task.schema_name.clone(),
+                    self.group_context.clone(),
+                );
+                let procedure = ProcedureWithId::with_random_id(Box::new(group_procedure));
+                id_map.insert(group_id, procedure.id);
+                procedure
+            })
+            .collect::<Vec<_>>();
+
+        self.data.group_subprocedures = id_map;
+        procedures
+    }
+
+    /// Composes the set of locks required to safely mutate table metadata.
+    fn table_lock_key(&self) -> Vec<common_procedure::StringKey> {
+        let mut lock_key = Vec::with_capacity(3);
+        let catalog = self.data.task.catalog_name.as_str();
+        let schema = self.data.task.schema_name.as_str();
+        lock_key.push(CatalogLock::Read(catalog).into());
+        lock_key.push(SchemaLock::read(catalog, schema).into());
+        lock_key.push(TableLock::Write(self.data.task.table_id).into());
+
+        lock_key
+    }
+
+    async fn trigger_group_rollbacks(&mut self) {
+        if self.data.rollback_triggered {
+            return;
+        }
+
+        match self.group_context.rollback_registered_groups().await {
+            Ok(_) => {
+                self.data.rollback_triggered = true;
+            }
+            Err(err) => {
+                error!(err; "repartition: rollback of successful groups failed");
+                self.data.rollback_triggered = true;
+            }
+        }
+    }
+}
+
+#[async_trait::async_trait]
+impl Procedure for RepartitionProcedure {
+    fn type_name(&self) -> &str {
+        Self::TYPE_NAME
+    }
+
+    async fn execute(&mut self, ctx: &ProcedureContext) -> ProcedureResult<Status> {
+        let state = self.data.state;
+        let status = match state {
+            RepartitionState::Prepare => self.on_prepare().await,
+            RepartitionState::AllocateResources => self.on_allocate_resources().await,
+            RepartitionState::DispatchSubprocedures => self.on_dispatch_subprocedures().await,
+            RepartitionState::CollectSubprocedures => self.on_collect_subprocedures(ctx).await,
+            RepartitionState::Finalize => self.on_finalize().await,
+            RepartitionState::Finished => Ok(Status::done()),
+        };
+
+        match status {
+            Ok(status) => Ok(status),
+            Err(err) => {
+                self.trigger_group_rollbacks().await;
+                if let Err(dealloc_err) = self.deallocate_resources().await {
+                    error!(dealloc_err; "repartition: deallocating resources after failure failed");
+                }
+                Err(map_repartition_error(err))
+            }
+        }
+    }
+
+    fn dump(&self) -> ProcedureResult<String> {
+        serde_json::to_string(&self.data).context(ToJsonSnafu)
+    }
+
+    fn lock_key(&self) -> LockKey {
+        LockKey::new(self.table_lock_key())
+    }
+}
+
+/// Serialized data of the repartition procedure.
+#[derive(Debug, Clone, Serialize, Deserialize)]
+struct RepartitionData {
+    state: RepartitionState,
+    task: RepartitionTask,
+    #[serde(default)]
+    plan: Option<RepartitionPlan>,
+    #[serde(default)]
+    resource_allocated: bool,
+    #[serde(default)]
+    pending_groups: Vec<PlanGroupId>,
+    #[serde(default)]
+    succeeded_groups: Vec<PlanGroupId>,
+    #[serde(default)]
+    failed_groups: Vec<PlanGroupId>,
+    #[serde(default)]
+    summary: Option<RepartitionSummary>,
+    #[serde(default)]
+    rollback_triggered: bool,
+    #[serde(default)]
+    group_subprocedures: HashMap<PlanGroupId, ProcedureId>,
+}
+
+impl RepartitionData {
+    /// Initialise the procedure data for a fresh run.
+    fn new(task: RepartitionTask) -> Self {
+        Self {
+            state: RepartitionState::Prepare,
+            task,
+            plan: None,
+            resource_allocated: false,
+            pending_groups: Vec::new(),
+            succeeded_groups: Vec::new(),
+            failed_groups: Vec::new(),
+            summary: None,
+            rollback_triggered: false,
+            group_subprocedures: HashMap::new(),
+        }
+    }
+}
+
+pub(super) fn map_repartition_error(err: error::Error) -> ProcedureError {
+    match (err.is_retryable(), err.need_clean_poisons()) {
+        (true, true) => ProcedureError::retry_later_and_clean_poisons(err),
+        (true, false) => ProcedureError::retry_later(err),
+        (false, true) => ProcedureError::external_and_clean_poisons(err),
+        (false, false) => ProcedureError::external(err),
+    }
+}
+
+/// High level states of the repartition procedure.
+#[derive(Debug, Clone, Copy, Serialize, Deserialize, AsRefStr)]
+enum RepartitionState {
+    Prepare,
+    AllocateResources,
+    DispatchSubprocedures,
+    CollectSubprocedures,
+    Finalize,
+    Finished,
+}
+
+/// Information returned to the caller after the procedure finishes.
+#[derive(Debug, Clone, Serialize, Deserialize, Default)]
+struct RepartitionSummary {
+    succeeded_groups: Vec<PlanGroupId>,
+    failed_groups: Vec<PlanGroupId>,
+    #[serde(default)]
+    manifest_summaries: Vec<GroupManifestSummary>,
+}
--- a/src/meta-srv/src/procedure/repartition/context.rs
+++ b/src/meta-srv/src/procedure/repartition/context.rs
@@ -0,0 +1,351 @@
+// Copyright 2023 Greptime Team
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use std::collections::HashMap;
+use std::sync::{Arc, Mutex};
+
+use api::region::RegionResponse;
+use api::v1::region::{
+    ApplyStagedManifestRequest, PauseRequest, PublishRegionRuleRequest, RegionRequest,
+    RegionRequestHeader, RemapManifestRequest, ResumeRequest, StageRegionRuleRequest,
+    region_request,
+};
+use common_error::ext::BoxedError;
+use common_meta::ddl::DdlContext;
+use common_meta::key::TableMetadataManagerRef;
+use common_meta::node_manager::NodeManagerRef;
+use common_meta::peer::Peer;
+use common_telemetry::{error, info};
+use serde::{Deserialize, Serialize};
+use serde_json::Value;
+use snafu::ResultExt;
+use store_api::storage::RegionId;
+
+use crate::error::{self, Result};
+
+pub const REMAP_MANIFEST_STATS_EXTENSION: &str = "repartition.manifest.stats";
+
+use super::group::{GroupRollbackRecord, RepartitionGroupProcedure};
+use crate::procedure::repartition::plan::PlanGroupId;
+
+/// Track the overall manifest stage for a repartition group.
+#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, Default)]
+pub enum ManifestStatus {
+    #[default]
+    NotStarted,
+    Staged,
+    Published,
+    Discarded,
+    Skipped,
+    Failed,
+}
+
+/// Per-group status record that is collected by the top-level procedure.
+#[derive(Debug, Clone, Serialize, Deserialize, Default)]
+pub struct GroupManifestSummary {
+    pub group_id: PlanGroupId,
+    pub status: ManifestStatus,
+    pub staged_region_count: u64,
+    pub stats: Option<Value>,
+    pub error: Option<String>,
+}
+
+/// Shared context that allows group procedures to interact with metadata and
+/// datanodes. It also aggregates per-group manifest summaries.
+#[derive(Clone)]
+pub struct RepartitionContext {
+    pub table_metadata_manager: TableMetadataManagerRef,
+    pub node_manager: NodeManagerRef,
+    manifest_records: Arc<Mutex<HashMap<PlanGroupId, GroupManifestSummary>>>,
+    rollback_records: Arc<Mutex<HashMap<PlanGroupId, GroupRollbackRecord>>>,
+}
+
+impl RepartitionContext {
+    pub fn new(context: &DdlContext) -> Self {
+        Self {
+            table_metadata_manager: context.table_metadata_manager.clone(),
+            node_manager: context.node_manager.clone(),
+            manifest_records: Arc::new(Mutex::new(HashMap::new())),
+            rollback_records: Arc::new(Mutex::new(HashMap::new())),
+        }
+    }
+
+    /// Send a pause request to the region leader so that local IO is quiesced.
+    pub async fn pause_region_on_datanode(&self, peer: &Peer, region_id: RegionId) -> Result<()> {
+        info!(
+            "requesting pause to datanode {} for region {}",
+            peer.id, region_id
+        );
+        let datanode = self.node_manager.datanode(peer).await;
+        let request = RegionRequest {
+            header: Some(RegionRequestHeader::default()),
+            body: Some(region_request::Body::Pause(PauseRequest {
+                region_id: region_id.as_u64(),
+            })),
+        };
+        datanode
+            .handle(request)
+            .await
+            .map_err(BoxedError::new)
+            .context(error::RetryLaterWithSourceSnafu {
+                reason: format!(
+                    "failed to pause region {} on datanode {}",
+                    region_id, peer.id
+                ),
+            })?;
+        Ok(())
+    }
+
+    /// Resume a previously paused region.
+    pub async fn resume_region_on_datanode(&self, peer: &Peer, region_id: RegionId) -> Result<()> {
+        info!(
+            "requesting resume to datanode {} for region {}",
+            peer.id, region_id
+        );
+        let datanode = self.node_manager.datanode(peer).await;
+        let request = RegionRequest {
+            header: Some(RegionRequestHeader::default()),
+            body: Some(region_request::Body::Resume(ResumeRequest {
+                region_id: region_id.as_u64(),
+                rule_version: String::new(),
+            })),
+        };
+        datanode
+            .handle(request)
+            .await
+            .map_err(BoxedError::new)
+            .context(error::RetryLaterWithSourceSnafu {
+                reason: format!(
+                    "failed to resume region {} on datanode {}",
+                    region_id, peer.id
+                ),
+            })?;
+        Ok(())
+    }
+
+    /// Stage the provided rule version on the datanode.
+    pub async fn stage_region_rule_on_datanode(
+        &self,
+        peer: &Peer,
+        region_id: RegionId,
+        rule_version: &str,
+    ) -> Result<()> {
+        info!(
+            "requesting region rule staging to datanode {} for region {}",
+            peer.id, region_id
+        );
+        let datanode = self.node_manager.datanode(peer).await;
+        let request = RegionRequest {
+            header: Some(RegionRequestHeader::default()),
+            body: Some(region_request::Body::StageRegionRule(
+                StageRegionRuleRequest {
+                    region_id: region_id.as_u64(),
+                    rule_version: rule_version.to_string(),
+                },
+            )),
+        };
+        datanode
+            .handle(request)
+            .await
+            .map_err(BoxedError::new)
+            .context(error::RetryLaterWithSourceSnafu {
+                reason: format!(
+                    "failed to stage region rule for region {} on datanode {}",
+                    region_id, peer.id
+                ),
+            })?;
+        Ok(())
+    }
+
+    /// Publish the staged rule version to make it active.
+    pub async fn publish_region_rule_on_datanode(
+        &self,
+        peer: &Peer,
+        region_id: RegionId,
+        rule_version: &str,
+    ) -> Result<()> {
+        info!(
+            "requesting region rule publish to datanode {} for region {}",
+            peer.id, region_id
+        );
+        let datanode = self.node_manager.datanode(peer).await;
+        let request = RegionRequest {
+            header: Some(RegionRequestHeader::default()),
+            body: Some(region_request::Body::PublishRegionRule(
+                PublishRegionRuleRequest {
+                    region_id: region_id.as_u64(),
+                    rule_version: rule_version.to_string(),
+                },
+            )),
+        };
+        datanode
+            .handle(request)
+            .await
+            .map_err(BoxedError::new)
+            .context(error::RetryLaterWithSourceSnafu {
+                reason: format!(
+                    "failed to publish region rule for region {} on datanode {}",
+                    region_id, peer.id
+                ),
+            })?;
+        Ok(())
+    }
+
+    /// Drop the staged rule version during rollback.
+    pub async fn clear_region_rule_stage_on_datanode(
+        &self,
+        peer: &Peer,
+        region_id: RegionId,
+    ) -> Result<()> {
+        info!(
+            "requesting region rule stage clear to datanode {} for region {}",
+            peer.id, region_id
+        );
+        let datanode = self.node_manager.datanode(peer).await;
+        let request = RegionRequest {
+            header: Some(RegionRequestHeader::default()),
+            body: Some(region_request::Body::StageRegionRule(
+                StageRegionRuleRequest {
+                    region_id: region_id.as_u64(),
+                    rule_version: String::new(),
+                },
+            )),
+        };
+        datanode
+            .handle(request)
+            .await
+            .map_err(BoxedError::new)
+            .context(error::RetryLaterWithSourceSnafu {
+                reason: format!(
+                    "failed to clear staged region rule for region {} on datanode {}",
+                    region_id, peer.id
+                ),
+            })?;
+        Ok(())
+    }
+
+    /// Instruct the datanode to remap manifests for this group.
+    pub async fn remap_manifests_on_datanode(
+        &self,
+        peer: &Peer,
+        manifest_request: RemapManifestRequest,
+    ) -> Result<RegionResponse> {
+        let table_id = manifest_request.table_id;
+        let group_id = manifest_request.group_id.clone();
+        info!(
+            "requesting manifest remap to datanode {} for table {} in group {}",
+            peer.id, table_id, group_id
+        );
+        let datanode = self.node_manager.datanode(peer).await;
+        let region_request = RegionRequest {
+            header: Some(RegionRequestHeader::default()),
+            body: Some(region_request::Body::RemapManifest(manifest_request)),
+        };
+        let response = datanode
+            .handle(region_request)
+            .await
+            .map_err(BoxedError::new)
+            .context(error::RetryLaterWithSourceSnafu {
+                reason: format!(
+                    "failed to remap manifests for group {} on datanode {}",
+                    group_id, peer.id
+                ),
+            })?;
+        Ok(response)
+    }
+
+    /// Publish or discard staged manifests.
+    pub async fn apply_staged_manifests_on_datanode(
+        &self,
+        peer: &Peer,
+        manifest_request: ApplyStagedManifestRequest,
+    ) -> Result<RegionResponse> {
+        let publish = manifest_request.publish;
+        let table_id = manifest_request.table_id;
+        let group_id = manifest_request.group_id.clone();
+        info!(
+            "requesting manifest {} on datanode {} for table {} in group {}",
+            if publish { "publish" } else { "discard" },
+            peer.id,
+            table_id,
+            group_id
+        );
+        let datanode = self.node_manager.datanode(peer).await;
+        let region_request = RegionRequest {
+            header: Some(RegionRequestHeader::default()),
+            body: Some(region_request::Body::ApplyStagedManifest(manifest_request)),
+        };
+        let response = datanode
+            .handle(region_request)
+            .await
+            .map_err(BoxedError::new)
+            .context(error::RetryLaterWithSourceSnafu {
+                reason: format!(
+                    "failed to {} staged manifests for group {} on datanode {}",
+                    if publish { "publish" } else { "discard" },
+                    group_id,
+                    peer.id
+                ),
+            })?;
+        Ok(response)
+    }
+
+    /// Store the latest manifest summary for a group.
+    pub fn record_manifest_summary(&self, summary: GroupManifestSummary) {
+        let mut records = self.manifest_records.lock().unwrap();
+        records.insert(summary.group_id, summary);
+    }
+
+    pub fn register_group_success(&self, record: GroupRollbackRecord) {
+        let mut records = self.rollback_records.lock().unwrap();
+        let group_id = record.group_id;
+        records.insert(group_id, record);
+    }
+
+    pub async fn rollback_registered_groups(&self) -> Result<()> {
+        let records: Vec<GroupRollbackRecord> = {
+            let mut map = self.rollback_records.lock().unwrap();
+            map.drain().map(|(_, record)| record).collect()
+        };
+
+        let mut first_err: Option<error::Error> = None;
+        for record in records {
+            let group_id = record.group_id;
+            if let Err(err) =
+                RepartitionGroupProcedure::execute_rollback(self.clone(), record).await
+            {
+                error!(err; "repartition: rollback of group {:?} failed", group_id);
+                if first_err.is_none() {
+                    first_err = Some(err);
+                }
+            }
+        }
+
+        if let Some(err) = first_err {
+            return Err(err);
+        }
+
+        Ok(())
+    }
+
+    pub fn clear_group_records(&self) {
+        self.rollback_records.lock().unwrap().clear();
+    }
+
+    /// Collect all manifest summaries recorded so far.
+    pub fn manifest_summaries(&self) -> Vec<GroupManifestSummary> {
+        let records = self.manifest_records.lock().unwrap();
+        records.values().cloned().collect()
+    }
+}
--- a/src/meta-srv/src/procedure/repartition/group.rs
+++ b/src/meta-srv/src/procedure/repartition/group.rs
--- a/src/meta-srv/src/procedure/repartition/plan.rs
+++ b/src/meta-srv/src/procedure/repartition/plan.rs
@@ -0,0 +1,95 @@
+// Copyright 2023 Greptime Team
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use common_meta::key::table_route::PhysicalTableRouteValue;
+use partition::expr::PartitionExpr;
+use partition::subtask::RepartitionSubtask;
+use serde::{Deserialize, Serialize};
+use store_api::storage::{RegionId, TableId};
+use uuid::Uuid;
+
+/// Identifier of a plan group.
+pub type PlanGroupId = Uuid;
+
+/// Logical description of the repartition plan.
+///
+/// The plan is persisted by the procedure framework so it must remain
+/// serializable/deserializable across versions.
+#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
+pub struct RepartitionPlan {
+    pub table_id: TableId,
+    pub entries: Vec<PlanEntry>,
+    pub resource_demand: ResourceDemand,
+    pub route_snapshot: PhysicalTableRouteValue,
+}
+
+impl RepartitionPlan {
+    pub fn new(
+        table_id: TableId,
+        entries: Vec<PlanEntry>,
+        resource_demand: ResourceDemand,
+        route_snapshot: PhysicalTableRouteValue,
+    ) -> Self {
+        Self {
+            table_id,
+            entries,
+            resource_demand,
+            route_snapshot,
+        }
+    }
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
+pub struct PlanEntry {
+    pub group_id: PlanGroupId,
+    pub subtask: RepartitionSubtask,
+    pub sources: Vec<RegionDescriptor>,
+    pub targets: Vec<RegionDescriptor>,
+}
+
+impl PlanEntry {
+    /// Construct a plan entry consisting of the connected component returned by
+    /// the planner.
+    pub fn new(
+        group_id: PlanGroupId,
+        subtask: RepartitionSubtask,
+        sources: Vec<RegionDescriptor>,
+        targets: Vec<RegionDescriptor>,
+    ) -> Self {
+        Self {
+            group_id,
+            subtask,
+            sources,
+            targets,
+        }
+    }
+}
+
+/// Metadata describing a region involved in the plan.
+#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
+pub struct RegionDescriptor {
+    pub region_id: Option<RegionId>,
+    pub partition_expr: PartitionExpr,
+}
+
+/// Auxiliary information about resources required to execute the plan.
+#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, Default)]
+pub struct ResourceDemand {}
+
+impl ResourceDemand {
+    pub fn from_plan_entries(_entries: &[PlanEntry]) -> Self {
+        // placeholder
+        Self {}
+    }
+}
--- a/src/meta-srv/src/service/admin/heartbeat.rs
+++ b/src/meta-srv/src/service/admin/heartbeat.rs
@@ -254,7 +254,7 @@ mod tests {
        assert_eq!(status, http::StatusCode::OK);
        assert_eq!(
            body,
-            "[[{\"timestamp_millis\":3,\"id\":0,\"addr\":\"127.0.0.1:3001\",\"rcus\":0,\"wcus\":0,\"region_num\":0,\"region_stats\":[],\"topic_stats\":[],\"node_epoch\":0,\"datanode_workloads\":{\"types\":[]},\"gc_stat\":null}]]"
+            "[[{\"timestamp_millis\":3,\"id\":0,\"addr\":\"127.0.0.1:3001\",\"rcus\":0,\"wcus\":0,\"region_num\":0,\"region_stats\":[],\"topic_stats\":[],\"node_epoch\":0,\"datanode_workloads\":{\"types\":[]}}]]"
        );
    }
 }
--- a/src/metric-engine/src/data_region.rs
+++ b/src/metric-engine/src/data_region.rs
@@ -240,7 +240,6 @@ impl DataRegion {

 #[cfg(test)]
 mod test {
-    use common_query::prelude::{greptime_timestamp, greptime_value};
    use datatypes::prelude::ConcreteDataType;
    use datatypes::schema::ColumnSchema;

@@ -301,8 +300,8 @@ mod test {
            .map(|c| &c.column_schema.name)
            .collect::<Vec<_>>();
        let expected = vec![
-            greptime_timestamp(),
-            greptime_value(),
+            "greptime_timestamp",
+            "greptime_value",
            "__table_id",
            "__tsid",
            "job",
--- a/src/metric-engine/src/engine/alter.rs
+++ b/src/metric-engine/src/engine/alter.rs
@@ -224,7 +224,6 @@ mod test {
    use api::v1::SemanticType;
    use common_meta::ddl::test_util::assert_column_name_and_id;
    use common_meta::ddl::utils::{parse_column_metadatas, parse_manifest_infos_from_extensions};
-    use common_query::prelude::{greptime_timestamp, greptime_value};
    use store_api::metric_engine_consts::ALTER_PHYSICAL_EXTENSION_KEY;
    use store_api::region_engine::RegionEngine;
    use store_api::region_request::{
@@ -296,7 +295,7 @@ mod test {
            .unwrap();
        assert_eq!(semantic_type, SemanticType::Tag);
        let timestamp_index = metadata_region
-            .column_semantic_type(physical_region_id, logical_region_id, greptime_timestamp())
+            .column_semantic_type(physical_region_id, logical_region_id, "greptime_timestamp")
            .await
            .unwrap()
            .unwrap();
@@ -306,8 +305,8 @@ mod test {
        assert_column_name_and_id(
            &column_metadatas,
            &[
-                (greptime_timestamp(), 0),
-                (greptime_value(), 1),
+                ("greptime_timestamp", 0),
+                ("greptime_value", 1),
                ("__table_id", ReservedColumnId::table_id()),
                ("__tsid", ReservedColumnId::tsid()),
                ("job", 2),
@@ -365,8 +364,8 @@ mod test {
        assert_column_name_and_id(
            &column_metadatas,
            &[
-                (greptime_timestamp(), 0),
-                (greptime_value(), 1),
+                ("greptime_timestamp", 0),
+                ("greptime_value", 1),
                ("__table_id", ReservedColumnId::table_id()),
                ("__tsid", ReservedColumnId::tsid()),
                ("job", 2),
--- a/src/metric-engine/src/engine/create.rs
+++ b/src/metric-engine/src/engine/create.rs
@@ -619,7 +619,6 @@ pub(crate) fn region_options_for_metadata_region(
 mod test {
    use common_meta::ddl::test_util::assert_column_name_and_id;
    use common_meta::ddl::utils::{parse_column_metadatas, parse_manifest_infos_from_extensions};
-    use common_query::prelude::{greptime_timestamp, greptime_value};
    use store_api::metric_engine_consts::{METRIC_ENGINE_NAME, PHYSICAL_TABLE_METADATA_KEY};
    use store_api::region_request::BatchRegionDdlRequest;

@@ -857,8 +856,8 @@ mod test {
        assert_column_name_and_id(
            &column_metadatas,
            &[
-                (greptime_timestamp(), 0),
-                (greptime_value(), 1),
+                ("greptime_timestamp", 0),
+                ("greptime_value", 1),
                ("__table_id", ReservedColumnId::table_id()),
                ("__tsid", ReservedColumnId::tsid()),
                ("job", 2),
--- a/src/metric-engine/src/engine/sync.rs
+++ b/src/metric-engine/src/engine/sync.rs
@@ -110,7 +110,6 @@ mod tests {
    use std::collections::HashMap;

    use api::v1::SemanticType;
-    use common_query::prelude::greptime_timestamp;
    use common_telemetry::info;
    use datatypes::data_type::ConcreteDataType;
    use datatypes::schema::ColumnSchema;
@@ -244,7 +243,7 @@ mod tests {
            .unwrap();
        assert_eq!(semantic_type, SemanticType::Tag);
        let timestamp_index = metadata_region
-            .column_semantic_type(physical_region_id, logical_region_id, greptime_timestamp())
+            .column_semantic_type(physical_region_id, logical_region_id, "greptime_timestamp")
            .await
            .unwrap()
            .unwrap();
--- a/src/metric-engine/src/test_util.rs
+++ b/src/metric-engine/src/test_util.rs
@@ -17,7 +17,6 @@
 use api::v1::value::ValueData;
 use api::v1::{ColumnDataType, ColumnSchema as PbColumnSchema, Row, SemanticType, Value};
 use common_meta::ddl::utils::parse_column_metadatas;
-use common_query::prelude::{greptime_timestamp, greptime_value};
 use common_telemetry::debug;
 use datatypes::prelude::ConcreteDataType;
 use datatypes::schema::ColumnSchema;
@@ -133,7 +132,7 @@ impl TestEnv {
                    column_id: 0,
                    semantic_type: SemanticType::Timestamp,
                    column_schema: ColumnSchema::new(
-                        greptime_timestamp(),
+                        "greptime_timestamp",
                        ConcreteDataType::timestamp_millisecond_datatype(),
                        false,
                    ),
@@ -142,7 +141,7 @@ impl TestEnv {
                    column_id: 1,
                    semantic_type: SemanticType::Field,
                    column_schema: ColumnSchema::new(
-                        greptime_value(),
+                        "greptime_value",
                        ConcreteDataType::float64_datatype(),
                        false,
                    ),
@@ -205,8 +204,8 @@ impl TestEnv {
        assert_eq!(
            column_names,
            vec![
-                greptime_timestamp(),
-                greptime_value(),
+                "greptime_timestamp",
+                "greptime_value",
                "__table_id",
                "__tsid",
                "job",
@@ -301,7 +300,7 @@ pub fn create_logical_region_request(
            column_id: 0,
            semantic_type: SemanticType::Timestamp,
            column_schema: ColumnSchema::new(
-                greptime_timestamp(),
+                "greptime_timestamp",
                ConcreteDataType::timestamp_millisecond_datatype(),
                false,
            ),
@@ -310,7 +309,7 @@ pub fn create_logical_region_request(
            column_id: 1,
            semantic_type: SemanticType::Field,
            column_schema: ColumnSchema::new(
-                greptime_value(),
+                "greptime_value",
                ConcreteDataType::float64_datatype(),
                false,
            ),
@@ -373,14 +372,14 @@ pub fn alter_logical_region_request(tags: &[&str]) -> RegionAlterRequest {
 pub fn row_schema_with_tags(tags: &[&str]) -> Vec<PbColumnSchema> {
    let mut schema = vec![
        PbColumnSchema {
-            column_name: greptime_timestamp().to_string(),
+            column_name: "greptime_timestamp".to_string(),
            datatype: ColumnDataType::TimestampMillisecond as i32,
            semantic_type: SemanticType::Timestamp as _,
            datatype_extension: None,
            options: None,
        },
        PbColumnSchema {
-            column_name: greptime_value().to_string(),
+            column_name: "greptime_value".to_string(),
            datatype: ColumnDataType::Float64 as i32,
            semantic_type: SemanticType::Field as _,
            datatype_extension: None,
--- a/src/mito-codec/Cargo.toml
+++ b/src/mito-codec/Cargo.toml
@@ -15,7 +15,6 @@ common-base.workspace = true
 common-decimal.workspace = true
 common-error.workspace = true
 common-macro.workspace = true
-common-query.workspace = true
 common-recordbatch.workspace = true
 common-telemetry.workspace = true
 common-time.workspace = true
--- a/src/mito-codec/src/primary_key_filter.rs
+++ b/src/mito-codec/src/primary_key_filter.rs
@@ -154,7 +154,6 @@ mod tests {
    use std::sync::Arc;

    use api::v1::SemanticType;
-    use common_query::prelude::{greptime_timestamp, greptime_value};
    use datafusion_common::Column;
    use datafusion_expr::{BinaryExpr, Expr, Literal, Operator};
    use datatypes::prelude::ConcreteDataType;
@@ -194,7 +193,7 @@ mod tests {
            })
            .push_column_metadata(ColumnMetadata {
                column_schema: ColumnSchema::new(
-                    greptime_value(),
+                    "greptime_value",
                    ConcreteDataType::float64_datatype(),
                    false,
                ),
@@ -203,7 +202,7 @@ mod tests {
            })
            .push_column_metadata(ColumnMetadata {
                column_schema: ColumnSchema::new(
-                    greptime_timestamp(),
+                    "greptime_timestamp",
                    ConcreteDataType::timestamp_nanosecond_datatype(),
                    false,
                ),
--- a/src/mito-codec/src/row_converter/sparse.rs
+++ b/src/mito-codec/src/row_converter/sparse.rs
@@ -385,7 +385,6 @@ mod tests {
    use std::sync::Arc;

    use api::v1::SemanticType;
-    use common_query::prelude::{greptime_timestamp, greptime_value};
    use common_time::Timestamp;
    use common_time::timestamp::TimeUnit;
    use datatypes::schema::ColumnSchema;
@@ -462,7 +461,7 @@ mod tests {
            })
            .push_column_metadata(ColumnMetadata {
                column_schema: ColumnSchema::new(
-                    greptime_value(),
+                    "greptime_value",
                    ConcreteDataType::float64_datatype(),
                    false,
                ),
@@ -471,7 +470,7 @@ mod tests {
            })
            .push_column_metadata(ColumnMetadata {
                column_schema: ColumnSchema::new(
-                    greptime_timestamp(),
+                    "greptime_timestamp",
                    ConcreteDataType::timestamp_nanosecond_datatype(),
                    false,
                ),
--- a/src/mito2/src/access_layer.rs
+++ b/src/mito2/src/access_layer.rs
@@ -72,7 +72,7 @@ pub struct Metrics {
 }

 impl Metrics {
-    pub fn new(write_type: WriteType) -> Self {
+    pub(crate) fn new(write_type: WriteType) -> Self {
        Self {
            write_type,
            iter_source: Default::default(),
@@ -255,12 +255,12 @@ impl AccessLayer {
        &self,
        request: SstWriteRequest,
        write_opts: &WriteOptions,
-        metrics: &mut Metrics,
-    ) -> Result<SstInfoArray> {
+        write_type: WriteType,
+    ) -> Result<(SstInfoArray, Metrics)> {
        let region_id = request.metadata.region_id;
        let cache_manager = request.cache_manager.clone();

-        let sst_info = if let Some(write_cache) = cache_manager.write_cache() {
+        let (sst_info, metrics) = if let Some(write_cache) = cache_manager.write_cache() {
            // Write to the write cache.
            write_cache
                .write_and_upload_sst(
@@ -273,7 +273,7 @@ impl AccessLayer {
                        remote_store: self.object_store.clone(),
                    },
                    write_opts,
-                    metrics,
+                    write_type,
                )
                .await?
        } else {
@@ -303,11 +303,11 @@ impl AccessLayer {
                request.index_config,
                indexer_builder,
                path_provider,
-                metrics,
+                Metrics::new(write_type),
            )
            .await
            .with_file_cleaner(cleaner);
-            match request.source {
+            let ssts = match request.source {
                Either::Left(source) => {
                    writer
                        .write_all(source, request.max_sequence, write_opts)
@@ -316,7 +316,9 @@ impl AccessLayer {
                Either::Right(flat_source) => {
                    writer.write_all_flat(flat_source, write_opts).await?
                }
-            }
+            };
+            let metrics = writer.into_metrics();
+            (ssts, metrics)
        };

        // Put parquet metadata to cache manager.
@@ -331,7 +333,7 @@ impl AccessLayer {
            }
        }

-        Ok(sst_info)
+        Ok((sst_info, metrics))
    }

    /// Puts encoded SST bytes to the write cache (if enabled) and uploads it to the object store.
--- a/src/mito2/src/cache/index/bloom_filter_index.rs
+++ b/src/mito2/src/cache/index/bloom_filter_index.rs
@@ -15,7 +15,7 @@
 use std::ops::Range;
 use std::sync::Arc;

-use api::v1::index::{BloomFilterLoc, BloomFilterMeta};
+use api::v1::index::BloomFilterMeta;
 use async_trait::async_trait;
 use bytes::Bytes;
 use index::bloom_filter::error::Result;
@@ -60,17 +60,11 @@ impl BloomFilterIndexCache {
 /// Calculates weight for bloom filter index metadata.
 fn bloom_filter_index_metadata_weight(
    k: &(FileId, ColumnId, Tag),
-    meta: &Arc<BloomFilterMeta>,
+    _: &Arc<BloomFilterMeta>,
 ) -> u32 {
-    let base = k.0.as_bytes().len()
+    (k.0.as_bytes().len()
        + std::mem::size_of::<ColumnId>()
-        + std::mem::size_of::<Tag>()
-        + std::mem::size_of::<BloomFilterMeta>();
-
-    let vec_estimated = meta.segment_loc_indices.len() * std::mem::size_of::<u64>()
-        + meta.bloom_filter_locs.len() * std::mem::size_of::<BloomFilterLoc>();
-
-    (base + vec_estimated) as u32
+        + std::mem::size_of::<BloomFilterMeta>()) as u32
 }

 /// Calculates weight for bloom filter index content.
@@ -177,45 +171,6 @@ mod test {

    const FUZZ_REPEAT_TIMES: usize = 100;

-    #[test]
-    fn bloom_filter_metadata_weight_counts_vec_contents() {
-        let file_id = FileId::parse_str("00000000-0000-0000-0000-000000000001").unwrap();
-        let column_id: ColumnId = 42;
-        let tag = Tag::Skipping;
-
-        let meta = BloomFilterMeta {
-            rows_per_segment: 128,
-            segment_count: 2,
-            row_count: 256,
-            bloom_filter_size: 1024,
-            segment_loc_indices: vec![0, 64, 128, 192],
-            bloom_filter_locs: vec![
-                BloomFilterLoc {
-                    offset: 0,
-                    size: 512,
-                    element_count: 1000,
-                },
-                BloomFilterLoc {
-                    offset: 512,
-                    size: 512,
-                    element_count: 1000,
-                },
-            ],
-        };
-
-        let weight =
-            bloom_filter_index_metadata_weight(&(file_id, column_id, tag), &Arc::new(meta.clone()));
-
-        let base = file_id.as_bytes().len()
-            + std::mem::size_of::<ColumnId>()
-            + std::mem::size_of::<Tag>()
-            + std::mem::size_of::<BloomFilterMeta>();
-        let expected_dynamic = meta.segment_loc_indices.len() * std::mem::size_of::<u64>()
-            + meta.bloom_filter_locs.len() * std::mem::size_of::<BloomFilterLoc>();
-
-        assert_eq!(weight as usize, base + expected_dynamic);
-    }
-
    #[test]
    fn fuzz_index_calculation() {
        let mut rng = rand::rng();
--- a/src/mito2/src/cache/write_cache.rs
+++ b/src/mito2/src/cache/write_cache.rs
@@ -169,8 +169,8 @@ impl WriteCache {
        write_request: SstWriteRequest,
        upload_request: SstUploadRequest,
        write_opts: &WriteOptions,
-        metrics: &mut Metrics,
-    ) -> Result<SstInfoArray> {
+        write_type: WriteType,
+    ) -> Result<(SstInfoArray, Metrics)> {
        let region_id = write_request.metadata.region_id;

        let store = self.file_cache.local_store();
@@ -197,7 +197,7 @@ impl WriteCache {
            write_request.index_config,
            indexer,
            path_provider.clone(),
-            metrics,
+            Metrics::new(write_type),
        )
        .await
        .with_file_cleaner(cleaner);
@@ -210,10 +210,11 @@ impl WriteCache {
            }
            either::Right(flat_source) => writer.write_all_flat(flat_source, write_opts).await?,
        };
+        let mut metrics = writer.into_metrics();

        // Upload sst file to remote object store.
        if sst_info.is_empty() {
-            return Ok(sst_info);
+            return Ok((sst_info, metrics));
        }

        let mut upload_tracker = UploadTracker::new(region_id);
@@ -255,7 +256,7 @@ impl WriteCache {
            return Err(err);
        }

-        Ok(sst_info)
+        Ok((sst_info, metrics))
    }

    /// Removes a file from the cache by `index_key`.
@@ -558,9 +559,8 @@ mod tests {
        };

        // Write to cache and upload sst to mock remote store
-        let mut metrics = Metrics::new(WriteType::Flush);
-        let mut sst_infos = write_cache
-            .write_and_upload_sst(write_request, upload_request, &write_opts, &mut metrics)
+        let (mut sst_infos, _) = write_cache
+            .write_and_upload_sst(write_request, upload_request, &write_opts, WriteType::Flush)
            .await
            .unwrap();
        let sst_info = sst_infos.remove(0);
@@ -655,9 +655,8 @@ mod tests {
            remote_store: mock_store.clone(),
        };

-        let mut metrics = Metrics::new(WriteType::Flush);
-        let mut sst_infos = write_cache
-            .write_and_upload_sst(write_request, upload_request, &write_opts, &mut metrics)
+        let (mut sst_infos, _) = write_cache
+            .write_and_upload_sst(write_request, upload_request, &write_opts, WriteType::Flush)
            .await
            .unwrap();
        let sst_info = sst_infos.remove(0);
@@ -736,9 +735,8 @@ mod tests {
            remote_store: mock_store.clone(),
        };

-        let mut metrics = Metrics::new(WriteType::Flush);
        write_cache
-            .write_and_upload_sst(write_request, upload_request, &write_opts, &mut metrics)
+            .write_and_upload_sst(write_request, upload_request, &write_opts, WriteType::Flush)
            .await
            .unwrap_err();
        let atomic_write_dir = write_cache_dir.path().join(ATOMIC_WRITE_DIR);
--- a/src/mito2/src/compaction/compactor.rs
+++ b/src/mito2/src/compaction/compactor.rs
@@ -30,9 +30,7 @@ use store_api::metadata::RegionMetadataRef;
 use store_api::region_request::PathType;
 use store_api::storage::RegionId;

-use crate::access_layer::{
-    AccessLayer, AccessLayerRef, Metrics, OperationType, SstWriteRequest, WriteType,
-};
+use crate::access_layer::{AccessLayer, AccessLayerRef, OperationType, SstWriteRequest, WriteType};
 use crate::cache::{CacheManager, CacheManagerRef};
 use crate::compaction::picker::{PickerOutput, new_picker};
 use crate::compaction::{CompactionSstReaderBuilder, find_ttl};
@@ -389,8 +387,7 @@ impl Compactor for DefaultCompactor {
                    let reader = builder.build_sst_reader().await?;
                    either::Left(Source::Reader(reader))
                };
-                let mut metrics = Metrics::new(WriteType::Compaction);
-                let sst_infos = sst_layer
+                let (sst_infos, metrics) = sst_layer
                    .write_sst(
                        SstWriteRequest {
                            op_type: OperationType::Compact,
@@ -406,7 +403,7 @@ impl Compactor for DefaultCompactor {
                            bloom_filter_index_config,
                        },
                        &write_opts,
-                        &mut metrics,
+                        WriteType::Compaction,
                    )
                    .await?;
                // Convert partition expression once outside the map
--- a/src/mito2/src/config.rs
+++ b/src/mito2/src/config.rs
@@ -25,7 +25,6 @@ use serde::{Deserialize, Serialize};
 use serde_with::serde_as;

 use crate::error::Result;
-use crate::gc::GcConfig;
 use crate::memtable::MemtableConfig;
 use crate::sst::DEFAULT_WRITE_BUFFER_SIZE;

@@ -149,8 +148,6 @@ pub struct MitoConfig {
    /// Whether to enable experimental flat format as the default format.
    /// When enabled, forces using BulkMemtable and BulkMemtableBuilder.
    pub default_experimental_flat_format: bool,
-
-    pub gc: GcConfig,
 }

 impl Default for MitoConfig {
@@ -189,7 +186,6 @@ impl Default for MitoConfig {
            memtable: MemtableConfig::default(),
            min_compaction_interval: Duration::from_secs(0),
            default_experimental_flat_format: false,
-            gc: GcConfig::default(),
        };

        // Adjust buffer and cache size according to system memory if we can.
--- a/src/mito2/src/engine.rs
+++ b/src/mito2/src/engine.rs
@@ -102,7 +102,7 @@ use store_api::region_engine::{
 };
 use store_api::region_request::{AffectedRows, RegionOpenRequest, RegionRequest};
 use store_api::sst_entry::{ManifestSstEntry, PuffinIndexMetaEntry, StorageSstEntry};
-use store_api::storage::{FileId, FileRefsManifest, RegionId, ScanRequest, SequenceNumber};
+use store_api::storage::{FileId, RegionId, ScanRequest, SequenceNumber};
 use tokio::sync::{Semaphore, oneshot};

 use crate::access_layer::RegionFilePathFactory;
@@ -115,7 +115,6 @@ use crate::error::{
 };
 #[cfg(feature = "enterprise")]
 use crate::extension::BoxedExtensionRangeProviderFactory;
-use crate::gc::GcLimiterRef;
 use crate::manifest::action::RegionEdit;
 use crate::memtable::MemtableStats;
 use crate::metrics::HANDLE_REQUEST_ELAPSED;
@@ -262,33 +261,6 @@ impl MitoEngine {
        self.inner.workers.file_ref_manager()
    }

-    pub fn gc_limiter(&self) -> GcLimiterRef {
-        self.inner.workers.gc_limiter()
-    }
-
-    /// Get all tmp ref files for given region ids, excluding files that's already in manifest.
-    pub async fn get_snapshot_of_unmanifested_refs(
-        &self,
-        region_ids: impl IntoIterator<Item = RegionId>,
-    ) -> Result<FileRefsManifest> {
-        let file_ref_mgr = self.file_ref_manager();
-
-        let region_ids = region_ids.into_iter().collect::<Vec<_>>();
-
-        // Convert region IDs to MitoRegionRef objects, error if any region doesn't exist
-        let regions: Vec<MitoRegionRef> = region_ids
-            .into_iter()
-            .map(|region_id| {
-                self.find_region(region_id)
-                    .with_context(|| RegionNotFoundSnafu { region_id })
-            })
-            .collect::<Result<_>>()?;
-
-        file_ref_mgr
-            .get_snapshot_of_unmanifested_refs(regions)
-            .await
-    }
-
    /// Returns true if the specific region exists.
    pub fn is_region_exists(&self, region_id: RegionId) -> bool {
        self.inner.workers.is_region_exists(region_id)
@@ -385,7 +357,7 @@ impl MitoEngine {
        self.find_region(id)
    }

-    pub fn find_region(&self, region_id: RegionId) -> Option<MitoRegionRef> {
+    pub(crate) fn find_region(&self, region_id: RegionId) -> Option<MitoRegionRef> {
        self.inner.workers.get_region(region_id)
    }

--- a/src/mito2/src/error.rs
+++ b/src/mito2/src/error.rs
@@ -1121,12 +1121,6 @@ pub enum Error {
        #[snafu(implicit)]
        location: Location,
    },
-
-    #[snafu(display("GC job permit exhausted"))]
-    TooManyGcJobs {
-        #[snafu(implicit)]
-        location: Location,
-    },
 }

 pub type Result<T, E = Error> = std::result::Result<T, E>;
@@ -1297,7 +1291,7 @@ impl ErrorExt for Error {

            InconsistentTimestampLength { .. } => StatusCode::InvalidArguments,

-            TooManyFilesToRead { .. } | TooManyGcJobs { .. } => StatusCode::RateLimited,
+            TooManyFilesToRead { .. } => StatusCode::RateLimited,
        }
    }

--- a/src/mito2/src/flush.rs
+++ b/src/mito2/src/flush.rs
@@ -525,19 +525,21 @@ impl RegionFlushTask {
                let source = Either::Left(source);
                let write_request = self.new_write_request(version, max_sequence, source);

-                let mut metrics = Metrics::new(WriteType::Flush);
-                let ssts_written = self
+                let (ssts_written, metrics) = self
                    .access_layer
-                    .write_sst(write_request, &write_opts, &mut metrics)
+                    .write_sst(write_request, &write_opts, WriteType::Flush)
                    .await?;
                if ssts_written.is_empty() {
                    // No data written.
                    continue;
                }

-                debug!(
+                common_telemetry::debug!(
                    "Region {} flush one memtable, num_mem_ranges: {}, num_rows: {}, metrics: {:?}",
-                    self.region_id, num_mem_ranges, num_mem_rows, metrics
+                    self.region_id,
+                    num_mem_ranges,
+                    num_mem_rows,
+                    metrics
                );

                flush_metrics = flush_metrics.merge(metrics);
@@ -589,11 +591,9 @@ impl RegionFlushTask {
            let semaphore = self.flush_semaphore.clone();
            let task = common_runtime::spawn_global(async move {
                let _permit = semaphore.acquire().await.unwrap();
-                let mut metrics = Metrics::new(WriteType::Flush);
-                let ssts = access_layer
-                    .write_sst(write_request, &write_opts, &mut metrics)
-                    .await?;
-                Ok((ssts, metrics))
+                access_layer
+                    .write_sst(write_request, &write_opts, WriteType::Flush)
+                    .await
            });
            tasks.push(task);
        }
--- a/src/mito2/src/gc.rs
+++ b/src/mito2/src/gc.rs
@@ -22,17 +22,14 @@
 //!

 use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet};
-use std::sync::Arc;
 use std::time::Duration;

-use common_meta::datanode::GcStat;
-use common_telemetry::{debug, error, info, warn};
+use common_telemetry::{error, info, warn};
 use common_time::Timestamp;
 use object_store::{Entry, Lister};
 use serde::{Deserialize, Serialize};
 use snafu::{OptionExt, ResultExt as _, ensure};
-use store_api::storage::{FileId, FileRefsManifest, GcReport, RegionId};
-use tokio::sync::{OwnedSemaphorePermit, TryAcquireError};
+use store_api::storage::{FileId, RegionId};
 use tokio_stream::StreamExt;

 use crate::access_layer::AccessLayerRef;
@@ -40,64 +37,26 @@ use crate::cache::CacheManagerRef;
 use crate::config::MitoConfig;
 use crate::error::{
    DurationOutOfRangeSnafu, EmptyRegionDirSnafu, JoinSnafu, OpenDalSnafu, RegionNotFoundSnafu,
-    Result, TooManyGcJobsSnafu, UnexpectedSnafu,
+    Result, UnexpectedSnafu,
 };
 use crate::manifest::manager::{RegionManifestManager, RegionManifestOptions, RemoveFileOptions};
 use crate::manifest::storage::manifest_compress_type;
-use crate::metrics::GC_DEL_FILE_CNT;
+use crate::metrics::GC_FILE_CNT;
 use crate::region::opener::new_manifest_dir;
 use crate::sst::file::delete_files;
+use crate::sst::file_ref::TableFileRefsManifest;
 use crate::sst::location::{self, region_dir_from_table_dir};

-/// Limit the amount of concurrent GC jobs on the datanode
-pub struct GcLimiter {
-    pub gc_job_limit: Arc<tokio::sync::Semaphore>,
-    gc_concurrency: usize,
+#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
+pub struct GcReport {
+    /// deleted files per region
+    pub deleted_files: HashMap<RegionId, Vec<FileId>>,
+    /// Regions that need retry in next gc round, usually because their tmp ref files are outdated
+    pub need_retry_regions: HashSet<RegionId>,
 }

-pub type GcLimiterRef = Arc<GcLimiter>;
-
-impl GcLimiter {
-    pub fn new(gc_concurrency: usize) -> Self {
-        Self {
-            gc_job_limit: Arc::new(tokio::sync::Semaphore::new(gc_concurrency)),
-            gc_concurrency,
-        }
-    }
-
-    pub fn running_gc_tasks(&self) -> u32 {
-        (self.gc_concurrency - self.gc_job_limit.available_permits()) as u32
-    }
-
-    pub fn gc_concurrency(&self) -> u32 {
-        self.gc_concurrency as u32
-    }
-
-    pub fn gc_stat(&self) -> GcStat {
-        GcStat::new(self.running_gc_tasks(), self.gc_concurrency())
-    }
-
-    /// Try to acquire a permit for a GC job.
-    ///
-    /// If no permit is available, returns an `TooManyGcJobs` error.
-    pub fn permit(&self) -> Result<OwnedSemaphorePermit> {
-        self.gc_job_limit
-            .clone()
-            .try_acquire_owned()
-            .map_err(|e| match e {
-                TryAcquireError::Closed => UnexpectedSnafu {
-                    reason: format!("Failed to acquire gc permit: {e}"),
-                }
-                .build(),
-                TryAcquireError::NoPermits => TooManyGcJobsSnafu {}.build(),
-            })
-    }
-}
-
-#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
-pub struct GcConfig {
-    /// Whether GC is enabled.
-    pub enable: bool,
+#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
+pub struct FileGcOption {
    /// Lingering time before deleting files.
    /// Should be long enough to allow long running queries to finish.
    ///
@@ -114,22 +73,16 @@ pub struct GcConfig {
    /// Maximum concurrent list operations per GC job.
    /// This is used to limit the number of concurrent listing operations and speed up listing.
    pub max_concurrent_lister_per_gc_job: usize,
-    /// Maximum concurrent GC jobs.
-    /// This is used to limit the number of concurrent GC jobs running on the datanode
-    /// to prevent too many concurrent GC jobs from overwhelming the datanode.
-    pub max_concurrent_gc_job: usize,
 }

-impl Default for GcConfig {
+impl Default for FileGcOption {
    fn default() -> Self {
        Self {
-            enable: false,
            // expect long running queries to be finished within a reasonable time
            lingering_time: Duration::from_secs(60 * 5),
            // 6 hours, for unknown expel time, which is when this file get removed from manifest, it should rarely happen, can keep it longer
            unknown_file_lingering_time: Duration::from_secs(60 * 60 * 6),
            max_concurrent_lister_per_gc_job: 32,
-            max_concurrent_gc_job: 4,
        }
    }
 }
@@ -139,23 +92,13 @@ pub struct LocalGcWorker {
    pub(crate) cache_manager: Option<CacheManagerRef>,
    pub(crate) manifest_mgrs: HashMap<RegionId, RegionManifestManager>,
    /// Lingering time before deleting files.
-    pub(crate) opt: GcConfig,
+    pub(crate) opt: FileGcOption,
    pub(crate) manifest_open_config: ManifestOpenConfig,
    /// Tmp ref files manifest, used to determine which files are still in use by ongoing queries.
    ///
    /// Also contains manifest versions of regions when the tmp ref files are generated.
    /// Used to determine whether the tmp ref files are outdated.
-    pub(crate) file_ref_manifest: FileRefsManifest,
-    _permit: OwnedSemaphorePermit,
-    /// Whether to perform full file listing during GC.
-    /// When set to false, GC will only delete files that are tracked in the manifest's removed_files,
-    /// which can significantly improve performance by avoiding expensive list operations.
-    /// When set to true, GC will perform a full listing to find and delete orphan files
-    /// (files not tracked in the manifest).
-    ///
-    /// Set to false for regular GC operations to optimize performance.
-    /// Set to true periodically or when you need to clean up orphan files.
-    pub full_file_listing: bool,
+    pub(crate) file_ref_manifest: TableFileRefsManifest,
 }

 pub struct ManifestOpenConfig {
@@ -182,16 +125,13 @@ impl LocalGcWorker {
    /// Create a new LocalGcWorker, with `regions_to_gc` regions to GC.
    /// The regions are specified by their `RegionId` and should all belong to the same table.
    ///
-    #[allow(clippy::too_many_arguments)]
    pub async fn try_new(
        access_layer: AccessLayerRef,
        cache_manager: Option<CacheManagerRef>,
        regions_to_gc: BTreeSet<RegionId>,
-        opt: GcConfig,
+        opt: FileGcOption,
        manifest_open_config: ManifestOpenConfig,
-        file_ref_manifest: FileRefsManifest,
-        limiter: &GcLimiterRef,
-        full_file_listing: bool,
+        file_ref_manifest: TableFileRefsManifest,
    ) -> Result<Self> {
        let table_id = regions_to_gc
            .first()
@@ -199,7 +139,6 @@ impl LocalGcWorker {
                reason: "Expect at least one region, found none",
            })?
            .table_id();
-        let permit = limiter.permit()?;
        let mut zelf = Self {
            access_layer,
            cache_manager,
@@ -207,8 +146,6 @@ impl LocalGcWorker {
            opt,
            manifest_open_config,
            file_ref_manifest,
-            _permit: permit,
-            full_file_listing,
        };

        // dedup just in case
@@ -256,15 +193,15 @@ impl LocalGcWorker {
        // TODO(discord9): verify manifest version before reading tmp ref files

        let mut tmp_ref_files = HashMap::new();
-        for (region_id, file_refs) in &self.file_ref_manifest.file_refs {
-            if outdated_regions.contains(region_id) {
+        for file_ref in &self.file_ref_manifest.file_refs {
+            if outdated_regions.contains(&file_ref.region_id) {
                // skip outdated regions
                continue;
            }
            tmp_ref_files
-                .entry(*region_id)
+                .entry(file_ref.region_id)
                .or_insert_with(HashSet::new)
-                .extend(file_refs.clone());
+                .insert(file_ref.file_id);
        }

        Ok(tmp_ref_files)
@@ -283,14 +220,14 @@ impl LocalGcWorker {
        let mut deleted_files = HashMap::new();
        let tmp_ref_files = self.read_tmp_ref_files(&mut outdated_regions).await?;
        for region_id in self.manifest_mgrs.keys() {
-            debug!("Doing gc for region {}", region_id);
+            info!("Doing gc for region {}", region_id);
            let tmp_ref_files = tmp_ref_files
                .get(region_id)
                .cloned()
                .unwrap_or_else(HashSet::new);
            let files = self.do_region_gc(*region_id, &tmp_ref_files).await?;
            deleted_files.insert(*region_id, files);
-            debug!("Gc for region {} finished", region_id);
+            info!("Gc for region {} finished", region_id);
        }
        info!(
            "LocalGcWorker finished after {} secs.",
@@ -307,7 +244,7 @@ impl LocalGcWorker {
 impl LocalGcWorker {
    /// concurrency of listing files per region.
    /// This is used to limit the number of concurrent listing operations and speed up listing
-    pub const CONCURRENCY_LIST_PER_FILES: usize = 1024;
+    pub const CONCURRENCY_LIST_PER_FILES: usize = 512;

    /// Perform GC for the region.
    /// 1. Get all the removed files in delta manifest files and their expel times
@@ -322,7 +259,7 @@ impl LocalGcWorker {
        region_id: RegionId,
        tmp_ref_files: &HashSet<FileId>,
    ) -> Result<Vec<FileId>> {
-        debug!("Doing gc for region {}", region_id);
+        info!("Doing gc for region {}", region_id);
        let manifest = self
            .manifest_mgrs
            .get(&region_id)
@@ -335,10 +272,10 @@ impl LocalGcWorker {

        if recently_removed_files.is_empty() {
            // no files to remove, skip
-            debug!("No recently removed files to gc for region {}", region_id);
+            info!("No recently removed files to gc for region {}", region_id);
        }

-        debug!(
+        info!(
            "Found {} recently removed files sets for region {}",
            recently_removed_files.len(),
            region_id
@@ -354,20 +291,27 @@ impl LocalGcWorker {
            .chain(tmp_ref_files.clone().into_iter())
            .collect();

+        let true_tmp_ref_files = tmp_ref_files
+            .iter()
+            .filter(|f| !current_files.contains_key(f))
+            .collect::<HashSet<_>>();
+
+        info!("True tmp ref files: {:?}", true_tmp_ref_files);
+
        let unused_files = self
            .list_to_be_deleted_files(region_id, in_used, recently_removed_files, concurrency)
            .await?;

        let unused_len = unused_files.len();

-        debug!(
+        info!(
            "Found {} unused files to delete for region {}",
            unused_len, region_id
        );

        self.delete_files(region_id, &unused_files).await?;

-        debug!(
+        info!(
            "Successfully deleted {} unused files for region {}",
            unused_len, region_id
        );
@@ -385,8 +329,7 @@ impl LocalGcWorker {
        )
        .await?;

-        // FIXME(discord9): if files are already deleted before calling delete_files, the metric will be inaccurate, no clean way to fix it now
-        GC_DEL_FILE_CNT.add(file_ids.len() as i64);
+        GC_FILE_CNT.add(file_ids.len() as i64);

        Ok(())
    }
@@ -548,7 +491,7 @@ impl LocalGcWorker {
        entries: Vec<Entry>,
        in_use_filenames: &HashSet<&FileId>,
        may_linger_filenames: &HashSet<&FileId>,
-        eligible_for_removal: &HashSet<&FileId>,
+        all_files_appear_in_delta_manifests: &HashSet<&FileId>,
        unknown_file_may_linger_until: chrono::DateTime<chrono::Utc>,
    ) -> (Vec<FileId>, HashSet<FileId>) {
        let mut all_unused_files_ready_for_delete = vec![];
@@ -572,7 +515,7 @@ impl LocalGcWorker {
            let should_delete = !in_use_filenames.contains(&file_id)
                && !may_linger_filenames.contains(&file_id)
                && {
-                    if !eligible_for_removal.contains(&file_id) {
+                    if !all_files_appear_in_delta_manifests.contains(&file_id) {
                        // if the file's expel time is unknown(because not appear in delta manifest), we keep it for a while
                        // using it's last modified time
                        // notice unknown files use a different lingering time
@@ -598,11 +541,6 @@ impl LocalGcWorker {
    /// Concurrently list unused files in the region dir
    /// because there may be a lot of files in the region dir
    /// and listing them may take a long time.
-    ///
-    /// When `full_file_listing` is false, this method will only delete files tracked in
-    /// `recently_removed_files` without performing expensive list operations, which significantly
-    /// improves performance. When `full_file_listing` is true, it performs a full listing to
-    /// find and delete orphan files.
    pub async fn list_to_be_deleted_files(
        &self,
        region_id: RegionId,
@@ -610,7 +548,6 @@ impl LocalGcWorker {
        recently_removed_files: BTreeMap<Timestamp, HashSet<FileId>>,
        concurrency: usize,
    ) -> Result<Vec<FileId>> {
-        let start = tokio::time::Instant::now();
        let now = chrono::Utc::now();
        let may_linger_until = now
            - chrono::Duration::from_std(self.opt.lingering_time).with_context(|_| {
@@ -632,7 +569,7 @@ impl LocalGcWorker {
        let may_linger_files = recently_removed_files.split_off(&threshold);
        let may_linger_filenames = may_linger_files.values().flatten().collect::<HashSet<_>>();

-        let eligible_for_removal = recently_removed_files
+        let all_files_appear_in_delta_manifests = recently_removed_files
            .values()
            .flatten()
            .collect::<HashSet<_>>();
@@ -640,56 +577,23 @@ impl LocalGcWorker {
        // in use filenames, include sst and index files
        let in_use_filenames = in_used.iter().collect::<HashSet<_>>();

-        // When full_file_listing is false, skip expensive list operations and only delete
-        // files that are tracked in recently_removed_files
-        if !self.full_file_listing {
-            // Only delete files that:
-            // 1. Are in recently_removed_files (tracked in manifest)
-            // 2. Are not in use
-            // 3. Have passed the lingering time
-            let files_to_delete: Vec<FileId> = eligible_for_removal
-                .iter()
-                .filter(|file_id| !in_use_filenames.contains(*file_id))
-                .map(|&f| *f)
-                .collect();
-
-            info!(
-                "gc: fast mode (no full listing) cost {} secs for region {}, found {} files to delete from manifest",
-                start.elapsed().as_secs_f64(),
-                region_id,
-                files_to_delete.len()
-            );
-
-            return Ok(files_to_delete);
-        }
-
-        // Full file listing mode: perform expensive list operations to find orphan files
        // Step 1: Create partitioned listers for concurrent processing
        let listers = self.partition_region_files(region_id, concurrency).await?;
-        let lister_cnt = listers.len();

        // Step 2: Concurrently list all files in the region directory
        let all_entries = self.list_region_files_concurrent(listers).await?;

-        let cnt = all_entries.len();
-
        // Step 3: Filter files to determine which ones can be deleted
        let (all_unused_files_ready_for_delete, all_in_exist_linger_files) = self
            .filter_deletable_files(
                all_entries,
                &in_use_filenames,
                &may_linger_filenames,
-                &eligible_for_removal,
+                &all_files_appear_in_delta_manifests,
                unknown_file_may_linger_until,
            );

-        info!(
-            "gc: full listing mode cost {} secs using {lister_cnt} lister for {cnt} files in region {}, found {} unused files to delete",
-            start.elapsed().as_secs_f64(),
-            region_id,
-            all_unused_files_ready_for_delete.len()
-        );
-        debug!("All in exist linger files: {:?}", all_in_exist_linger_files);
+        info!("All in exist linger files: {:?}", all_in_exist_linger_files);

        Ok(all_unused_files_ready_for_delete)
    }
--- a/src/mito2/src/lib.rs
+++ b/src/mito2/src/lib.rs
@@ -47,7 +47,7 @@ pub mod schedule;
 pub mod sst;
 mod time_provider;
 pub mod wal;
-pub mod worker;
+mod worker;

 #[cfg_attr(doc, aquamarine::aquamarine)]
 /// # Mito developer document
--- a/src/mito2/src/memtable/partition_tree.rs
+++ b/src/mito2/src/memtable/partition_tree.rs
@@ -384,7 +384,6 @@ mod tests {
    use api::v1::helper::{field_column_schema, row, tag_column_schema, time_index_column_schema};
    use api::v1::value::ValueData;
    use api::v1::{Mutation, OpType, Rows, SemanticType};
-    use common_query::prelude::{greptime_timestamp, greptime_value};
    use common_time::Timestamp;
    use datafusion_common::Column;
    use datafusion_expr::{BinaryExpr, Expr, Literal, Operator};
@@ -695,7 +694,7 @@ mod tests {
            })
            .push_column_metadata(ColumnMetadata {
                column_schema: ColumnSchema::new(
-                    greptime_timestamp(),
+                    "greptime_timestamp",
                    ConcreteDataType::timestamp_millisecond_datatype(),
                    false,
                ),
@@ -704,7 +703,7 @@ mod tests {
            })
            .push_column_metadata(ColumnMetadata {
                column_schema: ColumnSchema::new(
-                    greptime_value(),
+                    "greptime_value",
                    ConcreteDataType::float64_datatype(),
                    true,
                ),
--- a/src/mito2/src/memtable/time_series.rs
+++ b/src/mito2/src/memtable/time_series.rs
@@ -922,9 +922,7 @@ impl ValueBuilder {
                            )
                        };
                    mutable_vector.push_nulls(num_rows - 1);
-                    mutable_vector
-                        .push(field_value)
-                        .unwrap_or_else(|e| panic!("unexpected field value: {e:?}"));
+                    let _ = mutable_vector.push(field_value);
                    self.fields[idx] = Some(mutable_vector);
                    MEMTABLE_ACTIVE_FIELD_BUILDER_COUNT.inc();
                }
--- a/src/mito2/src/metrics.rs
+++ b/src/mito2/src/metrics.rs
@@ -437,7 +437,7 @@ lazy_static! {
            "mito stalled write request in each worker",
            &[WORKER_LABEL]
        ).unwrap();
-    /// Number of ref files
+    /// Number of ref files per table
    pub static ref GC_REF_FILE_CNT: IntGauge = register_int_gauge!(
            "greptime_gc_ref_file_count",
            "gc ref file count",
@@ -458,9 +458,9 @@ lazy_static! {
        .unwrap();

    /// Counter for the number of files deleted by the GC worker.
-    pub static ref GC_DEL_FILE_CNT: IntGauge =
+    pub static ref GC_FILE_CNT: IntGauge =
        register_int_gauge!(
-            "greptime_mito_gc_delete_file_count",
+            "greptime_mito_gc_file_count",
            "mito gc deleted file count",
        ).unwrap();
 }
--- a/src/mito2/src/read/scan_region.rs
+++ b/src/mito2/src/read/scan_region.rs
@@ -1106,8 +1106,9 @@ impl ScanInput {
        rows
    }

-    pub(crate) fn predicate_group(&self) -> &PredicateGroup {
-        &self.predicate
+    /// Returns table predicate of all exprs.
+    pub(crate) fn predicate(&self) -> Option<&Predicate> {
+        self.predicate.predicate()
    }

    /// Returns number of memtables to scan.
--- a/src/mito2/src/read/seq_scan.rs
+++ b/src/mito2/src/read/seq_scan.rs
@@ -632,12 +632,8 @@ impl RegionScanner for SeqScan {
        Ok(())
    }

-    fn has_predicate_without_region(&self) -> bool {
-        let predicate = self
-            .stream_ctx
-            .input
-            .predicate_group()
-            .predicate_without_region();
+    fn has_predicate(&self) -> bool {
+        let predicate = self.stream_ctx.input.predicate();
        predicate.map(|p| !p.exprs().is_empty()).unwrap_or(false)
    }

--- a/src/mito2/src/read/series_scan.rs
+++ b/src/mito2/src/read/series_scan.rs
@@ -314,12 +314,8 @@ impl RegionScanner for SeriesScan {
        Ok(())
    }

-    fn has_predicate_without_region(&self) -> bool {
-        let predicate = self
-            .stream_ctx
-            .input
-            .predicate_group()
-            .predicate_without_region();
+    fn has_predicate(&self) -> bool {
+        let predicate = self.stream_ctx.input.predicate();
        predicate.map(|p| !p.exprs().is_empty()).unwrap_or(false)
    }

--- a/src/mito2/src/read/unordered_scan.rs
+++ b/src/mito2/src/read/unordered_scan.rs
@@ -427,14 +427,8 @@ impl RegionScanner for UnorderedScan {
            .map_err(BoxedError::new)
    }

-    /// If this scanner have predicate other than region partition exprs
-    fn has_predicate_without_region(&self) -> bool {
-        let predicate = self
-            .stream_ctx
-            .input
-            .predicate_group()
-            .predicate_without_region();
-
+    fn has_predicate(&self) -> bool {
+        let predicate = self.stream_ctx.input.predicate();
        predicate.map(|p| !p.exprs().is_empty()).unwrap_or(false)
    }

--- a/src/mito2/src/region.rs
+++ b/src/mito2/src/region.rs
@@ -565,10 +565,6 @@ impl MitoRegion {
        Ok(())
    }

-    pub fn access_layer(&self) -> AccessLayerRef {
-        self.access_layer.clone()
-    }
-
    /// Returns the SST entries of the region.
    pub async fn manifest_sst_entries(&self) -> Vec<ManifestSstEntry> {
        let table_dir = self.table_dir();
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
Zhenchi	72a6b9ff66	wip Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>	2025-10-31 05:03:38 +00:00
Zhenchi	1286d4ca74	Merge remote-tracking branch 'origin/main' into zhongzc/repartition-procedure-scaffold	2025-10-26 10:56:09 +00:00
Zhenchi	3c1d7fcb89	feat: add group procedure Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>	2025-10-23 10:19:06 +00:00
Zhenchi	5be4987fd7	Merge remote-tracking branch 'origin/main' into zhongzc/repartition-procedure-scaffold	2025-10-22 08:50:16 +00:00
Zhenchi	db11022cff	Merge remote-tracking branch 'origin/main' into zhongzc/repartition-procedure-scaffold	2025-10-13 06:45:56 +00:00
Zhenchi	15935ee89a	fix Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>	2025-10-13 06:45:37 +00:00
Zhenchi	d0877997a2	feat: scaffold repartition procedure with plan/resource stubs Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>	2025-10-13 02:41:02 +00:00