feat: ingest jsonbench data through pipeline

Signed-off-by: luofucong <luofc@foxmail.com>
2025-12-27 16:32:54 +00:00 · 2025-11-28 20:07:26 +08:00
241 changed files with 2724 additions and 14091 deletions
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -49,9 +49,14 @@ on:
        description: Do not run integration tests during the build
        type: boolean
        default: true
-      build_linux_artifacts:
+      build_linux_amd64_artifacts:
        type: boolean
-        description: Build linux artifacts (both amd64 and arm64)
+        description: Build linux-amd64 artifacts
+        required: false
+        default: false
+      build_linux_arm64_artifacts:
+        type: boolean
+        description: Build linux-arm64 artifacts
        required: false
        default: false
      build_macos_artifacts:
@@ -139,7 +144,7 @@ jobs:
          ./.github/scripts/check-version.sh "${{ steps.create-version.outputs.version }}"

      - name: Allocate linux-amd64 runner
-        if: ${{ inputs.build_linux_artifacts || github.event_name == 'push' || github.event_name == 'schedule' }}
+        if: ${{ inputs.build_linux_amd64_artifacts || github.event_name == 'push' || github.event_name == 'schedule' }}
        uses: ./.github/actions/start-runner
        id: start-linux-amd64-runner
        with:
@@ -153,7 +158,7 @@ jobs:
          subnet-id: ${{ vars.EC2_RUNNER_SUBNET_ID }}

      - name: Allocate linux-arm64 runner
-        if: ${{ inputs.build_linux_artifacts || github.event_name == 'push' || github.event_name == 'schedule' }}
+        if: ${{ inputs.build_linux_arm64_artifacts || github.event_name == 'push' || github.event_name == 'schedule' }}
        uses: ./.github/actions/start-runner
        id: start-linux-arm64-runner
        with:
@@ -168,7 +173,7 @@ jobs:

  build-linux-amd64-artifacts:
    name: Build linux-amd64 artifacts
-    if: ${{ inputs.build_linux_artifacts || github.event_name == 'push' || github.event_name == 'schedule' }}
+    if: ${{ inputs.build_linux_amd64_artifacts || github.event_name == 'push' || github.event_name == 'schedule' }}
    needs: [
      allocate-runners,
    ]
@@ -190,7 +195,7 @@ jobs:

  build-linux-arm64-artifacts:
    name: Build linux-arm64 artifacts
-    if: ${{ inputs.build_linux_artifacts || github.event_name == 'push' || github.event_name == 'schedule' }}
+    if: ${{ inputs.build_linux_arm64_artifacts || github.event_name == 'push' || github.event_name == 'schedule' }}
    needs: [
      allocate-runners,
    ]
@@ -212,7 +217,7 @@ jobs:

  run-multi-lang-tests:
    name: Run Multi-language SDK Tests
-    if: ${{ inputs.build_linux_artifacts || github.event_name == 'push' || github.event_name == 'schedule' }}
+    if: ${{ inputs.build_linux_amd64_artifacts || github.event_name == 'push' || github.event_name == 'schedule' }}
    needs: [
      allocate-runners,
      build-linux-amd64-artifacts,
@@ -381,18 +386,7 @@ jobs:

  publish-github-release:
    name: Create GitHub release and upload artifacts
-    # Use always() to run even when optional jobs (macos, windows) are skipped.
-    # Then check that required jobs succeeded and optional jobs didn't fail.
-    if: |
-      always() &&
-      (inputs.publish_github_release || github.event_name == 'push' || github.event_name == 'schedule') &&
-      needs.allocate-runners.result == 'success' &&
-      (needs.build-linux-amd64-artifacts.result == 'success' || needs.build-linux-amd64-artifacts.result == 'skipped') &&
-      (needs.build-linux-arm64-artifacts.result == 'success' || needs.build-linux-arm64-artifacts.result == 'skipped') &&
-      (needs.build-macos-artifacts.result == 'success' || needs.build-macos-artifacts.result == 'skipped') &&
-      (needs.build-windows-artifacts.result == 'success' || needs.build-windows-artifacts.result == 'skipped') &&
-      (needs.release-images-to-dockerhub.result == 'success' || needs.release-images-to-dockerhub.result == 'skipped') &&
-      (needs.run-multi-lang-tests.result == 'success' || needs.run-multi-lang-tests.result == 'skipped')
+    if: ${{ inputs.publish_github_release || github.event_name == 'push' || github.event_name == 'schedule' }}
    needs: [ # The job have to wait for all the artifacts are built.
      allocate-runners,
      build-linux-amd64-artifacts,
--- a/AUTHOR.md
+++ b/AUTHOR.md
@@ -2,41 +2,41 @@

 ## Individual Committers (in alphabetical order)

- [apdong2022](https://github.com/apdong2022)
- [beryl678](https://github.com/beryl678)
- [CookiePieWw](https://github.com/CookiePieWw)
- [etolbakov](https://github.com/etolbakov)
- [irenjj](https://github.com/irenjj)
- [KKould](https://github.com/KKould)
- [Lanqing Yang](https://github.com/lyang24)
- [nicecui](https://github.com/nicecui)
- [NiwakaDev](https://github.com/NiwakaDev)
- [paomian](https://github.com/paomian)
- [tisonkun](https://github.com/tisonkun)
- [Wenjie0329](https://github.com/Wenjie0329)
- [zhaoyingnan01](https://github.com/zhaoyingnan01)
- [zhongzc](https://github.com/zhongzc)
- [ZonaHex](https://github.com/ZonaHex)
- [zyy17](https://github.com/zyy17)
+* [CookiePieWw](https://github.com/CookiePieWw)
+* [etolbakov](https://github.com/etolbakov)
+* [irenjj](https://github.com/irenjj)
+* [KKould](https://github.com/KKould)
+* [Lanqing Yang](https://github.com/lyang24)
+* [NiwakaDev](https://github.com/NiwakaDev)
+* [tisonkun](https://github.com/tisonkun)

 ## Team Members (in alphabetical order)

- [daviderli614](https://github.com/daviderli614)
- [discord9](https://github.com/discord9)
- [evenyag](https://github.com/evenyag)
- [fengjiachun](https://github.com/fengjiachun)
- [fengys1996](https://github.com/fengys1996)
- [GrepTime](https://github.com/GrepTime)
- [holalengyu](https://github.com/holalengyu)
- [killme2008](https://github.com/killme2008)
- [MichaelScofield](https://github.com/MichaelScofield)
- [shuiyisong](https://github.com/shuiyisong)
- [sunchanglong](https://github.com/sunchanglong)
- [sunng87](https://github.com/sunng87)
- [v0y4g3r](https://github.com/v0y4g3r)
- [waynexia](https://github.com/waynexia)
- [WenyXu](https://github.com/WenyXu)
- [xtang](https://github.com/xtang)
+* [apdong2022](https://github.com/apdong2022)
+* [beryl678](https://github.com/beryl678)
+* [daviderli614](https://github.com/daviderli614)
+* [discord9](https://github.com/discord9)
+* [evenyag](https://github.com/evenyag)
+* [fengjiachun](https://github.com/fengjiachun)
+* [fengys1996](https://github.com/fengys1996)
+* [GrepTime](https://github.com/GrepTime)
+* [holalengyu](https://github.com/holalengyu)
+* [killme2008](https://github.com/killme2008)
+* [MichaelScofield](https://github.com/MichaelScofield)
+* [nicecui](https://github.com/nicecui)
+* [paomian](https://github.com/paomian)
+* [shuiyisong](https://github.com/shuiyisong)
+* [sunchanglong](https://github.com/sunchanglong)
+* [sunng87](https://github.com/sunng87)
+* [v0y4g3r](https://github.com/v0y4g3r)
+* [waynexia](https://github.com/waynexia)
+* [Wenjie0329](https://github.com/Wenjie0329)
+* [WenyXu](https://github.com/WenyXu)
+* [xtang](https://github.com/xtang)
+* [zhaoyingnan01](https://github.com/zhaoyingnan01)
+* [zhongzc](https://github.com/zhongzc)
+* [ZonaHex](https://github.com/ZonaHex)
+* [zyy17](https://github.com/zyy17)

 ## All Contributors

--- a/Cargo.lock
+++ b/Cargo.lock
@@ -3741,9 +3741,9 @@ dependencies = [

 [[package]]
 name = "datafusion-pg-catalog"
-version = "0.12.2"
+version = "0.12.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "755393864c0c2dd95575ceed4b25e348686028e1b83d06f8f39914209999f821"
+checksum = "15824c98ff2009c23b0398d441499b147f7c5ac0e5ee993e7a473d79040e3626"
 dependencies = [
 "async-trait",
 "datafusion",
@@ -7514,11 +7514,9 @@ dependencies = [
 "common-test-util",
 "common-time",
 "common-wal",
- "criterion 0.4.0",
 "datafusion",
 "datatypes",
 "futures-util",
- "fxhash",
 "humantime-serde",
 "itertools 0.14.0",
 "lazy_static",
@@ -8364,7 +8362,6 @@ dependencies = [
 "common-macro",
 "common-telemetry",
 "common-test-util",
- "derive_builder 0.20.2",
 "futures",
 "humantime-serde",
 "lazy_static",
@@ -9203,9 +9200,9 @@ dependencies = [

 [[package]]
 name = "pgwire"
-version = "0.36.3"
+version = "0.36.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "70a2bcdcc4b20a88e0648778ecf00415bbd5b447742275439c22176835056f99"
+checksum = "d331bb0eef5bc83a221c0a85b1f205bccf094d4f72a26ae1d68a1b1c535123b7"
 dependencies = [
 "async-trait",
 "base64 0.22.1",
@@ -9503,7 +9500,6 @@ name = "plugins"
 version = "1.0.0-beta.2"
 dependencies = [
 "auth",
- "catalog",
 "clap 4.5.40",
 "cli",
 "common-base",
@@ -9512,7 +9508,6 @@ dependencies = [
 "datanode",
 "flow",
 "frontend",
- "meta-client",
 "meta-srv",
 "serde",
 "snafu 0.8.6",
@@ -13069,7 +13064,6 @@ dependencies = [
 "loki-proto",
 "meta-client",
 "meta-srv",
- "mito2",
 "moka",
 "mysql_async",
 "object-store",
@@ -13082,6 +13076,7 @@ dependencies = [
 "prost 0.13.5",
 "query",
 "rand 0.9.1",
+ "regex",
 "rstest",
 "rstest_reuse",
 "sea-query",
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -131,7 +131,7 @@ datafusion-functions = "50"
 datafusion-functions-aggregate-common = "50"
 datafusion-optimizer = "50"
 datafusion-orc = "0.5"
-datafusion-pg-catalog = "0.12.2"
+datafusion-pg-catalog = "0.12.1"
 datafusion-physical-expr = "50"
 datafusion-physical-plan = "50"
 datafusion-sql = "50"
--- a/config/config.md
+++ b/config/config.md
@@ -294,6 +294,7 @@
 | `meta_client` | -- | -- | The metasrv client options. |
 | `meta_client.metasrv_addrs` | Array | -- | The addresses of the metasrv. |
 | `meta_client.timeout` | String | `3s` | Operation timeout. |
+| `meta_client.heartbeat_timeout` | String | `500ms` | Heartbeat timeout. |
 | `meta_client.ddl_timeout` | String | `10s` | DDL timeout. |
 | `meta_client.connect_timeout` | String | `1s` | Connect server timeout. |
 | `meta_client.tcp_nodelay` | Bool | `true` | `TCP_NODELAY` option for accepted connections. |
@@ -456,6 +457,7 @@
 | `meta_client` | -- | -- | The metasrv client options. |
 | `meta_client.metasrv_addrs` | Array | -- | The addresses of the metasrv. |
 | `meta_client.timeout` | String | `3s` | Operation timeout. |
+| `meta_client.heartbeat_timeout` | String | `500ms` | Heartbeat timeout. |
 | `meta_client.ddl_timeout` | String | `10s` | DDL timeout. |
 | `meta_client.connect_timeout` | String | `1s` | Connect server timeout. |
 | `meta_client.tcp_nodelay` | Bool | `true` | `TCP_NODELAY` option for accepted connections. |
@@ -627,6 +629,7 @@
 | `meta_client` | -- | -- | The metasrv client options. |
 | `meta_client.metasrv_addrs` | Array | -- | The addresses of the metasrv. |
 | `meta_client.timeout` | String | `3s` | Operation timeout. |
+| `meta_client.heartbeat_timeout` | String | `500ms` | Heartbeat timeout. |
 | `meta_client.ddl_timeout` | String | `10s` | DDL timeout. |
 | `meta_client.connect_timeout` | String | `1s` | Connect server timeout. |
 | `meta_client.tcp_nodelay` | Bool | `true` | `TCP_NODELAY` option for accepted connections. |
--- a/config/datanode.example.toml
+++ b/config/datanode.example.toml
@@ -99,6 +99,9 @@ metasrv_addrs = ["127.0.0.1:3002"]
 ## Operation timeout.
 timeout = "3s"

+## Heartbeat timeout.
+heartbeat_timeout = "500ms"
+
 ## DDL timeout.
 ddl_timeout = "10s"

--- a/config/flownode.example.toml
+++ b/config/flownode.example.toml
@@ -78,6 +78,9 @@ metasrv_addrs = ["127.0.0.1:3002"]
 ## Operation timeout.
 timeout = "3s"

+## Heartbeat timeout.
+heartbeat_timeout = "500ms"
+
 ## DDL timeout.
 ddl_timeout = "10s"

--- a/config/frontend.example.toml
+++ b/config/frontend.example.toml
@@ -226,6 +226,9 @@ metasrv_addrs = ["127.0.0.1:3002"]
 ## Operation timeout.
 timeout = "3s"

+## Heartbeat timeout.
+heartbeat_timeout = "500ms"
+
 ## DDL timeout.
 ddl_timeout = "10s"

--- a/src/api/src/helper.rs
+++ b/src/api/src/helper.rs
@@ -894,7 +894,7 @@ pub fn is_column_type_value_eq(
        .unwrap_or(false)
 }

-fn encode_json_value(value: JsonValue) -> v1::JsonValue {
+pub fn encode_json_value(value: JsonValue) -> v1::JsonValue {
    fn helper(json: JsonVariant) -> v1::JsonValue {
        let value = match json {
            JsonVariant::Null => None,
--- a/src/api/src/v1/column_def.rs
+++ b/src/api/src/v1/column_def.rs
@@ -17,8 +17,8 @@ use std::collections::HashMap;
 use arrow_schema::extension::{EXTENSION_TYPE_METADATA_KEY, EXTENSION_TYPE_NAME_KEY};
 use datatypes::schema::{
    COMMENT_KEY, ColumnDefaultConstraint, ColumnSchema, FULLTEXT_KEY, FulltextAnalyzer,
-    FulltextBackend, FulltextOptions, INVERTED_INDEX_KEY, SKIPPING_INDEX_KEY, SkippingIndexOptions,
-    SkippingIndexType,
+    FulltextBackend, FulltextOptions, INVERTED_INDEX_KEY, Metadata, SKIPPING_INDEX_KEY,
+    SkippingIndexOptions, SkippingIndexType,
 };
 use greptime_proto::v1::{
    Analyzer, FulltextBackend as PbFulltextBackend, SkippingIndexType as PbSkippingIndexType,
@@ -131,6 +131,31 @@ pub fn try_as_column_def(column_schema: &ColumnSchema, is_primary_key: bool) ->
    })
 }

+/// Collect the [ColumnOptions] into the [Metadata] that can be used in, for example, [ColumnSchema].
+pub fn collect_column_options(column_options: Option<&ColumnOptions>) -> Metadata {
+    let mut metadata = Metadata::default();
+    let Some(ColumnOptions { options }) = column_options else {
+        return metadata;
+    };
+
+    if let Some(v) = options.get(FULLTEXT_GRPC_KEY) {
+        metadata.insert(FULLTEXT_KEY.to_string(), v.clone());
+    }
+    if let Some(v) = options.get(INVERTED_INDEX_GRPC_KEY) {
+        metadata.insert(INVERTED_INDEX_KEY.to_string(), v.clone());
+    }
+    if let Some(v) = options.get(SKIPPING_INDEX_GRPC_KEY) {
+        metadata.insert(SKIPPING_INDEX_KEY.to_string(), v.clone());
+    }
+    if let Some(v) = options.get(EXTENSION_TYPE_NAME_KEY) {
+        metadata.insert(EXTENSION_TYPE_NAME_KEY.to_string(), v.clone());
+    }
+    if let Some(v) = options.get(EXTENSION_TYPE_METADATA_KEY) {
+        metadata.insert(EXTENSION_TYPE_METADATA_KEY.to_string(), v.clone());
+    }
+    metadata
+}
+
 /// Constructs a `ColumnOptions` from the given `ColumnSchema`.
 pub fn options_from_column_schema(column_schema: &ColumnSchema) -> Option<ColumnOptions> {
    let mut options = ColumnOptions::default();
--- a/src/catalog/src/system_schema/information_schema/partitions.rs
+++ b/src/catalog/src/system_schema/information_schema/partitions.rs
@@ -211,7 +211,6 @@ struct InformationSchemaPartitionsBuilder {
    partition_names: StringVectorBuilder,
    partition_ordinal_positions: Int64VectorBuilder,
    partition_expressions: StringVectorBuilder,
-    partition_descriptions: StringVectorBuilder,
    create_times: TimestampSecondVectorBuilder,
    partition_ids: UInt64VectorBuilder,
 }
@@ -232,7 +231,6 @@ impl InformationSchemaPartitionsBuilder {
            partition_names: StringVectorBuilder::with_capacity(INIT_CAPACITY),
            partition_ordinal_positions: Int64VectorBuilder::with_capacity(INIT_CAPACITY),
            partition_expressions: StringVectorBuilder::with_capacity(INIT_CAPACITY),
-            partition_descriptions: StringVectorBuilder::with_capacity(INIT_CAPACITY),
            create_times: TimestampSecondVectorBuilder::with_capacity(INIT_CAPACITY),
            partition_ids: UInt64VectorBuilder::with_capacity(INIT_CAPACITY),
        }
@@ -321,21 +319,6 @@ impl InformationSchemaPartitionsBuilder {
            return;
        }

-        // Get partition column names (shared by all partitions)
-        // In MySQL, PARTITION_EXPRESSION is the partitioning function expression (e.g., column name)
-        let partition_columns: String = table_info
-            .meta
-            .partition_column_names()
-            .cloned()
-            .collect::<Vec<_>>()
-            .join(", ");
-
-        let partition_expr_str = if partition_columns.is_empty() {
-            None
-        } else {
-            Some(partition_columns)
-        };
-
        for (index, partition) in partitions.iter().enumerate() {
            let partition_name = format!("p{index}");

@@ -345,12 +328,8 @@ impl InformationSchemaPartitionsBuilder {
            self.partition_names.push(Some(&partition_name));
            self.partition_ordinal_positions
                .push(Some((index + 1) as i64));
-            // PARTITION_EXPRESSION: partition column names (same for all partitions)
-            self.partition_expressions
-                .push(partition_expr_str.as_deref());
-            // PARTITION_DESCRIPTION: partition boundary expression (different for each partition)
-            let description = partition.partition_expr.as_ref().map(|e| e.to_string());
-            self.partition_descriptions.push(description.as_deref());
+            let expression = partition.partition_expr.as_ref().map(|e| e.to_string());
+            self.partition_expressions.push(expression.as_deref());
            self.create_times.push(Some(TimestampSecond::from(
                table_info.meta.created_on.timestamp(),
            )));
@@ -390,7 +369,7 @@ impl InformationSchemaPartitionsBuilder {
            null_string_vector.clone(),
            Arc::new(self.partition_expressions.finish()),
            null_string_vector.clone(),
-            Arc::new(self.partition_descriptions.finish()),
+            null_string_vector.clone(),
            // TODO(dennis): rows and index statistics info
            null_i64_vector.clone(),
            null_i64_vector.clone(),
--- a/src/cmd/src/datanode/objbench.rs
+++ b/src/cmd/src/datanode/objbench.rs
@@ -163,7 +163,7 @@ impl ObjbenchCommand {
            available_indexes: Default::default(),
            indexes: Default::default(),
            index_file_size: 0,
-            index_version: 0,
+            index_file_id: None,
            num_rows,
            num_row_groups,
            sequence: None,
@@ -565,7 +565,6 @@ fn new_noop_file_purger() -> FilePurgerRef {
    struct Noop;
    impl FilePurger for Noop {
        fn remove_file(&self, _file_meta: FileMeta, _is_delete: bool) {}
-        fn update_index(&self, _file_meta: FileMeta, _version: store_api::storage::IndexVersion) {}
    }
    Arc::new(Noop)
 }
--- a/src/cmd/src/flownode.rs
+++ b/src/cmd/src/flownode.rs
@@ -18,6 +18,7 @@ use std::sync::Arc;
 use std::time::Duration;

 use cache::{build_fundamental_cache_registry, with_default_composite_cache_registry};
+use catalog::CatalogManagerRef;
 use catalog::information_extension::DistributedInformationExtension;
 use catalog::kvbackend::{CachedKvBackendBuilder, KvBackendCatalogManagerBuilder, MetaKvBackend};
 use clap::Parser;
@@ -25,12 +26,14 @@ use client::client_manager::NodeClients;
 use common_base::Plugins;
 use common_config::{Configurable, DEFAULT_DATA_HOME};
 use common_grpc::channel_manager::ChannelConfig;
+use common_meta::FlownodeId;
 use common_meta::cache::{CacheRegistryBuilder, LayeredCacheRegistryBuilder};
 use common_meta::heartbeat::handler::HandlerGroupExecutor;
 use common_meta::heartbeat::handler::invalidate_table_cache::InvalidateCacheHandler;
 use common_meta::heartbeat::handler::parse_mailbox_message::ParseMailboxMessageHandler;
 use common_meta::key::TableMetadataManager;
 use common_meta::key::flow::FlowMetadataManager;
+use common_meta::kv_backend::KvBackendRef;
 use common_stat::ResourceStatImpl;
 use common_telemetry::info;
 use common_telemetry::logging::{DEFAULT_LOGGING_DIR, TracingOptions};
@@ -40,7 +43,6 @@ use flow::{
    get_flow_auth_options,
 };
 use meta_client::{MetaClientOptions, MetaClientType};
-use plugins::flownode::context::GrpcConfigureContext;
 use servers::configurator::GrpcBuilderConfiguratorRef;
 use snafu::{OptionExt, ResultExt, ensure};
 use tracing_appender::non_blocking::WorkerGuard;
@@ -433,3 +435,11 @@ impl StartCommand {
        Ok(Instance::new(flownode, guard))
    }
 }
+
+/// The context for [`GrpcBuilderConfiguratorRef`] in flownode.
+pub struct GrpcConfigureContext {
+    pub kv_backend: KvBackendRef,
+    pub fe_client: Arc<FrontendClient>,
+    pub flownode_id: FlownodeId,
+    pub catalog_manager: CatalogManagerRef,
+}
--- a/src/cmd/src/frontend.rs
+++ b/src/cmd/src/frontend.rs
@@ -45,10 +45,7 @@ use frontend::frontend::Frontend;
 use frontend::heartbeat::HeartbeatTask;
 use frontend::instance::builder::FrontendBuilder;
 use frontend::server::Services;
-use meta_client::{MetaClientOptions, MetaClientType};
-use plugins::frontend::context::{
-    CatalogManagerConfigureContext, DistributedCatalogManagerConfigureContext,
-};
+use meta_client::{MetaClientOptions, MetaClientRef, MetaClientType};
 use servers::addrs;
 use servers::grpc::GrpcOptions;
 use servers::tls::{TlsMode, TlsOption};
@@ -426,11 +423,9 @@ impl StartCommand {
        let builder = if let Some(configurator) =
            plugins.get::<CatalogManagerConfiguratorRef<CatalogManagerConfigureContext>>()
        {
-            let ctx = DistributedCatalogManagerConfigureContext {
+            let ctx = CatalogManagerConfigureContext {
                meta_client: meta_client.clone(),
            };
-            let ctx = CatalogManagerConfigureContext::Distributed(ctx);
-
            configurator
                .configure(builder, ctx)
                .await
@@ -487,6 +482,11 @@ impl StartCommand {
    }
 }

+/// The context for [`CatalogManagerConfigratorRef`] in frontend.
+pub struct CatalogManagerConfigureContext {
+    pub meta_client: MetaClientRef,
+}
+
 #[cfg(test)]
 mod tests {
    use std::io::Write;
--- a/src/cmd/src/standalone.rs
+++ b/src/cmd/src/standalone.rs
@@ -32,7 +32,7 @@ use common_meta::cache::LayeredCacheRegistryBuilder;
 use common_meta::ddl::flow_meta::FlowMetadataAllocator;
 use common_meta::ddl::table_meta::TableMetadataAllocator;
 use common_meta::ddl::{DdlContext, NoopRegionFailureDetectorControl};
-use common_meta::ddl_manager::{DdlManager, DdlManagerConfiguratorRef};
+use common_meta::ddl_manager::{DdlManager, DdlManagerConfiguratorRef, DdlManagerConfigureContext};
 use common_meta::key::flow::FlowMetadataManager;
 use common_meta::key::{TableMetadataManager, TableMetadataManagerRef};
 use common_meta::kv_backend::KvBackendRef;
@@ -58,10 +58,6 @@ use frontend::instance::StandaloneDatanodeManager;
 use frontend::instance::builder::FrontendBuilder;
 use frontend::server::Services;
 use meta_srv::metasrv::{FLOW_ID_SEQ, TABLE_ID_SEQ};
-use plugins::frontend::context::{
-    CatalogManagerConfigureContext, StandaloneCatalogManagerConfigureContext,
-};
-use plugins::standalone::context::DdlManagerConfigureContext;
 use servers::tls::{TlsMode, TlsOption};
 use snafu::ResultExt;
 use standalone::StandaloneInformationExtension;
@@ -418,10 +414,9 @@ impl StartCommand {
        let builder = if let Some(configurator) =
            plugins.get::<CatalogManagerConfiguratorRef<CatalogManagerConfigureContext>>()
        {
-            let ctx = StandaloneCatalogManagerConfigureContext {
+            let ctx = CatalogManagerConfigureContext {
                fe_client: frontend_client.clone(),
            };
-            let ctx = CatalogManagerConfigureContext::Standalone(ctx);
            configurator
                .configure(builder, ctx)
                .await
@@ -511,13 +506,9 @@ impl StartCommand {
        let ddl_manager = DdlManager::try_new(ddl_context, procedure_manager.clone(), true)
            .context(error::InitDdlManagerSnafu)?;

-        let ddl_manager = if let Some(configurator) =
-            plugins.get::<DdlManagerConfiguratorRef<DdlManagerConfigureContext>>()
-        {
+        let ddl_manager = if let Some(configurator) = plugins.get::<DdlManagerConfiguratorRef>() {
            let ctx = DdlManagerConfigureContext {
                kv_backend: kv_backend.clone(),
-                fe_client: frontend_client.clone(),
-                catalog_manager: catalog_manager.clone(),
            };
            configurator
                .configure(ddl_manager, ctx)
@@ -604,6 +595,11 @@ impl StartCommand {
    }
 }

+/// The context for [`CatalogManagerConfigratorRef`] in standalone.
+pub struct CatalogManagerConfigureContext {
+    pub fe_client: Arc<FrontendClient>,
+}
+
 #[cfg(test)]
 mod tests {
    use std::default::Default;
--- a/src/cmd/tests/load_config_test.rs
+++ b/src/cmd/tests/load_config_test.rs
@@ -52,6 +52,7 @@ fn test_load_datanode_example_config() {
            meta_client: Some(MetaClientOptions {
                metasrv_addrs: vec!["127.0.0.1:3002".to_string()],
                timeout: Duration::from_secs(3),
+                heartbeat_timeout: Duration::from_millis(500),
                ddl_timeout: Duration::from_secs(10),
                connect_timeout: Duration::from_secs(1),
                tcp_nodelay: true,
@@ -117,6 +118,7 @@ fn test_load_frontend_example_config() {
            meta_client: Some(MetaClientOptions {
                metasrv_addrs: vec!["127.0.0.1:3002".to_string()],
                timeout: Duration::from_secs(3),
+                heartbeat_timeout: Duration::from_millis(500),
                ddl_timeout: Duration::from_secs(10),
                connect_timeout: Duration::from_secs(1),
                tcp_nodelay: true,
@@ -239,6 +241,7 @@ fn test_load_flownode_example_config() {
            meta_client: Some(MetaClientOptions {
                metasrv_addrs: vec!["127.0.0.1:3002".to_string()],
                timeout: Duration::from_secs(3),
+                heartbeat_timeout: Duration::from_millis(500),
                ddl_timeout: Duration::from_secs(10),
                connect_timeout: Duration::from_secs(1),
                tcp_nodelay: true,
--- a/src/common/base/src/plugins.rs
+++ b/src/common/base/src/plugins.rs
@@ -32,12 +32,7 @@ impl Plugins {

    pub fn insert<T: 'static + Send + Sync>(&self, value: T) {
        let last = self.write().insert(value);
-        if last.is_some() {
-            panic!(
-                "Plugin of type {} already exists",
-                std::any::type_name::<T>()
-            );
-        }
+        assert!(last.is_none(), "each type of plugins must be one and only");
    }

    pub fn get<T: 'static + Send + Sync + Clone>(&self) -> Option<T> {
@@ -145,7 +140,7 @@ mod tests {
    }

    #[test]
-    #[should_panic(expected = "Plugin of type i32 already exists")]
+    #[should_panic(expected = "each type of plugins must be one and only")]
    fn test_plugin_uniqueness() {
        let plugins = Plugins::new();
        plugins.insert(1i32);
--- a/src/common/function/src/scalars/expression.rs
+++ b/src/common/function/src/scalars/expression.rs
@@ -14,7 +14,6 @@

 mod binary;
 mod ctx;
-mod if_func;
 mod is_null;
 mod unary;

@@ -23,7 +22,6 @@ pub use ctx::EvalContext;
 pub use unary::scalar_unary_op;

 use crate::function_registry::FunctionRegistry;
-use crate::scalars::expression::if_func::IfFunction;
 use crate::scalars::expression::is_null::IsNullFunction;

 pub(crate) struct ExpressionFunction;
@@ -31,6 +29,5 @@ pub(crate) struct ExpressionFunction;
 impl ExpressionFunction {
    pub fn register(registry: &FunctionRegistry) {
        registry.register_scalar(IsNullFunction::default());
-        registry.register_scalar(IfFunction::default());
    }
 }
--- a/src/common/function/src/scalars/expression/if_func.rs
+++ b/src/common/function/src/scalars/expression/if_func.rs
@@ -1,404 +0,0 @@
-// Copyright 2023 Greptime Team
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-use std::fmt;
-use std::fmt::Display;
-
-use arrow::array::ArrowNativeTypeOp;
-use arrow::datatypes::ArrowPrimitiveType;
-use datafusion::arrow::array::{Array, ArrayRef, AsArray, BooleanArray, PrimitiveArray};
-use datafusion::arrow::compute::kernels::zip::zip;
-use datafusion::arrow::datatypes::DataType;
-use datafusion_common::DataFusionError;
-use datafusion_expr::type_coercion::binary::comparison_coercion;
-use datafusion_expr::{ColumnarValue, ScalarFunctionArgs, Signature, Volatility};
-
-use crate::function::Function;
-
-const NAME: &str = "if";
-
-/// MySQL-compatible IF function: IF(condition, true_value, false_value)
-///
-/// Returns true_value if condition is TRUE (not NULL and not 0),
-/// otherwise returns false_value.
-///
-/// MySQL truthy rules:
-/// - NULL -> false
-/// - 0 (numeric zero) -> false
-/// - Any non-zero numeric -> true
-/// - Boolean true/false -> use directly
-#[derive(Clone, Debug)]
-pub struct IfFunction {
-    signature: Signature,
-}
-
-impl Default for IfFunction {
-    fn default() -> Self {
-        Self {
-            signature: Signature::any(3, Volatility::Immutable),
-        }
-    }
-}
-
-impl Display for IfFunction {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        write!(f, "{}", NAME.to_ascii_uppercase())
-    }
-}
-
-impl Function for IfFunction {
-    fn name(&self) -> &str {
-        NAME
-    }
-
-    fn return_type(&self, input_types: &[DataType]) -> datafusion_common::Result<DataType> {
-        // Return the common type of true_value and false_value (args[1] and args[2])
-        if input_types.len() < 3 {
-            return Err(DataFusionError::Plan(format!(
-                "{} requires 3 arguments, got {}",
-                NAME,
-                input_types.len()
-            )));
-        }
-        let true_type = &input_types[1];
-        let false_type = &input_types[2];
-
-        // Use comparison_coercion to find common type
-        comparison_coercion(true_type, false_type).ok_or_else(|| {
-            DataFusionError::Plan(format!(
-                "Cannot find common type for IF function between {:?} and {:?}",
-                true_type, false_type
-            ))
-        })
-    }
-
-    fn signature(&self) -> &Signature {
-        &self.signature
-    }
-
-    fn invoke_with_args(
-        &self,
-        args: ScalarFunctionArgs,
-    ) -> datafusion_common::Result<ColumnarValue> {
-        if args.args.len() != 3 {
-            return Err(DataFusionError::Plan(format!(
-                "{} requires exactly 3 arguments, got {}",
-                NAME,
-                args.args.len()
-            )));
-        }
-
-        let condition = &args.args[0];
-        let true_value = &args.args[1];
-        let false_value = &args.args[2];
-
-        // Convert condition to boolean array using MySQL truthy rules
-        let bool_array = to_boolean_array(condition, args.number_rows)?;
-
-        // Convert true and false values to arrays
-        let true_array = true_value.to_array(args.number_rows)?;
-        let false_array = false_value.to_array(args.number_rows)?;
-
-        // Use zip to select values based on condition
-        // zip expects &dyn Datum, and ArrayRef (Arc<dyn Array>) implements Datum
-        let result = zip(&bool_array, &true_array, &false_array)?;
-        Ok(ColumnarValue::Array(result))
-    }
-}
-
-/// Convert a ColumnarValue to a BooleanArray using MySQL truthy rules:
-/// - NULL -> false
-/// - 0 (any numeric zero) -> false
-/// - Non-zero numeric -> true
-/// - Boolean -> use directly
-fn to_boolean_array(
-    value: &ColumnarValue,
-    num_rows: usize,
-) -> datafusion_common::Result<BooleanArray> {
-    let array = value.to_array(num_rows)?;
-    array_to_bool(array)
-}
-
-/// Convert an integer PrimitiveArray to BooleanArray using MySQL truthy rules:
-/// NULL -> false, 0 -> false, non-zero -> true
-fn int_array_to_bool<T>(array: &PrimitiveArray<T>) -> BooleanArray
-where
-    T: ArrowPrimitiveType,
-    T::Native: ArrowNativeTypeOp,
-{
-    BooleanArray::from_iter(
-        array
-            .iter()
-            .map(|opt| Some(opt.is_some_and(|v| !v.is_zero()))),
-    )
-}
-
-/// Convert a float PrimitiveArray to BooleanArray using MySQL truthy rules:
-/// NULL -> false, 0 (including -0.0) -> false, NaN -> true, other non-zero -> true
-fn float_array_to_bool<T>(array: &PrimitiveArray<T>) -> BooleanArray
-where
-    T: ArrowPrimitiveType,
-    T::Native: ArrowNativeTypeOp + num_traits::Float,
-{
-    use num_traits::Float;
-    BooleanArray::from_iter(
-        array
-            .iter()
-            .map(|opt| Some(opt.is_some_and(|v| v.is_nan() || !v.is_zero()))),
-    )
-}
-
-/// Convert an Array to BooleanArray using MySQL truthy rules
-fn array_to_bool(array: ArrayRef) -> datafusion_common::Result<BooleanArray> {
-    use arrow::datatypes::*;
-
-    match array.data_type() {
-        DataType::Boolean => {
-            let bool_array = array.as_boolean();
-            Ok(BooleanArray::from_iter(
-                bool_array.iter().map(|opt| Some(opt.unwrap_or(false))),
-            ))
-        }
-        DataType::Int8 => Ok(int_array_to_bool(array.as_primitive::<Int8Type>())),
-        DataType::Int16 => Ok(int_array_to_bool(array.as_primitive::<Int16Type>())),
-        DataType::Int32 => Ok(int_array_to_bool(array.as_primitive::<Int32Type>())),
-        DataType::Int64 => Ok(int_array_to_bool(array.as_primitive::<Int64Type>())),
-        DataType::UInt8 => Ok(int_array_to_bool(array.as_primitive::<UInt8Type>())),
-        DataType::UInt16 => Ok(int_array_to_bool(array.as_primitive::<UInt16Type>())),
-        DataType::UInt32 => Ok(int_array_to_bool(array.as_primitive::<UInt32Type>())),
-        DataType::UInt64 => Ok(int_array_to_bool(array.as_primitive::<UInt64Type>())),
-        // Float16 needs special handling since half::f16 doesn't implement num_traits::Float
-        DataType::Float16 => {
-            let typed_array = array.as_primitive::<Float16Type>();
-            Ok(BooleanArray::from_iter(typed_array.iter().map(|opt| {
-                Some(opt.is_some_and(|v| {
-                    let f = v.to_f32();
-                    f.is_nan() || !f.is_zero()
-                }))
-            })))
-        }
-        DataType::Float32 => Ok(float_array_to_bool(array.as_primitive::<Float32Type>())),
-        DataType::Float64 => Ok(float_array_to_bool(array.as_primitive::<Float64Type>())),
-        // Null type is always false.
-        // Note: NullArray::is_null() returns false (physical null), so we must handle it explicitly.
-        // See: https://github.com/apache/arrow-rs/issues/4840
-        DataType::Null => Ok(BooleanArray::from(vec![false; array.len()])),
-        // For other types, treat non-null as true
-        _ => {
-            let len = array.len();
-            Ok(BooleanArray::from_iter(
-                (0..len).map(|i| Some(!array.is_null(i))),
-            ))
-        }
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use std::sync::Arc;
-
-    use arrow_schema::Field;
-    use datafusion_common::ScalarValue;
-    use datafusion_common::arrow::array::{AsArray, Int32Array, StringArray};
-
-    use super::*;
-
-    #[test]
-    fn test_if_function_basic() {
-        let if_func = IfFunction::default();
-        assert_eq!("if", if_func.name());
-
-        // Test IF(true, 'yes', 'no') -> 'yes'
-        let result = if_func
-            .invoke_with_args(ScalarFunctionArgs {
-                args: vec![
-                    ColumnarValue::Scalar(ScalarValue::Boolean(Some(true))),
-                    ColumnarValue::Scalar(ScalarValue::Utf8(Some("yes".to_string()))),
-                    ColumnarValue::Scalar(ScalarValue::Utf8(Some("no".to_string()))),
-                ],
-                arg_fields: vec![],
-                number_rows: 1,
-                return_field: Arc::new(Field::new("", DataType::Utf8, true)),
-                config_options: Arc::new(Default::default()),
-            })
-            .unwrap();
-
-        if let ColumnarValue::Array(arr) = result {
-            let str_arr = arr.as_string::<i32>();
-            assert_eq!(str_arr.value(0), "yes");
-        } else {
-            panic!("Expected Array result");
-        }
-    }
-
-    #[test]
-    fn test_if_function_false() {
-        let if_func = IfFunction::default();
-
-        // Test IF(false, 'yes', 'no') -> 'no'
-        let result = if_func
-            .invoke_with_args(ScalarFunctionArgs {
-                args: vec![
-                    ColumnarValue::Scalar(ScalarValue::Boolean(Some(false))),
-                    ColumnarValue::Scalar(ScalarValue::Utf8(Some("yes".to_string()))),
-                    ColumnarValue::Scalar(ScalarValue::Utf8(Some("no".to_string()))),
-                ],
-                arg_fields: vec![],
-                number_rows: 1,
-                return_field: Arc::new(Field::new("", DataType::Utf8, true)),
-                config_options: Arc::new(Default::default()),
-            })
-            .unwrap();
-
-        if let ColumnarValue::Array(arr) = result {
-            let str_arr = arr.as_string::<i32>();
-            assert_eq!(str_arr.value(0), "no");
-        } else {
-            panic!("Expected Array result");
-        }
-    }
-
-    #[test]
-    fn test_if_function_null_is_false() {
-        let if_func = IfFunction::default();
-
-        // Test IF(NULL, 'yes', 'no') -> 'no' (NULL is treated as false)
-        // Using Boolean(None) - typed null
-        let result = if_func
-            .invoke_with_args(ScalarFunctionArgs {
-                args: vec![
-                    ColumnarValue::Scalar(ScalarValue::Boolean(None)),
-                    ColumnarValue::Scalar(ScalarValue::Utf8(Some("yes".to_string()))),
-                    ColumnarValue::Scalar(ScalarValue::Utf8(Some("no".to_string()))),
-                ],
-                arg_fields: vec![],
-                number_rows: 1,
-                return_field: Arc::new(Field::new("", DataType::Utf8, true)),
-                config_options: Arc::new(Default::default()),
-            })
-            .unwrap();
-
-        if let ColumnarValue::Array(arr) = result {
-            let str_arr = arr.as_string::<i32>();
-            assert_eq!(str_arr.value(0), "no");
-        } else {
-            panic!("Expected Array result");
-        }
-
-        // Test IF(NULL, 'yes', 'no') -> 'no' using ScalarValue::Null (untyped null from SQL NULL literal)
-        let result = if_func
-            .invoke_with_args(ScalarFunctionArgs {
-                args: vec![
-                    ColumnarValue::Scalar(ScalarValue::Null),
-                    ColumnarValue::Scalar(ScalarValue::Utf8(Some("yes".to_string()))),
-                    ColumnarValue::Scalar(ScalarValue::Utf8(Some("no".to_string()))),
-                ],
-                arg_fields: vec![],
-                number_rows: 1,
-                return_field: Arc::new(Field::new("", DataType::Utf8, true)),
-                config_options: Arc::new(Default::default()),
-            })
-            .unwrap();
-
-        if let ColumnarValue::Array(arr) = result {
-            let str_arr = arr.as_string::<i32>();
-            assert_eq!(str_arr.value(0), "no");
-        } else {
-            panic!("Expected Array result");
-        }
-    }
-
-    #[test]
-    fn test_if_function_numeric_truthy() {
-        let if_func = IfFunction::default();
-
-        // Test IF(1, 'yes', 'no') -> 'yes' (non-zero is true)
-        let result = if_func
-            .invoke_with_args(ScalarFunctionArgs {
-                args: vec![
-                    ColumnarValue::Scalar(ScalarValue::Int32(Some(1))),
-                    ColumnarValue::Scalar(ScalarValue::Utf8(Some("yes".to_string()))),
-                    ColumnarValue::Scalar(ScalarValue::Utf8(Some("no".to_string()))),
-                ],
-                arg_fields: vec![],
-                number_rows: 1,
-                return_field: Arc::new(Field::new("", DataType::Utf8, true)),
-                config_options: Arc::new(Default::default()),
-            })
-            .unwrap();
-
-        if let ColumnarValue::Array(arr) = result {
-            let str_arr = arr.as_string::<i32>();
-            assert_eq!(str_arr.value(0), "yes");
-        } else {
-            panic!("Expected Array result");
-        }
-
-        // Test IF(0, 'yes', 'no') -> 'no' (zero is false)
-        let result = if_func
-            .invoke_with_args(ScalarFunctionArgs {
-                args: vec![
-                    ColumnarValue::Scalar(ScalarValue::Int32(Some(0))),
-                    ColumnarValue::Scalar(ScalarValue::Utf8(Some("yes".to_string()))),
-                    ColumnarValue::Scalar(ScalarValue::Utf8(Some("no".to_string()))),
-                ],
-                arg_fields: vec![],
-                number_rows: 1,
-                return_field: Arc::new(Field::new("", DataType::Utf8, true)),
-                config_options: Arc::new(Default::default()),
-            })
-            .unwrap();
-
-        if let ColumnarValue::Array(arr) = result {
-            let str_arr = arr.as_string::<i32>();
-            assert_eq!(str_arr.value(0), "no");
-        } else {
-            panic!("Expected Array result");
-        }
-    }
-
-    #[test]
-    fn test_if_function_with_arrays() {
-        let if_func = IfFunction::default();
-
-        // Test with array condition
-        let condition = Int32Array::from(vec![Some(1), Some(0), None, Some(5)]);
-        let true_val = StringArray::from(vec!["yes", "yes", "yes", "yes"]);
-        let false_val = StringArray::from(vec!["no", "no", "no", "no"]);
-
-        let result = if_func
-            .invoke_with_args(ScalarFunctionArgs {
-                args: vec![
-                    ColumnarValue::Array(Arc::new(condition)),
-                    ColumnarValue::Array(Arc::new(true_val)),
-                    ColumnarValue::Array(Arc::new(false_val)),
-                ],
-                arg_fields: vec![],
-                number_rows: 4,
-                return_field: Arc::new(Field::new("", DataType::Utf8, true)),
-                config_options: Arc::new(Default::default()),
-            })
-            .unwrap();
-
-        if let ColumnarValue::Array(arr) = result {
-            let str_arr = arr.as_string::<i32>();
-            assert_eq!(str_arr.value(0), "yes"); // 1 is true
-            assert_eq!(str_arr.value(1), "no"); // 0 is false
-            assert_eq!(str_arr.value(2), "no"); // NULL is false
-            assert_eq!(str_arr.value(3), "yes"); // 5 is true
-        } else {
-            panic!("Expected Array result");
-        }
-    }
-}
--- a/src/common/function/src/system/pg_catalog.rs
+++ b/src/common/function/src/system/pg_catalog.rs
@@ -17,7 +17,7 @@ use std::sync::Arc;
 use common_catalog::consts::{
    DEFAULT_PRIVATE_SCHEMA_NAME, INFORMATION_SCHEMA_NAME, PG_CATALOG_NAME,
 };
-use datafusion::arrow::array::{ArrayRef, StringArray, StringBuilder, as_boolean_array};
+use datafusion::arrow::array::{ArrayRef, StringArray, as_boolean_array};
 use datafusion::catalog::TableFunction;
 use datafusion::common::ScalarValue;
 use datafusion::common::utils::SingleRowListArrayBuilder;
@@ -34,15 +34,10 @@ const CURRENT_SCHEMA_FUNCTION_NAME: &str = "current_schema";
 const CURRENT_SCHEMAS_FUNCTION_NAME: &str = "current_schemas";
 const SESSION_USER_FUNCTION_NAME: &str = "session_user";
 const CURRENT_DATABASE_FUNCTION_NAME: &str = "current_database";
-const OBJ_DESCRIPTION_FUNCTION_NAME: &str = "obj_description";
-const COL_DESCRIPTION_FUNCTION_NAME: &str = "col_description";
-const SHOBJ_DESCRIPTION_FUNCTION_NAME: &str = "shobj_description";
-const PG_MY_TEMP_SCHEMA_FUNCTION_NAME: &str = "pg_my_temp_schema";

 define_nullary_udf!(CurrentSchemaFunction);
 define_nullary_udf!(SessionUserFunction);
 define_nullary_udf!(CurrentDatabaseFunction);
-define_nullary_udf!(PgMyTempSchemaFunction);

 impl Function for CurrentDatabaseFunction {
    fn name(&self) -> &str {
@@ -178,175 +173,6 @@ impl Function for CurrentSchemasFunction {
    }
 }

-/// PostgreSQL obj_description - returns NULL for compatibility
-#[derive(Display, Debug, Clone)]
-#[display("{}", self.name())]
-pub(super) struct ObjDescriptionFunction {
-    signature: Signature,
-}
-
-impl ObjDescriptionFunction {
-    pub fn new() -> Self {
-        Self {
-            signature: Signature::one_of(
-                vec![
-                    TypeSignature::Exact(vec![DataType::Int64, DataType::Utf8]),
-                    TypeSignature::Exact(vec![DataType::UInt32, DataType::Utf8]),
-                    TypeSignature::Exact(vec![DataType::Int64]),
-                    TypeSignature::Exact(vec![DataType::UInt32]),
-                ],
-                Volatility::Stable,
-            ),
-        }
-    }
-}
-
-impl Function for ObjDescriptionFunction {
-    fn name(&self) -> &str {
-        OBJ_DESCRIPTION_FUNCTION_NAME
-    }
-
-    fn return_type(&self, _: &[DataType]) -> datafusion_common::Result<DataType> {
-        Ok(DataType::Utf8)
-    }
-
-    fn signature(&self) -> &Signature {
-        &self.signature
-    }
-
-    fn invoke_with_args(
-        &self,
-        args: ScalarFunctionArgs,
-    ) -> datafusion_common::Result<ColumnarValue> {
-        let num_rows = args.number_rows;
-        let mut builder = StringBuilder::with_capacity(num_rows, 0);
-        for _ in 0..num_rows {
-            builder.append_null();
-        }
-        Ok(ColumnarValue::Array(Arc::new(builder.finish())))
-    }
-}
-
-/// PostgreSQL col_description - returns NULL for compatibility
-#[derive(Display, Debug, Clone)]
-#[display("{}", self.name())]
-pub(super) struct ColDescriptionFunction {
-    signature: Signature,
-}
-
-impl ColDescriptionFunction {
-    pub fn new() -> Self {
-        Self {
-            signature: Signature::one_of(
-                vec![
-                    TypeSignature::Exact(vec![DataType::Int64, DataType::Int32]),
-                    TypeSignature::Exact(vec![DataType::UInt32, DataType::Int32]),
-                    TypeSignature::Exact(vec![DataType::Int64, DataType::Int64]),
-                    TypeSignature::Exact(vec![DataType::UInt32, DataType::Int64]),
-                ],
-                Volatility::Stable,
-            ),
-        }
-    }
-}
-
-impl Function for ColDescriptionFunction {
-    fn name(&self) -> &str {
-        COL_DESCRIPTION_FUNCTION_NAME
-    }
-
-    fn return_type(&self, _: &[DataType]) -> datafusion_common::Result<DataType> {
-        Ok(DataType::Utf8)
-    }
-
-    fn signature(&self) -> &Signature {
-        &self.signature
-    }
-
-    fn invoke_with_args(
-        &self,
-        args: ScalarFunctionArgs,
-    ) -> datafusion_common::Result<ColumnarValue> {
-        let num_rows = args.number_rows;
-        let mut builder = StringBuilder::with_capacity(num_rows, 0);
-        for _ in 0..num_rows {
-            builder.append_null();
-        }
-        Ok(ColumnarValue::Array(Arc::new(builder.finish())))
-    }
-}
-
-/// PostgreSQL shobj_description - returns NULL for compatibility
-#[derive(Display, Debug, Clone)]
-#[display("{}", self.name())]
-pub(super) struct ShobjDescriptionFunction {
-    signature: Signature,
-}
-
-impl ShobjDescriptionFunction {
-    pub fn new() -> Self {
-        Self {
-            signature: Signature::one_of(
-                vec![
-                    TypeSignature::Exact(vec![DataType::Int64, DataType::Utf8]),
-                    TypeSignature::Exact(vec![DataType::UInt64, DataType::Utf8]),
-                    TypeSignature::Exact(vec![DataType::Int32, DataType::Utf8]),
-                    TypeSignature::Exact(vec![DataType::UInt32, DataType::Utf8]),
-                ],
-                Volatility::Stable,
-            ),
-        }
-    }
-}
-
-impl Function for ShobjDescriptionFunction {
-    fn name(&self) -> &str {
-        SHOBJ_DESCRIPTION_FUNCTION_NAME
-    }
-
-    fn return_type(&self, _: &[DataType]) -> datafusion_common::Result<DataType> {
-        Ok(DataType::Utf8)
-    }
-
-    fn signature(&self) -> &Signature {
-        &self.signature
-    }
-
-    fn invoke_with_args(
-        &self,
-        args: ScalarFunctionArgs,
-    ) -> datafusion_common::Result<ColumnarValue> {
-        let num_rows = args.number_rows;
-        let mut builder = StringBuilder::with_capacity(num_rows, 0);
-        for _ in 0..num_rows {
-            builder.append_null();
-        }
-        Ok(ColumnarValue::Array(Arc::new(builder.finish())))
-    }
-}
-
-/// PostgreSQL pg_my_temp_schema - returns 0 (no temp schema) for compatibility
-impl Function for PgMyTempSchemaFunction {
-    fn name(&self) -> &str {
-        PG_MY_TEMP_SCHEMA_FUNCTION_NAME
-    }
-
-    fn return_type(&self, _: &[DataType]) -> datafusion_common::Result<DataType> {
-        Ok(DataType::UInt32)
-    }
-
-    fn signature(&self) -> &Signature {
-        &self.signature
-    }
-
-    fn invoke_with_args(
-        &self,
-        _args: ScalarFunctionArgs,
-    ) -> datafusion_common::Result<ColumnarValue> {
-        Ok(ColumnarValue::Scalar(ScalarValue::UInt32(Some(0))))
-    }
-}
-
 pub(super) struct PGCatalogFunction;

 impl PGCatalogFunction {
@@ -386,98 +212,5 @@ impl PGCatalogFunction {
        registry.register(pg_catalog::create_pg_total_relation_size_udf());
        registry.register(pg_catalog::create_pg_stat_get_numscans());
        registry.register(pg_catalog::create_pg_get_constraintdef());
-        registry.register(pg_catalog::create_pg_get_partition_ancestors_udf());
-        registry.register_scalar(ObjDescriptionFunction::new());
-        registry.register_scalar(ColDescriptionFunction::new());
-        registry.register_scalar(ShobjDescriptionFunction::new());
-        registry.register_scalar(PgMyTempSchemaFunction::default());
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use std::sync::Arc;
-
-    use arrow_schema::Field;
-    use datafusion::arrow::array::Array;
-    use datafusion_common::ScalarValue;
-    use datafusion_expr::ColumnarValue;
-
-    use super::*;
-
-    fn create_test_args(args: Vec<ColumnarValue>, number_rows: usize) -> ScalarFunctionArgs {
-        ScalarFunctionArgs {
-            args,
-            arg_fields: vec![],
-            number_rows,
-            return_field: Arc::new(Field::new("result", DataType::Utf8, true)),
-            config_options: Arc::new(Default::default()),
-        }
-    }
-
-    #[test]
-    fn test_obj_description_function() {
-        let func = ObjDescriptionFunction::new();
-        assert_eq!("obj_description", func.name());
-        assert_eq!(DataType::Utf8, func.return_type(&[]).unwrap());
-
-        let args = create_test_args(
-            vec![
-                ColumnarValue::Scalar(ScalarValue::Int64(Some(1234))),
-                ColumnarValue::Scalar(ScalarValue::Utf8(Some("pg_class".to_string()))),
-            ],
-            1,
-        );
-        let result = func.invoke_with_args(args).unwrap();
-        if let ColumnarValue::Array(arr) = result {
-            assert_eq!(1, arr.len());
-            assert!(arr.is_null(0));
-        } else {
-            panic!("Expected Array result");
-        }
-    }
-
-    #[test]
-    fn test_col_description_function() {
-        let func = ColDescriptionFunction::new();
-        assert_eq!("col_description", func.name());
-        assert_eq!(DataType::Utf8, func.return_type(&[]).unwrap());
-
-        let args = create_test_args(
-            vec![
-                ColumnarValue::Scalar(ScalarValue::Int64(Some(1234))),
-                ColumnarValue::Scalar(ScalarValue::Int64(Some(1))),
-            ],
-            1,
-        );
-        let result = func.invoke_with_args(args).unwrap();
-        if let ColumnarValue::Array(arr) = result {
-            assert_eq!(1, arr.len());
-            assert!(arr.is_null(0));
-        } else {
-            panic!("Expected Array result");
-        }
-    }
-
-    #[test]
-    fn test_shobj_description_function() {
-        let func = ShobjDescriptionFunction::new();
-        assert_eq!("shobj_description", func.name());
-        assert_eq!(DataType::Utf8, func.return_type(&[]).unwrap());
-
-        let args = create_test_args(
-            vec![
-                ColumnarValue::Scalar(ScalarValue::Int64(Some(1))),
-                ColumnarValue::Scalar(ScalarValue::Utf8(Some("pg_database".to_string()))),
-            ],
-            1,
-        );
-        let result = func.invoke_with_args(args).unwrap();
-        if let ColumnarValue::Array(arr) = result {
-            assert_eq!(1, arr.len());
-            assert!(arr.is_null(0));
-        } else {
-            panic!("Expected Array result");
-        }
    }
 }
--- a/src/common/grpc/src/flight/do_put.rs
+++ b/src/common/grpc/src/flight/do_put.rs
@@ -46,16 +46,13 @@ pub struct DoPutResponse {
    request_id: i64,
    /// The successfully ingested rows number.
    affected_rows: AffectedRows,
-    /// The elapsed time in seconds for handling the bulk insert.
-    elapsed_secs: f64,
 }

 impl DoPutResponse {
-    pub fn new(request_id: i64, affected_rows: AffectedRows, elapsed_secs: f64) -> Self {
+    pub fn new(request_id: i64, affected_rows: AffectedRows) -> Self {
        Self {
            request_id,
            affected_rows,
-            elapsed_secs,
        }
    }

@@ -66,10 +63,6 @@ impl DoPutResponse {
    pub fn affected_rows(&self) -> AffectedRows {
        self.affected_rows
    }
-
-    pub fn elapsed_secs(&self) -> f64 {
-        self.elapsed_secs
-    }
 }

 impl TryFrom<PutResult> for DoPutResponse {
@@ -93,11 +86,8 @@ mod tests {

    #[test]
    fn test_serde_do_put_response() {
-        let x = DoPutResponse::new(42, 88, 0.123);
+        let x = DoPutResponse::new(42, 88);
        let serialized = serde_json::to_string(&x).unwrap();
-        assert_eq!(
-            serialized,
-            r#"{"request_id":42,"affected_rows":88,"elapsed_secs":0.123}"#
-        );
+        assert_eq!(serialized, r#"{"request_id":42,"affected_rows":88}"#);
    }
 }
--- a/src/common/meta/src/ddl_manager.rs
+++ b/src/common/meta/src/ddl_manager.rs
@@ -46,6 +46,7 @@ use crate::error::{
 use crate::key::table_info::TableInfoValue;
 use crate::key::table_name::TableNameKey;
 use crate::key::{DeserializedValueWithBytes, TableMetadataManagerRef};
+use crate::kv_backend::KvBackendRef;
 use crate::procedure_executor::ExecutorContext;
 #[cfg(feature = "enterprise")]
 use crate::rpc::ddl::DdlTask::CreateTrigger;
@@ -69,16 +70,20 @@ use crate::rpc::router::RegionRoute;

 /// A configurator that customizes or enhances a [`DdlManager`].
 #[async_trait::async_trait]
-pub trait DdlManagerConfigurator<C>: Send + Sync {
+pub trait DdlManagerConfigurator: Send + Sync {
    /// Configures the given [`DdlManager`] using the provided [`DdlManagerConfigureContext`].
    async fn configure(
        &self,
        ddl_manager: DdlManager,
-        ctx: C,
+        ctx: DdlManagerConfigureContext,
    ) -> std::result::Result<DdlManager, BoxedError>;
 }

-pub type DdlManagerConfiguratorRef<C> = Arc<dyn DdlManagerConfigurator<C>>;
+pub type DdlManagerConfiguratorRef = Arc<dyn DdlManagerConfigurator>;
+
+pub struct DdlManagerConfigureContext {
+    pub kv_backend: KvBackendRef,
+}

 pub type DdlManagerRef = Arc<DdlManager>;

--- a/src/common/meta/src/distributed_time_constants.rs
+++ b/src/common/meta/src/distributed_time_constants.rs
@@ -41,17 +41,6 @@ pub const POSTGRES_KEEP_ALIVE_SECS: u64 = 30;
 /// In a lease, there are two opportunities for renewal.
 pub const META_KEEP_ALIVE_INTERVAL_SECS: u64 = META_LEASE_SECS / 2;

-/// The timeout of the heartbeat request.
-pub const HEARTBEAT_TIMEOUT: Duration = Duration::from_secs(META_KEEP_ALIVE_INTERVAL_SECS + 1);
-
-/// The keep-alive interval of the heartbeat channel.
-pub const HEARTBEAT_CHANNEL_KEEP_ALIVE_INTERVAL_SECS: Duration =
-    Duration::from_secs(META_KEEP_ALIVE_INTERVAL_SECS + 1);
-
-/// The keep-alive timeout of the heartbeat channel.
-pub const HEARTBEAT_CHANNEL_KEEP_ALIVE_TIMEOUT_SECS: Duration =
-    Duration::from_secs(META_KEEP_ALIVE_INTERVAL_SECS + 1);
-
 /// The default mailbox round-trip timeout.
 pub const MAILBOX_RTT_SECS: u64 = 1;

--- a/src/common/meta/src/instruction.rs
+++ b/src/common/meta/src/instruction.rs
@@ -339,16 +339,6 @@ pub struct FlushRegions {
    pub error_strategy: FlushErrorStrategy,
 }

-impl Display for FlushRegions {
-    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
-        write!(
-            f,
-            "FlushRegions(region_ids={:?}, strategy={:?}, error_strategy={:?})",
-            self.region_ids, self.strategy, self.error_strategy
-        )
-    }
-}
-
 impl FlushRegions {
    /// Create synchronous single-region flush
    pub fn sync_single(region_id: RegionId) -> Self {
--- a/src/common/procedure/src/error.rs
+++ b/src/common/procedure/src/error.rs
@@ -246,6 +246,14 @@ pub enum Error {
        #[snafu(implicit)]
        location: Location,
    },
+
+    #[snafu(display("Loader for {type_name} is not implemented: {reason}"))]
+    ProcedureLoaderNotImplemented {
+        #[snafu(implicit)]
+        location: Location,
+        type_name: String,
+        reason: String,
+    },
 }

 pub type Result<T> = std::result::Result<T, Error>;
@@ -266,7 +274,8 @@ impl ErrorExt for Error {
            Error::ToJson { .. }
            | Error::DeleteState { .. }
            | Error::FromJson { .. }
-            | Error::WaitWatcher { .. } => StatusCode::Internal,
+            | Error::WaitWatcher { .. }
+            | Error::ProcedureLoaderNotImplemented { .. } => StatusCode::Internal,

            Error::RetryTimesExceeded { .. }
            | Error::RollbackTimesExceeded { .. }
--- a/src/datanode/src/heartbeat/handler/flush_region.rs
+++ b/src/datanode/src/heartbeat/handler/flush_region.rs
@@ -320,15 +320,4 @@ mod tests {
        assert!(flush_reply.results[0].1.is_ok());
        assert!(flush_reply.results[1].1.is_err());
    }
-
-    #[test]
-    fn test_flush_regions_display() {
-        let region_id = RegionId::new(1024, 1);
-        let flush_regions = FlushRegions::sync_single(region_id);
-        let display = format!("{}", flush_regions);
-        assert_eq!(
-            display,
-            "FlushRegions(region_ids=[4398046511105(1024, 1)], strategy=Sync, error_strategy=FailFast)"
-        );
-    }
 }
--- a/src/datanode/src/region_server.rs
+++ b/src/datanode/src/region_server.rs
@@ -1200,8 +1200,7 @@ impl RegionServerInner {
            | RegionRequest::Flush(_)
            | RegionRequest::Compact(_)
            | RegionRequest::Truncate(_)
-            | RegionRequest::BuildIndex(_)
-            | RegionRequest::EnterStaging(_) => RegionChange::None,
+            | RegionRequest::BuildIndex(_) => RegionChange::None,
            RegionRequest::Catchup(_) => RegionChange::Catchup,
        };

@@ -1261,6 +1260,7 @@ impl RegionServerInner {
            .with_context(|_| HandleRegionRequestSnafu { region_id })?
            .new_opened_logical_region_ids()
        else {
+            warn!("No new opened logical regions");
            return Ok(());
        };

--- a/src/datanode/src/tests.rs
+++ b/src/datanode/src/tests.rs
@@ -24,8 +24,8 @@ use common_query::Output;
 use common_runtime::Runtime;
 use common_runtime::runtime::{BuilderBuild, RuntimeTrait};
 use datafusion::catalog::TableFunction;
-use datafusion::dataframe::DataFrame;
 use datafusion_expr::{AggregateUDF, LogicalPlan};
+use query::dataframe::DataFrame;
 use query::planner::LogicalPlanner;
 use query::query_engine::{DescribeResult, QueryEngineState};
 use query::{QueryEngine, QueryEngineContext};
--- a/src/datatypes/src/types/json_type.rs
+++ b/src/datatypes/src/types/json_type.rs
@@ -13,7 +13,7 @@
 // limitations under the License.

 use std::collections::BTreeMap;
-use std::fmt::{Display, Formatter};
+use std::fmt::{Debug, Display, Formatter};
 use std::str::FromStr;
 use std::sync::Arc;

@@ -133,28 +133,24 @@ impl From<&ConcreteDataType> for JsonNativeType {

 impl Display for JsonNativeType {
    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
-        match self {
-            JsonNativeType::Null => write!(f, "Null"),
-            JsonNativeType::Bool => write!(f, "Bool"),
-            JsonNativeType::Number(t) => {
-                write!(f, "Number({t:?})")
-            }
-            JsonNativeType::String => write!(f, "String"),
-            JsonNativeType::Array(item_type) => {
-                write!(f, "Array[{}]", item_type)
-            }
-            JsonNativeType::Object(object) => {
-                write!(
-                    f,
-                    "Object{{{}}}",
+        fn to_serde_value(t: &JsonNativeType) -> serde_json::Value {
+            match t {
+                JsonNativeType::Null => serde_json::Value::String("<Null>".to_string()),
+                JsonNativeType::Bool => serde_json::Value::String("<Bool>".to_string()),
+                JsonNativeType::Number(_) => serde_json::Value::String("<Number>".to_string()),
+                JsonNativeType::String => serde_json::Value::String("<String>".to_string()),
+                JsonNativeType::Array(item_type) => {
+                    serde_json::Value::Array(vec![to_serde_value(item_type)])
+                }
+                JsonNativeType::Object(object) => serde_json::Value::Object(
                    object
                        .iter()
-                        .map(|(k, v)| format!(r#""{k}": {v}"#))
-                        .collect::<Vec<_>>()
-                        .join(", ")
-                )
+                        .map(|(k, v)| (k.clone(), to_serde_value(v)))
+                        .collect(),
+                ),
            }
        }
+        write!(f, "{}", to_serde_value(self))
    }
 }

@@ -183,7 +179,11 @@ impl JsonType {
        }
    }

-    pub(crate) fn native_type(&self) -> &JsonNativeType {
+    pub fn is_native_type(&self) -> bool {
+        matches!(self.format, JsonFormat::Native(_))
+    }
+
+    pub fn native_type(&self) -> &JsonNativeType {
        match &self.format {
            JsonFormat::Jsonb => &JsonNativeType::String,
            JsonFormat::Native(x) => x.as_ref(),
--- a/src/frontend/src/instance/grpc.rs
+++ b/src/frontend/src/instance/grpc.rs
@@ -12,9 +12,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

-use std::pin::Pin;
 use std::sync::Arc;
-use std::time::Instant;

 use api::helper::from_pb_time_ranges;
 use api::v1::ddl_request::{Expr as DdlExpr, Expr};
@@ -24,18 +22,16 @@ use api::v1::{
    DeleteRequests, DropFlowExpr, InsertIntoPlan, InsertRequests, RowDeleteRequests,
    RowInsertRequests,
 };
-use async_stream::try_stream;
 use async_trait::async_trait;
 use auth::{PermissionChecker, PermissionCheckerRef, PermissionReq};
 use common_base::AffectedRows;
 use common_error::ext::BoxedError;
-use common_grpc::flight::do_put::DoPutResponse;
+use common_grpc::FlightData;
+use common_grpc::flight::FlightDecoder;
 use common_query::Output;
 use common_query::logical_plan::add_insert_to_logical_plan;
 use common_telemetry::tracing::{self};
 use datafusion::datasource::DefaultTableSource;
-use futures::Stream;
-use futures::stream::StreamExt;
 use query::parser::PromQuery;
 use servers::interceptor::{GrpcQueryInterceptor, GrpcQueryInterceptorRef};
 use servers::query_handler::grpc::GrpcQueryHandler;
@@ -244,8 +240,10 @@ impl GrpcQueryHandler for Instance {

    async fn put_record_batch(
        &self,
-        request: servers::grpc::flight::PutRecordBatchRequest,
+        table_name: &TableName,
        table_ref: &mut Option<TableRef>,
+        decoder: &mut FlightDecoder,
+        data: FlightData,
        ctx: QueryContextRef,
    ) -> Result<AffectedRows> {
        let table = if let Some(table) = table_ref {
@@ -254,15 +252,15 @@ impl GrpcQueryHandler for Instance {
            let table = self
                .catalog_manager()
                .table(
-                    &request.table_name.catalog_name,
-                    &request.table_name.schema_name,
-                    &request.table_name.table_name,
+                    &table_name.catalog_name,
+                    &table_name.schema_name,
+                    &table_name.table_name,
                    None,
                )
                .await
                .context(CatalogSnafu)?
                .with_context(|| TableNotFoundSnafu {
-                    table_name: request.table_name.to_string(),
+                    table_name: table_name.to_string(),
                })?;
            *table_ref = Some(table.clone());
            table
@@ -281,77 +279,10 @@ impl GrpcQueryHandler for Instance {
        // do we check limit for bulk insert?

        self.inserter
-            .handle_bulk_insert(
-                table,
-                request.flight_data,
-                request.record_batch,
-                request.schema_bytes,
-            )
+            .handle_bulk_insert(table, decoder, data)
            .await
            .context(TableOperationSnafu)
    }
-
-    fn handle_put_record_batch_stream(
-        &self,
-        mut stream: servers::grpc::flight::PutRecordBatchRequestStream,
-        ctx: QueryContextRef,
-    ) -> Pin<Box<dyn Stream<Item = Result<DoPutResponse>> + Send>> {
-        // Resolve table once for the stream
-        // Clone all necessary data to make it 'static
-        let catalog_manager = self.catalog_manager().clone();
-        let plugins = self.plugins.clone();
-        let inserter = self.inserter.clone();
-        let table_name = stream.table_name().clone();
-        let ctx = ctx.clone();
-
-        Box::pin(try_stream! {
-            plugins
-                .get::<PermissionCheckerRef>()
-                .as_ref()
-                .check_permission(ctx.current_user(), PermissionReq::BulkInsert)
-                .context(PermissionSnafu)?;
-            // Cache for resolved table reference - resolve once and reuse
-            let table_ref = catalog_manager
-                .table(
-                    &table_name.catalog_name,
-                    &table_name.schema_name,
-                    &table_name.table_name,
-                    None,
-                )
-                .await
-                .context(CatalogSnafu)?
-                .with_context(|| TableNotFoundSnafu {
-                    table_name: table_name.to_string(),
-                })?;
-
-            // Check permissions once for the stream
-            let interceptor_ref = plugins.get::<GrpcQueryInterceptorRef<Error>>();
-            let interceptor = interceptor_ref.as_ref();
-            interceptor.pre_bulk_insert(table_ref.clone(), ctx.clone())?;
-
-            // Process each request in the stream
-            while let Some(request_result) = stream.next().await {
-                let request = request_result.map_err(|e| {
-                    let error_msg = format!("Stream error: {:?}", e);
-                    IncompleteGrpcRequestSnafu { err_msg: error_msg }.build()
-                })?;
-
-                let request_id = request.request_id;
-                let start = Instant::now();
-                let rows = inserter
-                    .handle_bulk_insert(
-                        table_ref.clone(),
-                        request.flight_data,
-                        request.record_batch,
-                        request.schema_bytes,
-                    )
-                    .await
-                    .context(TableOperationSnafu)?;
-                let elapsed_secs = start.elapsed().as_secs_f64();
-                yield DoPutResponse::new(request_id, rows, elapsed_secs);
-            }
-        })
-    }
 }

 fn fill_catalog_and_schema_from_context(ddl_expr: &mut DdlExpr, ctx: &QueryContextRef) {
--- a/src/frontend/src/instance/promql.rs
+++ b/src/frontend/src/instance/promql.rs
@@ -136,7 +136,7 @@ impl Instance {
                table_name: format_full_table_name(ctx.current_catalog(), &table_schema, &metric),
            })?;

-        let scan_plan = dataframe.into_unoptimized_plan();
+        let scan_plan = dataframe.into_logical_plan();
        let filter_conditions =
            PromPlanner::matchers_to_expr(Matchers::new(matchers), scan_plan.schema())
                .context(PrometheusLabelValuesQueryPlanSnafu)?;
--- a/src/frontend/src/server.rs
+++ b/src/frontend/src/server.rs
@@ -22,7 +22,6 @@ use common_telemetry::info;
 use meta_client::MetaClientOptions;
 use servers::error::Error as ServerError;
 use servers::grpc::builder::GrpcServerBuilder;
-use servers::grpc::flight::FlightCraftRef;
 use servers::grpc::frontend_grpc_handler::FrontendGrpcHandler;
 use servers::grpc::greptime_handler::GreptimeRequestHandler;
 use servers::grpc::{GrpcOptions, GrpcServer};
@@ -53,7 +52,6 @@ where
    grpc_server_builder: Option<GrpcServerBuilder>,
    http_server_builder: Option<HttpServerBuilder>,
    plugins: Plugins,
-    flight_handler: Option<FlightCraftRef>,
 }

 impl<T> Services<T>
@@ -67,7 +65,6 @@ where
            grpc_server_builder: None,
            http_server_builder: None,
            plugins,
-            flight_handler: None,
        }
    }

@@ -142,13 +139,6 @@ where
        }
    }

-    pub fn with_flight_handler(self, flight_handler: FlightCraftRef) -> Self {
-        Self {
-            flight_handler: Some(flight_handler),
-            ..self
-        }
-    }
-
    fn build_grpc_server(
        &mut self,
        grpc: &GrpcOptions,
@@ -183,12 +173,6 @@ where
            grpc.flight_compression,
        );

-        // Use custom flight handler if provided, otherwise use the default GreptimeRequestHandler
-        let flight_handler = self
-            .flight_handler
-            .clone()
-            .unwrap_or_else(|| Arc::new(greptime_request_handler.clone()) as FlightCraftRef);
-
        let grpc_server = builder
            .name(name)
            .database_handler(greptime_request_handler.clone())
@@ -197,7 +181,7 @@ where
                self.instance.clone(),
                user_provider.clone(),
            ))
-            .flight_handler(flight_handler);
+            .flight_handler(Arc::new(greptime_request_handler));

        let grpc_server = if !external {
            let frontend_grpc_handler =
--- a/src/index/src/bloom_filter/applier.rs
+++ b/src/index/src/bloom_filter/applier.rs
@@ -21,7 +21,7 @@ use itertools::Itertools;

 use crate::Bytes;
 use crate::bloom_filter::error::Result;
-use crate::bloom_filter::reader::{BloomFilterReadMetrics, BloomFilterReader};
+use crate::bloom_filter::reader::BloomFilterReader;

 /// `InListPredicate` contains a list of acceptable values. A value needs to match at least
 /// one of the elements (logical OR semantic) for the predicate to be satisfied.
@@ -38,7 +38,7 @@ pub struct BloomFilterApplier {

 impl BloomFilterApplier {
    pub async fn new(reader: Box<dyn BloomFilterReader + Send>) -> Result<Self> {
-        let meta = reader.metadata(None).await?;
+        let meta = reader.metadata().await?;

        Ok(Self { reader, meta })
    }
@@ -50,7 +50,6 @@ impl BloomFilterApplier {
        &mut self,
        predicates: &[InListPredicate],
        search_ranges: &[Range<usize>],
-        metrics: Option<&mut BloomFilterReadMetrics>,
    ) -> Result<Vec<Range<usize>>> {
        if predicates.is_empty() {
            // If no predicates, return empty result
@@ -58,7 +57,7 @@ impl BloomFilterApplier {
        }

        let segments = self.row_ranges_to_segments(search_ranges);
-        let (seg_locations, bloom_filters) = self.load_bloom_filters(&segments, metrics).await?;
+        let (seg_locations, bloom_filters) = self.load_bloom_filters(&segments).await?;
        let matching_row_ranges = self.find_matching_rows(seg_locations, bloom_filters, predicates);
        Ok(intersect_ranges(search_ranges, &matching_row_ranges))
    }
@@ -96,7 +95,6 @@ impl BloomFilterApplier {
    async fn load_bloom_filters(
        &mut self,
        segments: &[usize],
-        metrics: Option<&mut BloomFilterReadMetrics>,
    ) -> Result<(Vec<(u64, usize)>, Vec<BloomFilter>)> {
        let segment_locations = segments
            .iter()
@@ -110,10 +108,7 @@ impl BloomFilterApplier {
            .map(|i| self.meta.bloom_filter_locs[i as usize])
            .collect::<Vec<_>>();

-        let bloom_filters = self
-            .reader
-            .bloom_filter_vec(&bloom_filter_locs, metrics)
-            .await?;
+        let bloom_filters = self.reader.bloom_filter_vec(&bloom_filter_locs).await?;

        Ok((segment_locations, bloom_filters))
    }
@@ -427,10 +422,7 @@ mod tests {
        ];

        for (predicates, search_range, expected) in cases {
-            let result = applier
-                .search(&predicates, &[search_range], None)
-                .await
-                .unwrap();
+            let result = applier.search(&predicates, &[search_range]).await.unwrap();
            assert_eq!(
                result, expected,
                "Expected {:?}, got {:?}",
--- a/src/index/src/bloom_filter/reader.rs
+++ b/src/index/src/bloom_filter/reader.rs
@@ -13,7 +13,6 @@
 // limitations under the License.

 use std::ops::{Range, Rem};
-use std::time::{Duration, Instant};

 use async_trait::async_trait;
 use bytemuck::try_cast_slice;
@@ -35,72 +34,6 @@ const BLOOM_META_LEN_SIZE: u64 = 4;
 /// Default prefetch size of bloom filter meta.
 pub const DEFAULT_PREFETCH_SIZE: u64 = 8192; // 8KiB

-/// Metrics for bloom filter read operations.
-#[derive(Default, Clone)]
-pub struct BloomFilterReadMetrics {
-    /// Total byte size to read.
-    pub total_bytes: u64,
-    /// Total number of ranges to read.
-    pub total_ranges: usize,
-    /// Elapsed time to fetch data.
-    pub fetch_elapsed: Duration,
-    /// Number of cache hits.
-    pub cache_hit: usize,
-    /// Number of cache misses.
-    pub cache_miss: usize,
-}
-
-impl std::fmt::Debug for BloomFilterReadMetrics {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        let Self {
-            total_bytes,
-            total_ranges,
-            fetch_elapsed,
-            cache_hit,
-            cache_miss,
-        } = self;
-
-        // If both total_bytes and cache_hit are 0, we didn't read anything.
-        if *total_bytes == 0 && *cache_hit == 0 {
-            return write!(f, "{{}}");
-        }
-        write!(f, "{{")?;
-
-        if *total_bytes > 0 {
-            write!(f, "\"total_bytes\":{}", total_bytes)?;
-        }
-        if *cache_hit > 0 {
-            if *total_bytes > 0 {
-                write!(f, ", ")?;
-            }
-            write!(f, "\"cache_hit\":{}", cache_hit)?;
-        }
-
-        if *total_ranges > 0 {
-            write!(f, ", \"total_ranges\":{}", total_ranges)?;
-        }
-        if !fetch_elapsed.is_zero() {
-            write!(f, ", \"fetch_elapsed\":\"{:?}\"", fetch_elapsed)?;
-        }
-        if *cache_miss > 0 {
-            write!(f, ", \"cache_miss\":{}", cache_miss)?;
-        }
-
-        write!(f, "}}")
-    }
-}
-
-impl BloomFilterReadMetrics {
-    /// Merges another metrics into this one.
-    pub fn merge_from(&mut self, other: &Self) {
-        self.total_bytes += other.total_bytes;
-        self.total_ranges += other.total_ranges;
-        self.fetch_elapsed += other.fetch_elapsed;
-        self.cache_hit += other.cache_hit;
-        self.cache_miss += other.cache_miss;
-    }
-}
-
 /// Safely converts bytes to Vec<u64> using bytemuck for optimal performance.
 /// Faster than chunking and converting each piece individually.
 ///
@@ -146,33 +79,25 @@ pub fn bytes_to_u64_vec(bytes: &Bytes) -> Vec<u64> {
 #[async_trait]
 pub trait BloomFilterReader: Sync {
    /// Reads range of bytes from the file.
-    async fn range_read(
-        &self,
-        offset: u64,
-        size: u32,
-        metrics: Option<&mut BloomFilterReadMetrics>,
-    ) -> Result<Bytes>;
+    async fn range_read(&self, offset: u64, size: u32) -> Result<Bytes>;

    /// Reads bunch of ranges from the file.
-    async fn read_vec(
-        &self,
-        ranges: &[Range<u64>],
-        metrics: Option<&mut BloomFilterReadMetrics>,
-    ) -> Result<Vec<Bytes>>;
+    async fn read_vec(&self, ranges: &[Range<u64>]) -> Result<Vec<Bytes>> {
+        let mut results = Vec::with_capacity(ranges.len());
+        for range in ranges {
+            let size = (range.end - range.start) as u32;
+            let data = self.range_read(range.start, size).await?;
+            results.push(data);
+        }
+        Ok(results)
+    }

    /// Reads the meta information of the bloom filter.
-    async fn metadata(
-        &self,
-        metrics: Option<&mut BloomFilterReadMetrics>,
-    ) -> Result<BloomFilterMeta>;
+    async fn metadata(&self) -> Result<BloomFilterMeta>;

    /// Reads a bloom filter with the given location.
-    async fn bloom_filter(
-        &self,
-        loc: &BloomFilterLoc,
-        metrics: Option<&mut BloomFilterReadMetrics>,
-    ) -> Result<BloomFilter> {
-        let bytes = self.range_read(loc.offset, loc.size as _, metrics).await?;
+    async fn bloom_filter(&self, loc: &BloomFilterLoc) -> Result<BloomFilter> {
+        let bytes = self.range_read(loc.offset, loc.size as _).await?;
        let vec = bytes_to_u64_vec(&bytes);
        let bm = BloomFilter::from_vec(vec)
            .seed(&SEED)
@@ -180,16 +105,12 @@ pub trait BloomFilterReader: Sync {
        Ok(bm)
    }

-    async fn bloom_filter_vec(
-        &self,
-        locs: &[BloomFilterLoc],
-        metrics: Option<&mut BloomFilterReadMetrics>,
-    ) -> Result<Vec<BloomFilter>> {
+    async fn bloom_filter_vec(&self, locs: &[BloomFilterLoc]) -> Result<Vec<BloomFilter>> {
        let ranges = locs
            .iter()
            .map(|l| l.offset..l.offset + l.size)
            .collect::<Vec<_>>();
-        let bss = self.read_vec(&ranges, metrics).await?;
+        let bss = self.read_vec(&ranges).await?;

        let mut result = Vec::with_capacity(bss.len());
        for (bs, loc) in bss.into_iter().zip(locs.iter()) {
@@ -219,59 +140,24 @@ impl<R: RangeReader> BloomFilterReaderImpl<R> {

 #[async_trait]
 impl<R: RangeReader> BloomFilterReader for BloomFilterReaderImpl<R> {
-    async fn range_read(
-        &self,
-        offset: u64,
-        size: u32,
-        metrics: Option<&mut BloomFilterReadMetrics>,
-    ) -> Result<Bytes> {
-        let start = metrics.as_ref().map(|_| Instant::now());
-        let result = self
-            .reader
+    async fn range_read(&self, offset: u64, size: u32) -> Result<Bytes> {
+        self.reader
            .read(offset..offset + size as u64)
            .await
-            .context(IoSnafu)?;
-
-        if let Some(m) = metrics {
-            m.total_ranges += 1;
-            m.total_bytes += size as u64;
-            if let Some(start) = start {
-                m.fetch_elapsed += start.elapsed();
-            }
-        }
-
-        Ok(result)
+            .context(IoSnafu)
    }

-    async fn read_vec(
-        &self,
-        ranges: &[Range<u64>],
-        metrics: Option<&mut BloomFilterReadMetrics>,
-    ) -> Result<Vec<Bytes>> {
-        let start = metrics.as_ref().map(|_| Instant::now());
-        let result = self.reader.read_vec(ranges).await.context(IoSnafu)?;
-
-        if let Some(m) = metrics {
-            m.total_ranges += ranges.len();
-            m.total_bytes += ranges.iter().map(|r| r.end - r.start).sum::<u64>();
-            if let Some(start) = start {
-                m.fetch_elapsed += start.elapsed();
-            }
-        }
-
-        Ok(result)
+    async fn read_vec(&self, ranges: &[Range<u64>]) -> Result<Vec<Bytes>> {
+        self.reader.read_vec(ranges).await.context(IoSnafu)
    }

-    async fn metadata(
-        &self,
-        metrics: Option<&mut BloomFilterReadMetrics>,
-    ) -> Result<BloomFilterMeta> {
+    async fn metadata(&self) -> Result<BloomFilterMeta> {
        let metadata = self.reader.metadata().await.context(IoSnafu)?;
        let file_size = metadata.content_length;

        let mut meta_reader =
            BloomFilterMetaReader::new(&self.reader, file_size, Some(DEFAULT_PREFETCH_SIZE));
-        meta_reader.metadata(metrics).await
+        meta_reader.metadata().await
    }
 }

@@ -297,10 +183,7 @@ impl<R: RangeReader> BloomFilterMetaReader<R> {
    ///
    /// It will first prefetch some bytes from the end of the file,
    /// then parse the metadata from the prefetch bytes.
-    pub async fn metadata(
-        &mut self,
-        metrics: Option<&mut BloomFilterReadMetrics>,
-    ) -> Result<BloomFilterMeta> {
+    pub async fn metadata(&mut self) -> Result<BloomFilterMeta> {
        ensure!(
            self.file_size >= BLOOM_META_LEN_SIZE,
            FileSizeTooSmallSnafu {
@@ -308,7 +191,6 @@ impl<R: RangeReader> BloomFilterMetaReader<R> {
            }
        );

-        let start = metrics.as_ref().map(|_| Instant::now());
        let meta_start = self.file_size.saturating_sub(self.prefetch_size);
        let suffix = self
            .reader
@@ -326,28 +208,8 @@ impl<R: RangeReader> BloomFilterMetaReader<R> {
                .read(metadata_start..self.file_size - BLOOM_META_LEN_SIZE)
                .await
                .context(IoSnafu)?;
-
-            if let Some(m) = metrics {
-                // suffix read + meta read
-                m.total_ranges += 2;
-                // Ignores the meta length size to simplify the calculation.
-                m.total_bytes += self.file_size.min(self.prefetch_size) + length;
-                if let Some(start) = start {
-                    m.fetch_elapsed += start.elapsed();
-                }
-            }
-
            BloomFilterMeta::decode(meta).context(DecodeProtoSnafu)
        } else {
-            if let Some(m) = metrics {
-                // suffix read only
-                m.total_ranges += 1;
-                m.total_bytes += self.file_size.min(self.prefetch_size);
-                if let Some(start) = start {
-                    m.fetch_elapsed += start.elapsed();
-                }
-            }
-
            let metadata_start = self.file_size - length - BLOOM_META_LEN_SIZE - meta_start;
            let meta = &suffix[metadata_start as usize..suffix_len - BLOOM_META_LEN_SIZE as usize];
            BloomFilterMeta::decode(meta).context(DecodeProtoSnafu)
@@ -428,7 +290,7 @@ mod tests {
        for prefetch in [0u64, file_size / 2, file_size, file_size + 10] {
            let mut reader =
                BloomFilterMetaReader::new(bytes.clone(), file_size as _, Some(prefetch));
-            let meta = reader.metadata(None).await.unwrap();
+            let meta = reader.metadata().await.unwrap();

            assert_eq!(meta.rows_per_segment, 2);
            assert_eq!(meta.segment_count, 2);
@@ -450,11 +312,11 @@ mod tests {
        let bytes = mock_bloom_filter_bytes().await;

        let reader = BloomFilterReaderImpl::new(bytes);
-        let meta = reader.metadata(None).await.unwrap();
+        let meta = reader.metadata().await.unwrap();

        assert_eq!(meta.bloom_filter_locs.len(), 2);
        let bf = reader
-            .bloom_filter(&meta.bloom_filter_locs[0], None)
+            .bloom_filter(&meta.bloom_filter_locs[0])
            .await
            .unwrap();
        assert!(bf.contains(&b"a"));
@@ -463,7 +325,7 @@ mod tests {
        assert!(bf.contains(&b"d"));

        let bf = reader
-            .bloom_filter(&meta.bloom_filter_locs[1], None)
+            .bloom_filter(&meta.bloom_filter_locs[1])
            .await
            .unwrap();
        assert!(bf.contains(&b"e"));
--- a/src/index/src/fulltext_index/tests.rs
+++ b/src/index/src/fulltext_index/tests.rs
@@ -74,7 +74,7 @@ async fn test_search(
    writer.finish().await.unwrap();

    let reader = puffin_manager.reader(&file_name).await.unwrap();
-    let (index_dir, _metrics) = reader.dir(&blob_key).await.unwrap();
+    let index_dir = reader.dir(&blob_key).await.unwrap();
    let searcher = TantivyFulltextIndexSearcher::new(index_dir.path(), config).unwrap();
    for (query, expected) in query_expected {
        let results = searcher.search(query).await.unwrap();
--- a/src/index/src/inverted_index/format/reader.rs
+++ b/src/index/src/inverted_index/format/reader.rs
@@ -15,7 +15,6 @@
 use std::collections::VecDeque;
 use std::ops::Range;
 use std::sync::Arc;
-use std::time::Duration;

 use async_trait::async_trait;
 use bytes::Bytes;
@@ -30,115 +29,37 @@ pub use crate::inverted_index::format::reader::blob::InvertedIndexBlobReader;
 mod blob;
 mod footer;

-/// Metrics for inverted index read operations.
-#[derive(Default, Clone)]
-pub struct InvertedIndexReadMetrics {
-    /// Total byte size to read.
-    pub total_bytes: u64,
-    /// Total number of ranges to read.
-    pub total_ranges: usize,
-    /// Elapsed time to fetch data.
-    pub fetch_elapsed: Duration,
-    /// Number of cache hits.
-    pub cache_hit: usize,
-    /// Number of cache misses.
-    pub cache_miss: usize,
-}
-
-impl std::fmt::Debug for InvertedIndexReadMetrics {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        let Self {
-            total_bytes,
-            total_ranges,
-            fetch_elapsed,
-            cache_hit,
-            cache_miss,
-        } = self;
-
-        // If both total_bytes and cache_hit are 0, we didn't read anything.
-        if *total_bytes == 0 && *cache_hit == 0 {
-            return write!(f, "{{}}");
-        }
-        write!(f, "{{")?;
-
-        if *total_bytes > 0 {
-            write!(f, "\"total_bytes\":{}", total_bytes)?;
-        }
-        if *cache_hit > 0 {
-            if *total_bytes > 0 {
-                write!(f, ", ")?;
-            }
-            write!(f, "\"cache_hit\":{}", cache_hit)?;
-        }
-
-        if *total_ranges > 0 {
-            write!(f, ", \"total_ranges\":{}", total_ranges)?;
-        }
-        if !fetch_elapsed.is_zero() {
-            write!(f, ", \"fetch_elapsed\":\"{:?}\"", fetch_elapsed)?;
-        }
-        if *cache_miss > 0 {
-            write!(f, ", \"cache_miss\":{}", cache_miss)?;
-        }
-
-        write!(f, "}}")
-    }
-}
-
-impl InvertedIndexReadMetrics {
-    /// Merges another metrics into this one.
-    pub fn merge_from(&mut self, other: &Self) {
-        self.total_bytes += other.total_bytes;
-        self.total_ranges += other.total_ranges;
-        self.fetch_elapsed += other.fetch_elapsed;
-        self.cache_hit += other.cache_hit;
-        self.cache_miss += other.cache_miss;
-    }
-}
-
 /// InvertedIndexReader defines an asynchronous reader of inverted index data
 #[mockall::automock]
 #[async_trait]
 pub trait InvertedIndexReader: Send + Sync {
    /// Seeks to given offset and reads data with exact size as provided.
-    async fn range_read<'a>(
-        &self,
-        offset: u64,
-        size: u32,
-        metrics: Option<&'a mut InvertedIndexReadMetrics>,
-    ) -> Result<Vec<u8>>;
+    async fn range_read(&self, offset: u64, size: u32) -> Result<Vec<u8>>;

    /// Reads the bytes in the given ranges.
-    async fn read_vec<'a>(
-        &self,
-        ranges: &[Range<u64>],
-        metrics: Option<&'a mut InvertedIndexReadMetrics>,
-    ) -> Result<Vec<Bytes>>;
+    async fn read_vec(&self, ranges: &[Range<u64>]) -> Result<Vec<Bytes>> {
+        let mut result = Vec::with_capacity(ranges.len());
+        for range in ranges {
+            let data = self
+                .range_read(range.start, (range.end - range.start) as u32)
+                .await?;
+            result.push(Bytes::from(data));
+        }
+        Ok(result)
+    }

    /// Retrieves metadata of all inverted indices stored within the blob.
-    async fn metadata<'a>(
-        &self,
-        metrics: Option<&'a mut InvertedIndexReadMetrics>,
-    ) -> Result<Arc<InvertedIndexMetas>>;
+    async fn metadata(&self) -> Result<Arc<InvertedIndexMetas>>;

    /// Retrieves the finite state transducer (FST) map from the given offset and size.
-    async fn fst<'a>(
-        &self,
-        offset: u64,
-        size: u32,
-        metrics: Option<&'a mut InvertedIndexReadMetrics>,
-    ) -> Result<FstMap> {
-        let fst_data = self.range_read(offset, size, metrics).await?;
+    async fn fst(&self, offset: u64, size: u32) -> Result<FstMap> {
+        let fst_data = self.range_read(offset, size).await?;
        FstMap::new(fst_data).context(DecodeFstSnafu)
    }

    /// Retrieves the multiple finite state transducer (FST) maps from the given ranges.
-    async fn fst_vec<'a>(
-        &mut self,
-        ranges: &[Range<u64>],
-        metrics: Option<&'a mut InvertedIndexReadMetrics>,
-    ) -> Result<Vec<FstMap>> {
-        self.read_vec(ranges, metrics)
+    async fn fst_vec(&mut self, ranges: &[Range<u64>]) -> Result<Vec<FstMap>> {
+        self.read_vec(ranges)
            .await?
            .into_iter()
            .map(|bytes| FstMap::new(bytes.to_vec()).context(DecodeFstSnafu))
@@ -146,28 +67,19 @@ pub trait InvertedIndexReader: Send + Sync {
    }

    /// Retrieves the bitmap from the given offset and size.
-    async fn bitmap<'a>(
-        &self,
-        offset: u64,
-        size: u32,
-        bitmap_type: BitmapType,
-        metrics: Option<&'a mut InvertedIndexReadMetrics>,
-    ) -> Result<Bitmap> {
-        self.range_read(offset, size, metrics)
-            .await
-            .and_then(|bytes| {
-                Bitmap::deserialize_from(&bytes, bitmap_type).context(DecodeBitmapSnafu)
-            })
+    async fn bitmap(&self, offset: u64, size: u32, bitmap_type: BitmapType) -> Result<Bitmap> {
+        self.range_read(offset, size).await.and_then(|bytes| {
+            Bitmap::deserialize_from(&bytes, bitmap_type).context(DecodeBitmapSnafu)
+        })
    }

    /// Retrieves the multiple bitmaps from the given ranges.
-    async fn bitmap_deque<'a>(
+    async fn bitmap_deque(
        &mut self,
        ranges: &[(Range<u64>, BitmapType)],
-        metrics: Option<&'a mut InvertedIndexReadMetrics>,
    ) -> Result<VecDeque<Bitmap>> {
        let (ranges, types): (Vec<_>, Vec<_>) = ranges.iter().cloned().unzip();
-        let bytes = self.read_vec(&ranges, metrics).await?;
+        let bytes = self.read_vec(&ranges).await?;
        bytes
            .into_iter()
            .zip(types)
--- a/src/index/src/inverted_index/format/reader/blob.rs
+++ b/src/index/src/inverted_index/format/reader/blob.rs
@@ -14,7 +14,6 @@

 use std::ops::Range;
 use std::sync::Arc;
-use std::time::Instant;

 use async_trait::async_trait;
 use bytes::Bytes;
@@ -24,10 +23,10 @@ use snafu::{ResultExt, ensure};

 use crate::inverted_index::error::{CommonIoSnafu, Result, UnexpectedBlobSizeSnafu};
 use crate::inverted_index::format::MIN_BLOB_SIZE;
+use crate::inverted_index::format::reader::InvertedIndexReader;
 use crate::inverted_index::format::reader::footer::{
    DEFAULT_PREFETCH_SIZE, InvertedIndexFooterReader,
 };
-use crate::inverted_index::format::reader::{InvertedIndexReadMetrics, InvertedIndexReader};

 /// Inverted index blob reader, implements [`InvertedIndexReader`]
 pub struct InvertedIndexBlobReader<R> {
@@ -54,58 +53,27 @@ impl<R> InvertedIndexBlobReader<R> {

 #[async_trait]
 impl<R: RangeReader + Sync> InvertedIndexReader for InvertedIndexBlobReader<R> {
-    async fn range_read<'a>(
-        &self,
-        offset: u64,
-        size: u32,
-        metrics: Option<&'a mut InvertedIndexReadMetrics>,
-    ) -> Result<Vec<u8>> {
-        let start = metrics.as_ref().map(|_| Instant::now());
-
+    async fn range_read(&self, offset: u64, size: u32) -> Result<Vec<u8>> {
        let buf = self
            .source
            .read(offset..offset + size as u64)
            .await
            .context(CommonIoSnafu)?;
-
-        if let Some(m) = metrics {
-            m.total_bytes += size as u64;
-            m.total_ranges += 1;
-            m.fetch_elapsed += start.unwrap().elapsed();
-        }
-
        Ok(buf.into())
    }

-    async fn read_vec<'a>(
-        &self,
-        ranges: &[Range<u64>],
-        metrics: Option<&'a mut InvertedIndexReadMetrics>,
-    ) -> Result<Vec<Bytes>> {
-        let start = metrics.as_ref().map(|_| Instant::now());
-
-        let result = self.source.read_vec(ranges).await.context(CommonIoSnafu)?;
-
-        if let Some(m) = metrics {
-            m.total_bytes += ranges.iter().map(|r| r.end - r.start).sum::<u64>();
-            m.total_ranges += ranges.len();
-            m.fetch_elapsed += start.unwrap().elapsed();
-        }
-
-        Ok(result)
+    async fn read_vec(&self, ranges: &[Range<u64>]) -> Result<Vec<Bytes>> {
+        self.source.read_vec(ranges).await.context(CommonIoSnafu)
    }

-    async fn metadata<'a>(
-        &self,
-        metrics: Option<&'a mut InvertedIndexReadMetrics>,
-    ) -> Result<Arc<InvertedIndexMetas>> {
+    async fn metadata(&self) -> Result<Arc<InvertedIndexMetas>> {
        let metadata = self.source.metadata().await.context(CommonIoSnafu)?;
        let blob_size = metadata.content_length;
        Self::validate_blob_size(blob_size)?;

        let mut footer_reader = InvertedIndexFooterReader::new(&self.source, blob_size)
            .with_prefetch_size(DEFAULT_PREFETCH_SIZE);
-        footer_reader.metadata(metrics).await.map(Arc::new)
+        footer_reader.metadata().await.map(Arc::new)
    }
 }

@@ -205,7 +173,7 @@ mod tests {
        let blob = create_inverted_index_blob();
        let blob_reader = InvertedIndexBlobReader::new(blob);

-        let metas = blob_reader.metadata(None).await.unwrap();
+        let metas = blob_reader.metadata().await.unwrap();
        assert_eq!(metas.metas.len(), 2);

        let meta0 = metas.metas.get("tag0").unwrap();
@@ -232,14 +200,13 @@ mod tests {
        let blob = create_inverted_index_blob();
        let blob_reader = InvertedIndexBlobReader::new(blob);

-        let metas = blob_reader.metadata(None).await.unwrap();
+        let metas = blob_reader.metadata().await.unwrap();
        let meta = metas.metas.get("tag0").unwrap();

        let fst_map = blob_reader
            .fst(
                meta.base_offset + meta.relative_fst_offset as u64,
                meta.fst_size,
-                None,
            )
            .await
            .unwrap();
@@ -252,7 +219,6 @@ mod tests {
            .fst(
                meta.base_offset + meta.relative_fst_offset as u64,
                meta.fst_size,
-                None,
            )
            .await
            .unwrap();
@@ -266,30 +232,30 @@ mod tests {
        let blob = create_inverted_index_blob();
        let blob_reader = InvertedIndexBlobReader::new(blob);

-        let metas = blob_reader.metadata(None).await.unwrap();
+        let metas = blob_reader.metadata().await.unwrap();
        let meta = metas.metas.get("tag0").unwrap();

        let bitmap = blob_reader
-            .bitmap(meta.base_offset, 26, BitmapType::Roaring, None)
+            .bitmap(meta.base_offset, 26, BitmapType::Roaring)
            .await
            .unwrap();
        assert_eq!(bitmap, mock_bitmap());
        let bitmap = blob_reader
-            .bitmap(meta.base_offset + 26, 26, BitmapType::Roaring, None)
+            .bitmap(meta.base_offset + 26, 26, BitmapType::Roaring)
            .await
            .unwrap();
        assert_eq!(bitmap, mock_bitmap());

-        let metas = blob_reader.metadata(None).await.unwrap();
+        let metas = blob_reader.metadata().await.unwrap();
        let meta = metas.metas.get("tag1").unwrap();

        let bitmap = blob_reader
-            .bitmap(meta.base_offset, 26, BitmapType::Roaring, None)
+            .bitmap(meta.base_offset, 26, BitmapType::Roaring)
            .await
            .unwrap();
        assert_eq!(bitmap, mock_bitmap());
        let bitmap = blob_reader
-            .bitmap(meta.base_offset + 26, 26, BitmapType::Roaring, None)
+            .bitmap(meta.base_offset + 26, 26, BitmapType::Roaring)
            .await
            .unwrap();
        assert_eq!(bitmap, mock_bitmap());
--- a/src/index/src/inverted_index/format/reader/footer.rs
+++ b/src/index/src/inverted_index/format/reader/footer.rs
@@ -12,8 +12,6 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

-use std::time::Instant;
-
 use common_base::range_read::RangeReader;
 use greptime_proto::v1::index::{InvertedIndexMeta, InvertedIndexMetas};
 use prost::Message;
@@ -25,7 +23,6 @@ use crate::inverted_index::error::{
    UnexpectedZeroSegmentRowCountSnafu,
 };
 use crate::inverted_index::format::FOOTER_PAYLOAD_SIZE_SIZE;
-use crate::inverted_index::format::reader::InvertedIndexReadMetrics;

 pub const DEFAULT_PREFETCH_SIZE: u64 = 8192; // 8KiB

@@ -57,17 +54,12 @@ impl<R> InvertedIndexFooterReader<R> {
 }

 impl<R: RangeReader> InvertedIndexFooterReader<R> {
-    pub async fn metadata(
-        &mut self,
-        mut metrics: Option<&mut InvertedIndexReadMetrics>,
-    ) -> Result<InvertedIndexMetas> {
+    pub async fn metadata(&mut self) -> Result<InvertedIndexMetas> {
        ensure!(
            self.blob_size >= FOOTER_PAYLOAD_SIZE_SIZE,
            BlobSizeTooSmallSnafu
        );

-        let start = metrics.as_ref().map(|_| Instant::now());
-
        let footer_start = self.blob_size.saturating_sub(self.prefetch_size());
        let suffix = self
            .source
@@ -81,36 +73,19 @@ impl<R: RangeReader> InvertedIndexFooterReader<R> {
        let footer_size = FOOTER_PAYLOAD_SIZE_SIZE;

        // Did not fetch the entire file metadata in the initial read, need to make a second request.
-        let result = if length > suffix_len as u64 - footer_size {
+        if length > suffix_len as u64 - footer_size {
            let metadata_start = self.blob_size - length - footer_size;
            let meta = self
                .source
                .read(metadata_start..self.blob_size - footer_size)
                .await
                .context(CommonIoSnafu)?;
-
-            if let Some(m) = metrics.as_deref_mut() {
-                m.total_bytes += self.blob_size.min(self.prefetch_size()) + length;
-                m.total_ranges += 2;
-            }
-
            self.parse_payload(&meta, length)
        } else {
-            if let Some(m) = metrics.as_deref_mut() {
-                m.total_bytes += self.blob_size.min(self.prefetch_size());
-                m.total_ranges += 1;
-            }
-
            let metadata_start = self.blob_size - length - footer_size - footer_start;
            let meta = &suffix[metadata_start as usize..suffix_len - footer_size as usize];
            self.parse_payload(meta, length)
-        };
-
-        if let Some(m) = metrics {
-            m.fetch_elapsed += start.unwrap().elapsed();
        }
-
-        result
    }

    fn read_tailing_four_bytes(suffix: &[u8]) -> Result<[u8; 4]> {
@@ -211,7 +186,7 @@ mod tests {
                reader = reader.with_prefetch_size(prefetch);
            }

-            let metas = reader.metadata(None).await.unwrap();
+            let metas = reader.metadata().await.unwrap();
            assert_eq!(metas.metas.len(), 1);
            let index_meta = &metas.metas.get("test").unwrap();
            assert_eq!(index_meta.name, "test");
@@ -235,7 +210,7 @@ mod tests {
                reader = reader.with_prefetch_size(prefetch);
            }

-            let result = reader.metadata(None).await;
+            let result = reader.metadata().await;
            assert_matches!(result, Err(Error::UnexpectedFooterPayloadSize { .. }));
        }
    }
@@ -258,7 +233,7 @@ mod tests {
                reader = reader.with_prefetch_size(prefetch);
            }

-            let result = reader.metadata(None).await;
+            let result = reader.metadata().await;
            assert_matches!(result, Err(Error::UnexpectedOffsetSize { .. }));
        }
    }
--- a/src/index/src/inverted_index/format/writer/blob.rs
+++ b/src/index/src/inverted_index/format/writer/blob.rs
@@ -122,7 +122,7 @@ mod tests {
            .unwrap();

        let reader = InvertedIndexBlobReader::new(blob);
-        let metadata = reader.metadata(None).await.unwrap();
+        let metadata = reader.metadata().await.unwrap();
        assert_eq!(metadata.total_row_count, 8);
        assert_eq!(metadata.segment_row_count, 1);
        assert_eq!(metadata.metas.len(), 0);
@@ -182,7 +182,7 @@ mod tests {
            .unwrap();

        let reader = InvertedIndexBlobReader::new(blob);
-        let metadata = reader.metadata(None).await.unwrap();
+        let metadata = reader.metadata().await.unwrap();
        assert_eq!(metadata.total_row_count, 8);
        assert_eq!(metadata.segment_row_count, 1);
        assert_eq!(metadata.metas.len(), 2);
@@ -198,19 +198,13 @@ mod tests {
            .fst(
                tag0.base_offset + tag0.relative_fst_offset as u64,
                tag0.fst_size,
-                None,
            )
            .await
            .unwrap();
        assert_eq!(fst0.len(), 3);
        let [offset, size] = unpack(fst0.get(b"a").unwrap());
        let bitmap = reader
-            .bitmap(
-                tag0.base_offset + offset as u64,
-                size,
-                BitmapType::Roaring,
-                None,
-            )
+            .bitmap(tag0.base_offset + offset as u64, size, BitmapType::Roaring)
            .await
            .unwrap();
        assert_eq!(
@@ -219,12 +213,7 @@ mod tests {
        );
        let [offset, size] = unpack(fst0.get(b"b").unwrap());
        let bitmap = reader
-            .bitmap(
-                tag0.base_offset + offset as u64,
-                size,
-                BitmapType::Roaring,
-                None,
-            )
+            .bitmap(tag0.base_offset + offset as u64, size, BitmapType::Roaring)
            .await
            .unwrap();
        assert_eq!(
@@ -233,12 +222,7 @@ mod tests {
        );
        let [offset, size] = unpack(fst0.get(b"c").unwrap());
        let bitmap = reader
-            .bitmap(
-                tag0.base_offset + offset as u64,
-                size,
-                BitmapType::Roaring,
-                None,
-            )
+            .bitmap(tag0.base_offset + offset as u64, size, BitmapType::Roaring)
            .await
            .unwrap();
        assert_eq!(
@@ -257,19 +241,13 @@ mod tests {
            .fst(
                tag1.base_offset + tag1.relative_fst_offset as u64,
                tag1.fst_size,
-                None,
            )
            .await
            .unwrap();
        assert_eq!(fst1.len(), 3);
        let [offset, size] = unpack(fst1.get(b"x").unwrap());
        let bitmap = reader
-            .bitmap(
-                tag1.base_offset + offset as u64,
-                size,
-                BitmapType::Roaring,
-                None,
-            )
+            .bitmap(tag1.base_offset + offset as u64, size, BitmapType::Roaring)
            .await
            .unwrap();
        assert_eq!(
@@ -278,12 +256,7 @@ mod tests {
        );
        let [offset, size] = unpack(fst1.get(b"y").unwrap());
        let bitmap = reader
-            .bitmap(
-                tag1.base_offset + offset as u64,
-                size,
-                BitmapType::Roaring,
-                None,
-            )
+            .bitmap(tag1.base_offset + offset as u64, size, BitmapType::Roaring)
            .await
            .unwrap();
        assert_eq!(
@@ -292,12 +265,7 @@ mod tests {
        );
        let [offset, size] = unpack(fst1.get(b"z").unwrap());
        let bitmap = reader
-            .bitmap(
-                tag1.base_offset + offset as u64,
-                size,
-                BitmapType::Roaring,
-                None,
-            )
+            .bitmap(tag1.base_offset + offset as u64, size, BitmapType::Roaring)
            .await
            .unwrap();
        assert_eq!(
--- a/src/index/src/inverted_index/search/fst_values_mapper.rs
+++ b/src/index/src/inverted_index/search/fst_values_mapper.rs
@@ -16,7 +16,7 @@ use greptime_proto::v1::index::{BitmapType, InvertedIndexMeta};

 use crate::bitmap::Bitmap;
 use crate::inverted_index::error::Result;
-use crate::inverted_index::format::reader::{InvertedIndexReadMetrics, InvertedIndexReader};
+use crate::inverted_index::format::reader::InvertedIndexReader;

 /// `ParallelFstValuesMapper` enables parallel mapping of multiple FST value groups to their
 /// corresponding bitmaps within an inverted index.
@@ -35,8 +35,7 @@ impl<'a> ParallelFstValuesMapper<'a> {

    pub async fn map_values_vec(
        &mut self,
-        value_and_meta_vec: &[(Vec<u64>, &InvertedIndexMeta)],
-        metrics: Option<&mut InvertedIndexReadMetrics>,
+        value_and_meta_vec: &[(Vec<u64>, &'a InvertedIndexMeta)],
    ) -> Result<Vec<Bitmap>> {
        let groups = value_and_meta_vec
            .iter()
@@ -65,7 +64,7 @@ impl<'a> ParallelFstValuesMapper<'a> {
        }

        common_telemetry::debug!("fetch ranges: {:?}", fetch_ranges);
-        let mut bitmaps = self.reader.bitmap_deque(&fetch_ranges, metrics).await?;
+        let mut bitmaps = self.reader.bitmap_deque(&fetch_ranges).await?;
        let mut output = Vec::with_capacity(groups.len());

        for counter in groups {
@@ -96,25 +95,23 @@ mod tests {
    #[tokio::test]
    async fn test_map_values_vec() {
        let mut mock_reader = MockInvertedIndexReader::new();
-        mock_reader
-            .expect_bitmap_deque()
-            .returning(|ranges, _metrics| {
-                let mut output = VecDeque::new();
-                for (range, bitmap_type) in ranges {
-                    let offset = range.start;
-                    let size = range.end - range.start;
-                    match (offset, size, bitmap_type) {
-                        (1, 1, BitmapType::Roaring) => {
-                            output.push_back(Bitmap::from_lsb0_bytes(&[0b10101010], *bitmap_type))
-                        }
-                        (2, 1, BitmapType::Roaring) => {
-                            output.push_back(Bitmap::from_lsb0_bytes(&[0b01010101], *bitmap_type))
-                        }
-                        _ => unreachable!(),
+        mock_reader.expect_bitmap_deque().returning(|ranges| {
+            let mut output = VecDeque::new();
+            for (range, bitmap_type) in ranges {
+                let offset = range.start;
+                let size = range.end - range.start;
+                match (offset, size, bitmap_type) {
+                    (1, 1, BitmapType::Roaring) => {
+                        output.push_back(Bitmap::from_lsb0_bytes(&[0b10101010], *bitmap_type))
                    }
+                    (2, 1, BitmapType::Roaring) => {
+                        output.push_back(Bitmap::from_lsb0_bytes(&[0b01010101], *bitmap_type))
+                    }
+                    _ => unreachable!(),
                }
-                Ok(output)
-            });
+            }
+            Ok(output)
+        });

        let meta = InvertedIndexMeta {
            bitmap_type: BitmapType::Roaring.into(),
@@ -123,13 +120,13 @@ mod tests {
        let mut values_mapper = ParallelFstValuesMapper::new(&mut mock_reader);

        let result = values_mapper
-            .map_values_vec(&[(vec![], &meta)], None)
+            .map_values_vec(&[(vec![], &meta)])
            .await
            .unwrap();
        assert_eq!(result[0].count_ones(), 0);

        let result = values_mapper
-            .map_values_vec(&[(vec![value(1, 1)], &meta)], None)
+            .map_values_vec(&[(vec![value(1, 1)], &meta)])
            .await
            .unwrap();
        assert_eq!(
@@ -138,7 +135,7 @@ mod tests {
        );

        let result = values_mapper
-            .map_values_vec(&[(vec![value(2, 1)], &meta)], None)
+            .map_values_vec(&[(vec![value(2, 1)], &meta)])
            .await
            .unwrap();
        assert_eq!(
@@ -147,7 +144,7 @@ mod tests {
        );

        let result = values_mapper
-            .map_values_vec(&[(vec![value(1, 1), value(2, 1)], &meta)], None)
+            .map_values_vec(&[(vec![value(1, 1), value(2, 1)], &meta)])
            .await
            .unwrap();
        assert_eq!(
@@ -156,7 +153,7 @@ mod tests {
        );

        let result = values_mapper
-            .map_values_vec(&[(vec![value(2, 1), value(1, 1)], &meta)], None)
+            .map_values_vec(&[(vec![value(2, 1), value(1, 1)], &meta)])
            .await
            .unwrap();
        assert_eq!(
@@ -165,10 +162,7 @@ mod tests {
        );

        let result = values_mapper
-            .map_values_vec(
-                &[(vec![value(2, 1)], &meta), (vec![value(1, 1)], &meta)],
-                None,
-            )
+            .map_values_vec(&[(vec![value(2, 1)], &meta), (vec![value(1, 1)], &meta)])
            .await
            .unwrap();
        assert_eq!(
@@ -180,13 +174,10 @@ mod tests {
            Bitmap::from_lsb0_bytes(&[0b10101010], BitmapType::Roaring)
        );
        let result = values_mapper
-            .map_values_vec(
-                &[
-                    (vec![value(2, 1), value(1, 1)], &meta),
-                    (vec![value(1, 1)], &meta),
-                ],
-                None,
-            )
+            .map_values_vec(&[
+                (vec![value(2, 1), value(1, 1)], &meta),
+                (vec![value(1, 1)], &meta),
+            ])
            .await
            .unwrap();
        assert_eq!(
--- a/src/index/src/inverted_index/search/index_apply.rs
+++ b/src/index/src/inverted_index/search/index_apply.rs
@@ -19,7 +19,7 @@ pub use predicates_apply::PredicatesIndexApplier;

 use crate::bitmap::Bitmap;
 use crate::inverted_index::error::Result;
-use crate::inverted_index::format::reader::{InvertedIndexReadMetrics, InvertedIndexReader};
+use crate::inverted_index::format::reader::InvertedIndexReader;

 /// The output of an apply operation.
 #[derive(Clone, Debug, PartialEq)]
@@ -44,11 +44,10 @@ pub trait IndexApplier: Send + Sync {
    /// Applies the predefined predicates to the data read by the given index reader, returning
    /// a list of relevant indices (e.g., post IDs, group IDs, row IDs).
    #[allow(unused_parens)]
-    async fn apply<'a, 'b>(
+    async fn apply<'a>(
        &self,
        context: SearchContext,
        reader: &mut (dyn InvertedIndexReader + 'a),
-        metrics: Option<&'b mut InvertedIndexReadMetrics>,
    ) -> Result<ApplyOutput>;

    /// Returns the memory usage of the applier.
--- a/src/index/src/inverted_index/search/index_apply/predicates_apply.rs
+++ b/src/index/src/inverted_index/search/index_apply/predicates_apply.rs
@@ -19,7 +19,7 @@ use greptime_proto::v1::index::InvertedIndexMetas;

 use crate::bitmap::Bitmap;
 use crate::inverted_index::error::{IndexNotFoundSnafu, Result};
-use crate::inverted_index::format::reader::{InvertedIndexReadMetrics, InvertedIndexReader};
+use crate::inverted_index::format::reader::InvertedIndexReader;
 use crate::inverted_index::search::fst_apply::{
    FstApplier, IntersectionFstApplier, KeysFstApplier,
 };
@@ -43,14 +43,12 @@ pub struct PredicatesIndexApplier {
 impl IndexApplier for PredicatesIndexApplier {
    /// Applies all `FstApplier`s to the data in the inverted index reader, intersecting the individual
    /// bitmaps obtained for each index to result in a final set of indices.
-    async fn apply<'a, 'b>(
+    async fn apply<'a>(
        &self,
        context: SearchContext,
        reader: &mut (dyn InvertedIndexReader + 'a),
-        metrics: Option<&'b mut InvertedIndexReadMetrics>,
    ) -> Result<ApplyOutput> {
-        let mut metrics = metrics;
-        let metadata = reader.metadata(metrics.as_deref_mut()).await?;
+        let metadata = reader.metadata().await?;
        let mut output = ApplyOutput {
            matched_segment_ids: Bitmap::new_bitvec(),
            total_row_count: metadata.total_row_count as _,
@@ -86,7 +84,7 @@ impl IndexApplier for PredicatesIndexApplier {
            return Ok(output);
        }

-        let fsts = reader.fst_vec(&fst_ranges, metrics.as_deref_mut()).await?;
+        let fsts = reader.fst_vec(&fst_ranges).await?;
        let value_and_meta_vec = fsts
            .into_iter()
            .zip(appliers)
@@ -94,7 +92,7 @@ impl IndexApplier for PredicatesIndexApplier {
            .collect::<Vec<_>>();

        let mut mapper = ParallelFstValuesMapper::new(reader);
-        let mut bm_vec = mapper.map_values_vec(&value_and_meta_vec, metrics).await?;
+        let mut bm_vec = mapper.map_values_vec(&value_and_meta_vec).await?;

        let mut bitmap = bm_vec.pop().unwrap(); // SAFETY: `fst_ranges` is not empty
        for bm in bm_vec {
@@ -223,28 +221,26 @@ mod tests {
        let mut mock_reader = MockInvertedIndexReader::new();
        mock_reader
            .expect_metadata()
-            .returning(|_| Ok(mock_metas([("tag-0", 0)])));
-        mock_reader.expect_fst_vec().returning(|_ranges, _metrics| {
+            .returning(|| Ok(mock_metas([("tag-0", 0)])));
+        mock_reader.expect_fst_vec().returning(|_ranges| {
            Ok(vec![
                FstMap::from_iter([(b"tag-0_value-0", fst_value(2, 1))]).unwrap(),
            ])
        });

-        mock_reader
-            .expect_bitmap_deque()
-            .returning(|arg, _metrics| {
-                assert_eq!(arg.len(), 1);
-                let range = &arg[0].0;
-                let bitmap_type = arg[0].1;
-                assert_eq!(*range, 2..3);
-                assert_eq!(bitmap_type, BitmapType::Roaring);
-                Ok(VecDeque::from([Bitmap::from_lsb0_bytes(
-                    &[0b10101010],
-                    bitmap_type,
-                )]))
-            });
+        mock_reader.expect_bitmap_deque().returning(|arg| {
+            assert_eq!(arg.len(), 1);
+            let range = &arg[0].0;
+            let bitmap_type = arg[0].1;
+            assert_eq!(*range, 2..3);
+            assert_eq!(bitmap_type, BitmapType::Roaring);
+            Ok(VecDeque::from([Bitmap::from_lsb0_bytes(
+                &[0b10101010],
+                bitmap_type,
+            )]))
+        });
        let output = applier
-            .apply(SearchContext::default(), &mut mock_reader, None)
+            .apply(SearchContext::default(), &mut mock_reader)
            .await
            .unwrap();
        assert_eq!(
@@ -256,14 +252,14 @@ mod tests {
        let mut mock_reader = MockInvertedIndexReader::new();
        mock_reader
            .expect_metadata()
-            .returning(|_| Ok(mock_metas([("tag-0", 0)])));
-        mock_reader.expect_fst_vec().returning(|_range, _metrics| {
+            .returning(|| Ok(mock_metas([("tag-0", 0)])));
+        mock_reader.expect_fst_vec().returning(|_range| {
            Ok(vec![
                FstMap::from_iter([(b"tag-0_value-1", fst_value(2, 1))]).unwrap(),
            ])
        });
        let output = applier
-            .apply(SearchContext::default(), &mut mock_reader, None)
+            .apply(SearchContext::default(), &mut mock_reader)
            .await
            .unwrap();
        assert_eq!(output.matched_segment_ids.count_ones(), 0);
@@ -283,8 +279,8 @@ mod tests {
        let mut mock_reader = MockInvertedIndexReader::new();
        mock_reader
            .expect_metadata()
-            .returning(|_| Ok(mock_metas([("tag-0", 0), ("tag-1", 1)])));
-        mock_reader.expect_fst_vec().returning(|ranges, _metrics| {
+            .returning(|| Ok(mock_metas([("tag-0", 0), ("tag-1", 1)])));
+        mock_reader.expect_fst_vec().returning(|ranges| {
            let mut output = vec![];
            for range in ranges {
                match range.start {
@@ -297,29 +293,27 @@ mod tests {
            }
            Ok(output)
        });
-        mock_reader
-            .expect_bitmap_deque()
-            .returning(|ranges, _metrics| {
-                let mut output = VecDeque::new();
-                for (range, bitmap_type) in ranges {
-                    let offset = range.start;
-                    let size = range.end - range.start;
-                    match (offset, size, bitmap_type) {
-                        (1, 1, BitmapType::Roaring) => {
-                            output.push_back(Bitmap::from_lsb0_bytes(&[0b10101010], *bitmap_type))
-                        }
-                        (2, 1, BitmapType::Roaring) => {
-                            output.push_back(Bitmap::from_lsb0_bytes(&[0b11011011], *bitmap_type))
-                        }
-                        _ => unreachable!(),
+        mock_reader.expect_bitmap_deque().returning(|ranges| {
+            let mut output = VecDeque::new();
+            for (range, bitmap_type) in ranges {
+                let offset = range.start;
+                let size = range.end - range.start;
+                match (offset, size, bitmap_type) {
+                    (1, 1, BitmapType::Roaring) => {
+                        output.push_back(Bitmap::from_lsb0_bytes(&[0b10101010], *bitmap_type))
                    }
+                    (2, 1, BitmapType::Roaring) => {
+                        output.push_back(Bitmap::from_lsb0_bytes(&[0b11011011], *bitmap_type))
+                    }
+                    _ => unreachable!(),
                }
+            }

-                Ok(output)
-            });
+            Ok(output)
+        });

        let output = applier
-            .apply(SearchContext::default(), &mut mock_reader, None)
+            .apply(SearchContext::default(), &mut mock_reader)
            .await
            .unwrap();
        assert_eq!(
@@ -337,10 +331,10 @@ mod tests {
        let mut mock_reader: MockInvertedIndexReader = MockInvertedIndexReader::new();
        mock_reader
            .expect_metadata()
-            .returning(|_| Ok(mock_metas([("tag-0", 0)])));
+            .returning(|| Ok(mock_metas([("tag-0", 0)])));

        let output = applier
-            .apply(SearchContext::default(), &mut mock_reader, None)
+            .apply(SearchContext::default(), &mut mock_reader)
            .await
            .unwrap();
        assert_eq!(output.matched_segment_ids, Bitmap::full_bitvec(8)); // full range to scan
@@ -349,7 +343,7 @@ mod tests {
    #[tokio::test]
    async fn test_index_applier_with_empty_index() {
        let mut mock_reader = MockInvertedIndexReader::new();
-        mock_reader.expect_metadata().returning(move |_| {
+        mock_reader.expect_metadata().returning(move || {
            Ok(Arc::new(InvertedIndexMetas {
                total_row_count: 0, // No rows
                segment_row_count: 1,
@@ -365,7 +359,7 @@ mod tests {
        };

        let output = applier
-            .apply(SearchContext::default(), &mut mock_reader, None)
+            .apply(SearchContext::default(), &mut mock_reader)
            .await
            .unwrap();
        assert!(output.matched_segment_ids.is_empty());
@@ -376,7 +370,7 @@ mod tests {
        let mut mock_reader = MockInvertedIndexReader::new();
        mock_reader
            .expect_metadata()
-            .returning(|_| Ok(mock_metas(vec![])));
+            .returning(|| Ok(mock_metas(vec![])));

        let mut mock_fst_applier = MockFstApplier::new();
        mock_fst_applier.expect_apply().never();
@@ -391,7 +385,6 @@ mod tests {
                    index_not_found_strategy: IndexNotFoundStrategy::ThrowError,
                },
                &mut mock_reader,
-                None,
            )
            .await;
        assert!(matches!(result, Err(Error::IndexNotFound { .. })));
@@ -402,7 +395,6 @@ mod tests {
                    index_not_found_strategy: IndexNotFoundStrategy::ReturnEmpty,
                },
                &mut mock_reader,
-                None,
            )
            .await
            .unwrap();
@@ -414,7 +406,6 @@ mod tests {
                    index_not_found_strategy: IndexNotFoundStrategy::Ignore,
                },
                &mut mock_reader,
-                None,
            )
            .await
            .unwrap();
--- a/src/meta-client/src/client.rs
+++ b/src/meta-client/src/client.rs
@@ -189,9 +189,6 @@ impl MetaClientBuilder {
        let mgr = client.channel_manager.clone();

        if self.enable_heartbeat {
-            if self.heartbeat_channel_manager.is_some() {
-                info!("Enable heartbeat channel using the heartbeat channel manager.");
-            }
            let mgr = self.heartbeat_channel_manager.unwrap_or(mgr.clone());
            client.heartbeat = Some(HeartbeatClient::new(
                self.id,
--- a/src/meta-client/src/client/ask_leader.rs
+++ b/src/meta-client/src/client/ask_leader.rs
@@ -24,7 +24,7 @@ use common_meta::distributed_time_constants::META_KEEP_ALIVE_INTERVAL_SECS;
 use common_telemetry::tracing_context::TracingContext;
 use common_telemetry::warn;
 use rand::seq::SliceRandom;
-use snafu::ResultExt;
+use snafu::{OptionExt, ResultExt};
 use tokio::time::timeout;
 use tonic::transport::Channel;

@@ -101,14 +101,12 @@ impl AskLeader {
        };

        let (tx, mut rx) = tokio::sync::mpsc::channel(peers.len());
-        let channel_manager = self.channel_manager.clone();

        for addr in &peers {
            let mut client = self.create_asker(addr)?;
            let tx_clone = tx.clone();
            let req = req.clone();
            let addr = addr.clone();
-            let channel_manager = channel_manager.clone();
            tokio::spawn(async move {
                match client.ask_leader(req).await {
                    Ok(res) => {
@@ -119,19 +117,13 @@ impl AskLeader {
                        };
                    }
                    Err(status) => {
-                        // Reset cached channel even on generic errors: the VIP may keep us on a dead
-                        // backend, so forcing a reconnect gives us a chance to hit a healthy peer.
-                        Self::reset_channels_with_manager(
-                            &channel_manager,
-                            std::slice::from_ref(&addr),
-                        );
                        warn!("Failed to ask leader from: {addr}, {status}");
                    }
                }
            });
        }

-        let leader = match timeout(
+        let leader = timeout(
            self.channel_manager
                .config()
                .timeout
@@ -139,16 +131,8 @@ impl AskLeader {
            rx.recv(),
        )
        .await
-        {
-            Ok(Some(leader)) => leader,
-            Ok(None) => return error::NoLeaderSnafu.fail(),
-            Err(e) => {
-                // All peers timed out. Reset channels to force reconnection,
-                // which may help escape dead backends in VIP/LB scenarios.
-                Self::reset_channels_with_manager(&self.channel_manager, &peers);
-                return Err(e).context(error::AskLeaderTimeoutSnafu);
-            }
-        };
+        .context(error::AskLeaderTimeoutSnafu)?
+        .context(error::NoLeaderSnafu)?;

        let mut leadership_group = self.leadership_group.write().unwrap();
        leadership_group.leader = Some(leader.clone());
@@ -185,15 +169,6 @@ impl AskLeader {
                .context(error::CreateChannelSnafu)?,
        ))
    }
-
-    /// Drop cached channels for the given peers so a fresh connection is used next time.
-    fn reset_channels_with_manager(channel_manager: &ChannelManager, peers: &[String]) {
-        if peers.is_empty() {
-            return;
-        }
-
-        channel_manager.retain_channel(|addr, _| !peers.iter().any(|peer| peer == addr));
-    }
 }

 #[async_trait]
--- a/src/meta-client/src/lib.rs
+++ b/src/meta-client/src/lib.rs
@@ -18,10 +18,6 @@ use std::time::Duration;
 use client::RegionFollowerClientRef;
 use common_base::Plugins;
 use common_grpc::channel_manager::{ChannelConfig, ChannelManager};
-use common_meta::distributed_time_constants::{
-    HEARTBEAT_CHANNEL_KEEP_ALIVE_INTERVAL_SECS, HEARTBEAT_CHANNEL_KEEP_ALIVE_TIMEOUT_SECS,
-    HEARTBEAT_TIMEOUT,
-};
 use common_telemetry::{debug, info};
 use serde::{Deserialize, Serialize};

@@ -38,6 +34,8 @@ pub struct MetaClientOptions {
    #[serde(with = "humantime_serde")]
    pub timeout: Duration,
    #[serde(with = "humantime_serde")]
+    pub heartbeat_timeout: Duration,
+    #[serde(with = "humantime_serde")]
    pub ddl_timeout: Duration,
    #[serde(with = "humantime_serde")]
    pub connect_timeout: Duration,
@@ -54,6 +52,7 @@ impl Default for MetaClientOptions {
        Self {
            metasrv_addrs: vec!["127.0.0.1:3002".to_string()],
            timeout: Duration::from_millis(3_000u64),
+            heartbeat_timeout: Duration::from_millis(500u64),
            ddl_timeout: Duration::from_millis(10_000u64),
            connect_timeout: Duration::from_millis(1_000u64),
            tcp_nodelay: true,
@@ -98,11 +97,7 @@ pub async fn create_meta_client(
        .timeout(meta_client_options.timeout)
        .connect_timeout(meta_client_options.connect_timeout)
        .tcp_nodelay(meta_client_options.tcp_nodelay);
-    let heartbeat_config = base_config
-        .clone()
-        .timeout(HEARTBEAT_TIMEOUT)
-        .http2_keep_alive_interval(HEARTBEAT_CHANNEL_KEEP_ALIVE_INTERVAL_SECS)
-        .http2_keep_alive_timeout(HEARTBEAT_CHANNEL_KEEP_ALIVE_TIMEOUT_SECS);
+    let heartbeat_config = base_config.clone();

    if let MetaClientType::Frontend = client_type {
        let ddl_config = base_config.clone().timeout(meta_client_options.ddl_timeout);
--- a/src/meta-srv/src/gc.rs
+++ b/src/meta-srv/src/gc.rs
@@ -23,17 +23,14 @@ use store_api::storage::RegionId;
 mod candidate;
 mod ctx;
 mod handler;
-#[cfg(test)]
-mod mock;
 mod options;
 mod procedure;
 mod scheduler;
 mod tracker;

-pub use options::GcSchedulerOptions;
-pub use procedure::BatchGcProcedure;
+pub(crate) use options::GcSchedulerOptions;
 pub(crate) use scheduler::{GcScheduler, GcTickerRef};

-pub type Region2Peers = HashMap<RegionId, (Peer, Vec<Peer>)>;
+pub(crate) type Region2Peers = HashMap<RegionId, (Peer, Vec<Peer>)>;

 pub(crate) type Peer2Regions = HashMap<Peer, HashSet<RegionId>>;
--- a/src/meta-srv/src/gc/ctx.rs
+++ b/src/meta-srv/src/gc/ctx.rs
@@ -84,6 +84,44 @@ impl DefaultGcSchedulerCtx {
        mailbox: MailboxRef,
        server_addr: String,
    ) -> Result<Self> {
+        // register a noop loader for `GcRegionProcedure` to avoid error when deserializing procedure when rebooting
+
+        procedure_manager
+            .register_loader(
+                GcRegionProcedure::TYPE_NAME,
+                Box::new(move |json| {
+                    common_procedure::error::ProcedureLoaderNotImplementedSnafu {
+                        type_name: GcRegionProcedure::TYPE_NAME.to_string(),
+                        reason:
+                            "GC procedure should be retried by scheduler, not reloaded from storage"
+                                .to_string(),
+                    }
+                    .fail()
+                }),
+            )
+            .context(error::RegisterProcedureLoaderSnafu {
+                type_name: GcRegionProcedure::TYPE_NAME,
+            })?;
+
+        // register a noop loader for `BatchGcProcedure` to avoid error when deserializing procedure when rebooting
+
+        procedure_manager
+            .register_loader(
+                BatchGcProcedure::TYPE_NAME,
+                Box::new(move |json| {
+                    common_procedure::error::ProcedureLoaderNotImplementedSnafu {
+                        type_name: BatchGcProcedure::TYPE_NAME.to_string(),
+                        reason:
+                            "Batch GC procedure should not be reloaded from storage, as it doesn't need to be retried if interrupted"
+                                .to_string(),
+                    }
+                    .fail()
+                }),
+            )
+            .context(error::RegisterProcedureLoaderSnafu {
+                type_name: BatchGcProcedure::TYPE_NAME,
+            })?;
+
        Ok(Self {
            table_metadata_manager,
            procedure_manager,
--- a/src/meta-srv/src/gc/mock.rs
+++ b/src/meta-srv/src/gc/mock.rs
@@ -1,458 +0,0 @@
-// Copyright 2023 Greptime Team
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-mod basic;
-mod candidate_select;
-mod concurrent;
-mod config;
-mod err_handle;
-mod full_list;
-mod integration;
-mod misc;
-
-use std::collections::{HashMap, HashSet};
-use std::sync::{Arc, Mutex};
-use std::time::{Duration, Instant};
-
-use common_meta::datanode::{RegionManifestInfo, RegionStat};
-use common_meta::key::table_route::PhysicalTableRouteValue;
-use common_meta::peer::Peer;
-use common_meta::rpc::router::{Region, RegionRoute};
-use common_telemetry::debug;
-use ordered_float::OrderedFloat;
-use store_api::region_engine::RegionRole;
-use store_api::storage::{FileRefsManifest, GcReport, RegionId};
-use table::metadata::TableId;
-use tokio::sync::mpsc::Sender;
-
-use crate::error::{Result, UnexpectedSnafu};
-use crate::gc::candidate::GcCandidate;
-use crate::gc::ctx::SchedulerCtx;
-use crate::gc::handler::Region2Peers;
-use crate::gc::options::GcSchedulerOptions;
-use crate::gc::scheduler::{Event, GcScheduler};
-
-pub const TEST_REGION_SIZE_200MB: u64 = 200_000_000;
-
-/// Helper function to create an empty GcReport for the given region IDs
-pub fn new_empty_report_with(region_ids: impl IntoIterator<Item = RegionId>) -> GcReport {
-    let mut deleted_files = HashMap::new();
-    for region_id in region_ids {
-        deleted_files.insert(region_id, vec![]);
-    }
-    GcReport {
-        deleted_files,
-        need_retry_regions: HashSet::new(),
-    }
-}
-
-#[allow(clippy::type_complexity)]
-#[derive(Debug, Default)]
-pub struct MockSchedulerCtx {
-    pub table_to_region_stats: Arc<Mutex<Option<HashMap<TableId, Vec<RegionStat>>>>>,
-    pub table_routes: Arc<Mutex<HashMap<TableId, (TableId, PhysicalTableRouteValue)>>>,
-    pub file_refs: Arc<Mutex<Option<FileRefsManifest>>>,
-    pub gc_reports: Arc<Mutex<HashMap<RegionId, GcReport>>>,
-    pub candidates: Arc<Mutex<Option<HashMap<TableId, Vec<GcCandidate>>>>>,
-    pub get_table_to_region_stats_calls: Arc<Mutex<usize>>,
-    pub get_file_references_calls: Arc<Mutex<usize>>,
-    pub gc_regions_calls: Arc<Mutex<usize>>,
-    // Error injection fields for testing
-    pub get_table_to_region_stats_error: Arc<Mutex<Option<crate::error::Error>>>,
-    pub get_table_route_error: Arc<Mutex<Option<crate::error::Error>>>,
-    pub get_file_references_error: Arc<Mutex<Option<crate::error::Error>>>,
-    pub gc_regions_error: Arc<Mutex<Option<crate::error::Error>>>,
-    // Retry testing fields
-    pub gc_regions_retry_count: Arc<Mutex<HashMap<RegionId, usize>>>,
-    pub gc_regions_error_sequence: Arc<Mutex<Vec<crate::error::Error>>>,
-    pub gc_regions_success_after_retries: Arc<Mutex<HashMap<RegionId, usize>>>,
-    // Per-region error injection
-    pub gc_regions_per_region_errors: Arc<Mutex<HashMap<RegionId, crate::error::Error>>>,
-}
-
-impl MockSchedulerCtx {
-    pub fn with_table_routes(
-        self,
-        table_routes: HashMap<TableId, (TableId, Vec<(RegionId, Peer)>)>,
-    ) -> Self {
-        *self.table_routes.lock().unwrap() = table_routes
-            .into_iter()
-            .map(|(k, (phy_id, region2peer))| {
-                let phy = PhysicalTableRouteValue::new(
-                    region2peer
-                        .into_iter()
-                        .map(|(region_id, peer)| RegionRoute {
-                            region: Region::new_test(region_id),
-                            leader_peer: Some(peer),
-                            ..Default::default()
-                        })
-                        .collect(),
-                );
-
-                (k, (phy_id, phy))
-            })
-            .collect();
-        self
-    }
-
-    /// Set an error to be returned by `get_table_to_region_stats`
-    #[allow(dead_code)]
-    pub fn with_get_table_to_region_stats_error(self, error: crate::error::Error) -> Self {
-        *self.get_table_to_region_stats_error.lock().unwrap() = Some(error);
-        self
-    }
-
-    /// Set an error to be returned by `get_table_route`
-    pub fn set_table_route_error(&self, error: crate::error::Error) {
-        *self.get_table_route_error.lock().unwrap() = Some(error);
-    }
-
-    /// Set an error to be returned by `get_file_references`
-    #[allow(dead_code)]
-    pub fn with_get_file_references_error(self, error: crate::error::Error) -> Self {
-        *self.get_file_references_error.lock().unwrap() = Some(error);
-        self
-    }
-
-    /// Set an error to be returned by `gc_regions`
-    pub fn with_gc_regions_error(self, error: crate::error::Error) -> Self {
-        *self.gc_regions_error.lock().unwrap() = Some(error);
-        self
-    }
-
-    /// Set a sequence of errors to be returned by `gc_regions` for retry testing
-    pub fn set_gc_regions_error_sequence(&self, errors: Vec<crate::error::Error>) {
-        *self.gc_regions_error_sequence.lock().unwrap() = errors;
-    }
-
-    /// Set success after a specific number of retries for a region
-    pub fn set_gc_regions_success_after_retries(&self, region_id: RegionId, retries: usize) {
-        self.gc_regions_success_after_retries
-            .lock()
-            .unwrap()
-            .insert(region_id, retries);
-    }
-
-    /// Get the retry count for a specific region
-    pub fn get_retry_count(&self, region_id: RegionId) -> usize {
-        self.gc_regions_retry_count
-            .lock()
-            .unwrap()
-            .get(&region_id)
-            .copied()
-            .unwrap_or(0)
-    }
-
-    /// Reset all retry tracking
-    pub fn reset_retry_tracking(&self) {
-        *self.gc_regions_retry_count.lock().unwrap() = HashMap::new();
-        *self.gc_regions_error_sequence.lock().unwrap() = Vec::new();
-        *self.gc_regions_success_after_retries.lock().unwrap() = HashMap::new();
-    }
-
-    /// Set an error to be returned for a specific region
-    pub fn set_gc_regions_error_for_region(&self, region_id: RegionId, error: crate::error::Error) {
-        self.gc_regions_per_region_errors
-            .lock()
-            .unwrap()
-            .insert(region_id, error);
-    }
-
-    /// Clear per-region errors
-    #[allow(unused)]
-    pub fn clear_gc_regions_per_region_errors(&self) {
-        self.gc_regions_per_region_errors.lock().unwrap().clear();
-    }
-}
-
-#[async_trait::async_trait]
-impl SchedulerCtx for MockSchedulerCtx {
-    async fn get_table_to_region_stats(&self) -> Result<HashMap<TableId, Vec<RegionStat>>> {
-        *self.get_table_to_region_stats_calls.lock().unwrap() += 1;
-
-        // Check if we should return an injected error
-        if let Some(error) = self.get_table_to_region_stats_error.lock().unwrap().take() {
-            return Err(error);
-        }
-
-        Ok(self
-            .table_to_region_stats
-            .lock()
-            .unwrap()
-            .clone()
-            .unwrap_or_default())
-    }
-
-    async fn get_table_route(
-        &self,
-        table_id: TableId,
-    ) -> Result<(TableId, PhysicalTableRouteValue)> {
-        // Check if we should return an injected error
-        if let Some(error) = self.get_table_route_error.lock().unwrap().take() {
-            return Err(error);
-        }
-
-        Ok(self
-            .table_routes
-            .lock()
-            .unwrap()
-            .get(&table_id)
-            .cloned()
-            .unwrap_or_else(|| (table_id, PhysicalTableRouteValue::default())))
-    }
-
-    async fn get_file_references(
-        &self,
-        query_regions: &[RegionId],
-        _related_regions: HashMap<RegionId, Vec<RegionId>>,
-        region_to_peer: &Region2Peers,
-        _timeout: Duration,
-    ) -> Result<FileRefsManifest> {
-        *self.get_file_references_calls.lock().unwrap() += 1;
-
-        // Check if we should return an injected error
-        if let Some(error) = self.get_file_references_error.lock().unwrap().take() {
-            return Err(error);
-        }
-        if query_regions
-            .iter()
-            .any(|region_id| !region_to_peer.contains_key(region_id))
-        {
-            UnexpectedSnafu {
-                violated: format!(
-                    "region_to_peer{region_to_peer:?} does not contain all region_ids requested: {:?}",
-                    query_regions
-                ),
-            }.fail()?;
-        }
-
-        Ok(self.file_refs.lock().unwrap().clone().unwrap_or_default())
-    }
-
-    async fn gc_regions(
-        &self,
-        _peer: Peer,
-        region_ids: &[RegionId],
-        _file_refs_manifest: &FileRefsManifest,
-        _full_file_listing: bool,
-        _timeout: Duration,
-    ) -> Result<GcReport> {
-        *self.gc_regions_calls.lock().unwrap() += 1;
-
-        // Check per-region error injection first (for any region)
-        for &region_id in region_ids {
-            if let Some(error) = self
-                .gc_regions_per_region_errors
-                .lock()
-                .unwrap()
-                .remove(&region_id)
-            {
-                *self
-                    .gc_regions_retry_count
-                    .lock()
-                    .unwrap()
-                    .entry(region_id)
-                    .or_insert(0) += 1;
-                return Err(error);
-            }
-        }
-
-        // Check if we should return an injected error
-        if let Some(error) = self.gc_regions_error.lock().unwrap().take() {
-            for region_id in region_ids {
-                *self
-                    .gc_regions_retry_count
-                    .lock()
-                    .unwrap()
-                    .entry(*region_id)
-                    .or_insert(0) += 1;
-            }
-            return Err(error);
-        }
-
-        // Handle error sequence for retry testing
-        {
-            let mut error_sequence = self.gc_regions_error_sequence.lock().unwrap();
-            if !error_sequence.is_empty() {
-                let error = error_sequence.remove(0);
-                for region_id in region_ids {
-                    *self
-                        .gc_regions_retry_count
-                        .lock()
-                        .unwrap()
-                        .entry(*region_id)
-                        .or_insert(0) += 1;
-                }
-                return Err(error);
-            }
-        }
-
-        // Build the final report by processing each region individually
-        let mut final_report = GcReport::default();
-        let gc_reports = self.gc_reports.lock().unwrap();
-        let success_after_retries = self.gc_regions_success_after_retries.lock().unwrap();
-
-        for &region_id in region_ids {
-            // Get current retry count for this region
-            let retry_count = self
-                .gc_regions_retry_count
-                .lock()
-                .unwrap()
-                .get(&region_id)
-                .copied()
-                .unwrap_or(0);
-
-            // Check if this region should succeed or need retry
-            if let Some(&required_retries) = success_after_retries.get(&region_id) {
-                if retry_count < required_retries {
-                    debug!(
-                        "Region {} needs retry (attempt {}/{})",
-                        region_id,
-                        retry_count + 1,
-                        required_retries
-                    );
-                    // This region needs more retries - add to need_retry_regions
-                    final_report.need_retry_regions.insert(region_id);
-                    // Track the retry attempt
-                    let mut retry_count_map = self.gc_regions_retry_count.lock().unwrap();
-                    *retry_count_map.entry(region_id).or_insert(0) += 1;
-                } else {
-                    debug!(
-                        "Region {} has completed retries - succeeding now",
-                        region_id
-                    );
-                    // This region has completed all required retries - succeed
-                    if let Some(report) = gc_reports.get(&region_id) {
-                        final_report.merge(report.clone());
-                    }
-                    // Track the success attempt
-                    let mut retry_count_map = self.gc_regions_retry_count.lock().unwrap();
-                    *retry_count_map.entry(region_id).or_insert(0) += 1;
-                }
-            } else {
-                // No retry requirement - check if we have a GC report for this region
-                if let Some(report) = gc_reports.get(&region_id) {
-                    // We have a GC report - succeed immediately
-                    final_report.merge(report.clone());
-                    // Track the success attempt
-                    let mut retry_count_map = self.gc_regions_retry_count.lock().unwrap();
-                    *retry_count_map.entry(region_id).or_insert(0) += 1;
-                } else {
-                    // No GC report available - this region should be marked for retry
-                    final_report.need_retry_regions.insert(region_id);
-                    // Track the attempt
-                    let mut retry_count_map = self.gc_regions_retry_count.lock().unwrap();
-                    *retry_count_map.entry(region_id).or_insert(0) += 1;
-                }
-            }
-        }
-
-        // Return the report with need_retry_regions populated - let the caller handle retry logic
-        Ok(final_report)
-    }
-}
-
-pub struct TestEnv {
-    pub scheduler: GcScheduler,
-    pub ctx: Arc<MockSchedulerCtx>,
-    #[allow(dead_code)]
-    tx: Sender<Event>,
-}
-
-#[allow(unused)]
-impl TestEnv {
-    pub fn new() -> Self {
-        let ctx = Arc::new(MockSchedulerCtx::default());
-        let (tx, rx) = GcScheduler::channel();
-        let config = GcSchedulerOptions::default();
-
-        let scheduler = GcScheduler {
-            ctx: ctx.clone(),
-            receiver: rx,
-            config,
-            region_gc_tracker: Arc::new(tokio::sync::Mutex::new(HashMap::new())),
-            last_tracker_cleanup: Arc::new(tokio::sync::Mutex::new(Instant::now())),
-        };
-
-        Self { scheduler, ctx, tx }
-    }
-
-    pub fn with_candidates(self, candidates: HashMap<TableId, Vec<GcCandidate>>) -> Self {
-        *self.ctx.candidates.lock().unwrap() = Some(candidates);
-        self
-    }
-
-    #[allow(dead_code)]
-    pub async fn run_scheduler(mut self) {
-        self.scheduler.run().await;
-    }
-
-    #[allow(dead_code)]
-    pub async fn tick(&self) {
-        self.tx.send(Event::Tick).await.unwrap();
-    }
-}
-
-/// Helper function to create a mock GC candidate that will pass the GC threshold
-fn new_candidate(region_id: RegionId, score: f64) -> GcCandidate {
-    // will pass threshold for gc
-    let region_stat = mock_region_stat(region_id, RegionRole::Leader, 10_000, 10);
-
-    GcCandidate {
-        region_id,
-        score: OrderedFloat(score),
-        region_stat,
-    }
-}
-
-/// Helper function to create a mock GC candidate
-fn mock_candidate(region_id: RegionId) -> GcCandidate {
-    let region_stat = mock_region_stat(region_id, RegionRole::Leader, TEST_REGION_SIZE_200MB, 10);
-    GcCandidate {
-        region_id,
-        score: ordered_float::OrderedFloat(1.0),
-        region_stat,
-    }
-}
-
-/// Helper function to create a mock RegionStat
-fn mock_region_stat(
-    id: RegionId,
-    role: RegionRole,
-    approximate_bytes: u64,
-    sst_num: u64,
-) -> RegionStat {
-    RegionStat {
-        id,
-        role,
-        approximate_bytes,
-        sst_num,
-        region_manifest: RegionManifestInfo::Mito {
-            manifest_version: 0,
-            flushed_entry_id: 0,
-            file_removed_cnt: 0,
-        },
-        rcus: 0,
-        wcus: 0,
-        engine: "mito".to_string(),
-        num_rows: 0,
-        memtable_size: 0,
-        manifest_size: 0,
-        sst_size: 0,
-        index_size: 0,
-        data_topic_latest_entry_id: 0,
-        metadata_topic_latest_entry_id: 0,
-        written_bytes: 0,
-    }
-}
--- a/src/meta-srv/src/gc/mock/basic.rs
+++ b/src/meta-srv/src/gc/mock/basic.rs
@@ -1,164 +0,0 @@
-// Copyright 2023 Greptime Team
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-use std::collections::HashMap;
-use std::sync::{Arc, Mutex};
-use std::time::Instant;
-
-use common_meta::peer::Peer;
-use common_telemetry::init_default_ut_logging;
-use store_api::region_engine::RegionRole;
-use store_api::storage::{FileId, FileRefsManifest, GcReport, RegionId};
-
-use crate::gc::mock::{
-    MockSchedulerCtx, TEST_REGION_SIZE_200MB, TestEnv, mock_region_stat, new_candidate,
-};
-use crate::gc::{GcScheduler, GcSchedulerOptions};
-
-#[tokio::test]
-async fn test_parallel_process_datanodes_empty() {
-    let env = TestEnv::new();
-    let report = env
-        .scheduler
-        .parallel_process_datanodes(HashMap::new())
-        .await;
-
-    assert_eq!(report.per_datanode_reports.len(), 0);
-    assert_eq!(report.failed_datanodes.len(), 0);
-}
-
-#[tokio::test]
-async fn test_parallel_process_datanodes_with_candidates() {
-    init_default_ut_logging();
-
-    let table_id = 1;
-    let region_id = RegionId::new(table_id, 1);
-    let peer = Peer::new(1, "");
-    let candidates = HashMap::from([(table_id, vec![new_candidate(region_id, 1.0)])]);
-
-    let mut gc_reports = HashMap::new();
-    let deleted_files = vec![FileId::random()];
-    gc_reports.insert(
-        region_id,
-        GcReport {
-            deleted_files: HashMap::from([(region_id, deleted_files.clone())]),
-            ..Default::default()
-        },
-    );
-    let file_refs = FileRefsManifest {
-        manifest_version: HashMap::from([(region_id, 1)]),
-        ..Default::default()
-    };
-    let ctx = MockSchedulerCtx {
-        gc_reports: Arc::new(Mutex::new(gc_reports)),
-        file_refs: Arc::new(Mutex::new(Some(file_refs))),
-        ..Default::default()
-    }
-    .with_table_routes(HashMap::from([(
-        table_id,
-        (table_id, vec![(region_id, peer.clone())]),
-    )]));
-
-    let env = TestEnv::new();
-    // We need to replace the ctx with the one with gc_reports
-    let mut scheduler = env.scheduler;
-    scheduler.ctx = Arc::new(ctx);
-
-    // Convert table-based candidates to datanode-based candidates
-    let datanode_to_candidates = HashMap::from([(
-        peer,
-        candidates
-            .into_iter()
-            .flat_map(|(table_id, candidates)| candidates.into_iter().map(move |c| (table_id, c)))
-            .collect(),
-    )]);
-
-    let report = scheduler
-        .parallel_process_datanodes(datanode_to_candidates)
-        .await;
-
-    assert_eq!(report.per_datanode_reports.len(), 1);
-    assert_eq!(report.failed_datanodes.len(), 0);
-}
-
-#[tokio::test]
-async fn test_handle_tick() {
-    init_default_ut_logging();
-
-    let table_id = 1;
-    let region_id = RegionId::new(table_id, 1);
-    let peer = Peer::new(1, "");
-    let candidates = HashMap::from([(table_id, vec![new_candidate(region_id, 1.0)])]);
-
-    let mut gc_reports = HashMap::new();
-    gc_reports.insert(region_id, GcReport::default());
-    let file_refs = FileRefsManifest {
-        manifest_version: HashMap::from([(region_id, 1)]),
-        ..Default::default()
-    };
-    let ctx = Arc::new(
-        MockSchedulerCtx {
-            table_to_region_stats: Arc::new(Mutex::new(Some(HashMap::from([(
-                table_id,
-                vec![mock_region_stat(
-                    region_id,
-                    RegionRole::Leader,
-                    TEST_REGION_SIZE_200MB,
-                    10,
-                )],
-            )])))),
-            gc_reports: Arc::new(Mutex::new(gc_reports)),
-            candidates: Arc::new(Mutex::new(Some(candidates))),
-            file_refs: Arc::new(Mutex::new(Some(file_refs))),
-            ..Default::default()
-        }
-        .with_table_routes(HashMap::from([(
-            table_id,
-            (table_id, vec![(region_id, peer)]),
-        )])),
-    );
-
-    let scheduler = GcScheduler {
-        ctx: ctx.clone(),
-        receiver: GcScheduler::channel().1,
-        config: GcSchedulerOptions::default(),
-        region_gc_tracker: Arc::new(tokio::sync::Mutex::new(HashMap::new())),
-        last_tracker_cleanup: Arc::new(tokio::sync::Mutex::new(Instant::now())),
-    };
-
-    let report = scheduler.handle_tick().await.unwrap();
-
-    // Validate the returned GcJobReport
-    assert_eq!(
-        report.per_datanode_reports.len(),
-        1,
-        "Should process 1 datanode"
-    );
-    assert_eq!(
-        report.failed_datanodes.len(),
-        0,
-        "Should have 0 failed datanodes"
-    );
-
-    assert_eq!(*ctx.get_table_to_region_stats_calls.lock().unwrap(), 1);
-    assert_eq!(*ctx.get_file_references_calls.lock().unwrap(), 1);
-    assert_eq!(*ctx.gc_regions_calls.lock().unwrap(), 1);
-
-    let tracker = scheduler.region_gc_tracker.lock().await;
-    assert!(
-        tracker.contains_key(&region_id),
-        "Tracker should have one region: {:?}",
-        tracker
-    );
-}
--- a/src/meta-srv/src/gc/mock/candidate_select.rs
+++ b/src/meta-srv/src/gc/mock/candidate_select.rs
@@ -1,390 +0,0 @@
-// Copyright 2023 Greptime Team
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-use std::collections::HashMap;
-use std::sync::{Arc, Mutex};
-use std::time::Instant;
-
-use common_meta::datanode::RegionManifestInfo;
-use common_telemetry::init_default_ut_logging;
-use store_api::region_engine::RegionRole;
-use store_api::storage::RegionId;
-
-use crate::gc::mock::{MockSchedulerCtx, TEST_REGION_SIZE_200MB, mock_region_stat};
-use crate::gc::{GcScheduler, GcSchedulerOptions};
-
-/// Candidate Selection Tests
-#[tokio::test]
-async fn test_gc_candidate_filtering_by_role() {
-    init_default_ut_logging();
-
-    let table_id = 1;
-    let leader_region = RegionId::new(table_id, 1);
-    let follower_region = RegionId::new(table_id, 2);
-
-    let mut leader_stat = mock_region_stat(
-        leader_region,
-        RegionRole::Leader,
-        TEST_REGION_SIZE_200MB,
-        10,
-    ); // 200MB
-
-    let mut follower_stat = mock_region_stat(
-        follower_region,
-        RegionRole::Follower,
-        TEST_REGION_SIZE_200MB,
-        10,
-    ); // 200MB
-
-    // Set up manifest info for scoring
-    if let RegionManifestInfo::Mito {
-        file_removed_cnt, ..
-    } = &mut leader_stat.region_manifest
-    {
-        *file_removed_cnt = 5;
-    }
-    if let RegionManifestInfo::Mito {
-        file_removed_cnt, ..
-    } = &mut follower_stat.region_manifest
-    {
-        *file_removed_cnt = 5;
-    }
-
-    let table_stats = HashMap::from([(table_id, vec![leader_stat.clone(), follower_stat.clone()])]);
-
-    let ctx = Arc::new(MockSchedulerCtx {
-        table_to_region_stats: Arc::new(Mutex::new(Some(table_stats))),
-        ..Default::default()
-    });
-
-    let scheduler = GcScheduler {
-        ctx: ctx.clone(),
-        receiver: GcScheduler::channel().1,
-        config: GcSchedulerOptions::default(),
-        region_gc_tracker: Arc::new(tokio::sync::Mutex::new(HashMap::new())),
-        last_tracker_cleanup: Arc::new(tokio::sync::Mutex::new(Instant::now())),
-    };
-
-    let stats = ctx
-        .table_to_region_stats
-        .lock()
-        .unwrap()
-        .clone()
-        .unwrap_or_default();
-
-    let candidates = scheduler.select_gc_candidates(&stats).await.unwrap();
-
-    // Should only select leader regions
-    assert_eq!(
-        candidates.len(),
-        1,
-        "Expected 1 table with candidates, got {}",
-        candidates.len()
-    );
-    if let Some(table_candidates) = candidates.get(&table_id) {
-        assert_eq!(
-            table_candidates.len(),
-            1,
-            "Expected 1 candidate for table {}, got {}",
-            table_id,
-            table_candidates.len()
-        );
-        assert_eq!(
-            table_candidates[0].region_id, leader_region,
-            "Expected leader region {}, got {}",
-            leader_region, table_candidates[0].region_id
-        );
-    } else {
-        panic!("Expected table {} to have candidates", table_id);
-    }
-}
-
-#[tokio::test]
-async fn test_gc_candidate_size_threshold() {
-    init_default_ut_logging();
-
-    let table_id = 1;
-    let small_region = RegionId::new(table_id, 1);
-    let large_region = RegionId::new(table_id, 2);
-
-    let mut small_stat = mock_region_stat(small_region, RegionRole::Leader, 50_000_000, 5); // 50MB
-    if let RegionManifestInfo::Mito {
-        file_removed_cnt, ..
-    } = &mut small_stat.region_manifest
-    {
-        *file_removed_cnt = 3;
-    }
-
-    let mut large_stat =
-        mock_region_stat(large_region, RegionRole::Leader, TEST_REGION_SIZE_200MB, 20); // 200MB
-    if let RegionManifestInfo::Mito {
-        file_removed_cnt, ..
-    } = &mut large_stat.region_manifest
-    {
-        *file_removed_cnt = 5;
-    }
-
-    let table_stats = HashMap::from([(table_id, vec![small_stat, large_stat])]);
-
-    let ctx = Arc::new(MockSchedulerCtx {
-        table_to_region_stats: Arc::new(Mutex::new(Some(table_stats))),
-        ..Default::default()
-    });
-
-    let config = GcSchedulerOptions {
-        min_region_size_threshold: 100 * 1024 * 1024, // 100MB (default)
-        ..Default::default()
-    };
-
-    let scheduler = GcScheduler {
-        ctx: ctx.clone(),
-        receiver: GcScheduler::channel().1,
-        config,
-        region_gc_tracker: Arc::new(tokio::sync::Mutex::new(HashMap::new())),
-        last_tracker_cleanup: Arc::new(tokio::sync::Mutex::new(Instant::now())),
-    };
-
-    let stats = ctx
-        .table_to_region_stats
-        .lock()
-        .unwrap()
-        .clone()
-        .unwrap_or_default();
-
-    let candidates = scheduler.select_gc_candidates(&stats).await.unwrap();
-
-    // Should only select large region
-    assert_eq!(
-        candidates.len(),
-        1,
-        "Expected 1 table with candidates, got {}",
-        candidates.len()
-    );
-    if let Some(table_candidates) = candidates.get(&table_id) {
-        assert_eq!(
-            table_candidates.len(),
-            1,
-            "Expected 1 candidate for table {}, got {}",
-            table_id,
-            table_candidates.len()
-        );
-        assert_eq!(
-            table_candidates[0].region_id, large_region,
-            "Expected large region {}, got {}",
-            large_region, table_candidates[0].region_id
-        );
-    } else {
-        panic!("Expected table {} to have candidates", table_id);
-    }
-}
-
-#[tokio::test]
-async fn test_gc_candidate_scoring() {
-    init_default_ut_logging();
-
-    let table_id = 1;
-    let low_score_region = RegionId::new(table_id, 1);
-    let high_score_region = RegionId::new(table_id, 2);
-
-    let mut low_stat = mock_region_stat(
-        low_score_region,
-        RegionRole::Leader,
-        TEST_REGION_SIZE_200MB,
-        5,
-    ); // 200MB
-    // Set low file removal rate for low_score_region
-    if let RegionManifestInfo::Mito {
-        file_removed_cnt, ..
-    } = &mut low_stat.region_manifest
-    {
-        *file_removed_cnt = 2;
-    }
-
-    let mut high_stat = mock_region_stat(
-        high_score_region,
-        RegionRole::Leader,
-        TEST_REGION_SIZE_200MB,
-        50,
-    ); // 200MB
-    // Set high file removal rate for high_score_region
-    if let RegionManifestInfo::Mito {
-        file_removed_cnt, ..
-    } = &mut high_stat.region_manifest
-    {
-        *file_removed_cnt = 20;
-    }
-
-    let table_stats = HashMap::from([(table_id, vec![low_stat, high_stat])]);
-
-    let ctx = Arc::new(MockSchedulerCtx {
-        table_to_region_stats: Arc::new(Mutex::new(Some(table_stats))),
-        ..Default::default()
-    });
-
-    let config = GcSchedulerOptions {
-        sst_count_weight: 1.0,
-        file_removed_count_weight: 0.5,
-        min_region_size_threshold: 100 * 1024 * 1024, // 100MB (default)
-        ..Default::default()
-    };
-
-    let scheduler = GcScheduler {
-        ctx: ctx.clone(),
-        receiver: GcScheduler::channel().1,
-        config,
-        region_gc_tracker: Arc::new(tokio::sync::Mutex::new(HashMap::new())),
-        last_tracker_cleanup: Arc::new(tokio::sync::Mutex::new(Instant::now())),
-    };
-
-    let stats = ctx
-        .table_to_region_stats
-        .lock()
-        .unwrap()
-        .clone()
-        .unwrap_or_default();
-
-    let candidates = scheduler.select_gc_candidates(&stats).await.unwrap();
-
-    // Should select both regions but high score region should be first
-    assert_eq!(
-        candidates.len(),
-        1,
-        "Expected 1 table with candidates, got {}",
-        candidates.len()
-    );
-    if let Some(table_candidates) = candidates.get(&table_id) {
-        assert_eq!(
-            table_candidates.len(),
-            2,
-            "Expected 2 candidates for table {}, got {}",
-            table_id,
-            table_candidates.len()
-        );
-        // Higher score region should come first (sorted by score descending)
-        assert_eq!(
-            table_candidates[0].region_id, high_score_region,
-            "High score region should be first"
-        );
-        assert!(
-            table_candidates[0].score > table_candidates[1].score,
-            "High score region should have higher score: {} > {}",
-            table_candidates[0].score,
-            table_candidates[1].score
-        );
-    } else {
-        panic!("Expected table {} to have candidates", table_id);
-    }
-}
-
-#[tokio::test]
-async fn test_gc_candidate_regions_per_table_threshold() {
-    init_default_ut_logging();
-
-    let table_id = 1;
-    // Create 10 regions for the same table
-    let mut region_stats = Vec::new();
-
-    for i in 0..10 {
-        let region_id = RegionId::new(table_id, i + 1);
-        let mut stat = mock_region_stat(region_id, RegionRole::Leader, TEST_REGION_SIZE_200MB, 20); // 200MB
-
-        // Set different file removal rates to create different scores
-        // Higher region IDs get higher scores (better GC candidates)
-        if let RegionManifestInfo::Mito {
-            file_removed_cnt, ..
-        } = &mut stat.region_manifest
-        {
-            *file_removed_cnt = (i as u64 + 1) * 2; // Region 1: 2, Region 2: 4, ..., Region 10: 20
-        }
-
-        region_stats.push(stat);
-    }
-
-    let table_stats = HashMap::from([(table_id, region_stats)]);
-
-    let ctx = Arc::new(MockSchedulerCtx {
-        table_to_region_stats: Arc::new(Mutex::new(Some(table_stats))),
-        ..Default::default()
-    });
-
-    // Set regions_per_table_threshold to 3
-    let config = GcSchedulerOptions {
-        regions_per_table_threshold: 3,
-        min_region_size_threshold: 100 * 1024 * 1024, // 100MB (default)
-        ..Default::default()
-    };
-
-    let scheduler = GcScheduler {
-        ctx: ctx.clone(),
-        receiver: GcScheduler::channel().1,
-        config,
-        region_gc_tracker: Arc::new(tokio::sync::Mutex::new(HashMap::new())),
-        last_tracker_cleanup: Arc::new(tokio::sync::Mutex::new(Instant::now())),
-    };
-
-    let stats = ctx
-        .table_to_region_stats
-        .lock()
-        .unwrap()
-        .clone()
-        .unwrap_or_default();
-
-    let candidates = scheduler.select_gc_candidates(&stats).await.unwrap();
-
-    // Should have 1 table with candidates
-    assert_eq!(
-        candidates.len(),
-        1,
-        "Expected 1 table with candidates, got {}",
-        candidates.len()
-    );
-
-    if let Some(table_candidates) = candidates.get(&table_id) {
-        // Should only have 3 candidates due to regions_per_table_threshold
-        assert_eq!(
-            table_candidates.len(),
-            3,
-            "Expected 3 candidates for table {} due to regions_per_table_threshold, got {}",
-            table_id,
-            table_candidates.len()
-        );
-
-        // Verify that the top 3 scoring regions are selected
-        // Regions 8, 9, 10 should have the highest scores (file_removed_cnt: 16, 18, 20)
-        // They should be returned in descending order by score
-        let expected_regions = vec![10, 9, 8];
-        let actual_regions: Vec<u32> = table_candidates
-            .iter()
-            .map(|c| c.region_id.region_number())
-            .collect();
-
-        assert_eq!(
-            actual_regions, expected_regions,
-            "Expected regions {:?} to be selected, got {:?}",
-            expected_regions, actual_regions
-        );
-
-        // Verify they are sorted by score in descending order
-        for i in 0..table_candidates.len() - 1 {
-            assert!(
-                table_candidates[i].score >= table_candidates[i + 1].score,
-                "Candidates should be sorted by score descending: {} >= {}",
-                table_candidates[i].score,
-                table_candidates[i + 1].score
-            );
-        }
-    } else {
-        panic!("Expected table {} to have candidates", table_id);
-    }
-}
--- a/src/meta-srv/src/gc/mock/concurrent.rs
+++ b/src/meta-srv/src/gc/mock/concurrent.rs
@@ -1,516 +0,0 @@
-// Copyright 2023 Greptime Team
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-use std::collections::{HashMap, HashSet};
-use std::sync::{Arc, Mutex};
-use std::time::{Duration, Instant};
-
-use common_meta::key::table_route::PhysicalTableRouteValue;
-use common_meta::peer::Peer;
-use common_meta::rpc::router::{Region, RegionRoute};
-use common_telemetry::{info, init_default_ut_logging};
-use store_api::region_engine::RegionRole;
-use store_api::storage::{FileId, FileRefsManifest, GcReport, RegionId};
-
-use crate::gc::mock::{
-    MockSchedulerCtx, TEST_REGION_SIZE_200MB, mock_candidate, mock_region_stat, new_candidate,
-};
-use crate::gc::{GcScheduler, GcSchedulerOptions};
-
-/// Concurrent Processing Tests
-#[tokio::test]
-async fn test_concurrent_table_processing_limits() {
-    init_default_ut_logging();
-
-    let mut candidates = HashMap::new();
-    let mut gc_reports = HashMap::new();
-
-    // Create many tables with candidates
-    for table_id in 1..=10 {
-        let region_id = RegionId::new(table_id, 1);
-        candidates.insert(table_id, vec![new_candidate(region_id, 1.0)]);
-        gc_reports.insert(
-            region_id,
-            GcReport {
-                deleted_files: HashMap::from([(region_id, vec![FileId::random()])]),
-                ..Default::default()
-            },
-        );
-    }
-
-    let ctx = MockSchedulerCtx {
-        candidates: Arc::new(Mutex::new(Some(candidates))),
-        file_refs: Arc::new(Mutex::new(Some(FileRefsManifest {
-            manifest_version: (1..=10).map(|i| (RegionId::new(i, 1), 1)).collect(),
-            ..Default::default()
-        }))),
-        gc_reports: Arc::new(Mutex::new(gc_reports)),
-        ..Default::default()
-    }
-    .with_table_routes(
-        (1..=10)
-            .map(|table_id| {
-                let region_id = RegionId::new(table_id, 1);
-                (table_id, (table_id, vec![(region_id, Peer::new(1, ""))]))
-            })
-            .collect(),
-    );
-
-    let ctx = Arc::new(ctx);
-
-    let config = GcSchedulerOptions {
-        max_concurrent_tables: 3,                          // Set a low limit
-        retry_backoff_duration: Duration::from_millis(50), // for faster test
-        ..Default::default()
-    };
-
-    let scheduler = GcScheduler {
-        ctx: ctx.clone(),
-        receiver: GcScheduler::channel().1,
-        config,
-        region_gc_tracker: Arc::new(tokio::sync::Mutex::new(HashMap::new())),
-        last_tracker_cleanup: Arc::new(tokio::sync::Mutex::new(Instant::now())),
-    };
-
-    let candidates = ctx.candidates.lock().unwrap().clone().unwrap_or_default();
-
-    // Convert table-based candidates to datanode-based candidates
-    let peer = Peer::new(1, "");
-    let datanode_to_candidates = HashMap::from([(
-        peer,
-        candidates
-            .into_iter()
-            .flat_map(|(table_id, candidates)| candidates.into_iter().map(move |c| (table_id, c)))
-            .collect(),
-    )]);
-
-    let report = scheduler
-        .parallel_process_datanodes(datanode_to_candidates)
-        .await;
-
-    // Should process all datanodes
-    assert_eq!(report.per_datanode_reports.len(), 1);
-    assert_eq!(report.failed_datanodes.len(), 0);
-}
-
-#[tokio::test]
-async fn test_datanode_processes_tables_with_partial_gc_failures() {
-    init_default_ut_logging();
-
-    let table1 = 1;
-    let region1 = RegionId::new(table1, 1);
-    let table2 = 2;
-    let region2 = RegionId::new(table2, 1);
-    let peer = Peer::new(1, "");
-
-    let mut candidates = HashMap::new();
-    candidates.insert(table1, vec![new_candidate(region1, 1.0)]);
-    candidates.insert(table2, vec![new_candidate(region2, 1.0)]);
-
-    // Set up GC reports for success and failure
-    let mut gc_reports = HashMap::new();
-    gc_reports.insert(
-        region1,
-        GcReport {
-            deleted_files: HashMap::from([(region1, vec![])]),
-            ..Default::default()
-        },
-    );
-    // region2 will have no GC report, simulating failure
-
-    let file_refs = FileRefsManifest {
-        manifest_version: HashMap::from([(region1, 1), (region2, 1)]),
-        ..Default::default()
-    };
-
-    let ctx = Arc::new(
-        MockSchedulerCtx {
-            gc_reports: Arc::new(Mutex::new(gc_reports)),
-            file_refs: Arc::new(Mutex::new(Some(file_refs))),
-            candidates: Arc::new(Mutex::new(Some(candidates))),
-            ..Default::default()
-        }
-        .with_table_routes(HashMap::from([
-            (table1, (table1, vec![(region1, peer.clone())])),
-            (table2, (table2, vec![(region2, peer.clone())])),
-        ])),
-    );
-
-    let scheduler = GcScheduler {
-        ctx: ctx.clone(),
-        receiver: GcScheduler::channel().1,
-        config: GcSchedulerOptions::default(),
-        region_gc_tracker: Arc::new(tokio::sync::Mutex::new(HashMap::new())),
-        last_tracker_cleanup: Arc::new(tokio::sync::Mutex::new(Instant::now())),
-    };
-
-    let candidates = ctx.candidates.lock().unwrap().clone().unwrap_or_default();
-
-    // Convert table-based candidates to datanode-based candidates
-
-    let datanode_to_candidates = HashMap::from([(
-        peer,
-        candidates
-            .into_iter()
-            .flat_map(|(table_id, candidates)| candidates.into_iter().map(move |c| (table_id, c)))
-            .collect(),
-    )]);
-
-    let report = scheduler
-        .parallel_process_datanodes(datanode_to_candidates)
-        .await;
-
-    // Should have one datanode with mixed results
-    assert_eq!(report.per_datanode_reports.len(), 1);
-    // also check one failed region (region2 has no GC report, so it should be in need_retry_regions)
-    let datanode_report = report.per_datanode_reports.values().next().unwrap();
-    assert_eq!(datanode_report.need_retry_regions.len(), 1);
-    assert_eq!(report.failed_datanodes.len(), 0);
-}
-
-// Region Concurrency Tests
-
-#[tokio::test]
-async fn test_region_gc_concurrency_limit() {
-    init_default_ut_logging();
-
-    let table_id = 1;
-    let peer = Peer::new(1, "");
-
-    // Create multiple regions for the same table
-    let mut region_stats = Vec::new();
-    let mut candidates = Vec::new();
-    let mut gc_reports = HashMap::new();
-
-    for i in 1..=10 {
-        let region_id = RegionId::new(table_id, i as u32);
-        let region_stat =
-            mock_region_stat(region_id, RegionRole::Leader, TEST_REGION_SIZE_200MB, 10); // 200MB
-        region_stats.push(region_stat);
-
-        candidates.push(mock_candidate(region_id));
-
-        gc_reports.insert(
-            region_id,
-            GcReport {
-                deleted_files: HashMap::from([(
-                    region_id,
-                    vec![FileId::random(), FileId::random()],
-                )]),
-                ..Default::default()
-            },
-        );
-    }
-
-    let table_stats = HashMap::from([(table_id, region_stats)]);
-
-    let file_refs = FileRefsManifest {
-        manifest_version: (1..=10)
-            .map(|i| (RegionId::new(table_id, i as u32), 1))
-            .collect(),
-        ..Default::default()
-    };
-
-    let ctx = Arc::new(
-        MockSchedulerCtx {
-            table_to_region_stats: Arc::new(Mutex::new(Some(table_stats))),
-            gc_reports: Arc::new(Mutex::new(gc_reports)),
-            file_refs: Arc::new(Mutex::new(Some(file_refs))),
-            ..Default::default()
-        }
-        .with_table_routes(HashMap::from([(
-            table_id,
-            (
-                table_id,
-                (1..=10)
-                    .map(|i| (RegionId::new(table_id, i as u32), peer.clone()))
-                    .collect(),
-            ),
-        )])),
-    );
-
-    // Configure low concurrency limit
-    let config = GcSchedulerOptions {
-        region_gc_concurrency: 3, // Only 3 regions can be processed concurrently
-        retry_backoff_duration: Duration::from_millis(50), // for faster test
-        ..Default::default()
-    };
-
-    let scheduler = GcScheduler {
-        ctx: ctx.clone(),
-        receiver: GcScheduler::channel().1,
-        config,
-        region_gc_tracker: Arc::new(tokio::sync::Mutex::new(HashMap::new())),
-        last_tracker_cleanup: Arc::new(tokio::sync::Mutex::new(Instant::now())),
-    };
-
-    let start_time = Instant::now();
-    let report = scheduler
-        .process_datanode_gc(
-            peer,
-            candidates.into_iter().map(|c| (table_id, c)).collect(),
-        )
-        .await
-        .unwrap();
-    let duration = start_time.elapsed();
-
-    // All regions should be processed successfully
-    // Check that all 10 regions have deleted files
-    assert_eq!(report.deleted_files.len(), 10);
-    for i in 1..=10 {
-        let region_id = RegionId::new(table_id, i as u32);
-        assert!(report.deleted_files.contains_key(&region_id));
-        assert_eq!(report.deleted_files[&region_id].len(), 2); // Each region has 2 deleted files
-    }
-    assert!(report.need_retry_regions.is_empty());
-
-    // Verify that concurrency limit was respected (this is hard to test directly,
-    // but we can verify that the processing completed successfully)
-    info!(
-        "Processed 10 regions with concurrency limit 3 in {:?}",
-        duration
-    );
-}
-
-#[tokio::test]
-async fn test_region_gc_concurrency_with_partial_failures() {
-    init_default_ut_logging();
-
-    let table_id = 1;
-    let peer = Peer::new(1, "");
-
-    // Create multiple regions with mixed success/failure
-    let mut region_stats = Vec::new();
-    let mut candidates = Vec::new();
-    let mut gc_reports = HashMap::new();
-
-    // Create the context first so we can set errors on it
-    let ctx = Arc::new(MockSchedulerCtx::default());
-
-    for i in 1..=6 {
-        let region_id = RegionId::new(table_id, i as u32);
-        let region_stat =
-            mock_region_stat(region_id, RegionRole::Leader, TEST_REGION_SIZE_200MB, 10); // 200MB
-        region_stats.push(region_stat);
-
-        candidates.push(mock_candidate(region_id));
-
-        if i % 2 == 0 {
-            // Even regions will succeed
-            gc_reports.insert(
-                region_id,
-                GcReport {
-                    deleted_files: HashMap::from([(
-                        region_id,
-                        vec![FileId::random(), FileId::random()],
-                    )]),
-                    ..Default::default()
-                },
-            );
-        } else {
-            // Odd regions will fail - don't add them to gc_reports
-            // This will cause them to be marked as needing retry
-        }
-    }
-
-    let table_stats = HashMap::from([(table_id, region_stats)]);
-
-    let file_refs = FileRefsManifest {
-        manifest_version: (1..=6)
-            .map(|i| (RegionId::new(table_id, i as u32), 1))
-            .collect(),
-        ..Default::default()
-    };
-
-    // Update the context with the data
-    *ctx.table_to_region_stats.lock().unwrap() = Some(table_stats);
-    *ctx.gc_reports.lock().unwrap() = gc_reports;
-    *ctx.file_refs.lock().unwrap() = Some(file_refs);
-    let region_routes = (1..=6)
-        .map(|i| RegionRoute {
-            region: Region::new_test(RegionId::new(table_id, i as u32)),
-            leader_peer: Some(peer.clone()),
-            ..Default::default()
-        })
-        .collect();
-
-    *ctx.table_routes.lock().unwrap() = HashMap::from([(
-        table_id,
-        (table_id, PhysicalTableRouteValue::new(region_routes)),
-    )]);
-
-    // Configure concurrency limit
-    let config = GcSchedulerOptions {
-        region_gc_concurrency: 2, // Process 2 regions concurrently
-        retry_backoff_duration: Duration::from_millis(50), // for faster test
-        ..Default::default()
-    };
-
-    let scheduler = GcScheduler {
-        ctx: ctx.clone(),
-        receiver: GcScheduler::channel().1,
-        config,
-        region_gc_tracker: Arc::new(tokio::sync::Mutex::new(HashMap::new())),
-        last_tracker_cleanup: Arc::new(tokio::sync::Mutex::new(Instant::now())),
-    };
-
-    let datanode_to_candidates = HashMap::from([(
-        peer.clone(),
-        candidates.into_iter().map(|c| (table_id, c)).collect(),
-    )]);
-
-    let report = scheduler
-        .parallel_process_datanodes(datanode_to_candidates)
-        .await;
-
-    let report = report.per_datanode_reports.get(&peer.id).unwrap();
-
-    // Should have 3 successful and 3 failed regions
-    // Even regions (2, 4, 6) should succeed, odd regions (1, 3, 5) should fail
-    let mut successful_regions = 0;
-    let mut failed_regions = 0;
-
-    for i in 1..=6 {
-        let region_id = RegionId::new(table_id, i as u32);
-        if i % 2 == 0 {
-            // Even regions should succeed
-            if report.deleted_files.contains_key(&region_id) {
-                successful_regions += 1;
-            }
-        } else {
-            // Odd regions should fail - they should be in need_retry_regions
-            if report.need_retry_regions.contains(&region_id) {
-                failed_regions += 1;
-            }
-        }
-    }
-
-    // In the new implementation, regions that cause gc_regions to return an error
-    // are added to need_retry_regions. Let's check if we have the expected mix.
-    info!(
-        "Successful regions: {}, Failed regions: {}",
-        successful_regions, failed_regions
-    );
-    info!(
-        "Deleted files: {:?}",
-        report.deleted_files.keys().collect::<Vec<_>>()
-    );
-    info!("Need retry regions: {:?}", report.need_retry_regions);
-
-    // The exact count might vary depending on how the mock handles errors,
-    // but we should have some successful and some failed regions
-    assert!(
-        successful_regions > 0,
-        "Should have at least some successful regions"
-    );
-    assert!(
-        failed_regions > 0,
-        "Should have at least some failed regions"
-    );
-}
-
-#[tokio::test]
-async fn test_region_gc_concurrency_with_retryable_errors() {
-    init_default_ut_logging();
-
-    let table_id = 1;
-    let peer = Peer::new(1, "");
-
-    // Create multiple regions
-    let mut region_stats = Vec::new();
-    let mut candidates = Vec::new();
-
-    for i in 1..=5 {
-        let region_id = RegionId::new(table_id, i as u32);
-        let region_stat =
-            mock_region_stat(region_id, RegionRole::Leader, TEST_REGION_SIZE_200MB, 10); // 200MB
-        region_stats.push(region_stat);
-        candidates.push(mock_candidate(region_id));
-    }
-
-    let table_stats = HashMap::from([(table_id, region_stats)]);
-
-    let file_refs = FileRefsManifest {
-        manifest_version: (1..=5)
-            .map(|i| (RegionId::new(table_id, i as u32), 1))
-            .collect(),
-        ..Default::default()
-    };
-
-    let gc_report = (1..=5)
-        .map(|i| {
-            let region_id = RegionId::new(table_id, i as u32);
-            (
-                region_id,
-                // mock the actual gc report with deleted files when succeeded(even no files to delete)
-                GcReport::new(HashMap::from([(region_id, vec![])]), HashSet::new()),
-            )
-        })
-        .collect();
-
-    let ctx = Arc::new(
-        MockSchedulerCtx {
-            table_to_region_stats: Arc::new(Mutex::new(Some(table_stats))),
-            file_refs: Arc::new(Mutex::new(Some(file_refs))),
-            gc_reports: Arc::new(Mutex::new(gc_report)),
-            ..Default::default()
-        }
-        .with_table_routes(HashMap::from([(
-            table_id,
-            (
-                table_id,
-                (1..=5)
-                    .map(|i| (RegionId::new(table_id, i as u32), peer.clone()))
-                    .collect(),
-            ),
-        )])),
-    );
-
-    // Configure concurrency limit
-    let config = GcSchedulerOptions {
-        region_gc_concurrency: 2, // Process 2 regions concurrently
-        retry_backoff_duration: Duration::from_millis(50),
-        ..Default::default()
-    };
-
-    let scheduler = GcScheduler {
-        ctx: ctx.clone(),
-        receiver: GcScheduler::channel().1,
-        config,
-        region_gc_tracker: Arc::new(tokio::sync::Mutex::new(HashMap::new())),
-        last_tracker_cleanup: Arc::new(tokio::sync::Mutex::new(Instant::now())),
-    };
-
-    let datanode_to_candidates = HashMap::from([(
-        peer.clone(),
-        candidates.into_iter().map(|c| (table_id, c)).collect(),
-    )]);
-    let report = scheduler
-        .parallel_process_datanodes(datanode_to_candidates)
-        .await;
-
-    let report = report.per_datanode_reports.get(&peer.id).unwrap();
-
-    // In the new implementation without retry logic, all regions should be processed
-    // The exact behavior depends on how the mock handles the regions
-    info!(
-        "Deleted files: {:?}",
-        report.deleted_files.keys().collect::<Vec<_>>()
-    );
-    info!("Need retry regions: {:?}", report.need_retry_regions);
-
-    // We should have processed all 5 regions in some way
-    let total_processed = report.deleted_files.len() + report.need_retry_regions.len();
-    assert_eq!(total_processed, 5, "Should have processed all 5 regions");
-}
--- a/src/meta-srv/src/gc/mock/config.rs
+++ b/src/meta-srv/src/gc/mock/config.rs
@@ -1,197 +0,0 @@
-// Copyright 2023 Greptime Team
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-use std::collections::HashMap;
-use std::sync::{Arc, Mutex};
-use std::time::Instant;
-
-use common_meta::datanode::RegionManifestInfo;
-use common_telemetry::init_default_ut_logging;
-use store_api::region_engine::RegionRole;
-use store_api::storage::RegionId;
-
-use crate::gc::mock::{MockSchedulerCtx, TEST_REGION_SIZE_200MB, mock_region_stat};
-use crate::gc::{GcScheduler, GcSchedulerOptions};
-
-/// Configuration Tests
-#[tokio::test]
-async fn test_different_gc_weights() {
-    init_default_ut_logging();
-
-    let table_id = 1;
-    let region_id = RegionId::new(table_id, 1);
-
-    let mut region_stat =
-        mock_region_stat(region_id, RegionRole::Leader, TEST_REGION_SIZE_200MB, 10); // 200MB to pass size threshold
-
-    if let RegionManifestInfo::Mito {
-        file_removed_cnt, ..
-    } = &mut region_stat.region_manifest
-    {
-        *file_removed_cnt = 5;
-    }
-
-    let table_stats = HashMap::from([(table_id, vec![region_stat])]);
-
-    let ctx = Arc::new(MockSchedulerCtx {
-        table_to_region_stats: Arc::new(Mutex::new(Some(table_stats))),
-        ..Default::default()
-    });
-
-    // Test with different weights
-    let config1 = GcSchedulerOptions {
-        sst_count_weight: 2.0,
-        file_removed_count_weight: 0.5,
-        min_region_size_threshold: 100 * 1024 * 1024, // 100MB (default)
-        ..Default::default()
-    };
-
-    let scheduler1 = GcScheduler {
-        ctx: ctx.clone(),
-        receiver: GcScheduler::channel().1,
-        config: config1,
-        region_gc_tracker: Arc::new(tokio::sync::Mutex::new(HashMap::new())),
-        last_tracker_cleanup: Arc::new(tokio::sync::Mutex::new(Instant::now())),
-    };
-
-    let stats = ctx
-        .table_to_region_stats
-        .lock()
-        .unwrap()
-        .clone()
-        .unwrap_or_default();
-
-    let candidates1 = scheduler1.select_gc_candidates(&stats).await.unwrap();
-
-    let config2 = GcSchedulerOptions {
-        sst_count_weight: 0.5,
-        file_removed_count_weight: 2.0,
-        min_region_size_threshold: 100 * 1024 * 1024, // 100MB (default)
-        ..Default::default()
-    };
-
-    let scheduler2 = GcScheduler {
-        ctx: ctx.clone(),
-        receiver: GcScheduler::channel().1,
-        config: config2,
-        region_gc_tracker: Arc::new(tokio::sync::Mutex::new(HashMap::new())),
-        last_tracker_cleanup: Arc::new(tokio::sync::Mutex::new(Instant::now())),
-    };
-
-    let stats = &ctx
-        .table_to_region_stats
-        .lock()
-        .unwrap()
-        .clone()
-        .unwrap_or_default();
-    let candidates2 = scheduler2.select_gc_candidates(stats).await.unwrap();
-
-    // Both should select the region but with different scores
-    assert_eq!(
-        candidates1.len(),
-        1,
-        "Expected 1 table with candidates for config1, got {}",
-        candidates1.len()
-    );
-    assert_eq!(
-        candidates2.len(),
-        1,
-        "Expected 1 table with candidates for config2, got {}",
-        candidates2.len()
-    );
-
-    // Verify the region is actually selected
-    assert!(
-        candidates1.contains_key(&table_id),
-        "Config1 should contain table_id {}",
-        table_id
-    );
-    assert!(
-        candidates2.contains_key(&table_id),
-        "Config2 should contain table_id {}",
-        table_id
-    );
-}
-
-#[tokio::test]
-async fn test_regions_per_table_threshold() {
-    init_default_ut_logging();
-
-    let table_id = 1;
-    let mut region_stats = Vec::new();
-
-    // Create many regions
-    for i in 1..=10 {
-        let region_id = RegionId::new(table_id, i as u32);
-        let mut stat = mock_region_stat(region_id, RegionRole::Leader, TEST_REGION_SIZE_200MB, 10); // 200MB
-
-        if let RegionManifestInfo::Mito {
-            file_removed_cnt, ..
-        } = &mut stat.region_manifest
-        {
-            *file_removed_cnt = 5;
-        }
-
-        region_stats.push(stat);
-    }
-
-    let table_stats = HashMap::from([(table_id, region_stats)]);
-
-    let ctx = Arc::new(MockSchedulerCtx {
-        table_to_region_stats: Arc::new(Mutex::new(Some(table_stats))),
-        ..Default::default()
-    });
-
-    let config = GcSchedulerOptions {
-        regions_per_table_threshold: 3, // Limit to 3 regions per table
-        min_region_size_threshold: 100 * 1024 * 1024, // 100MB (default)
-        ..Default::default()
-    };
-
-    let scheduler = GcScheduler {
-        ctx: ctx.clone(),
-        receiver: GcScheduler::channel().1,
-        config,
-        region_gc_tracker: Arc::new(tokio::sync::Mutex::new(HashMap::new())),
-        last_tracker_cleanup: Arc::new(tokio::sync::Mutex::new(Instant::now())),
-    };
-
-    let stats = ctx
-        .table_to_region_stats
-        .lock()
-        .unwrap()
-        .clone()
-        .unwrap_or_default();
-
-    let candidates = scheduler.select_gc_candidates(&stats).await.unwrap();
-
-    assert_eq!(
-        candidates.len(),
-        1,
-        "Expected 1 table with candidates, got {}",
-        candidates.len()
-    );
-    if let Some(table_candidates) = candidates.get(&table_id) {
-        // Should be limited to 3 regions
-        assert_eq!(
-            table_candidates.len(),
-            3,
-            "Expected 3 candidates for table {}, got {}",
-            table_id,
-            table_candidates.len()
-        );
-    } else {
-        panic!("Expected table {} to have candidates", table_id);
-    }
-}
--- a/src/meta-srv/src/gc/mock/err_handle.rs
+++ b/src/meta-srv/src/gc/mock/err_handle.rs
@@ -1,293 +0,0 @@
-// Copyright 2023 Greptime Team
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-use std::collections::{HashMap, HashSet};
-use std::sync::{Arc, Mutex};
-use std::time::{Duration, Instant};
-
-use common_meta::datanode::RegionManifestInfo;
-use common_meta::peer::Peer;
-use common_telemetry::init_default_ut_logging;
-use store_api::region_engine::RegionRole;
-use store_api::storage::{FileId, FileRefsManifest, GcReport, RegionId};
-
-use crate::gc::mock::{
-    MockSchedulerCtx, TEST_REGION_SIZE_200MB, mock_region_stat, new_empty_report_with,
-};
-use crate::gc::{GcScheduler, GcSchedulerOptions};
-
-/// Error Handling Tests
-#[tokio::test]
-async fn test_gc_regions_failure_handling() {
-    init_default_ut_logging();
-
-    let table_id = 1;
-    let region_id = RegionId::new(table_id, 1);
-    let peer = Peer::new(1, "");
-
-    // Create region stat with proper size and file_removed_cnt to ensure it gets selected as candidate
-    let mut region_stat =
-        mock_region_stat(region_id, RegionRole::Leader, TEST_REGION_SIZE_200MB, 10); // 200MB
-    if let RegionManifestInfo::Mito {
-        file_removed_cnt, ..
-    } = &mut region_stat.region_manifest
-    {
-        *file_removed_cnt = 5;
-    }
-
-    let table_stats = HashMap::from([(table_id, vec![region_stat])]);
-
-    // Create a context that will return an error for gc_regions
-    let mut gc_reports = HashMap::new();
-    gc_reports.insert(region_id, GcReport::default());
-
-    // Inject an error for gc_regions method
-    let gc_error = crate::error::UnexpectedSnafu {
-        violated: "Simulated GC failure for testing".to_string(),
-    }
-    .build();
-
-    let file_refs = FileRefsManifest {
-        manifest_version: HashMap::from([(region_id, 1)]),
-        file_refs: HashMap::from([(region_id, HashSet::from([FileId::random()]))]),
-    };
-
-    let ctx = Arc::new(
-        MockSchedulerCtx {
-            table_to_region_stats: Arc::new(Mutex::new(Some(table_stats))),
-            gc_reports: Arc::new(Mutex::new(gc_reports)),
-            file_refs: Arc::new(Mutex::new(Some(file_refs))),
-            ..Default::default()
-        }
-        .with_table_routes(HashMap::from([(
-            table_id,
-            (table_id, vec![(region_id, peer)]),
-        )]))
-        .with_gc_regions_error(gc_error),
-    );
-
-    let scheduler = GcScheduler {
-        ctx: ctx.clone(),
-        receiver: GcScheduler::channel().1,
-        config: GcSchedulerOptions::default(),
-        region_gc_tracker: Arc::new(tokio::sync::Mutex::new(HashMap::new())),
-        last_tracker_cleanup: Arc::new(tokio::sync::Mutex::new(Instant::now())),
-    };
-
-    // This should handle the failure gracefully
-    let report = scheduler.handle_tick().await.unwrap();
-
-    // Validate the report shows the failure handling
-    assert_eq!(
-        report.per_datanode_reports.len(),
-        1,
-        "Should process 1 datanode despite failure"
-    );
-    assert_eq!(
-        report.failed_datanodes.len(),
-        0,
-        "Should have 0 failed datanodes (failure handled via need_retry_regions)"
-    );
-
-    // Check that the region is in need_retry_regions due to the failure
-    let datanode_report = report.per_datanode_reports.values().next().unwrap();
-    assert_eq!(
-        datanode_report.need_retry_regions.len(),
-        1,
-        "Should have 1 region in need_retry_regions due to failure"
-    );
-    assert!(
-        datanode_report.need_retry_regions.contains(&region_id),
-        "Region should be in need_retry_regions"
-    );
-
-    // Verify that calls were made despite potential failures
-    assert_eq!(
-        *ctx.get_table_to_region_stats_calls.lock().unwrap(),
-        1,
-        "Expected 1 call to get_table_to_region_stats"
-    );
-    assert!(
-        *ctx.get_file_references_calls.lock().unwrap() >= 1,
-        "Expected at least 1 call to get_file_references"
-    );
-    assert!(
-        *ctx.gc_regions_calls.lock().unwrap() >= 1,
-        "Expected at least 1 call to gc_regions"
-    );
-}
-
-#[tokio::test]
-async fn test_get_file_references_failure() {
-    init_default_ut_logging();
-
-    let table_id = 1;
-    let region_id = RegionId::new(table_id, 1);
-    let peer = Peer::new(1, "");
-
-    // Create region stat with proper size and file_removed_cnt to ensure it gets selected as candidate
-    let mut region_stat =
-        mock_region_stat(region_id, RegionRole::Leader, TEST_REGION_SIZE_200MB, 10); // 200MB
-    if let RegionManifestInfo::Mito {
-        file_removed_cnt, ..
-    } = &mut region_stat.region_manifest
-    {
-        *file_removed_cnt = 5;
-    }
-
-    let table_stats = HashMap::from([(table_id, vec![region_stat])]);
-
-    // Create context with empty file refs (simulating failure)
-    let ctx = Arc::new(
-        MockSchedulerCtx {
-            table_to_region_stats: Arc::new(Mutex::new(Some(table_stats))),
-            file_refs: Arc::new(Mutex::new(Some(FileRefsManifest::default()))),
-            gc_reports: Arc::new(Mutex::new(HashMap::from([(
-                region_id,
-                new_empty_report_with([region_id]),
-            )]))),
-            ..Default::default()
-        }
-        .with_table_routes(HashMap::from([(
-            table_id,
-            (table_id, vec![(region_id, peer)]),
-        )])),
-    );
-
-    let scheduler = GcScheduler {
-        ctx: ctx.clone(),
-        receiver: GcScheduler::channel().1,
-        config: GcSchedulerOptions {
-            retry_backoff_duration: Duration::from_millis(10), // shorten for test
-            ..Default::default()
-        },
-        region_gc_tracker: Arc::new(tokio::sync::Mutex::new(HashMap::new())),
-        last_tracker_cleanup: Arc::new(tokio::sync::Mutex::new(Instant::now())),
-    };
-
-    let report = scheduler.handle_tick().await.unwrap();
-
-    // Validate the report shows the expected results
-    // In the new implementation, even if get_file_references fails, we still create a datanode report
-    assert_eq!(
-        report.per_datanode_reports.len(),
-        1,
-        "Should process 1 datanode"
-    );
-    assert_eq!(
-        report.failed_datanodes.len(),
-        0,
-        "Should have 0 failed datanodes (failure handled gracefully)"
-    );
-
-    // The region should be processed but may have empty results due to file refs failure
-    let datanode_report = report.per_datanode_reports.values().next().unwrap();
-    // The current implementation still processes the region even with file refs failure
-    // and creates an empty entry in deleted_files
-    assert!(
-        datanode_report.deleted_files.contains_key(&region_id),
-        "Should have region in deleted_files (even if empty)"
-    );
-    assert!(
-        datanode_report.deleted_files[&region_id].is_empty(),
-        "Should have empty deleted files due to file refs failure"
-    );
-
-    // Should still attempt to get file references (may be called multiple times due to retry logic)
-    assert!(
-        *ctx.get_file_references_calls.lock().unwrap() >= 1,
-        "Expected at least 1 call to get_file_references, got {}",
-        *ctx.get_file_references_calls.lock().unwrap()
-    );
-}
-
-#[tokio::test]
-async fn test_get_table_route_failure() {
-    init_default_ut_logging();
-
-    let table_id = 1;
-    let region_id = RegionId::new(table_id, 1);
-
-    // Create region stat with proper size and file_removed_cnt to ensure it gets selected as candidate
-    let mut region_stat =
-        mock_region_stat(region_id, RegionRole::Leader, TEST_REGION_SIZE_200MB, 10); // 200MB
-    if let RegionManifestInfo::Mito {
-        file_removed_cnt, ..
-    } = &mut region_stat.region_manifest
-    {
-        *file_removed_cnt = 5;
-    }
-
-    let table_stats = HashMap::from([(table_id, vec![region_stat])]);
-
-    // Inject an error for get_table_route method to simulate failure
-    let route_error = crate::error::UnexpectedSnafu {
-        violated: "Simulated table route failure for testing".to_string(),
-    }
-    .build();
-
-    // Create context with table route error injection
-    let ctx = Arc::new(MockSchedulerCtx {
-        table_to_region_stats: Arc::new(Mutex::new(Some(table_stats))),
-        ..Default::default()
-    });
-    ctx.set_table_route_error(route_error);
-
-    let scheduler = GcScheduler {
-        ctx: ctx.clone(),
-        receiver: GcScheduler::channel().1,
-        config: GcSchedulerOptions::default(),
-        region_gc_tracker: Arc::new(tokio::sync::Mutex::new(HashMap::new())),
-        last_tracker_cleanup: Arc::new(tokio::sync::Mutex::new(Instant::now())),
-    };
-
-    // Get candidates first
-    let stats = &ctx
-        .table_to_region_stats
-        .lock()
-        .unwrap()
-        .clone()
-        .unwrap_or_default();
-    let candidates = scheduler.select_gc_candidates(stats).await.unwrap();
-
-    // Convert table-based candidates to datanode-based candidates
-    let datanode_to_candidates = HashMap::from([(
-        Peer::new(1, ""),
-        candidates
-            .into_iter()
-            .flat_map(|(table_id, candidates)| candidates.into_iter().map(move |c| (table_id, c)))
-            .collect(),
-    )]);
-
-    // This should handle table route failure gracefully
-    let report = scheduler
-        .parallel_process_datanodes(datanode_to_candidates)
-        .await;
-
-    // Should process the datanode but handle route error gracefully
-    assert_eq!(
-        report.per_datanode_reports.len(),
-        0,
-        "Expected 0 datanode report"
-    );
-    assert_eq!(
-        report.failed_datanodes.len(),
-        1,
-        "Expected 1 failed datanodes (route error handled gracefully)"
-    );
-    assert!(
-        report.failed_datanodes.contains_key(&1),
-        "Failed datanodes should contain the datanode with route error"
-    );
-}
--- a/src/meta-srv/src/gc/mock/full_list.rs
+++ b/src/meta-srv/src/gc/mock/full_list.rs
@@ -1,272 +0,0 @@
-// Copyright 2023 Greptime Team
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-use std::collections::HashMap;
-use std::sync::{Arc, Mutex};
-use std::time::{Duration, Instant};
-
-use common_meta::peer::Peer;
-use common_telemetry::init_default_ut_logging;
-use store_api::region_engine::RegionRole;
-use store_api::storage::{FileId, FileRefsManifest, GcReport, RegionId};
-
-use crate::gc::mock::{MockSchedulerCtx, TEST_REGION_SIZE_200MB, mock_candidate, mock_region_stat};
-use crate::gc::{GcScheduler, GcSchedulerOptions};
-
-// Full File Listing Tests
-
-#[tokio::test]
-async fn test_full_file_listing_first_time_gc() {
-    init_default_ut_logging();
-
-    let table_id = 1;
-    let region_id = RegionId::new(table_id, 1);
-    let peer = Peer::new(1, "");
-
-    let region_stat = mock_region_stat(region_id, RegionRole::Leader, TEST_REGION_SIZE_200MB, 10); // 200MB
-    let table_stats = HashMap::from([(table_id, vec![region_stat])]);
-
-    let gc_report = GcReport {
-        deleted_files: HashMap::from([(region_id, vec![FileId::random(), FileId::random()])]),
-        ..Default::default()
-    };
-
-    let file_refs = FileRefsManifest {
-        manifest_version: HashMap::from([(region_id, 1)]),
-        ..Default::default()
-    };
-
-    let ctx = Arc::new(
-        MockSchedulerCtx {
-            table_to_region_stats: Arc::new(Mutex::new(Some(table_stats))),
-            gc_reports: Arc::new(Mutex::new(HashMap::from([(region_id, gc_report)]))),
-            file_refs: Arc::new(Mutex::new(Some(file_refs))),
-            ..Default::default()
-        }
-        .with_table_routes(HashMap::from([(
-            table_id,
-            (table_id, vec![(region_id, peer.clone())]),
-        )])),
-    );
-
-    // Configure short full file listing interval for testing
-    let config = GcSchedulerOptions {
-        full_file_listing_interval: Duration::from_secs(3600), // 1 hour
-        ..Default::default()
-    };
-
-    let scheduler = GcScheduler {
-        ctx: ctx.clone(),
-        receiver: GcScheduler::channel().1,
-        config,
-        region_gc_tracker: Arc::new(tokio::sync::Mutex::new(HashMap::new())),
-        last_tracker_cleanup: Arc::new(tokio::sync::Mutex::new(Instant::now())),
-    };
-
-    // First GC - should use full listing since region has never been GC'd
-    let reports = scheduler
-        .process_datanode_gc(peer.clone(), vec![(table_id, mock_candidate(region_id))])
-        .await
-        .unwrap();
-
-    assert_eq!(reports.deleted_files.len(), 1);
-
-    // Verify that full listing was used by checking the tracker
-    let tracker = scheduler.region_gc_tracker.lock().await;
-    let gc_info = tracker
-        .get(&region_id)
-        .expect("Region should be in tracker");
-    assert!(
-        gc_info.last_full_listing_time.is_some(),
-        "First GC should use full listing"
-    );
-}
-
-#[tokio::test]
-async fn test_full_file_listing_interval_enforcement() {
-    init_default_ut_logging();
-
-    let table_id = 1;
-    let region_id = RegionId::new(table_id, 1);
-    let peer = Peer::new(1, "");
-
-    let region_stat = mock_region_stat(region_id, RegionRole::Leader, TEST_REGION_SIZE_200MB, 10); // 200MB
-    let table_stats = HashMap::from([(table_id, vec![region_stat])]);
-
-    let gc_report = GcReport {
-        deleted_files: HashMap::from([(region_id, vec![FileId::random(), FileId::random()])]),
-        ..Default::default()
-    };
-
-    let file_refs = FileRefsManifest {
-        manifest_version: HashMap::from([(region_id, 1)]),
-        ..Default::default()
-    };
-
-    let ctx = Arc::new(
-        MockSchedulerCtx {
-            table_to_region_stats: Arc::new(Mutex::new(Some(table_stats))),
-            gc_reports: Arc::new(Mutex::new(HashMap::from([(region_id, gc_report)]))),
-            file_refs: Arc::new(Mutex::new(Some(file_refs))),
-            ..Default::default()
-        }
-        .with_table_routes(HashMap::from([(
-            table_id,
-            (table_id, vec![(region_id, peer.clone())]),
-        )])),
-    );
-
-    // Configure very short full file listing interval for testing
-    let config = GcSchedulerOptions {
-        full_file_listing_interval: Duration::from_millis(100), // 100ms
-        ..Default::default()
-    };
-
-    let scheduler = GcScheduler {
-        ctx: ctx.clone(),
-        receiver: GcScheduler::channel().1,
-        config,
-        region_gc_tracker: Arc::new(tokio::sync::Mutex::new(HashMap::new())),
-        last_tracker_cleanup: Arc::new(tokio::sync::Mutex::new(Instant::now())),
-    };
-
-    // First GC - should use full listing
-    let reports1 = scheduler
-        .process_datanode_gc(peer.clone(), vec![(table_id, mock_candidate(region_id))])
-        .await
-        .unwrap();
-    assert_eq!(reports1.deleted_files.len(), 1);
-
-    // Get the first full listing time
-    let first_full_listing_time = {
-        let tracker = scheduler.region_gc_tracker.lock().await;
-        let gc_info = tracker
-            .get(&region_id)
-            .expect("Region should be in tracker");
-        gc_info
-            .last_full_listing_time
-            .expect("Should have full listing time")
-    };
-
-    // Wait for interval to pass
-    tokio::time::sleep(Duration::from_millis(150)).await;
-
-    // Second GC - should use full listing again since interval has passed
-    let _reports2 = scheduler
-        .process_datanode_gc(peer.clone(), vec![(table_id, mock_candidate(region_id))])
-        .await
-        .unwrap();
-
-    // Verify that full listing was used again
-    let tracker = scheduler.region_gc_tracker.lock().await;
-    let gc_info = tracker
-        .get(&region_id)
-        .expect("Region should be in tracker");
-    let second_full_listing_time = gc_info
-        .last_full_listing_time
-        .expect("Should have full listing time");
-
-    assert!(
-        second_full_listing_time > first_full_listing_time,
-        "Second GC should update full listing time"
-    );
-}
-
-#[tokio::test]
-async fn test_full_file_listing_no_interval_passed() {
-    init_default_ut_logging();
-
-    let table_id = 1;
-    let region_id = RegionId::new(table_id, 1);
-    let peer = Peer::new(1, "");
-
-    let region_stat = mock_region_stat(region_id, RegionRole::Leader, TEST_REGION_SIZE_200MB, 10); // 200MB
-    let table_stats = HashMap::from([(table_id, vec![region_stat])]);
-
-    let gc_report = GcReport {
-        deleted_files: HashMap::from([(region_id, vec![FileId::random(), FileId::random()])]),
-        ..Default::default()
-    };
-
-    let file_refs = FileRefsManifest {
-        manifest_version: HashMap::from([(region_id, 1)]),
-        ..Default::default()
-    };
-
-    let ctx = Arc::new(
-        MockSchedulerCtx {
-            table_to_region_stats: Arc::new(Mutex::new(Some(table_stats))),
-            gc_reports: Arc::new(Mutex::new(HashMap::from([(region_id, gc_report)]))),
-            file_refs: Arc::new(Mutex::new(Some(file_refs))),
-            ..Default::default()
-        }
-        .with_table_routes(HashMap::from([(
-            table_id,
-            (table_id, vec![(region_id, peer.clone())]),
-        )])),
-    );
-
-    // Configure long full file listing interval
-    let config = GcSchedulerOptions {
-        full_file_listing_interval: Duration::from_secs(3600), // 1 hour
-        ..Default::default()
-    };
-
-    let scheduler = GcScheduler {
-        ctx: ctx.clone(),
-        receiver: GcScheduler::channel().1,
-        config,
-        region_gc_tracker: Arc::new(tokio::sync::Mutex::new(HashMap::new())),
-        last_tracker_cleanup: Arc::new(tokio::sync::Mutex::new(Instant::now())),
-    };
-
-    // First GC - should use full listing
-    let reports1 = scheduler
-        .process_datanode_gc(peer.clone(), vec![(table_id, mock_candidate(region_id))])
-        .await
-        .unwrap();
-    assert_eq!(reports1.deleted_files.len(), 1);
-
-    // Get the first full listing time
-    let first_full_listing_time = {
-        let tracker = scheduler.region_gc_tracker.lock().await;
-        let gc_info = tracker
-            .get(&region_id)
-            .expect("Region should be in tracker");
-        gc_info
-            .last_full_listing_time
-            .expect("Should have full listing time")
-    };
-
-    // Second GC immediately - should NOT use full listing since interval hasn't passed
-    let reports2 = scheduler
-        .process_datanode_gc(peer.clone(), vec![(table_id, mock_candidate(region_id))])
-        .await
-        .unwrap();
-    assert_eq!(reports2.deleted_files.len(), 1);
-
-    // Verify that full listing time was NOT updated
-    let tracker = scheduler.region_gc_tracker.lock().await;
-    let gc_info = tracker
-        .get(&region_id)
-        .expect("Region should be in tracker");
-    let second_full_listing_time = gc_info
-        .last_full_listing_time
-        .expect("Should have full listing time");
-
-    assert_eq!(
-        second_full_listing_time, first_full_listing_time,
-        "Second GC should not update full listing time when interval hasn't passed"
-    );
-}
--- a/src/meta-srv/src/gc/mock/integration.rs
+++ b/src/meta-srv/src/gc/mock/integration.rs
@@ -1,252 +0,0 @@
-// Copyright 2023 Greptime Team
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-use std::collections::HashMap;
-use std::sync::{Arc, Mutex};
-use std::time::{Duration, Instant};
-
-use common_meta::datanode::RegionManifestInfo;
-use common_meta::peer::Peer;
-use common_telemetry::init_default_ut_logging;
-use store_api::region_engine::RegionRole;
-use store_api::storage::{FileId, FileRefsManifest, GcReport, RegionId};
-
-use crate::gc::mock::{
-    MockSchedulerCtx, TEST_REGION_SIZE_200MB, mock_region_stat, new_empty_report_with,
-};
-use crate::gc::{GcScheduler, GcSchedulerOptions};
-
-// Integration Flow Tests
-
-#[tokio::test]
-async fn test_full_gc_workflow() {
-    init_default_ut_logging();
-
-    let table_id = 1;
-    let region_id = RegionId::new(table_id, 1);
-    let peer = Peer::new(1, "");
-
-    let mut region_stat =
-        mock_region_stat(region_id, RegionRole::Leader, TEST_REGION_SIZE_200MB, 10); // 200MB
-
-    if let RegionManifestInfo::Mito {
-        file_removed_cnt, ..
-    } = &mut region_stat.region_manifest
-    {
-        *file_removed_cnt = 5;
-    }
-
-    let table_stats = HashMap::from([(table_id, vec![region_stat])]);
-
-    let mut gc_reports = HashMap::new();
-    gc_reports.insert(
-        region_id,
-        GcReport {
-            deleted_files: HashMap::from([(region_id, vec![FileId::random(), FileId::random()])]),
-            ..Default::default()
-        },
-    );
-
-    let file_refs = FileRefsManifest {
-        manifest_version: HashMap::from([(region_id, 1)]),
-        ..Default::default()
-    };
-
-    let ctx = Arc::new(
-        MockSchedulerCtx {
-            table_to_region_stats: Arc::new(Mutex::new(Some(table_stats))),
-            gc_reports: Arc::new(Mutex::new(gc_reports)),
-            file_refs: Arc::new(Mutex::new(Some(file_refs))),
-            ..Default::default()
-        }
-        .with_table_routes(HashMap::from([(
-            table_id,
-            (table_id, vec![(region_id, peer)]),
-        )])),
-    );
-
-    let scheduler = GcScheduler {
-        ctx: ctx.clone(),
-        receiver: GcScheduler::channel().1,
-        config: GcSchedulerOptions::default(),
-        region_gc_tracker: Arc::new(tokio::sync::Mutex::new(HashMap::new())),
-        last_tracker_cleanup: Arc::new(tokio::sync::Mutex::new(Instant::now())),
-    };
-
-    // Run the full workflow
-    let report = scheduler.handle_tick().await.unwrap();
-
-    // Validate the returned GcJobReport - should have 1 datanode report
-    assert_eq!(
-        report.per_datanode_reports.len(),
-        1,
-        "Should process 1 datanode"
-    );
-    assert_eq!(
-        report.failed_datanodes.len(),
-        0,
-        "Should have no failed datanodes"
-    );
-
-    // Get the datanode report
-    let datanode_report = report.per_datanode_reports.values().next().unwrap();
-
-    // Check that the region was processed successfully
-    assert!(
-        datanode_report.deleted_files.contains_key(&region_id),
-        "Should have deleted files for region"
-    );
-    assert_eq!(
-        datanode_report.deleted_files[&region_id].len(),
-        2,
-        "Should have 2 deleted files"
-    );
-    assert!(
-        datanode_report.need_retry_regions.is_empty(),
-        "Should have no retry regions"
-    );
-
-    // Verify all steps were executed
-    assert_eq!(
-        *ctx.get_table_to_region_stats_calls.lock().unwrap(),
-        1,
-        "Expected 1 call to get_table_to_region_stats"
-    );
-    assert_eq!(
-        *ctx.get_file_references_calls.lock().unwrap(),
-        1,
-        "Expected 1 call to get_file_references"
-    );
-    assert_eq!(
-        *ctx.gc_regions_calls.lock().unwrap(),
-        1,
-        "Expected 1 call to gc_regions"
-    );
-}
-
-#[tokio::test]
-async fn test_tracker_cleanup() {
-    init_default_ut_logging();
-
-    let table_id = 1;
-    let region_id = RegionId::new(table_id, 1);
-    let peer = Peer::new(1, "");
-
-    // Create region stat with proper file_removed_cnt to ensure it gets selected as candidate
-    let mut region_stat =
-        mock_region_stat(region_id, RegionRole::Leader, TEST_REGION_SIZE_200MB, 10); // 200MB
-    if let RegionManifestInfo::Mito {
-        file_removed_cnt, ..
-    } = &mut region_stat.region_manifest
-    {
-        *file_removed_cnt = 5;
-    }
-
-    let table_stats = HashMap::from([(table_id, vec![region_stat])]);
-
-    let mut gc_reports = HashMap::new();
-    gc_reports.insert(region_id, new_empty_report_with([region_id]));
-
-    let file_refs = FileRefsManifest {
-        manifest_version: HashMap::from([(region_id, 1)]),
-        ..Default::default()
-    };
-
-    let ctx = Arc::new(
-        MockSchedulerCtx {
-            table_to_region_stats: Arc::new(Mutex::new(Some(table_stats))),
-            gc_reports: Arc::new(Mutex::new(gc_reports)),
-            file_refs: Arc::new(Mutex::new(Some(file_refs))),
-            ..Default::default()
-        }
-        .with_table_routes(HashMap::from([(
-            table_id,
-            (table_id, vec![(region_id, peer)]),
-        )])),
-    );
-
-    let old_region_gc_tracker = {
-        let mut tracker = HashMap::new();
-        tracker.insert(
-            region_id,
-            crate::gc::tracker::RegionGcInfo {
-                last_full_listing_time: Some(Instant::now() - Duration::from_secs(7200)), // 2 hours ago
-                last_gc_time: Instant::now() - Duration::from_secs(7200), // 2 hours ago
-            },
-        );
-        // also insert a different table that should also be cleaned up
-        tracker.insert(
-            RegionId::new(2, 1),
-            crate::gc::tracker::RegionGcInfo {
-                last_full_listing_time: Some(Instant::now() - Duration::from_secs(7200)), // 2 hours ago
-                last_gc_time: Instant::now() - Duration::from_secs(7200), // 2 hours ago
-            },
-        );
-        tracker
-    };
-
-    // Use a custom config with shorter cleanup interval to trigger cleanup
-    let config = GcSchedulerOptions {
-        // 30 minutes
-        tracker_cleanup_interval: Duration::from_secs(1800),
-        ..Default::default()
-    };
-
-    let scheduler = GcScheduler {
-        ctx: ctx.clone(),
-        receiver: GcScheduler::channel().1,
-        config,
-        region_gc_tracker: Arc::new(tokio::sync::Mutex::new(old_region_gc_tracker)),
-        last_tracker_cleanup: Arc::new(tokio::sync::Mutex::new(
-            Instant::now() - Duration::from_secs(3600), // Old cleanup time (1 hour ago)
-        )),
-    };
-
-    let report = scheduler.handle_tick().await.unwrap();
-
-    // Validate the returned GcJobReport - should have 1 datanode report
-    assert_eq!(
-        report.per_datanode_reports.len(),
-        1,
-        "Should process 1 datanode"
-    );
-    assert_eq!(
-        report.failed_datanodes.len(),
-        0,
-        "Should have no failed datanodes"
-    );
-
-    // Get the datanode report
-    let datanode_report = report.per_datanode_reports.values().next().unwrap();
-
-    // Check that the region was processed successfully
-    assert!(
-        datanode_report.deleted_files.contains_key(&region_id),
-        "Should have deleted files for region"
-    );
-    assert!(
-        datanode_report.need_retry_regions.is_empty(),
-        "Should have no retry regions"
-    );
-
-    // Verify tracker was updated
-    let tracker = scheduler.region_gc_tracker.lock().await;
-    assert!(
-        tracker.contains_key(&region_id),
-        "Tracker should contain region {}",
-        region_id
-    );
-    // only one entry
-    assert_eq!(tracker.len(), 1, "Tracker should only have 1 entry");
-}
--- a/src/meta-srv/src/gc/mock/misc.rs
+++ b/src/meta-srv/src/gc/mock/misc.rs
@@ -1,155 +0,0 @@
-// Copyright 2023 Greptime Team
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-use std::collections::HashMap;
-use std::sync::{Arc, Mutex};
-use std::time::Instant;
-
-use common_meta::peer::Peer;
-use common_telemetry::init_default_ut_logging;
-use store_api::storage::{FileRefsManifest, GcReport, RegionId};
-
-use crate::gc::mock::{MockSchedulerCtx, new_candidate};
-use crate::gc::{GcScheduler, GcSchedulerOptions};
-
-/// Edge Case Tests
-
-#[tokio::test]
-async fn test_empty_file_refs_manifest() {
-    init_default_ut_logging();
-
-    let table_id = 1;
-    let region_id = RegionId::new(table_id, 1);
-    let peer = Peer::new(1, "");
-    let candidates = HashMap::from([(table_id, vec![new_candidate(region_id, 1.0)])]);
-
-    // Empty file refs manifest
-    let file_refs = FileRefsManifest::default();
-
-    let ctx = Arc::new(
-        MockSchedulerCtx {
-            file_refs: Arc::new(Mutex::new(Some(file_refs))),
-            candidates: Arc::new(Mutex::new(Some(candidates))),
-            ..Default::default()
-        }
-        .with_table_routes(HashMap::from([(
-            table_id,
-            (table_id, vec![(region_id, peer)]),
-        )])),
-    );
-
-    let scheduler = GcScheduler {
-        ctx: ctx.clone(),
-        receiver: GcScheduler::channel().1,
-        config: GcSchedulerOptions::default(),
-        region_gc_tracker: Arc::new(tokio::sync::Mutex::new(HashMap::new())),
-        last_tracker_cleanup: Arc::new(tokio::sync::Mutex::new(Instant::now())),
-    };
-
-    let candidates = ctx.candidates.lock().unwrap().clone().unwrap_or_default();
-
-    // Convert table-based candidates to datanode-based candidates
-    let peer = Peer::new(1, "");
-    let datanode_to_candidates = HashMap::from([(
-        peer,
-        candidates
-            .into_iter()
-            .flat_map(|(table_id, candidates)| candidates.into_iter().map(move |c| (table_id, c)))
-            .collect(),
-    )]);
-
-    let report = scheduler
-        .parallel_process_datanodes(datanode_to_candidates)
-        .await;
-
-    assert_eq!(report.per_datanode_reports.len(), 1);
-    assert_eq!(report.failed_datanodes.len(), 0);
-    // Should handle empty file refs gracefully
-}
-
-#[tokio::test]
-async fn test_multiple_regions_per_table() {
-    init_default_ut_logging();
-
-    let table_id = 1;
-    let region1 = RegionId::new(table_id, 1);
-    let region2 = RegionId::new(table_id, 2);
-    let region3 = RegionId::new(table_id, 3);
-    let peer = Peer::new(1, "");
-
-    let candidates = HashMap::from([(
-        table_id,
-        vec![
-            new_candidate(region1, 1.0),
-            new_candidate(region2, 2.0),
-            new_candidate(region3, 3.0),
-        ],
-    )]);
-
-    let mut gc_reports = HashMap::new();
-    gc_reports.insert(region1, GcReport::default());
-    gc_reports.insert(region2, GcReport::default());
-    gc_reports.insert(region3, GcReport::default());
-
-    let file_refs = FileRefsManifest {
-        manifest_version: HashMap::from([(region1, 1), (region2, 1), (region3, 1)]),
-        ..Default::default()
-    };
-
-    let ctx = Arc::new(
-        MockSchedulerCtx {
-            gc_reports: Arc::new(Mutex::new(gc_reports)),
-            file_refs: Arc::new(Mutex::new(Some(file_refs))),
-            candidates: Arc::new(Mutex::new(Some(candidates))),
-            ..Default::default()
-        }
-        .with_table_routes(HashMap::from([(
-            table_id,
-            (
-                table_id,
-                vec![
-                    (region1, peer.clone()),
-                    (region2, peer.clone()),
-                    (region3, peer.clone()),
-                ],
-            ),
-        )])),
-    );
-
-    let scheduler = GcScheduler {
-        ctx: ctx.clone(),
-        receiver: GcScheduler::channel().1,
-        config: GcSchedulerOptions::default(),
-        region_gc_tracker: Arc::new(tokio::sync::Mutex::new(HashMap::new())),
-        last_tracker_cleanup: Arc::new(tokio::sync::Mutex::new(Instant::now())),
-    };
-
-    let candidates = ctx.candidates.lock().unwrap().clone().unwrap_or_default();
-
-    // Convert table-based candidates to datanode-based candidates
-    let datanode_to_candidates = HashMap::from([(
-        peer.clone(),
-        candidates
-            .into_iter()
-            .flat_map(|(table_id, candidates)| candidates.into_iter().map(move |c| (table_id, c)))
-            .collect(),
-    )]);
-
-    let report = scheduler
-        .parallel_process_datanodes(datanode_to_candidates)
-        .await;
-
-    assert_eq!(report.per_datanode_reports.len(), 1);
-    assert_eq!(report.failed_datanodes.len(), 0);
-}
--- a/src/meta-srv/src/gc/tracker.rs
+++ b/src/meta-srv/src/gc/tracker.rs
@@ -50,7 +50,7 @@ impl GcScheduler {
        let now = Instant::now();

        // Check if enough time has passed since last cleanup
-        if now.saturating_duration_since(last_cleanup) < self.config.tracker_cleanup_interval {
+        if now.duration_since(last_cleanup) < self.config.tracker_cleanup_interval {
            return Ok(());
        }

--- a/src/meta-srv/src/metasrv/builder.rs
+++ b/src/meta-srv/src/metasrv/builder.rs
@@ -28,7 +28,7 @@ use common_meta::ddl::table_meta::{TableMetadataAllocator, TableMetadataAllocato
 use common_meta::ddl::{
    DdlContext, NoopRegionFailureDetectorControl, RegionFailureDetectorControllerRef,
 };
-use common_meta::ddl_manager::{DdlManager, DdlManagerConfiguratorRef};
+use common_meta::ddl_manager::{DdlManager, DdlManagerConfiguratorRef, DdlManagerConfigureContext};
 use common_meta::distributed_time_constants::{self};
 use common_meta::key::TableMetadataManager;
 use common_meta::key::flow::FlowMetadataManager;
@@ -405,11 +405,10 @@ impl MetasrvBuilder {

        let ddl_manager = if let Some(configurator) = plugins
            .as_ref()
-            .and_then(|p| p.get::<DdlManagerConfiguratorRef<DdlManagerConfigureContext>>())
+            .and_then(|p| p.get::<DdlManagerConfiguratorRef>())
        {
            let ctx = DdlManagerConfigureContext {
                kv_backend: kv_backend.clone(),
-                meta_peer_client: meta_peer_client.clone(),
            };
            configurator
                .configure(ddl_manager, ctx)
@@ -638,9 +637,3 @@ impl Default for MetasrvBuilder {
        Self::new()
    }
 }
-
-/// The context for [`DdlManagerConfiguratorRef`].
-pub struct DdlManagerConfigureContext {
-    pub kv_backend: KvBackendRef,
-    pub meta_peer_client: MetaPeerClientRef,
-}
--- a/src/meta-srv/src/procedure/repartition/group.rs
+++ b/src/meta-srv/src/procedure/repartition/group.rs
@@ -13,16 +13,11 @@
 // limitations under the License.

 pub(crate) mod repartition_start;
-pub(crate) mod update_metadata;

 use std::any::Any;
 use std::fmt::Debug;

 use common_error::ext::BoxedError;
-use common_meta::DatanodeId;
-use common_meta::cache_invalidator::CacheInvalidatorRef;
-use common_meta::instruction::CacheIdent;
-use common_meta::key::datanode_table::{DatanodeTableKey, DatanodeTableValue, RegionInfo};
 use common_meta::key::table_route::TableRouteValue;
 use common_meta::key::{DeserializedValueWithBytes, TableMetadataManagerRef};
 use common_meta::rpc::router::RegionRoute;
@@ -42,8 +37,6 @@ pub struct RepartitionGroupProcedure {}
 pub struct Context {
    pub persistent_ctx: PersistentContext,

-    pub cache_invalidator: CacheInvalidatorRef,
-
    pub table_metadata_manager: TableMetadataManagerRef,
 }

@@ -52,7 +45,6 @@ pub struct GroupPrepareResult {
    pub source_routes: Vec<RegionRoute>,
    pub target_routes: Vec<RegionRoute>,
    pub central_region: RegionId,
-    pub central_region_datanode_id: DatanodeId,
 }

 #[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
@@ -99,109 +91,6 @@ impl Context {

        Ok(table_route_value)
    }
-
-    /// Returns the `datanode_table_value`
-    ///
-    /// Retry:
-    /// - Failed to retrieve the metadata of datanode table.
-    pub async fn get_datanode_table_value(
-        &self,
-        table_id: TableId,
-        datanode_id: u64,
-    ) -> Result<DatanodeTableValue> {
-        let datanode_table_value = self
-            .table_metadata_manager
-            .datanode_table_manager()
-            .get(&DatanodeTableKey {
-                datanode_id,
-                table_id,
-            })
-            .await
-            .context(error::TableMetadataManagerSnafu)
-            .map_err(BoxedError::new)
-            .with_context(|_| error::RetryLaterWithSourceSnafu {
-                reason: format!("Failed to get DatanodeTable: {table_id}"),
-            })?
-            .context(error::DatanodeTableNotFoundSnafu {
-                table_id,
-                datanode_id,
-            })?;
-        Ok(datanode_table_value)
-    }
-
-    /// Broadcasts the invalidate table cache message.
-    pub async fn invalidate_table_cache(&self) -> Result<()> {
-        let table_id = self.persistent_ctx.table_id;
-        let group_id = self.persistent_ctx.group_id;
-        let subject = format!(
-            "Invalidate table cache for repartition table, group: {}, table: {}",
-            group_id, table_id,
-        );
-        let ctx = common_meta::cache_invalidator::Context {
-            subject: Some(subject),
-        };
-        let _ = self
-            .cache_invalidator
-            .invalidate(&ctx, &[CacheIdent::TableId(table_id)])
-            .await;
-        Ok(())
-    }
-
-    /// Updates the table route.
-    ///
-    /// Retry:
-    /// - Failed to retrieve the metadata of datanode table.
-    ///
-    /// Abort:
-    /// - Table route not found.
-    /// - Failed to update the table route.
-    pub async fn update_table_route(
-        &self,
-        current_table_route_value: &DeserializedValueWithBytes<TableRouteValue>,
-        new_region_routes: Vec<RegionRoute>,
-    ) -> Result<()> {
-        let table_id = self.persistent_ctx.table_id;
-        // Safety: prepare result is set in [RepartitionStart] state.
-        let prepare_result = self.persistent_ctx.group_prepare_result.as_ref().unwrap();
-        let central_region_datanode_table_value = self
-            .get_datanode_table_value(table_id, prepare_result.central_region_datanode_id)
-            .await?;
-        let RegionInfo {
-            region_options,
-            region_wal_options,
-            ..
-        } = &central_region_datanode_table_value.region_info;
-
-        self.table_metadata_manager
-            .update_table_route(
-                table_id,
-                central_region_datanode_table_value.region_info.clone(),
-                current_table_route_value,
-                new_region_routes,
-                region_options,
-                region_wal_options,
-            )
-            .await
-            .context(error::TableMetadataManagerSnafu)
-    }
-}
-
-/// Returns the region routes of the given table route value.
-///
-/// Abort:
-/// - Table route value is not physical.
-pub fn region_routes(
-    table_id: TableId,
-    table_route_value: &TableRouteValue,
-) -> Result<&Vec<RegionRoute>> {
-    table_route_value
-        .region_routes()
-        .with_context(|_| error::UnexpectedLogicalRouteTableSnafu {
-            err_msg: format!(
-                "TableRoute({:?}) is a non-physical TableRouteValue.",
-                table_id
-            ),
-        })
 }

 #[async_trait::async_trait]
@@ -262,23 +151,4 @@ mod tests {
        let err = ctx.get_table_route_value().await.unwrap_err();
        assert!(err.is_retryable());
    }
-
-    #[tokio::test]
-    async fn test_get_datanode_table_value_retry_error() {
-        let kv = MockKvBackendBuilder::default()
-            .range_fn(Arc::new(|_| {
-                common_meta::error::UnexpectedSnafu {
-                    err_msg: "mock err",
-                }
-                .fail()
-            }))
-            .build()
-            .unwrap();
-        let mut env = TestingEnv::new();
-        env.table_metadata_manager = Arc::new(TableMetadataManager::new(Arc::new(kv)));
-        let persistent_context = new_persistent_context(1024, vec![], vec![]);
-        let ctx = env.create_context(persistent_context);
-        let err = ctx.get_datanode_table_value(1024, 1).await.unwrap_err();
-        assert!(err.is_retryable());
-    }
 }
--- a/src/meta-srv/src/procedure/repartition/group/repartition_start.rs
+++ b/src/meta-srv/src/procedure/repartition/group/repartition_start.rs
@@ -22,9 +22,7 @@ use serde::{Deserialize, Serialize};
 use snafu::{OptionExt, ResultExt, ensure};

 use crate::error::{self, Result};
-use crate::procedure::repartition::group::{
-    Context, GroupId, GroupPrepareResult, State, region_routes,
-};
+use crate::procedure::repartition::group::{Context, GroupId, GroupPrepareResult, State};
 use crate::procedure::repartition::plan::RegionDescriptor;

 #[derive(Debug, Serialize, Deserialize)]
@@ -69,6 +67,7 @@ impl RepartitionStart {
            }
        );

+        let central_region = sources[0].region_id;
        let region_routes_map = region_routes
            .iter()
            .map(|r| (r.region.id, r))
@@ -94,26 +93,14 @@ impl RepartitionStart {
                        group_id,
                        region_id: t.region_id,
                    })
-                    .map(|r| (*r).clone())
+                    .and_then(|r| ensure_region_route_expr_match(r, t))
            })
            .collect::<Result<Vec<_>>>()?;
-        let central_region = sources[0].region_id;
-        let central_region_datanode_id = source_region_routes[0]
-            .leader_peer
-            .as_ref()
-            .context(error::UnexpectedSnafu {
-                violated: format!(
-                    "Leader peer is not set for central region: {}",
-                    central_region
-                ),
-            })?
-            .id;

        Ok(GroupPrepareResult {
            source_routes: source_region_routes,
            target_routes: target_region_routes,
            central_region,
-            central_region_datanode_id,
        })
    }

@@ -148,7 +135,14 @@ impl State for RepartitionStart {
        let table_id = ctx.persistent_ctx.table_id;
        let group_id = ctx.persistent_ctx.group_id;
        let table_route_value = ctx.get_table_route_value().await?.into_inner();
-        let region_routes = region_routes(table_id, &table_route_value)?;
+        let region_routes = table_route_value.region_routes().with_context(|_| {
+            error::UnexpectedLogicalRouteTableSnafu {
+                err_msg: format!(
+                    "TableRoute({:?}) is a non-physical TableRouteValue.",
+                    table_id
+                ),
+            }
+        })?;
        let group_prepare_result = Self::ensure_route_present(
            group_id,
            region_routes,
@@ -240,6 +234,43 @@ mod tests {
        )
        .unwrap_err();
        assert_matches!(err, Error::PartitionExprMismatch { .. });
+
+        let source_region = RegionDescriptor {
+            region_id: RegionId::new(1024, 1),
+            partition_expr: range_expr("x", 0, 100),
+        };
+        let target_region = RegionDescriptor {
+            region_id: RegionId::new(1024, 2),
+            partition_expr: range_expr("x", 0, 10),
+        };
+        let region_routes = vec![
+            RegionRoute {
+                region: Region {
+                    id: RegionId::new(1024, 1),
+                    partition_expr: range_expr("x", 0, 100).as_json_str().unwrap(),
+                    ..Default::default()
+                },
+                leader_peer: Some(Peer::empty(1)),
+                ..Default::default()
+            },
+            RegionRoute {
+                region: Region {
+                    id: RegionId::new(1024, 2),
+                    partition_expr: range_expr("x", 0, 5).as_json_str().unwrap(),
+                    ..Default::default()
+                },
+                leader_peer: Some(Peer::empty(1)),
+                ..Default::default()
+            },
+        ];
+        let err = RepartitionStart::ensure_route_present(
+            Uuid::new_v4(),
+            &region_routes,
+            &[source_region],
+            &[target_region],
+        )
+        .unwrap_err();
+        assert_matches!(err, Error::PartitionExprMismatch { .. });
    }

    #[test]
--- a/src/meta-srv/src/procedure/repartition/group/update_metadata.rs
+++ b/src/meta-srv/src/procedure/repartition/group/update_metadata.rs
@@ -1,80 +0,0 @@
-// Copyright 2023 Greptime Team
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-pub(crate) mod apply_staging_region;
-pub(crate) mod rollback_staging_region;
-
-use std::any::Any;
-
-use common_procedure::{Context as ProcedureContext, Status};
-use common_telemetry::warn;
-use serde::{Deserialize, Serialize};
-
-use crate::error::Result;
-use crate::procedure::repartition::group::repartition_start::RepartitionStart;
-use crate::procedure::repartition::group::{Context, State};
-
-#[derive(Debug, Serialize, Deserialize)]
-pub enum UpdateMetadata {
-    /// Applies the new partition expressions for staging regions.
-    ApplyStaging,
-    /// Rolls back the new partition expressions for staging regions.
-    RollbackStaging,
-}
-
-impl UpdateMetadata {
-    #[allow(dead_code)]
-    fn next_state() -> (Box<dyn State>, Status) {
-        // TODO(weny): change it later.
-        (Box::new(RepartitionStart), Status::executing(true))
-    }
-}
-
-#[async_trait::async_trait]
-#[typetag::serde]
-impl State for UpdateMetadata {
-    async fn next(
-        &mut self,
-        ctx: &mut Context,
-        _procedure_ctx: &ProcedureContext,
-    ) -> Result<(Box<dyn State>, Status)> {
-        match self {
-            UpdateMetadata::ApplyStaging => {
-                // TODO(weny): If all metadata have already been updated, skip applying staging regions.
-                self.apply_staging_regions(ctx).await?;
-
-                if let Err(err) = ctx.invalidate_table_cache().await {
-                    warn!(
-                        "Failed to broadcast the invalidate table cache message during the apply staging regions, error: {err:?}"
-                    );
-                };
-                Ok(Self::next_state())
-            }
-            UpdateMetadata::RollbackStaging => {
-                self.rollback_staging_regions(ctx).await?;
-
-                if let Err(err) = ctx.invalidate_table_cache().await {
-                    warn!(
-                        "Failed to broadcast the invalidate table cache message during the rollback staging regions, error: {err:?}"
-                    );
-                };
-                Ok(Self::next_state())
-            }
-        }
-    }
-
-    fn as_any(&self) -> &dyn Any {
-        self
-    }
-}
--- a/src/meta-srv/src/procedure/repartition/group/update_metadata/apply_staging_region.rs
+++ b/src/meta-srv/src/procedure/repartition/group/update_metadata/apply_staging_region.rs
@@ -1,181 +0,0 @@
-// Copyright 2023 Greptime Team
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-use std::collections::HashMap;
-
-use common_error::ext::BoxedError;
-use common_meta::rpc::router::RegionRoute;
-use common_telemetry::error;
-use snafu::{OptionExt, ResultExt};
-
-use crate::error::{self, Result};
-use crate::procedure::repartition::group::update_metadata::UpdateMetadata;
-use crate::procedure::repartition::group::{Context, GroupId, region_routes};
-use crate::procedure::repartition::plan::RegionDescriptor;
-
-impl UpdateMetadata {
-    /// Applies the new partition expressions for staging regions.
-    ///
-    /// Abort:
-    /// - Target region not found.
-    /// - Source region not found.
-    fn apply_staging_region_routes(
-        group_id: GroupId,
-        sources: &[RegionDescriptor],
-        targets: &[RegionDescriptor],
-        current_region_routes: &[RegionRoute],
-    ) -> Result<Vec<RegionRoute>> {
-        let mut region_routes = current_region_routes.to_vec();
-        let mut region_routes_map = region_routes
-            .iter_mut()
-            .map(|route| (route.region.id, route))
-            .collect::<HashMap<_, _>>();
-
-        for target in targets {
-            let region_route = region_routes_map.get_mut(&target.region_id).context(
-                error::RepartitionTargetRegionMissingSnafu {
-                    group_id,
-                    region_id: target.region_id,
-                },
-            )?;
-            region_route.region.partition_expr = target
-                .partition_expr
-                .as_json_str()
-                .context(error::SerializePartitionExprSnafu)?;
-            region_route.set_leader_staging();
-        }
-
-        for source in sources {
-            let region_route = region_routes_map.get_mut(&source.region_id).context(
-                error::RepartitionSourceRegionMissingSnafu {
-                    group_id,
-                    region_id: source.region_id,
-                },
-            )?;
-            region_route.set_leader_staging();
-        }
-
-        Ok(region_routes)
-    }
-
-    /// Applies the new partition expressions for staging regions.
-    ///
-    /// Abort:
-    /// - Table route is not physical.
-    /// - Target region not found.
-    /// - Source region not found.
-    /// - Failed to update the table route.
-    /// - Central region datanode table value not found.
-    #[allow(dead_code)]
-    pub(crate) async fn apply_staging_regions(&self, ctx: &mut Context) -> Result<()> {
-        let table_id = ctx.persistent_ctx.table_id;
-        let group_id = ctx.persistent_ctx.group_id;
-        let current_table_route_value = ctx.get_table_route_value().await?;
-        let region_routes = region_routes(table_id, current_table_route_value.get_inner_ref())?;
-        let new_region_routes = Self::apply_staging_region_routes(
-            group_id,
-            &ctx.persistent_ctx.sources,
-            &ctx.persistent_ctx.targets,
-            region_routes,
-        )?;
-
-        if let Err(err) = ctx
-            .update_table_route(&current_table_route_value, new_region_routes)
-            .await
-        {
-            error!(err; "Failed to update the table route during the updating metadata for repartition: {table_id}, group_id: {group_id}");
-            return Err(BoxedError::new(err)).context(error::RetryLaterWithSourceSnafu {
-                reason: format!(
-                    "Failed to update the table route during the updating metadata for repartition: {table_id}, group_id: {group_id}"
-                ),
-            });
-        };
-
-        Ok(())
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use common_meta::peer::Peer;
-    use common_meta::rpc::router::{Region, RegionRoute};
-    use store_api::storage::RegionId;
-    use uuid::Uuid;
-
-    use crate::procedure::repartition::group::update_metadata::UpdateMetadata;
-    use crate::procedure::repartition::plan::RegionDescriptor;
-    use crate::procedure::repartition::test_util::range_expr;
-
-    #[test]
-    fn test_generate_region_routes() {
-        let group_id = Uuid::new_v4();
-        let table_id = 1024;
-        let region_routes = vec![
-            RegionRoute {
-                region: Region {
-                    id: RegionId::new(table_id, 1),
-                    partition_expr: range_expr("x", 0, 100).as_json_str().unwrap(),
-                    ..Default::default()
-                },
-                leader_peer: Some(Peer::empty(1)),
-                ..Default::default()
-            },
-            RegionRoute {
-                region: Region {
-                    id: RegionId::new(table_id, 2),
-                    partition_expr: String::new(),
-                    ..Default::default()
-                },
-                leader_peer: Some(Peer::empty(1)),
-                ..Default::default()
-            },
-            RegionRoute {
-                region: Region {
-                    id: RegionId::new(table_id, 3),
-                    partition_expr: String::new(),
-                    ..Default::default()
-                },
-                leader_peer: Some(Peer::empty(1)),
-                ..Default::default()
-            },
-        ];
-        let source_region = RegionDescriptor {
-            region_id: RegionId::new(table_id, 1),
-            partition_expr: range_expr("x", 0, 100),
-        };
-        let target_region = RegionDescriptor {
-            region_id: RegionId::new(table_id, 2),
-            partition_expr: range_expr("x", 0, 10),
-        };
-
-        let new_region_routes = UpdateMetadata::apply_staging_region_routes(
-            group_id,
-            &[source_region],
-            &[target_region],
-            &region_routes,
-        )
-        .unwrap();
-        assert!(new_region_routes[0].is_leader_staging());
-        assert_eq!(
-            new_region_routes[0].region.partition_expr,
-            range_expr("x", 0, 100).as_json_str().unwrap()
-        );
-        assert_eq!(
-            new_region_routes[1].region.partition_expr,
-            range_expr("x", 0, 10).as_json_str().unwrap()
-        );
-        assert!(new_region_routes[1].is_leader_staging());
-        assert!(!new_region_routes[2].is_leader_staging());
-    }
-}
--- a/src/meta-srv/src/procedure/repartition/group/update_metadata/rollback_staging_region.rs
+++ b/src/meta-srv/src/procedure/repartition/group/update_metadata/rollback_staging_region.rs
@@ -1,187 +0,0 @@
-// Copyright 2023 Greptime Team
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-use std::collections::HashMap;
-
-use common_error::ext::BoxedError;
-use common_meta::rpc::router::RegionRoute;
-use common_telemetry::error;
-use snafu::{OptionExt, ResultExt};
-
-use crate::error::{self, Result};
-use crate::procedure::repartition::group::update_metadata::UpdateMetadata;
-use crate::procedure::repartition::group::{Context, GroupId, region_routes};
-
-impl UpdateMetadata {
-    /// Rolls back the staging regions.
-    ///
-    /// Abort:
-    /// - Source region not found.
-    /// - Target region not found.
-    #[allow(dead_code)]
-    fn rollback_staging_region_routes(
-        group_id: GroupId,
-        source_routes: &[RegionRoute],
-        target_routes: &[RegionRoute],
-        current_region_routes: &[RegionRoute],
-    ) -> Result<Vec<RegionRoute>> {
-        let mut region_routes = current_region_routes.to_vec();
-        let mut region_routes_map = region_routes
-            .iter_mut()
-            .map(|route| (route.region.id, route))
-            .collect::<HashMap<_, _>>();
-
-        for source in source_routes {
-            let region_route = region_routes_map.get_mut(&source.region.id).context(
-                error::RepartitionSourceRegionMissingSnafu {
-                    group_id,
-                    region_id: source.region.id,
-                },
-            )?;
-            region_route.region.partition_expr = source.region.partition_expr.clone();
-            region_route.clear_leader_staging();
-        }
-
-        for target in target_routes {
-            let region_route = region_routes_map.get_mut(&target.region.id).context(
-                error::RepartitionTargetRegionMissingSnafu {
-                    group_id,
-                    region_id: target.region.id,
-                },
-            )?;
-            region_route.clear_leader_staging();
-        }
-
-        Ok(region_routes)
-    }
-
-    /// Rolls back the metadata for staging regions.
-    ///
-    /// Abort:
-    /// - Table route is not physical.
-    /// - Source region not found.
-    /// - Target region not found.
-    /// - Failed to update the table route.
-    /// - Central region datanode table value not found.
-    #[allow(dead_code)]
-    pub(crate) async fn rollback_staging_regions(&self, ctx: &mut Context) -> Result<()> {
-        let table_id = ctx.persistent_ctx.table_id;
-        let group_id = ctx.persistent_ctx.group_id;
-        let current_table_route_value = ctx.get_table_route_value().await?;
-        let region_routes = region_routes(table_id, current_table_route_value.get_inner_ref())?;
-        // Safety: prepare result is set in [RepartitionStart] state.
-        let prepare_result = ctx.persistent_ctx.group_prepare_result.as_ref().unwrap();
-        let new_region_routes = Self::rollback_staging_region_routes(
-            group_id,
-            &prepare_result.source_routes,
-            &prepare_result.target_routes,
-            region_routes,
-        )?;
-
-        if let Err(err) = ctx
-            .update_table_route(&current_table_route_value, new_region_routes)
-            .await
-        {
-            error!(err; "Failed to update the table route during the updating metadata for repartition: {table_id}, group_id: {group_id}");
-            return Err(BoxedError::new(err)).context(error::RetryLaterWithSourceSnafu {
-                reason: format!(
-                    "Failed to update the table route during the updating metadata for repartition: {table_id}, group_id: {group_id}"
-                ),
-            });
-        };
-
-        Ok(())
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use common_meta::peer::Peer;
-    use common_meta::rpc::router::{LeaderState, Region, RegionRoute};
-    use store_api::storage::RegionId;
-    use uuid::Uuid;
-
-    use crate::procedure::repartition::group::update_metadata::UpdateMetadata;
-    use crate::procedure::repartition::test_util::range_expr;
-
-    #[test]
-    fn test_rollback_staging_region_routes() {
-        let group_id = Uuid::new_v4();
-        let table_id = 1024;
-        let region_routes = vec![
-            RegionRoute {
-                region: Region {
-                    id: RegionId::new(table_id, 1),
-                    partition_expr: range_expr("x", 0, 100).as_json_str().unwrap(),
-                    ..Default::default()
-                },
-                leader_peer: Some(Peer::empty(1)),
-                leader_state: Some(LeaderState::Staging),
-                ..Default::default()
-            },
-            RegionRoute {
-                region: Region {
-                    id: RegionId::new(table_id, 2),
-                    partition_expr: String::new(),
-                    ..Default::default()
-                },
-                leader_peer: Some(Peer::empty(1)),
-                leader_state: Some(LeaderState::Staging),
-                ..Default::default()
-            },
-            RegionRoute {
-                region: Region {
-                    id: RegionId::new(table_id, 3),
-                    partition_expr: String::new(),
-                    ..Default::default()
-                },
-                leader_peer: Some(Peer::empty(1)),
-                leader_state: Some(LeaderState::Downgrading),
-                ..Default::default()
-            },
-        ];
-        let source_routes = vec![RegionRoute {
-            region: Region {
-                id: RegionId::new(table_id, 1),
-                partition_expr: range_expr("x", 0, 20).as_json_str().unwrap(),
-                ..Default::default()
-            },
-            leader_peer: Some(Peer::empty(1)),
-            ..Default::default()
-        }];
-        let target_routes = vec![RegionRoute {
-            region: Region {
-                id: RegionId::new(table_id, 2),
-                partition_expr: range_expr("x", 0, 20).as_json_str().unwrap(),
-                ..Default::default()
-            },
-            leader_peer: Some(Peer::empty(1)),
-            ..Default::default()
-        }];
-        let new_region_routes = UpdateMetadata::rollback_staging_region_routes(
-            group_id,
-            &source_routes,
-            &target_routes,
-            &region_routes,
-        )
-        .unwrap();
-        assert!(!new_region_routes[0].is_leader_staging());
-        assert_eq!(
-            new_region_routes[0].region.partition_expr,
-            range_expr("x", 0, 20).as_json_str().unwrap(),
-        );
-        assert!(!new_region_routes[1].is_leader_staging());
-        assert!(new_region_routes[2].is_leader_downgrading());
-    }
-}
--- a/src/meta-srv/src/procedure/repartition/plan.rs
+++ b/src/meta-srv/src/procedure/repartition/plan.rs
@@ -21,6 +21,6 @@ use store_api::storage::RegionId;
 pub struct RegionDescriptor {
    /// The region id of the region involved in the plan.
    pub region_id: RegionId,
-    /// The new partition expression of the region.
+    /// The partition expression of the region.
    pub partition_expr: PartitionExpr,
 }
--- a/src/meta-srv/src/procedure/repartition/test_util.rs
+++ b/src/meta-srv/src/procedure/repartition/test_util.rs
@@ -16,22 +16,17 @@ use std::sync::Arc;

 use common_meta::key::{TableMetadataManager, TableMetadataManagerRef};
 use common_meta::kv_backend::memory::MemoryKvBackend;
-use common_meta::sequence::SequenceBuilder;
 use datatypes::value::Value;
 use partition::expr::{PartitionExpr, col};
 use store_api::storage::TableId;
 use uuid::Uuid;

-use crate::cache_invalidator::MetasrvCacheInvalidator;
-use crate::metasrv::MetasrvInfo;
 use crate::procedure::repartition::group::{Context, PersistentContext};
 use crate::procedure::repartition::plan::RegionDescriptor;
-use crate::procedure::test_util::MailboxContext;

 /// `TestingEnv` provides components during the tests.
 pub struct TestingEnv {
    pub table_metadata_manager: TableMetadataManagerRef,
-    pub mailbox_ctx: MailboxContext,
 }

 impl Default for TestingEnv {
@@ -44,28 +39,16 @@ impl TestingEnv {
    pub fn new() -> Self {
        let kv_backend = Arc::new(MemoryKvBackend::new());
        let table_metadata_manager = Arc::new(TableMetadataManager::new(kv_backend.clone()));
-        let mailbox_sequence =
-            SequenceBuilder::new("test_heartbeat_mailbox", kv_backend.clone()).build();
-        let mailbox_ctx = MailboxContext::new(mailbox_sequence);

        Self {
            table_metadata_manager,
-            mailbox_ctx,
        }
    }

    pub fn create_context(self, persistent_context: PersistentContext) -> Context {
-        let cache_invalidator = Arc::new(MetasrvCacheInvalidator::new(
-            self.mailbox_ctx.mailbox().clone(),
-            MetasrvInfo {
-                server_addr: String::new(),
-            },
-        ));
-
        Context {
            persistent_ctx: persistent_context,
            table_metadata_manager: self.table_metadata_manager.clone(),
-            cache_invalidator,
        }
    }
 }
--- a/src/metric-engine/Cargo.toml
+++ b/src/metric-engine/Cargo.toml
@@ -14,7 +14,6 @@ async-stream.workspace = true
 async-trait.workspace = true
 base64.workspace = true
 bytes.workspace = true
-fxhash = "0.2"
 common-base.workspace = true
 common-error.workspace = true
 common-macro.workspace = true
@@ -32,6 +31,7 @@ lazy_static = "1.4"
 mito-codec.workspace = true
 mito2.workspace = true
 moka.workspace = true
+mur3 = "0.1"
 object-store.workspace = true
 prometheus.workspace = true
 serde.workspace = true
@@ -47,12 +47,6 @@ common-meta = { workspace = true, features = ["testing"] }
 common-test-util.workspace = true
 mito2 = { workspace = true, features = ["test"] }
 common-wal = { workspace = true }
-criterion = { version = "0.4", features = ["async", "async_tokio"] }
-mur3 = "0.1"
-
-[[bench]]
-name = "bench_tsid_generator"
-harness = false

 [package.metadata.cargo-udeps.ignore]
 normal = ["aquamarine"]
--- a/src/metric-engine/benches/bench_tsid_generator.rs
+++ b/src/metric-engine/benches/bench_tsid_generator.rs
@@ -1,273 +0,0 @@
-// Copyright 2023 Greptime Team
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-use std::hash::Hasher;
-
-use criterion::{Criterion, black_box, criterion_group, criterion_main};
-use fxhash::FxHasher;
-use mur3::Hasher128;
-
-// A random number (from original implementation)
-const TSID_HASH_SEED: u32 = 846793005;
-
-/// Original TSID generator using mur3::Hasher128
-/// Hashes both label name and value for each label pair
-struct OriginalTsidGenerator {
-    hasher: Hasher128,
-}
-
-impl OriginalTsidGenerator {
-    fn new() -> Self {
-        Self {
-            hasher: Hasher128::with_seed(TSID_HASH_SEED),
-        }
-    }
-
-    /// Writes a label pair (name and value) to the generator.
-    fn write_label(&mut self, name: &str, value: &str) {
-        use std::hash::Hash;
-        name.hash(&mut self.hasher);
-        value.hash(&mut self.hasher);
-    }
-
-    /// Generates a new TSID.
-    fn finish(&mut self) -> u64 {
-        // TSID is 64 bits, simply truncate the 128 bits hash
-        let (hash, _) = self.hasher.finish128();
-        hash
-    }
-}
-
-/// Current TSID generator using fxhash::FxHasher
-/// Fast path: pre-computes label name hash, only hashes values
-struct CurrentTsidGenerator {
-    hasher: FxHasher,
-}
-
-impl CurrentTsidGenerator {
-    fn new() -> Self {
-        Self {
-            hasher: FxHasher::default(),
-        }
-    }
-
-    fn new_with_label_name_hash(label_name_hash: u64) -> Self {
-        let mut hasher = FxHasher::default();
-        hasher.write_u64(label_name_hash);
-        Self { hasher }
-    }
-
-    /// Writes a label value to the generator.
-    fn write_str(&mut self, value: &str) {
-        self.hasher.write(value.as_bytes());
-        self.hasher.write_u8(0xff);
-    }
-
-    /// Generates a new TSID.
-    fn finish(&mut self) -> u64 {
-        self.hasher.finish()
-    }
-}
-
-/// Pre-computes label name hash (used in fast path)
-fn compute_label_name_hash(labels: &[(&str, &str)]) -> u64 {
-    let mut hasher = FxHasher::default();
-    for (name, _) in labels {
-        hasher.write(name.as_bytes());
-        hasher.write_u8(0xff);
-    }
-    hasher.finish()
-}
-
-fn bench_tsid_generator_small(c: &mut Criterion) {
-    let labels = vec![("namespace", "greptimedb"), ("host", "127.0.0.1")];
-
-    let mut group = c.benchmark_group("tsid_generator_small_2_labels");
-    group.bench_function("original_mur3", |b| {
-        b.iter(|| {
-            let mut tsid_gen = OriginalTsidGenerator::new();
-            for (name, value) in &labels {
-                tsid_gen.write_label(black_box(name), black_box(value));
-            }
-            black_box(tsid_gen.finish())
-        })
-    });
-
-    let label_name_hash = compute_label_name_hash(&labels);
-    group.bench_function("current_fxhash_fast_path", |b| {
-        b.iter(|| {
-            let mut tsid_gen =
-                CurrentTsidGenerator::new_with_label_name_hash(black_box(label_name_hash));
-            for (_, value) in &labels {
-                tsid_gen.write_str(black_box(value));
-            }
-            black_box(tsid_gen.finish())
-        })
-    });
-
-    group.finish();
-}
-
-fn bench_tsid_generator_medium(c: &mut Criterion) {
-    let labels = vec![
-        ("namespace", "greptimedb"),
-        ("host", "127.0.0.1"),
-        ("region", "us-west-2"),
-        ("env", "production"),
-        ("service", "api"),
-    ];
-
-    let mut group = c.benchmark_group("tsid_generator_medium_5_labels");
-    group.bench_function("original_mur3", |b| {
-        b.iter(|| {
-            let mut tsid_gen = OriginalTsidGenerator::new();
-            for (name, value) in &labels {
-                tsid_gen.write_label(black_box(name), black_box(value));
-            }
-            black_box(tsid_gen.finish())
-        })
-    });
-
-    let label_name_hash = compute_label_name_hash(&labels);
-    group.bench_function("current_fxhash_fast_path", |b| {
-        b.iter(|| {
-            let mut tsid_gen =
-                CurrentTsidGenerator::new_with_label_name_hash(black_box(label_name_hash));
-            for (_, value) in &labels {
-                tsid_gen.write_str(black_box(value));
-            }
-            black_box(tsid_gen.finish())
-        })
-    });
-
-    group.finish();
-}
-
-fn bench_tsid_generator_large(c: &mut Criterion) {
-    let labels = vec![
-        ("namespace", "greptimedb"),
-        ("host", "127.0.0.1"),
-        ("region", "us-west-2"),
-        ("env", "production"),
-        ("service", "api"),
-        ("version", "v1.0.0"),
-        ("cluster", "cluster-1"),
-        ("dc", "dc1"),
-        ("rack", "rack-1"),
-        ("pod", "pod-123"),
-    ];
-
-    let mut group = c.benchmark_group("tsid_generator_large_10_labels");
-    group.bench_function("original_mur3", |b| {
-        b.iter(|| {
-            let mut tsid_gen = OriginalTsidGenerator::new();
-            for (name, value) in &labels {
-                tsid_gen.write_label(black_box(name), black_box(value));
-            }
-            black_box(tsid_gen.finish())
-        })
-    });
-
-    let label_name_hash = compute_label_name_hash(&labels);
-    group.bench_function("current_fxhash_fast_path", |b| {
-        b.iter(|| {
-            let mut tsid_gen =
-                CurrentTsidGenerator::new_with_label_name_hash(black_box(label_name_hash));
-            for (_, value) in &labels {
-                tsid_gen.write_str(black_box(value));
-            }
-            black_box(tsid_gen.finish())
-        })
-    });
-
-    group.finish();
-}
-
-fn bench_tsid_generator_slow_path(c: &mut Criterion) {
-    // Simulate slow path: some labels have null values (empty strings)
-    let labels_with_nulls = vec![
-        ("namespace", "greptimedb"),
-        ("host", "127.0.0.1"),
-        ("region", ""), // null
-        ("env", "production"),
-    ];
-
-    let labels_all_non_null = vec![
-        ("namespace", "greptimedb"),
-        ("host", "127.0.0.1"),
-        ("env", "production"),
-    ];
-
-    let mut group = c.benchmark_group("tsid_generator_slow_path_with_nulls");
-
-    // Original: always hashes name and value
-    group.bench_function("original_mur3_with_nulls", |b| {
-        b.iter(|| {
-            let mut tsid_gen = OriginalTsidGenerator::new();
-            for (name, value) in &labels_with_nulls {
-                if !value.is_empty() {
-                    tsid_gen.write_label(black_box(name), black_box(value));
-                }
-            }
-            black_box(tsid_gen.finish())
-        })
-    });
-
-    // Current slow path: recomputes label name hash
-    group.bench_function("current_fxhash_slow_path", |b| {
-        b.iter(|| {
-            // Step 1: Compute label name hash for non-null labels
-            let mut name_hasher = CurrentTsidGenerator::new();
-            for (name, value) in &labels_with_nulls {
-                if !value.is_empty() {
-                    name_hasher.write_str(black_box(name));
-                }
-            }
-            let label_name_hash = name_hasher.finish();
-
-            // Step 2: Use label name hash and hash values
-            let mut tsid_gen = CurrentTsidGenerator::new_with_label_name_hash(label_name_hash);
-            for (_, value) in &labels_with_nulls {
-                if !value.is_empty() {
-                    tsid_gen.write_str(black_box(value));
-                }
-            }
-            black_box(tsid_gen.finish())
-        })
-    });
-
-    // Current fast path: pre-computed (for comparison)
-    let label_name_hash = compute_label_name_hash(&labels_all_non_null);
-    group.bench_function("current_fxhash_fast_path_no_nulls", |b| {
-        b.iter(|| {
-            let mut tsid_gen =
-                CurrentTsidGenerator::new_with_label_name_hash(black_box(label_name_hash));
-            for (_, value) in &labels_all_non_null {
-                tsid_gen.write_str(black_box(value));
-            }
-            black_box(tsid_gen.finish())
-        })
-    });
-
-    group.finish();
-}
-
-criterion_group!(
-    benches,
-    bench_tsid_generator_small,
-    bench_tsid_generator_medium,
-    bench_tsid_generator_large,
-    bench_tsid_generator_slow_path
-);
-criterion_main!(benches);
--- a/src/metric-engine/src/engine.rs
+++ b/src/metric-engine/src/engine.rs
@@ -23,7 +23,6 @@ mod options;
 mod put;
 mod read;
 mod region_metadata;
-mod staging;
 mod state;
 mod sync;

@@ -212,13 +211,6 @@ impl RegionEngine for MetricEngine {
        let mut extension_return_value = HashMap::new();

        let result = match request {
-            RegionRequest::EnterStaging(_) => {
-                if self.inner.is_physical_region(region_id) {
-                    self.handle_enter_staging_request(region_id, request).await
-                } else {
-                    UnsupportedRegionRequestSnafu { request }.fail()
-                }
-            }
            RegionRequest::Put(put) => self.inner.put_region(region_id, put).await,
            RegionRequest::Create(create) => {
                self.inner
--- a/src/metric-engine/src/engine/flush.rs
+++ b/src/metric-engine/src/engine/flush.rs
@@ -119,7 +119,7 @@ mod tests {
                    .index_file_path
                    .map(|path| path.replace(&e.file_id, "<file_id>"));
                e.file_id = "<file_id>".to_string();
-                e.index_version = 0;
+                e.index_file_id = e.index_file_id.map(|_| "<index_file_id>".to_string());
                format!("\n{:?}", e)
            })
            .sorted()
@@ -128,12 +128,12 @@ mod tests {
        assert_eq!(
            debug_format,
            r#"
-ManifestSstEntry { table_dir: "test_metric_region/", region_id: 47244640257(11, 1), table_id: 11, region_number: 1, region_group: 0, region_sequence: 1, file_id: "<file_id>", index_version: 0, level: 0, file_path: "test_metric_region/11_0000000001/data/<file_id>.parquet", file_size: 3217, index_file_path: Some("test_metric_region/11_0000000001/data/index/<file_id>.puffin"), index_file_size: Some(235), num_rows: 10, num_row_groups: 1, num_series: Some(1), min_ts: 0::Millisecond, max_ts: 9::Millisecond, sequence: Some(20), origin_region_id: 47244640257(11, 1), node_id: None, visible: true }
-ManifestSstEntry { table_dir: "test_metric_region/", region_id: 47244640258(11, 2), table_id: 11, region_number: 2, region_group: 0, region_sequence: 2, file_id: "<file_id>", index_version: 0, level: 0, file_path: "test_metric_region/11_0000000002/data/<file_id>.parquet", file_size: 3217, index_file_path: Some("test_metric_region/11_0000000002/data/index/<file_id>.puffin"), index_file_size: Some(235), num_rows: 10, num_row_groups: 1, num_series: Some(1), min_ts: 0::Millisecond, max_ts: 9::Millisecond, sequence: Some(10), origin_region_id: 47244640258(11, 2), node_id: None, visible: true }
-ManifestSstEntry { table_dir: "test_metric_region/", region_id: 47261417473(11, 16777217), table_id: 11, region_number: 16777217, region_group: 1, region_sequence: 1, file_id: "<file_id>", index_version: 0, level: 0, file_path: "test_metric_region/11_0000000001/metadata/<file_id>.parquet", file_size: 3487, index_file_path: None, index_file_size: None, num_rows: 8, num_row_groups: 1, num_series: Some(8), min_ts: 0::Millisecond, max_ts: 0::Millisecond, sequence: Some(8), origin_region_id: 47261417473(11, 16777217), node_id: None, visible: true }
-ManifestSstEntry { table_dir: "test_metric_region/", region_id: 47261417474(11, 16777218), table_id: 11, region_number: 16777218, region_group: 1, region_sequence: 2, file_id: "<file_id>", index_version: 0, level: 0, file_path: "test_metric_region/11_0000000002/metadata/<file_id>.parquet", file_size: 3471, index_file_path: None, index_file_size: None, num_rows: 4, num_row_groups: 1, num_series: Some(4), min_ts: 0::Millisecond, max_ts: 0::Millisecond, sequence: Some(4), origin_region_id: 47261417474(11, 16777218), node_id: None, visible: true }
-ManifestSstEntry { table_dir: "test_metric_region/", region_id: 94489280554(22, 42), table_id: 22, region_number: 42, region_group: 0, region_sequence: 42, file_id: "<file_id>", index_version: 0, level: 0, file_path: "test_metric_region/22_0000000042/data/<file_id>.parquet", file_size: 3217, index_file_path: Some("test_metric_region/22_0000000042/data/index/<file_id>.puffin"), index_file_size: Some(235), num_rows: 10, num_row_groups: 1, num_series: Some(1), min_ts: 0::Millisecond, max_ts: 9::Millisecond, sequence: Some(10), origin_region_id: 94489280554(22, 42), node_id: None, visible: true }
-ManifestSstEntry { table_dir: "test_metric_region/", region_id: 94506057770(22, 16777258), table_id: 22, region_number: 16777258, region_group: 1, region_sequence: 42, file_id: "<file_id>", index_version: 0, level: 0, file_path: "test_metric_region/22_0000000042/metadata/<file_id>.parquet", file_size: 3471, index_file_path: None, index_file_size: None, num_rows: 4, num_row_groups: 1, num_series: Some(4), min_ts: 0::Millisecond, max_ts: 0::Millisecond, sequence: Some(4), origin_region_id: 94506057770(22, 16777258), node_id: None, visible: true }"#,
+ManifestSstEntry { table_dir: "test_metric_region/", region_id: 47244640257(11, 1), table_id: 11, region_number: 1, region_group: 0, region_sequence: 1, file_id: "<file_id>", index_file_id: Some("<index_file_id>"), level: 0, file_path: "test_metric_region/11_0000000001/data/<file_id>.parquet", file_size: 3217, index_file_path: Some("test_metric_region/11_0000000001/data/index/<file_id>.puffin"), index_file_size: Some(235), num_rows: 10, num_row_groups: 1, num_series: Some(1), min_ts: 0::Millisecond, max_ts: 9::Millisecond, sequence: Some(20), origin_region_id: 47244640257(11, 1), node_id: None, visible: true }
+ManifestSstEntry { table_dir: "test_metric_region/", region_id: 47244640258(11, 2), table_id: 11, region_number: 2, region_group: 0, region_sequence: 2, file_id: "<file_id>", index_file_id: Some("<index_file_id>"), level: 0, file_path: "test_metric_region/11_0000000002/data/<file_id>.parquet", file_size: 3217, index_file_path: Some("test_metric_region/11_0000000002/data/index/<file_id>.puffin"), index_file_size: Some(235), num_rows: 10, num_row_groups: 1, num_series: Some(1), min_ts: 0::Millisecond, max_ts: 9::Millisecond, sequence: Some(10), origin_region_id: 47244640258(11, 2), node_id: None, visible: true }
+ManifestSstEntry { table_dir: "test_metric_region/", region_id: 47261417473(11, 16777217), table_id: 11, region_number: 16777217, region_group: 1, region_sequence: 1, file_id: "<file_id>", index_file_id: None, level: 0, file_path: "test_metric_region/11_0000000001/metadata/<file_id>.parquet", file_size: 3487, index_file_path: None, index_file_size: None, num_rows: 8, num_row_groups: 1, num_series: Some(8), min_ts: 0::Millisecond, max_ts: 0::Millisecond, sequence: Some(8), origin_region_id: 47261417473(11, 16777217), node_id: None, visible: true }
+ManifestSstEntry { table_dir: "test_metric_region/", region_id: 47261417474(11, 16777218), table_id: 11, region_number: 16777218, region_group: 1, region_sequence: 2, file_id: "<file_id>", index_file_id: None, level: 0, file_path: "test_metric_region/11_0000000002/metadata/<file_id>.parquet", file_size: 3471, index_file_path: None, index_file_size: None, num_rows: 4, num_row_groups: 1, num_series: Some(4), min_ts: 0::Millisecond, max_ts: 0::Millisecond, sequence: Some(4), origin_region_id: 47261417474(11, 16777218), node_id: None, visible: true }
+ManifestSstEntry { table_dir: "test_metric_region/", region_id: 94489280554(22, 42), table_id: 22, region_number: 42, region_group: 0, region_sequence: 42, file_id: "<file_id>", index_file_id: Some("<index_file_id>"), level: 0, file_path: "test_metric_region/22_0000000042/data/<file_id>.parquet", file_size: 3217, index_file_path: Some("test_metric_region/22_0000000042/data/index/<file_id>.puffin"), index_file_size: Some(235), num_rows: 10, num_row_groups: 1, num_series: Some(1), min_ts: 0::Millisecond, max_ts: 9::Millisecond, sequence: Some(10), origin_region_id: 94489280554(22, 42), node_id: None, visible: true }
+ManifestSstEntry { table_dir: "test_metric_region/", region_id: 94506057770(22, 16777258), table_id: 22, region_number: 16777258, region_group: 1, region_sequence: 42, file_id: "<file_id>", index_file_id: None, level: 0, file_path: "test_metric_region/22_0000000042/metadata/<file_id>.parquet", file_size: 3471, index_file_path: None, index_file_size: None, num_rows: 4, num_row_groups: 1, num_series: Some(4), min_ts: 0::Millisecond, max_ts: 0::Millisecond, sequence: Some(4), origin_region_id: 94506057770(22, 16777258), node_id: None, visible: true }"#
        );
        // list from storage
        let storage_entries = mito
--- a/src/metric-engine/src/engine/put.rs
+++ b/src/metric-engine/src/engine/put.rs
@@ -272,15 +272,15 @@ mod tests {
            .unwrap();
        let batches = RecordBatches::try_collect(stream).await.unwrap();
        let expected = "\
-+-------------------------+----------------+------------+---------------------+-------+
-| greptime_timestamp      | greptime_value | __table_id | __tsid              | job   |
-+-------------------------+----------------+------------+---------------------+-------+
-| 1970-01-01T00:00:00     | 0.0            | 3          | 2955007454552897459 | tag_0 |
-| 1970-01-01T00:00:00.001 | 1.0            | 3          | 2955007454552897459 | tag_0 |
-| 1970-01-01T00:00:00.002 | 2.0            | 3          | 2955007454552897459 | tag_0 |
-| 1970-01-01T00:00:00.003 | 3.0            | 3          | 2955007454552897459 | tag_0 |
-| 1970-01-01T00:00:00.004 | 4.0            | 3          | 2955007454552897459 | tag_0 |
-+-------------------------+----------------+------------+---------------------+-------+";
+-------------------------+----------------+------------+----------------------+-------+
+| greptime_timestamp      | greptime_value | __table_id | __tsid               | job   |
+-------------------------+----------------+------------+----------------------+-------+
+| 1970-01-01T00:00:00     | 0.0            | 3          | 12881218023286672757 | tag_0 |
+| 1970-01-01T00:00:00.001 | 1.0            | 3          | 12881218023286672757 | tag_0 |
+| 1970-01-01T00:00:00.002 | 2.0            | 3          | 12881218023286672757 | tag_0 |
+| 1970-01-01T00:00:00.003 | 3.0            | 3          | 12881218023286672757 | tag_0 |
+| 1970-01-01T00:00:00.004 | 4.0            | 3          | 12881218023286672757 | tag_0 |
+-------------------------+----------------+------------+----------------------+-------+";
        assert_eq!(expected, batches.pretty_print().unwrap(), "physical region");

        // read data from logical region
--- a/src/metric-engine/src/engine/staging.rs
+++ b/src/metric-engine/src/engine/staging.rs
@@ -1,54 +0,0 @@
-// Copyright 2023 Greptime Team
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-use common_base::AffectedRows;
-use snafu::ResultExt;
-use store_api::region_engine::RegionEngine;
-use store_api::region_request::{EnterStagingRequest, RegionRequest};
-use store_api::storage::RegionId;
-
-use crate::engine::MetricEngine;
-use crate::error::{MitoEnterStagingOperationSnafu, Result};
-use crate::utils;
-
-impl MetricEngine {
-    /// Handles the enter staging request for the given region.
-    pub(crate) async fn handle_enter_staging_request(
-        &self,
-        region_id: RegionId,
-        request: RegionRequest,
-    ) -> Result<AffectedRows> {
-        let metadata_region_id = utils::to_metadata_region_id(region_id);
-        let data_region_id = utils::to_data_region_id(region_id);
-
-        // For metadata region, it doesn't care about the partition expr, so we can just pass an empty string.
-        self.inner
-            .mito
-            .handle_request(
-                metadata_region_id,
-                RegionRequest::EnterStaging(EnterStagingRequest {
-                    partition_expr: String::new(),
-                }),
-            )
-            .await
-            .context(MitoEnterStagingOperationSnafu)?;
-
-        self.inner
-            .mito
-            .handle_request(data_region_id, request)
-            .await
-            .context(MitoEnterStagingOperationSnafu)
-            .map(|response| response.affected_rows)
-    }
-}
--- a/src/metric-engine/src/error.rs
+++ b/src/metric-engine/src/error.rs
@@ -156,13 +156,6 @@ pub enum Error {
        location: Location,
    },

-    #[snafu(display("Mito enter staging operation fails"))]
-    MitoEnterStagingOperation {
-        source: BoxedError,
-        #[snafu(implicit)]
-        location: Location,
-    },
-
    #[snafu(display("Failed to collect record batch stream"))]
    CollectRecordBatchStream {
        source: common_recordbatch::error::Error,
@@ -367,7 +360,6 @@ impl ErrorExt for Error {
            | MitoWriteOperation { source, .. }
            | MitoFlushOperation { source, .. }
            | MitoSyncOperation { source, .. }
-            | MitoEnterStagingOperation { source, .. }
            | BatchOpenMitoRegion { source, .. }
            | BatchCatchupMitoRegion { source, .. } => source.status_code(),

--- a/src/metric-engine/src/row_modifier.rs
+++ b/src/metric-engine/src/row_modifier.rs
@@ -13,12 +13,11 @@
 // limitations under the License.

 use std::collections::{BTreeMap, HashMap};
-use std::hash::Hasher;
+use std::hash::Hash;

 use api::v1::value::ValueData;
 use api::v1::{ColumnDataType, ColumnSchema, Row, Rows, SemanticType, Value};
 use datatypes::value::ValueRef;
-use fxhash::FxHasher;
 use mito_codec::row_converter::SparsePrimaryKeyCodec;
 use smallvec::SmallVec;
 use snafu::ResultExt;
@@ -31,6 +30,9 @@ use store_api::storage::{ColumnId, TableId};

 use crate::error::{EncodePrimaryKeySnafu, Result};

+// A random number
+const TSID_HASH_SEED: u32 = 846793005;
+
 /// A row modifier modifies [`Rows`].
 ///
 /// - For [`PrimaryKeyEncoding::Sparse`] encoding,
@@ -73,7 +75,6 @@ impl RowModifier {
        let num_output_column = num_column - num_primary_key_column + 1;

        let mut buffer = vec![];
-
        for mut iter in iter.iter_mut() {
            let (table_id, tsid) = Self::fill_internal_columns(table_id, &iter);
            let mut values = Vec::with_capacity(num_output_column);
@@ -146,72 +147,47 @@ impl RowModifier {

    /// Fills internal columns of a row with table name and a hash of tag values.
    pub fn fill_internal_columns(table_id: TableId, iter: &RowIter<'_>) -> (Value, Value) {
-        let ts_id = if !iter.has_null_labels() {
-            // No null labels in row, we can safely reuse the precomputed label name hash.
-            let mut ts_id_gen = TsidGenerator::new(iter.index.label_name_hash);
-            for (_, value) in iter.primary_keys_with_name() {
-                // The type is checked before. So only null is ignored.
-                if let Some(ValueData::StringValue(string)) = &value.value_data {
-                    ts_id_gen.write_str(string);
-                } else {
-                    unreachable!(
-                        "Should not contain null or non-string value: {:?}, table id: {}",
-                        value, table_id
-                    );
-                }
+        let mut hasher = TsidGenerator::default();
+        for (name, value) in iter.primary_keys_with_name() {
+            // The type is checked before. So only null is ignored.
+            if let Some(ValueData::StringValue(string)) = &value.value_data {
+                hasher.write_label(name, string);
            }
-            ts_id_gen.finish()
-        } else {
-            // Slow path: row contains null, recompute label hash
-            let mut hasher = TsidGenerator::default();
-            // 1. Find out label names with non-null values and get the hash.
-            for (name, value) in iter.primary_keys_with_name() {
-                // The type is checked before. So only null is ignored.
-                if let Some(ValueData::StringValue(_)) = &value.value_data {
-                    hasher.write_str(name);
-                }
-            }
-            let label_name_hash = hasher.finish();
-
-            // 2. Use label name hash as seed and continue with label values.
-            let mut final_hasher = TsidGenerator::new(label_name_hash);
-            for (_, value) in iter.primary_keys_with_name() {
-                if let Some(ValueData::StringValue(value)) = &value.value_data {
-                    final_hasher.write_str(value);
-                }
-            }
-            final_hasher.finish()
-        };
+        }
+        let hash = hasher.finish();

        (
            ValueData::U32Value(table_id).into(),
-            ValueData::U64Value(ts_id).into(),
+            ValueData::U64Value(hash).into(),
        )
    }
 }

 /// Tsid generator.
-#[derive(Default)]
 pub struct TsidGenerator {
-    hasher: FxHasher,
+    hasher: mur3::Hasher128,
+}
+
+impl Default for TsidGenerator {
+    fn default() -> Self {
+        Self {
+            hasher: mur3::Hasher128::with_seed(TSID_HASH_SEED),
+        }
+    }
 }

 impl TsidGenerator {
-    pub fn new(label_name_hash: u64) -> Self {
-        let mut hasher = FxHasher::default();
-        hasher.write_u64(label_name_hash);
-        Self { hasher }
-    }
-
    /// Writes a label pair to the generator.
-    pub fn write_str(&mut self, value: &str) {
-        self.hasher.write(value.as_bytes());
-        self.hasher.write_u8(0xff);
+    pub fn write_label(&mut self, name: &str, value: &str) {
+        name.hash(&mut self.hasher);
+        value.hash(&mut self.hasher);
    }

    /// Generates a new TSID.
    pub fn finish(&mut self) -> u64 {
-        self.hasher.finish()
+        // TSID is 64 bits, simply truncate the 128 bits hash
+        let (hash, _) = self.hasher.finish128();
+        hash
    }
 }

@@ -226,8 +202,6 @@ struct ValueIndex {
 struct IterIndex {
    indices: Vec<ValueIndex>,
    num_primary_key_column: usize,
-    /// Precomputed hash for label names.
-    label_name_hash: u64,
 }

 impl IterIndex {
@@ -278,22 +252,15 @@ impl IterIndex {
            }
        }
        let num_primary_key_column = primary_key_indices.len() + reserved_indices.len();
-        let mut indices = Vec::with_capacity(num_primary_key_column + 2);
-        indices.extend(reserved_indices);
-        let mut label_name_hasher = TsidGenerator::default();
-        for (pk_name, pk_index) in primary_key_indices {
-            // primary_key_indices already sorted.
-            label_name_hasher.write_str(pk_name);
-            indices.push(pk_index);
-        }
-        let label_name_hash = label_name_hasher.finish();
-
-        indices.extend(ts_index);
-        indices.extend(field_indices);
+        let indices = reserved_indices
+            .into_iter()
+            .chain(primary_key_indices.values().cloned())
+            .chain(ts_index)
+            .chain(field_indices)
+            .collect();
        IterIndex {
            indices,
            num_primary_key_column,
-            label_name_hash,
        }
    }
 }
@@ -347,13 +314,6 @@ impl RowIter<'_> {
            })
    }

-    /// Returns true if any label in current row is null.
-    fn has_null_labels(&self) -> bool {
-        self.index.indices[..self.index.num_primary_key_column]
-            .iter()
-            .any(|idx| self.row.values[idx.index].value_data.is_none())
-    }
-
    /// Returns the primary keys.
    pub fn primary_keys(&self) -> impl Iterator<Item = (ColumnId, ValueRef<'_>)> {
        self.index.indices[..self.index.num_primary_key_column]
@@ -439,9 +399,9 @@ mod tests {
        let result = encoder.modify_rows_sparse(rows_iter, table_id).unwrap();
        assert_eq!(result.rows[0].values.len(), 1);
        let encoded_primary_key = vec![
-            128, 0, 0, 4, 1, 0, 0, 4, 1, 128, 0, 0, 3, 1, 37, 196, 242, 181, 117, 224, 7, 137, 0,
-            0, 0, 2, 1, 1, 49, 50, 55, 46, 48, 46, 48, 46, 9, 49, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,
-            1, 1, 1, 103, 114, 101, 112, 116, 105, 109, 101, 9, 100, 98, 0, 0, 0, 0, 0, 0, 2,
+            128, 0, 0, 4, 1, 0, 0, 4, 1, 128, 0, 0, 3, 1, 131, 9, 166, 190, 173, 37, 39, 240, 0, 0,
+            0, 2, 1, 1, 49, 50, 55, 46, 48, 46, 48, 46, 9, 49, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1,
+            1, 1, 103, 114, 101, 112, 116, 105, 109, 101, 9, 100, 98, 0, 0, 0, 0, 0, 0, 2,
        ];
        assert_eq!(
            result.rows[0].values[0],
@@ -517,7 +477,7 @@ mod tests {
        assert_eq!(result.rows[0].values[2], ValueData::U32Value(1025).into());
        assert_eq!(
            result.rows[0].values[3],
-            ValueData::U64Value(2721566936019240841).into()
+            ValueData::U64Value(9442261431637846000).into()
        );
        assert_eq!(result.schema, expected_dense_schema());
    }
@@ -536,7 +496,7 @@ mod tests {
        let row_iter = rows_iter.iter_mut().next().unwrap();
        let (encoded_table_id, tsid) = RowModifier::fill_internal_columns(table_id, &row_iter);
        assert_eq!(encoded_table_id, ValueData::U32Value(1025).into());
-        assert_eq!(tsid, ValueData::U64Value(2721566936019240841).into());
+        assert_eq!(tsid, ValueData::U64Value(9442261431637846000).into());

        // Change the column order
        let schema = vec![
@@ -564,264 +524,6 @@ mod tests {
        let row_iter = rows_iter.iter_mut().next().unwrap();
        let (encoded_table_id, tsid) = RowModifier::fill_internal_columns(table_id, &row_iter);
        assert_eq!(encoded_table_id, ValueData::U32Value(1025).into());
-        assert_eq!(tsid, ValueData::U64Value(2721566936019240841).into());
-    }
-
-    /// Helper function to create a schema with multiple label columns
-    fn create_multi_label_schema(labels: &[&str]) -> Vec<ColumnSchema> {
-        labels
-            .iter()
-            .map(|name| ColumnSchema {
-                column_name: name.to_string(),
-                datatype: ColumnDataType::String as i32,
-                semantic_type: SemanticType::Tag as _,
-                datatype_extension: None,
-                options: None,
-            })
-            .collect()
-    }
-
-    /// Helper function to create a name_to_column_id map
-    fn create_name_to_column_id(labels: &[&str]) -> HashMap<String, ColumnId> {
-        labels
-            .iter()
-            .enumerate()
-            .map(|(idx, name)| (name.to_string(), idx as ColumnId + 1))
-            .collect()
-    }
-
-    /// Helper function to create a row with string values
-    fn create_row_with_values(values: &[&str]) -> Row {
-        Row {
-            values: values
-                .iter()
-                .map(|v| ValueData::StringValue(v.to_string()).into())
-                .collect(),
-        }
-    }
-
-    /// Helper function to create a row with some null values
-    fn create_row_with_nulls(values: &[Option<&str>]) -> Row {
-        Row {
-            values: values
-                .iter()
-                .map(|v| {
-                    v.map(|s| ValueData::StringValue(s.to_string()).into())
-                        .unwrap_or(Value { value_data: None })
-                })
-                .collect(),
-        }
-    }
-
-    /// Helper function to extract TSID from a row
-    fn extract_tsid(
-        schema: Vec<ColumnSchema>,
-        row: Row,
-        name_to_column_id: &HashMap<String, ColumnId>,
-        table_id: TableId,
-    ) -> u64 {
-        let rows = Rows {
-            schema,
-            rows: vec![row],
-        };
-        let mut rows_iter = RowsIter::new(rows, name_to_column_id);
-        let row_iter = rows_iter.iter_mut().next().unwrap();
-        let (_, tsid_value) = RowModifier::fill_internal_columns(table_id, &row_iter);
-        match tsid_value.value_data {
-            Some(ValueData::U64Value(tsid)) => tsid,
-            _ => panic!("Expected U64Value for TSID"),
-        }
-    }
-
-    #[test]
-    fn test_tsid_same_for_different_label_orders() {
-        // Test that rows with the same label name-value pairs but in different orders
-        // produce the same TSID
-        let table_id = 1025;
-
-        // Schema 1: a, b, c
-        let schema1 = create_multi_label_schema(&["a", "b", "c"]);
-        let name_to_column_id1 = create_name_to_column_id(&["a", "b", "c"]);
-        let row1 = create_row_with_values(&["A", "B", "C"]);
-        let tsid1 = extract_tsid(schema1, row1, &name_to_column_id1, table_id);
-
-        // Schema 2: b, a, c (different order)
-        let schema2 = create_multi_label_schema(&["b", "a", "c"]);
-        let name_to_column_id2 = create_name_to_column_id(&["a", "b", "c"]);
-        let row2 = create_row_with_values(&["B", "A", "C"]);
-        let tsid2 = extract_tsid(schema2, row2, &name_to_column_id2, table_id);
-
-        // Schema 3: c, b, a (another different order)
-        let schema3 = create_multi_label_schema(&["c", "b", "a"]);
-        let name_to_column_id3 = create_name_to_column_id(&["a", "b", "c"]);
-        let row3 = create_row_with_values(&["C", "B", "A"]);
-        let tsid3 = extract_tsid(schema3, row3, &name_to_column_id3, table_id);
-
-        // All should have the same TSID since label names are sorted lexicographically
-        // and we're using the same label name-value pairs
-        assert_eq!(
-            tsid1, tsid2,
-            "TSID should be same for different column orders"
-        );
-        assert_eq!(
-            tsid2, tsid3,
-            "TSID should be same for different column orders"
-        );
-    }
-
-    #[test]
-    fn test_tsid_same_with_null_labels() {
-        // Test that rows that differ only by null label values produce the same TSID
-        let table_id = 1025;
-
-        // Row 1: a=A, b=B (no nulls, fast path)
-        let schema1 = create_multi_label_schema(&["a", "b"]);
-        let name_to_column_id1 = create_name_to_column_id(&["a", "b"]);
-        let row1 = create_row_with_values(&["A", "B"]);
-        let tsid1 = extract_tsid(schema1, row1, &name_to_column_id1, table_id);
-
-        // Row 2: a=A, b=B, c=null (has null, slow path)
-        let schema2 = create_multi_label_schema(&["a", "b", "c"]);
-        let name_to_column_id2 = create_name_to_column_id(&["a", "b", "c"]);
-        let row2 = create_row_with_nulls(&[Some("A"), Some("B"), None]);
-        let tsid2 = extract_tsid(schema2, row2, &name_to_column_id2, table_id);
-
-        // Both should have the same TSID since null labels are ignored
-        assert_eq!(
-            tsid1, tsid2,
-            "TSID should be same when only difference is null label values"
-        );
-    }
-
-    #[test]
-    fn test_tsid_same_with_multiple_null_labels() {
-        // Test with multiple null labels
-        let table_id = 1025;
-
-        // Row 1: a=A, b=B (no nulls)
-        let schema1 = create_multi_label_schema(&["a", "b"]);
-        let name_to_column_id1 = create_name_to_column_id(&["a", "b"]);
-        let row1 = create_row_with_values(&["A", "B"]);
-        let tsid1 = extract_tsid(schema1, row1, &name_to_column_id1, table_id);
-
-        // Row 2: a=A, b=B, c=null, d=null (multiple nulls)
-        let schema2 = create_multi_label_schema(&["a", "b", "c", "d"]);
-        let name_to_column_id2 = create_name_to_column_id(&["a", "b", "c", "d"]);
-        let row2 = create_row_with_nulls(&[Some("A"), Some("B"), None, None]);
-        let tsid2 = extract_tsid(schema2, row2, &name_to_column_id2, table_id);
-
-        assert_eq!(
-            tsid1, tsid2,
-            "TSID should be same when only difference is multiple null label values"
-        );
-    }
-
-    #[test]
-    fn test_tsid_different_with_different_non_null_values() {
-        // Test that rows with different non-null values produce different TSIDs
-        let table_id = 1025;
-
-        // Row 1: a=A, b=B
-        let schema1 = create_multi_label_schema(&["a", "b"]);
-        let name_to_column_id1 = create_name_to_column_id(&["a", "b"]);
-        let row1 = create_row_with_values(&["A", "B"]);
-        let tsid1 = extract_tsid(schema1, row1, &name_to_column_id1, table_id);
-
-        // Row 2: a=A, b=C (different value for b)
-        let schema2 = create_multi_label_schema(&["a", "b"]);
-        let name_to_column_id2 = create_name_to_column_id(&["a", "b"]);
-        let row2 = create_row_with_values(&["A", "C"]);
-        let tsid2 = extract_tsid(schema2, row2, &name_to_column_id2, table_id);
-
-        assert_ne!(
-            tsid1, tsid2,
-            "TSID should be different when label values differ"
-        );
-    }
-
-    #[test]
-    fn test_tsid_fast_path_vs_slow_path_consistency() {
-        // Test that fast path (no nulls) and slow path (with nulls) produce
-        // the same TSID for the same non-null label values
-        let table_id = 1025;
-
-        // Fast path: a=A, b=B (no nulls)
-        let schema_fast = create_multi_label_schema(&["a", "b"]);
-        let name_to_column_id_fast = create_name_to_column_id(&["a", "b"]);
-        let row_fast = create_row_with_values(&["A", "B"]);
-        let tsid_fast = extract_tsid(schema_fast, row_fast, &name_to_column_id_fast, table_id);
-
-        // Slow path: a=A, b=B, c=null (has null, triggers slow path)
-        let schema_slow = create_multi_label_schema(&["a", "b", "c"]);
-        let name_to_column_id_slow = create_name_to_column_id(&["a", "b", "c"]);
-        let row_slow = create_row_with_nulls(&[Some("A"), Some("B"), None]);
-        let tsid_slow = extract_tsid(schema_slow, row_slow, &name_to_column_id_slow, table_id);
-
-        assert_eq!(
-            tsid_fast, tsid_slow,
-            "Fast path and slow path should produce same TSID for same non-null values"
-        );
-    }
-
-    #[test]
-    fn test_tsid_with_null_in_middle() {
-        // Test with null in the middle of labels
-        let table_id = 1025;
-
-        // Row 1: a=A, b=B, c=C
-        let schema1 = create_multi_label_schema(&["a", "b", "c"]);
-        let name_to_column_id1 = create_name_to_column_id(&["a", "b", "c"]);
-        let row1 = create_row_with_values(&["A", "B", "C"]);
-        let tsid1 = extract_tsid(schema1, row1, &name_to_column_id1, table_id);
-
-        // Row 2: a=A, b=null, c=C (null in middle)
-        let schema2 = create_multi_label_schema(&["a", "b", "c"]);
-        let name_to_column_id2 = create_name_to_column_id(&["a", "b", "c"]);
-        let row2 = create_row_with_nulls(&[Some("A"), None, Some("C")]);
-        let tsid2 = extract_tsid(schema2, row2, &name_to_column_id2, table_id);
-
-        // Should be different because b is null in row2 but B in row1
-        // Actually wait, let me reconsider - if b is null, it should be ignored
-        // So row2 should be equivalent to a=A, c=C
-        // But row1 is a=A, b=B, c=C, so they should be different
-        assert_ne!(
-            tsid1, tsid2,
-            "TSID should be different when a non-null value becomes null"
-        );
-
-        // Row 3: a=A, c=C (no b at all, equivalent to row2)
-        let schema3 = create_multi_label_schema(&["a", "c"]);
-        let name_to_column_id3 = create_name_to_column_id(&["a", "c"]);
-        let row3 = create_row_with_values(&["A", "C"]);
-        let tsid3 = extract_tsid(schema3, row3, &name_to_column_id3, table_id);
-
-        // Row2 (a=A, b=null, c=C) should be same as row3 (a=A, c=C)
-        assert_eq!(
-            tsid2, tsid3,
-            "TSID should be same when null label is ignored"
-        );
-    }
-
-    #[test]
-    fn test_tsid_all_null_labels() {
-        // Test with all labels being null
-        let table_id = 1025;
-
-        // Row with all nulls
-        let schema = create_multi_label_schema(&["a", "b", "c"]);
-        let name_to_column_id = create_name_to_column_id(&["a", "b", "c"]);
-        let row = create_row_with_nulls(&[None, None, None]);
-        let tsid = extract_tsid(schema.clone(), row, &name_to_column_id, table_id);
-
-        // Should still produce a TSID (based on label names only when all values are null)
-        // This tests that the slow path handles the case where all values are null
-        // The TSID will be based on the label name hash only
-        // Test that it's consistent - same schema with all nulls should produce same TSID
-        let row2 = create_row_with_nulls(&[None, None, None]);
-        let tsid2 = extract_tsid(schema, row2, &name_to_column_id, table_id);
-        assert_eq!(
-            tsid, tsid2,
-            "TSID should be consistent when all label values are null"
-        );
+        assert_eq!(tsid, ValueData::U64Value(9442261431637846000).into());
    }
 }
--- a/src/mito2/Cargo.toml
+++ b/src/mito2/Cargo.toml
@@ -55,7 +55,7 @@ lazy_static = "1.4"
 log-store = { workspace = true }
 mito-codec.workspace = true
 moka = { workspace = true, features = ["sync", "future"] }
-object-store = { workspace = true, features = ["testing"] }
+object-store.workspace = true
 parquet = { workspace = true, features = ["async"] }
 paste.workspace = true
 pin-project.workspace = true
--- a/src/mito2/src/access_layer.rs
+++ b/src/mito2/src/access_layer.rs
@@ -37,7 +37,7 @@ use crate::error::{CleanDirSnafu, DeleteIndexSnafu, DeleteSstSnafu, OpenDalSnafu
 use crate::metrics::{COMPACTION_STAGE_ELAPSED, FLUSH_ELAPSED};
 use crate::read::{FlatSource, Source};
 use crate::region::options::IndexOptions;
-use crate::sst::file::{FileHandle, RegionFileId, RegionIndexId};
+use crate::sst::file::{FileHandle, RegionFileId};
 use crate::sst::index::IndexerBuilderImpl;
 use crate::sst::index::intermediate::IntermediateManager;
 use crate::sst::index::puffin_manager::{PuffinManagerFactory, SstPuffinManager};
@@ -216,7 +216,7 @@ impl AccessLayer {
    pub(crate) async fn delete_sst(
        &self,
        region_file_id: &RegionFileId,
-        index_file_id: &RegionIndexId,
+        index_file_id: &RegionFileId,
    ) -> Result<()> {
        let path = location::sst_file_path(&self.table_dir, *region_file_id, self.path_type);
        self.object_store
@@ -226,22 +226,12 @@ impl AccessLayer {
                file_id: region_file_id.file_id(),
            })?;

-        // Delete all versions of the index file.
-        for version in 0..=index_file_id.version {
-            self.delete_index(&RegionIndexId::new(index_file_id.file_id, version))
-                .await?;
-        }
-
-        Ok(())
-    }
-
-    pub(crate) async fn delete_index(&self, region_index_id: &RegionIndexId) -> Result<()> {
-        let path = location::index_file_path(&self.table_dir, *region_index_id, self.path_type);
+        let path = location::index_file_path(&self.table_dir, *index_file_id, self.path_type);
        self.object_store
            .delete(&path)
            .await
            .context(DeleteIndexSnafu {
-                file_id: region_index_id.file_id(),
+                file_id: region_file_id.file_id(),
            })?;

        Ok(())
@@ -301,7 +291,6 @@ impl AccessLayer {
                puffin_manager: self
                    .puffin_manager_factory
                    .build(store, path_provider.clone()),
-                write_cache_enabled: false,
                intermediate_manager: self.intermediate_manager.clone(),
                index_options: request.index_options,
                inverted_index_config: request.inverted_index_config,
@@ -479,10 +468,9 @@ impl TempFileCleaner {
    }

    /// Removes the SST and index file from the local atomic dir by the file id.
-    /// This only removes the initial index, since the index version is always 0 for a new SST, this method should be safe to pass 0.
    pub(crate) async fn clean_by_file_id(&self, file_id: FileId) {
        let sst_key = IndexKey::new(self.region_id, file_id, FileType::Parquet).to_string();
-        let index_key = IndexKey::new(self.region_id, file_id, FileType::Puffin(0)).to_string();
+        let index_key = IndexKey::new(self.region_id, file_id, FileType::Puffin).to_string();

        Self::clean_atomic_dir_files(&self.object_store, &[&sst_key, &index_key]).await;
    }
@@ -565,12 +553,9 @@ async fn clean_dir(dir: &str) -> Result<()> {

 /// Path provider for SST file and index file.
 pub trait FilePathProvider: Send + Sync {
-    /// Creates index file path of given file id. Version default to 0, and not shown in the path.
+    /// Creates index file path of given file id.
    fn build_index_file_path(&self, file_id: RegionFileId) -> String;

-    /// Creates index file path of given index id (with version support).
-    fn build_index_file_path_with_version(&self, index_id: RegionIndexId) -> String;
-
    /// Creates SST file path of given file id.
    fn build_sst_file_path(&self, file_id: RegionFileId) -> String;
 }
@@ -590,16 +575,7 @@ impl WriteCachePathProvider {

 impl FilePathProvider for WriteCachePathProvider {
    fn build_index_file_path(&self, file_id: RegionFileId) -> String {
-        let puffin_key = IndexKey::new(file_id.region_id(), file_id.file_id(), FileType::Puffin(0));
-        self.file_cache.cache_file_path(puffin_key)
-    }
-
-    fn build_index_file_path_with_version(&self, index_id: RegionIndexId) -> String {
-        let puffin_key = IndexKey::new(
-            index_id.region_id(),
-            index_id.file_id(),
-            FileType::Puffin(index_id.version),
-        );
+        let puffin_key = IndexKey::new(file_id.region_id(), file_id.file_id(), FileType::Puffin);
        self.file_cache.cache_file_path(puffin_key)
    }

@@ -629,11 +605,7 @@ impl RegionFilePathFactory {

 impl FilePathProvider for RegionFilePathFactory {
    fn build_index_file_path(&self, file_id: RegionFileId) -> String {
-        location::index_file_path_legacy(&self.table_dir, file_id, self.path_type)
-    }
-
-    fn build_index_file_path_with_version(&self, index_id: RegionIndexId) -> String {
-        location::index_file_path(&self.table_dir, index_id, self.path_type)
+        location::index_file_path(&self.table_dir, file_id, self.path_type)
    }

    fn build_sst_file_path(&self, file_id: RegionFileId) -> String {
--- a/src/mito2/src/cache.rs
+++ b/src/mito2/src/cache.rs
@@ -18,7 +18,6 @@ mod cache_size;

 pub(crate) mod file_cache;
 pub(crate) mod index;
-pub(crate) mod manifest_cache;
 #[cfg(test)]
 pub(crate) mod test_util;
 pub(crate) mod write_cache;
@@ -44,8 +43,7 @@ use crate::cache::index::inverted_index::{InvertedIndexCache, InvertedIndexCache
 use crate::cache::write_cache::WriteCacheRef;
 use crate::metrics::{CACHE_BYTES, CACHE_EVICTION, CACHE_HIT, CACHE_MISS};
 use crate::read::Batch;
-use crate::sst::file::{RegionFileId, RegionIndexId};
-use crate::sst::parquet::reader::MetadataCacheMetrics;
+use crate::sst::file::RegionFileId;

 /// Metrics type key for sst meta.
 const SST_META_TYPE: &str = "sst_meta";
@@ -76,24 +74,19 @@ pub enum CacheStrategy {
 }

 impl CacheStrategy {
-    /// Gets parquet metadata with cache metrics tracking.
-    /// Returns the metadata and updates the provided metrics.
-    pub(crate) async fn get_parquet_meta_data(
+    /// Calls [CacheManager::get_parquet_meta_data()].
+    pub async fn get_parquet_meta_data(
        &self,
        file_id: RegionFileId,
-        metrics: &mut MetadataCacheMetrics,
    ) -> Option<Arc<ParquetMetaData>> {
        match self {
            CacheStrategy::EnableAll(cache_manager) => {
-                cache_manager.get_parquet_meta_data(file_id, metrics).await
+                cache_manager.get_parquet_meta_data(file_id).await
            }
            CacheStrategy::Compaction(cache_manager) => {
-                cache_manager.get_parquet_meta_data(file_id, metrics).await
-            }
-            CacheStrategy::Disabled => {
-                metrics.cache_miss += 1;
-                None
+                cache_manager.get_parquet_meta_data(file_id).await
            }
+            CacheStrategy::Disabled => None,
        }
    }

@@ -180,7 +173,7 @@ impl CacheStrategy {
    }

    /// Calls [CacheManager::evict_puffin_cache()].
-    pub async fn evict_puffin_cache(&self, file_id: RegionIndexId) {
+    pub async fn evict_puffin_cache(&self, file_id: RegionFileId) {
        match self {
            CacheStrategy::EnableAll(cache_manager) => {
                cache_manager.evict_puffin_cache(file_id).await
@@ -298,17 +291,16 @@ impl CacheManager {
        CacheManagerBuilder::default()
    }

-    /// Gets cached [ParquetMetaData] with metrics tracking.
-    /// Tries in-memory cache first, then file cache, updating metrics accordingly.
-    pub(crate) async fn get_parquet_meta_data(
+    /// Gets cached [ParquetMetaData] from in-memory cache first.
+    /// If not found, tries to get it from write cache and fill the in-memory cache.
+    pub async fn get_parquet_meta_data(
        &self,
        file_id: RegionFileId,
-        metrics: &mut MetadataCacheMetrics,
    ) -> Option<Arc<ParquetMetaData>> {
        // Try to get metadata from sst meta cache
-        if let Some(metadata) = self.get_parquet_meta_data_from_mem_cache(file_id) {
-            metrics.mem_cache_hit += 1;
-            return Some(metadata);
+        let metadata = self.get_parquet_meta_data_from_mem_cache(file_id);
+        if metadata.is_some() {
+            return metadata;
        }

        // Try to get metadata from write cache
@@ -316,13 +308,11 @@ impl CacheManager {
        if let Some(write_cache) = &self.write_cache
            && let Some(metadata) = write_cache.file_cache().get_parquet_meta_data(key).await
        {
-            metrics.file_cache_hit += 1;
            let metadata = Arc::new(metadata);
            // Put metadata into sst meta cache
            self.put_parquet_meta_data(file_id, metadata.clone());
            return Some(metadata);
        };
-        metrics.cache_miss += 1;

        None
    }
@@ -400,7 +390,7 @@ impl CacheManager {
    }

    /// Evicts every puffin-related cache entry for the given file.
-    pub async fn evict_puffin_cache(&self, file_id: RegionIndexId) {
+    pub async fn evict_puffin_cache(&self, file_id: RegionFileId) {
        if let Some(cache) = &self.bloom_filter_index_cache {
            cache.invalidate_file(file_id.file_id());
        }
@@ -422,7 +412,7 @@ impl CacheManager {
                .remove(IndexKey::new(
                    file_id.region_id(),
                    file_id.file_id(),
-                    FileType::Puffin(file_id.version),
+                    FileType::Puffin,
                ))
                .await;
        }
@@ -835,14 +825,8 @@ mod tests {
        let region_id = RegionId::new(1, 1);
        let file_id = RegionFileId::new(region_id, FileId::random());
        let metadata = parquet_meta();
-        let mut metrics = MetadataCacheMetrics::default();
        cache.put_parquet_meta_data(file_id, metadata);
-        assert!(
-            cache
-                .get_parquet_meta_data(file_id, &mut metrics)
-                .await
-                .is_none()
-        );
+        assert!(cache.get_parquet_meta_data(file_id).await.is_none());

        let value = Value::Int64(10);
        let vector: VectorRef = Arc::new(Int64Vector::from_slice([10, 10, 10, 10]));
@@ -864,30 +848,14 @@ mod tests {
    #[tokio::test]
    async fn test_parquet_meta_cache() {
        let cache = CacheManager::builder().sst_meta_cache_size(2000).build();
-        let mut metrics = MetadataCacheMetrics::default();
        let region_id = RegionId::new(1, 1);
        let file_id = RegionFileId::new(region_id, FileId::random());
-        assert!(
-            cache
-                .get_parquet_meta_data(file_id, &mut metrics)
-                .await
-                .is_none()
-        );
+        assert!(cache.get_parquet_meta_data(file_id).await.is_none());
        let metadata = parquet_meta();
        cache.put_parquet_meta_data(file_id, metadata);
-        assert!(
-            cache
-                .get_parquet_meta_data(file_id, &mut metrics)
-                .await
-                .is_some()
-        );
+        assert!(cache.get_parquet_meta_data(file_id).await.is_some());
        cache.remove_parquet_meta_data(file_id);
-        assert!(
-            cache
-                .get_parquet_meta_data(file_id, &mut metrics)
-                .await
-                .is_none()
-        );
+        assert!(cache.get_parquet_meta_data(file_id).await.is_none());
    }

    #[test]
@@ -949,7 +917,7 @@ mod tests {
        let cache = Arc::new(cache);

        let region_id = RegionId::new(1, 1);
-        let index_id = RegionIndexId::new(RegionFileId::new(region_id, FileId::random()), 0);
+        let region_file_id = RegionFileId::new(region_id, FileId::random());
        let column_id: ColumnId = 1;

        let bloom_cache = cache.bloom_filter_index_cache().unwrap().clone();
@@ -957,21 +925,16 @@ mod tests {
        let result_cache = cache.index_result_cache().unwrap();
        let puffin_metadata_cache = cache.puffin_metadata_cache().unwrap().clone();

-        let bloom_key = (
-            index_id.file_id(),
-            index_id.version,
-            column_id,
-            Tag::Skipping,
-        );
+        let bloom_key = (region_file_id.file_id(), column_id, Tag::Skipping);
        bloom_cache.put_metadata(bloom_key, Arc::new(BloomFilterMeta::default()));
        inverted_cache.put_metadata(
-            (index_id.file_id(), index_id.version),
+            region_file_id.file_id(),
            Arc::new(InvertedIndexMetas::default()),
        );
        let predicate = PredicateKey::new_bloom(Arc::new(BTreeMap::new()));
        let selection = Arc::new(RowGroupSelection::default());
-        result_cache.put(predicate.clone(), index_id.file_id(), selection);
-        let file_id_str = index_id.to_string();
+        result_cache.put(predicate.clone(), region_file_id.file_id(), selection);
+        let file_id_str = region_file_id.to_string();
        let metadata = Arc::new(FileMetadata {
            blobs: Vec::new(),
            properties: HashMap::new(),
@@ -981,32 +944,40 @@ mod tests {
        assert!(bloom_cache.get_metadata(bloom_key).is_some());
        assert!(
            inverted_cache
-                .get_metadata((index_id.file_id(), index_id.version))
+                .get_metadata(region_file_id.file_id())
+                .is_some()
+        );
+        assert!(
+            result_cache
+                .get(&predicate, region_file_id.file_id())
                .is_some()
        );
-        assert!(result_cache.get(&predicate, index_id.file_id()).is_some());
        assert!(puffin_metadata_cache.get_metadata(&file_id_str).is_some());

-        cache.evict_puffin_cache(index_id).await;
+        cache.evict_puffin_cache(region_file_id).await;

        assert!(bloom_cache.get_metadata(bloom_key).is_none());
        assert!(
            inverted_cache
-                .get_metadata((index_id.file_id(), index_id.version))
+                .get_metadata(region_file_id.file_id())
+                .is_none()
+        );
+        assert!(
+            result_cache
+                .get(&predicate, region_file_id.file_id())
                .is_none()
        );
-        assert!(result_cache.get(&predicate, index_id.file_id()).is_none());
        assert!(puffin_metadata_cache.get_metadata(&file_id_str).is_none());

        // Refill caches and evict via CacheStrategy to ensure delegation works.
        bloom_cache.put_metadata(bloom_key, Arc::new(BloomFilterMeta::default()));
        inverted_cache.put_metadata(
-            (index_id.file_id(), index_id.version),
+            region_file_id.file_id(),
            Arc::new(InvertedIndexMetas::default()),
        );
        result_cache.put(
            predicate.clone(),
-            index_id.file_id(),
+            region_file_id.file_id(),
            Arc::new(RowGroupSelection::default()),
        );
        puffin_metadata_cache.put_metadata(
@@ -1018,15 +989,19 @@ mod tests {
        );

        let strategy = CacheStrategy::EnableAll(cache.clone());
-        strategy.evict_puffin_cache(index_id).await;
+        strategy.evict_puffin_cache(region_file_id).await;

        assert!(bloom_cache.get_metadata(bloom_key).is_none());
        assert!(
            inverted_cache
-                .get_metadata((index_id.file_id(), index_id.version))
+                .get_metadata(region_file_id.file_id())
+                .is_none()
+        );
+        assert!(
+            result_cache
+                .get(&predicate, region_file_id.file_id())
                .is_none()
        );
-        assert!(result_cache.get(&predicate, index_id.file_id()).is_none());
        assert!(puffin_metadata_cache.get_metadata(&file_id_str).is_none());
    }
 }
--- a/src/mito2/src/cache/file_cache.rs
+++ b/src/mito2/src/cache/file_cache.rs
@@ -55,35 +55,121 @@ pub(crate) const DEFAULT_INDEX_CACHE_PERCENT: u8 = 20;
 /// Minimum capacity for each cache (512MB).
 const MIN_CACHE_CAPACITY: u64 = 512 * 1024 * 1024;

-/// Inner struct for FileCache that can be used in spawned tasks.
+/// A file cache manages files on local store and evict files based
+/// on size.
 #[derive(Debug)]
-struct FileCacheInner {
+pub(crate) struct FileCache {
    /// Local store to cache files.
    local_store: ObjectStore,
    /// Index to track cached Parquet files.
    parquet_index: Cache<IndexKey, IndexValue>,
    /// Index to track cached Puffin files.
    puffin_index: Cache<IndexKey, IndexValue>,
+    /// Capacity of the puffin (index) cache in bytes.
+    puffin_capacity: u64,
 }

-impl FileCacheInner {
+pub(crate) type FileCacheRef = Arc<FileCache>;
+
+impl FileCache {
+    /// Creates a new file cache.
+    pub(crate) fn new(
+        local_store: ObjectStore,
+        capacity: ReadableSize,
+        ttl: Option<Duration>,
+        index_cache_percent: Option<u8>,
+    ) -> FileCache {
+        // Validate and use the provided percent or default
+        let index_percent = index_cache_percent
+            .filter(|&percent| percent > 0 && percent < 100)
+            .unwrap_or(DEFAULT_INDEX_CACHE_PERCENT);
+        let total_capacity = capacity.as_bytes();
+
+        // Convert percent to ratio and calculate capacity for each cache
+        let index_ratio = index_percent as f64 / 100.0;
+        let puffin_capacity = (total_capacity as f64 * index_ratio) as u64;
+        let parquet_capacity = total_capacity - puffin_capacity;
+
+        // Ensure both capacities are at least 512MB
+        let puffin_capacity = puffin_capacity.max(MIN_CACHE_CAPACITY);
+        let parquet_capacity = parquet_capacity.max(MIN_CACHE_CAPACITY);
+
+        info!(
+            "Initializing file cache with index_percent: {}%, total_capacity: {}, parquet_capacity: {}, puffin_capacity: {}",
+            index_percent,
+            ReadableSize(total_capacity),
+            ReadableSize(parquet_capacity),
+            ReadableSize(puffin_capacity)
+        );
+
+        let parquet_index = Self::build_cache(local_store.clone(), parquet_capacity, ttl, "file");
+        let puffin_index = Self::build_cache(local_store.clone(), puffin_capacity, ttl, "index");
+
+        FileCache {
+            local_store,
+            parquet_index,
+            puffin_index,
+            puffin_capacity,
+        }
+    }
+
+    /// Builds a cache for a specific file type.
+    fn build_cache(
+        local_store: ObjectStore,
+        capacity: u64,
+        ttl: Option<Duration>,
+        label: &'static str,
+    ) -> Cache<IndexKey, IndexValue> {
+        let cache_store = local_store;
+        let mut builder = Cache::builder()
+            .eviction_policy(EvictionPolicy::lru())
+            .weigher(|_key, value: &IndexValue| -> u32 {
+                // We only measure space on local store.
+                value.file_size
+            })
+            .max_capacity(capacity)
+            .async_eviction_listener(move |key, value, cause| {
+                let store = cache_store.clone();
+                // Stores files under FILE_DIR.
+                let file_path = cache_file_path(FILE_DIR, *key);
+                async move {
+                    if let RemovalCause::Replaced = cause {
+                        // The cache is replaced by another file. This is unexpected, we don't remove the same
+                        // file but updates the metrics as the file is already replaced by users.
+                        CACHE_BYTES.with_label_values(&[label]).sub(value.file_size.into());
+                        warn!("Replace existing cache {} for region {} unexpectedly", file_path, key.region_id);
+                        return;
+                    }
+
+                    match store.delete(&file_path).await {
+                        Ok(()) => {
+                            CACHE_BYTES.with_label_values(&[label]).sub(value.file_size.into());
+                        }
+                        Err(e) => {
+                            warn!(e; "Failed to delete cached file {} for region {}", file_path, key.region_id);
+                        }
+                    }
+                }
+                .boxed()
+            });
+        if let Some(ttl) = ttl {
+            builder = builder.time_to_idle(ttl);
+        }
+        builder.build()
+    }
+
    /// Returns the appropriate memory index for the given file type.
    fn memory_index(&self, file_type: FileType) -> &Cache<IndexKey, IndexValue> {
        match file_type {
            FileType::Parquet => &self.parquet_index,
-            FileType::Puffin { .. } => &self.puffin_index,
+            FileType::Puffin => &self.puffin_index,
        }
    }

-    /// Returns the cache file path for the key.
-    fn cache_file_path(&self, key: IndexKey) -> String {
-        cache_file_path(FILE_DIR, key)
-    }
-
    /// Puts a file into the cache index.
    ///
    /// The `WriteCache` should ensure the file is in the correct path.
-    async fn put(&self, key: IndexKey, value: IndexValue) {
+    pub(crate) async fn put(&self, key: IndexKey, value: IndexValue) {
        CACHE_BYTES
            .with_label_values(&[key.file_type.metric_label()])
            .add(value.file_size.into());
@@ -94,8 +180,100 @@ impl FileCacheInner {
        index.run_pending_tasks().await;
    }

-    /// Recovers the index from local store.
-    async fn recover(&self) -> Result<()> {
+    pub(crate) async fn get(&self, key: IndexKey) -> Option<IndexValue> {
+        self.memory_index(key.file_type).get(&key).await
+    }
+
+    /// Reads a file from the cache.
+    #[allow(unused)]
+    pub(crate) async fn reader(&self, key: IndexKey) -> Option<Reader> {
+        // We must use `get()` to update the estimator of the cache.
+        // See https://docs.rs/moka/latest/moka/future/struct.Cache.html#method.contains_key
+        let index = self.memory_index(key.file_type);
+        if index.get(&key).await.is_none() {
+            CACHE_MISS
+                .with_label_values(&[key.file_type.metric_label()])
+                .inc();
+            return None;
+        }
+
+        let file_path = self.cache_file_path(key);
+        match self.get_reader(&file_path).await {
+            Ok(Some(reader)) => {
+                CACHE_HIT
+                    .with_label_values(&[key.file_type.metric_label()])
+                    .inc();
+                return Some(reader);
+            }
+            Err(e) => {
+                if e.kind() != ErrorKind::NotFound {
+                    warn!(e; "Failed to get file for key {:?}", key);
+                }
+            }
+            Ok(None) => {}
+        }
+
+        // We removes the file from the index.
+        index.remove(&key).await;
+        CACHE_MISS
+            .with_label_values(&[key.file_type.metric_label()])
+            .inc();
+        None
+    }
+
+    /// Reads ranges from the cache.
+    pub(crate) async fn read_ranges(
+        &self,
+        key: IndexKey,
+        ranges: &[Range<u64>],
+    ) -> Option<Vec<Bytes>> {
+        let index = self.memory_index(key.file_type);
+        if index.get(&key).await.is_none() {
+            CACHE_MISS
+                .with_label_values(&[key.file_type.metric_label()])
+                .inc();
+            return None;
+        }
+
+        let file_path = self.cache_file_path(key);
+        // In most cases, it will use blocking read,
+        // because FileCache is normally based on local file system, which supports blocking read.
+        let bytes_result = fetch_byte_ranges(&file_path, self.local_store.clone(), ranges).await;
+        match bytes_result {
+            Ok(bytes) => {
+                CACHE_HIT
+                    .with_label_values(&[key.file_type.metric_label()])
+                    .inc();
+                Some(bytes)
+            }
+            Err(e) => {
+                if e.kind() != ErrorKind::NotFound {
+                    warn!(e; "Failed to get file for key {:?}", key);
+                }
+
+                // We removes the file from the index.
+                index.remove(&key).await;
+                CACHE_MISS
+                    .with_label_values(&[key.file_type.metric_label()])
+                    .inc();
+                None
+            }
+        }
+    }
+
+    /// Removes a file from the cache explicitly.
+    /// It always tries to remove the file from the local store because we may not have the file
+    /// in the memory index if upload is failed.
+    pub(crate) async fn remove(&self, key: IndexKey) {
+        let file_path = self.cache_file_path(key);
+        self.memory_index(key.file_type).remove(&key).await;
+        // Always delete the file from the local store.
+        if let Err(e) = self.local_store.delete(&file_path).await {
+            warn!(e; "Failed to delete a cached file {}", file_path);
+        }
+    }
+
+    async fn recover_inner(&self) -> Result<()> {
        let now = Instant::now();
        let mut lister = self
            .local_store
@@ -130,7 +308,7 @@ impl FileCacheInner {
            // Track sizes separately for each file type
            match key.file_type {
                FileType::Parquet => parquet_size += size,
-                FileType::Puffin { .. } => puffin_size += size,
+                FileType::Puffin => puffin_size += size,
            }
        }
        // The metrics is a signed int gauge so we can updates it finally.
@@ -163,7 +341,136 @@ impl FileCacheInner {
        Ok(())
    }

-    /// Downloads a file without cleaning up on error.
+    /// Recovers the index from local store.
+    ///
+    /// If `task_receiver` is provided, spawns a background task after recovery
+    /// to process `RegionLoadCacheTask` messages for loading files into the cache.
+    pub(crate) async fn recover(
+        self: &Arc<Self>,
+        sync: bool,
+        task_receiver: Option<UnboundedReceiver<RegionLoadCacheTask>>,
+    ) {
+        let moved_self = self.clone();
+        let handle = tokio::spawn(async move {
+            if let Err(err) = moved_self.recover_inner().await {
+                error!(err; "Failed to recover file cache.")
+            }
+
+            // Spawns background task to process region load cache tasks after recovery.
+            // So it won't block the recovery when `sync` is true.
+            if let Some(mut receiver) = task_receiver {
+                let cache_ref = moved_self.clone();
+                info!("Spawning background task for processing region load cache tasks");
+                tokio::spawn(async move {
+                    while let Some(task) = receiver.recv().await {
+                        let file_cache = cache_ref.clone();
+                        task.fill_cache(file_cache).await;
+                    }
+                    info!("Background task for processing region load cache tasks stopped");
+                });
+            }
+        });
+
+        if sync {
+            let _ = handle.await;
+        }
+    }
+
+    /// Returns the cache file path for the key.
+    pub(crate) fn cache_file_path(&self, key: IndexKey) -> String {
+        cache_file_path(FILE_DIR, key)
+    }
+
+    /// Returns the local store of the file cache.
+    pub(crate) fn local_store(&self) -> ObjectStore {
+        self.local_store.clone()
+    }
+
+    /// Get the parquet metadata in file cache.
+    /// If the file is not in the cache or fail to load metadata, return None.
+    pub(crate) async fn get_parquet_meta_data(&self, key: IndexKey) -> Option<ParquetMetaData> {
+        // Check if file cache contains the key
+        if let Some(index_value) = self.parquet_index.get(&key).await {
+            // Load metadata from file cache
+            let local_store = self.local_store();
+            let file_path = self.cache_file_path(key);
+            let file_size = index_value.file_size as u64;
+            let metadata_loader = MetadataLoader::new(local_store, &file_path, file_size);
+
+            match metadata_loader.load().await {
+                Ok(metadata) => {
+                    CACHE_HIT
+                        .with_label_values(&[key.file_type.metric_label()])
+                        .inc();
+                    Some(metadata)
+                }
+                Err(e) => {
+                    if !e.is_object_not_found() {
+                        warn!(
+                            e; "Failed to get parquet metadata for key {:?}",
+                            key
+                        );
+                    }
+                    // We removes the file from the index.
+                    self.parquet_index.remove(&key).await;
+                    CACHE_MISS
+                        .with_label_values(&[key.file_type.metric_label()])
+                        .inc();
+                    None
+                }
+            }
+        } else {
+            CACHE_MISS
+                .with_label_values(&[key.file_type.metric_label()])
+                .inc();
+            None
+        }
+    }
+
+    async fn get_reader(&self, file_path: &str) -> object_store::Result<Option<Reader>> {
+        if self.local_store.exists(file_path).await? {
+            Ok(Some(self.local_store.reader(file_path).await?))
+        } else {
+            Ok(None)
+        }
+    }
+
+    /// Checks if the key is in the file cache.
+    pub(crate) fn contains_key(&self, key: &IndexKey) -> bool {
+        self.memory_index(key.file_type).contains_key(key)
+    }
+
+    /// Returns the capacity of the puffin (index) cache in bytes.
+    pub(crate) fn puffin_cache_capacity(&self) -> u64 {
+        self.puffin_capacity
+    }
+
+    /// Returns the current weighted size (used bytes) of the puffin (index) cache.
+    pub(crate) fn puffin_cache_size(&self) -> u64 {
+        self.puffin_index.weighted_size()
+    }
+
+    /// Downloads a file in `remote_path` from the remote object store to the local cache
+    /// (specified by `index_key`).
+    pub(crate) async fn download(
+        &self,
+        index_key: IndexKey,
+        remote_path: &str,
+        remote_store: &ObjectStore,
+        file_size: u64,
+    ) -> Result<()> {
+        if let Err(e) = self
+            .download_without_cleaning(index_key, remote_path, remote_store, file_size)
+            .await
+        {
+            let filename = index_key.to_string();
+            TempFileCleaner::clean_atomic_dir_files(&self.local_store, &[&filename]).await;
+
+            return Err(e);
+        }
+        Ok(())
+    }
+
    async fn download_without_cleaning(
        &self,
        index_key: IndexKey,
@@ -178,7 +485,7 @@ impl FileCacheInner {
        let timer = WRITE_CACHE_DOWNLOAD_ELAPSED
            .with_label_values(&[match file_type {
                FileType::Parquet => "download_parquet",
-                FileType::Puffin { .. } => "download_puffin",
+                FileType::Puffin => "download_puffin",
            }])
            .start_timer();

@@ -230,360 +537,11 @@ impl FileCacheInner {
        self.put(index_key, index_value).await;
        Ok(())
    }
-
-    /// Downloads a file from remote store to local cache.
-    async fn download(
-        &self,
-        index_key: IndexKey,
-        remote_path: &str,
-        remote_store: &ObjectStore,
-        file_size: u64,
-    ) -> Result<()> {
-        if let Err(e) = self
-            .download_without_cleaning(index_key, remote_path, remote_store, file_size)
-            .await
-        {
-            let filename = index_key.to_string();
-            TempFileCleaner::clean_atomic_dir_files(&self.local_store, &[&filename]).await;
-
-            return Err(e);
-        }
-
-        Ok(())
-    }
-}
-
-/// A file cache manages files on local store and evict files based
-/// on size.
-#[derive(Debug, Clone)]
-pub(crate) struct FileCache {
-    /// Inner cache state shared with background worker.
-    inner: Arc<FileCacheInner>,
-    /// Capacity of the puffin (index) cache in bytes.
-    puffin_capacity: u64,
-}
-
-pub(crate) type FileCacheRef = Arc<FileCache>;
-
-impl FileCache {
-    /// Creates a new file cache.
-    pub(crate) fn new(
-        local_store: ObjectStore,
-        capacity: ReadableSize,
-        ttl: Option<Duration>,
-        index_cache_percent: Option<u8>,
-    ) -> FileCache {
-        // Validate and use the provided percent or default
-        let index_percent = index_cache_percent
-            .filter(|&percent| percent > 0 && percent < 100)
-            .unwrap_or(DEFAULT_INDEX_CACHE_PERCENT);
-        let total_capacity = capacity.as_bytes();
-
-        // Convert percent to ratio and calculate capacity for each cache
-        let index_ratio = index_percent as f64 / 100.0;
-        let puffin_capacity = (total_capacity as f64 * index_ratio) as u64;
-        let parquet_capacity = total_capacity - puffin_capacity;
-
-        // Ensure both capacities are at least 512MB
-        let puffin_capacity = puffin_capacity.max(MIN_CACHE_CAPACITY);
-        let parquet_capacity = parquet_capacity.max(MIN_CACHE_CAPACITY);
-
-        info!(
-            "Initializing file cache with index_percent: {}%, total_capacity: {}, parquet_capacity: {}, puffin_capacity: {}",
-            index_percent,
-            ReadableSize(total_capacity),
-            ReadableSize(parquet_capacity),
-            ReadableSize(puffin_capacity)
-        );
-
-        let parquet_index = Self::build_cache(local_store.clone(), parquet_capacity, ttl, "file");
-        let puffin_index = Self::build_cache(local_store.clone(), puffin_capacity, ttl, "index");
-
-        // Create inner cache shared with background worker
-        let inner = Arc::new(FileCacheInner {
-            local_store,
-            parquet_index,
-            puffin_index,
-        });
-
-        FileCache {
-            inner,
-            puffin_capacity,
-        }
-    }
-
-    /// Builds a cache for a specific file type.
-    fn build_cache(
-        local_store: ObjectStore,
-        capacity: u64,
-        ttl: Option<Duration>,
-        label: &'static str,
-    ) -> Cache<IndexKey, IndexValue> {
-        let cache_store = local_store;
-        let mut builder = Cache::builder()
-            .eviction_policy(EvictionPolicy::lru())
-            .weigher(|_key, value: &IndexValue| -> u32 {
-                // We only measure space on local store.
-                value.file_size
-            })
-            .max_capacity(capacity)
-            .async_eviction_listener(move |key, value, cause| {
-                let store = cache_store.clone();
-                // Stores files under FILE_DIR.
-                let file_path = cache_file_path(FILE_DIR, *key);
-                async move {
-                    if let RemovalCause::Replaced = cause {
-                        // The cache is replaced by another file. This is unexpected, we don't remove the same
-                        // file but updates the metrics as the file is already replaced by users.
-                        CACHE_BYTES.with_label_values(&[label]).sub(value.file_size.into());
-                        // TODO(yingwen): Don't log warn later.
-                        warn!("Replace existing cache {} for region {} unexpectedly", file_path, key.region_id);
-                        return;
-                    }
-
-                    match store.delete(&file_path).await {
-                        Ok(()) => {
-                            CACHE_BYTES.with_label_values(&[label]).sub(value.file_size.into());
-                        }
-                        Err(e) => {
-                            warn!(e; "Failed to delete cached file {} for region {}", file_path, key.region_id);
-                        }
-                    }
-                }
-                .boxed()
-            });
-        if let Some(ttl) = ttl {
-            builder = builder.time_to_idle(ttl);
-        }
-        builder.build()
-    }
-
-    /// Puts a file into the cache index.
-    ///
-    /// The `WriteCache` should ensure the file is in the correct path.
-    pub(crate) async fn put(&self, key: IndexKey, value: IndexValue) {
-        self.inner.put(key, value).await
-    }
-
-    pub(crate) async fn get(&self, key: IndexKey) -> Option<IndexValue> {
-        self.inner.memory_index(key.file_type).get(&key).await
-    }
-
-    /// Reads a file from the cache.
-    #[allow(unused)]
-    pub(crate) async fn reader(&self, key: IndexKey) -> Option<Reader> {
-        // We must use `get()` to update the estimator of the cache.
-        // See https://docs.rs/moka/latest/moka/future/struct.Cache.html#method.contains_key
-        let index = self.inner.memory_index(key.file_type);
-        if index.get(&key).await.is_none() {
-            CACHE_MISS
-                .with_label_values(&[key.file_type.metric_label()])
-                .inc();
-            return None;
-        }
-
-        let file_path = self.inner.cache_file_path(key);
-        match self.get_reader(&file_path).await {
-            Ok(Some(reader)) => {
-                CACHE_HIT
-                    .with_label_values(&[key.file_type.metric_label()])
-                    .inc();
-                return Some(reader);
-            }
-            Err(e) => {
-                if e.kind() != ErrorKind::NotFound {
-                    warn!(e; "Failed to get file for key {:?}", key);
-                }
-            }
-            Ok(None) => {}
-        }
-
-        // We removes the file from the index.
-        index.remove(&key).await;
-        CACHE_MISS
-            .with_label_values(&[key.file_type.metric_label()])
-            .inc();
-        None
-    }
-
-    /// Reads ranges from the cache.
-    pub(crate) async fn read_ranges(
-        &self,
-        key: IndexKey,
-        ranges: &[Range<u64>],
-    ) -> Option<Vec<Bytes>> {
-        let index = self.inner.memory_index(key.file_type);
-        if index.get(&key).await.is_none() {
-            CACHE_MISS
-                .with_label_values(&[key.file_type.metric_label()])
-                .inc();
-            return None;
-        }
-
-        let file_path = self.inner.cache_file_path(key);
-        // In most cases, it will use blocking read,
-        // because FileCache is normally based on local file system, which supports blocking read.
-        let bytes_result =
-            fetch_byte_ranges(&file_path, self.inner.local_store.clone(), ranges).await;
-        match bytes_result {
-            Ok(bytes) => {
-                CACHE_HIT
-                    .with_label_values(&[key.file_type.metric_label()])
-                    .inc();
-                Some(bytes)
-            }
-            Err(e) => {
-                if e.kind() != ErrorKind::NotFound {
-                    warn!(e; "Failed to get file for key {:?}", key);
-                }
-
-                // We removes the file from the index.
-                index.remove(&key).await;
-                CACHE_MISS
-                    .with_label_values(&[key.file_type.metric_label()])
-                    .inc();
-                None
-            }
-        }
-    }
-
-    /// Removes a file from the cache explicitly.
-    /// It always tries to remove the file from the local store because we may not have the file
-    /// in the memory index if upload is failed.
-    pub(crate) async fn remove(&self, key: IndexKey) {
-        let file_path = self.inner.cache_file_path(key);
-        self.inner.memory_index(key.file_type).remove(&key).await;
-        // Always delete the file from the local store.
-        if let Err(e) = self.inner.local_store.delete(&file_path).await {
-            warn!(e; "Failed to delete a cached file {}", file_path);
-        }
-    }
-
-    /// Recovers the index from local store.
-    ///
-    /// If `task_receiver` is provided, spawns a background task after recovery
-    /// to process `RegionLoadCacheTask` messages for loading files into the cache.
-    pub(crate) async fn recover(
-        &self,
-        sync: bool,
-        task_receiver: Option<UnboundedReceiver<RegionLoadCacheTask>>,
-    ) {
-        let moved_self = self.clone();
-        let handle = tokio::spawn(async move {
-            if let Err(err) = moved_self.inner.recover().await {
-                error!(err; "Failed to recover file cache.")
-            }
-
-            // Spawns background task to process region load cache tasks after recovery.
-            // So it won't block the recovery when `sync` is true.
-            if let Some(mut receiver) = task_receiver {
-                info!("Spawning background task for processing region load cache tasks");
-                tokio::spawn(async move {
-                    while let Some(task) = receiver.recv().await {
-                        task.fill_cache(&moved_self).await;
-                    }
-                    info!("Background task for processing region load cache tasks stopped");
-                });
-            }
-        });
-
-        if sync {
-            let _ = handle.await;
-        }
-    }
-
-    /// Returns the cache file path for the key.
-    pub(crate) fn cache_file_path(&self, key: IndexKey) -> String {
-        self.inner.cache_file_path(key)
-    }
-
-    /// Returns the local store of the file cache.
-    pub(crate) fn local_store(&self) -> ObjectStore {
-        self.inner.local_store.clone()
-    }
-
-    /// Get the parquet metadata in file cache.
-    /// If the file is not in the cache or fail to load metadata, return None.
-    pub(crate) async fn get_parquet_meta_data(&self, key: IndexKey) -> Option<ParquetMetaData> {
-        // Check if file cache contains the key
-        if let Some(index_value) = self.inner.parquet_index.get(&key).await {
-            // Load metadata from file cache
-            let local_store = self.local_store();
-            let file_path = self.inner.cache_file_path(key);
-            let file_size = index_value.file_size as u64;
-            let metadata_loader = MetadataLoader::new(local_store, &file_path, file_size);
-
-            match metadata_loader.load().await {
-                Ok(metadata) => {
-                    CACHE_HIT
-                        .with_label_values(&[key.file_type.metric_label()])
-                        .inc();
-                    Some(metadata)
-                }
-                Err(e) => {
-                    if !e.is_object_not_found() {
-                        warn!(
-                            e; "Failed to get parquet metadata for key {:?}",
-                            key
-                        );
-                    }
-                    // We removes the file from the index.
-                    self.inner.parquet_index.remove(&key).await;
-                    CACHE_MISS
-                        .with_label_values(&[key.file_type.metric_label()])
-                        .inc();
-                    None
-                }
-            }
-        } else {
-            CACHE_MISS
-                .with_label_values(&[key.file_type.metric_label()])
-                .inc();
-            None
-        }
-    }
-
-    async fn get_reader(&self, file_path: &str) -> object_store::Result<Option<Reader>> {
-        if self.inner.local_store.exists(file_path).await? {
-            Ok(Some(self.inner.local_store.reader(file_path).await?))
-        } else {
-            Ok(None)
-        }
-    }
-
-    /// Checks if the key is in the file cache.
-    pub(crate) fn contains_key(&self, key: &IndexKey) -> bool {
-        self.inner.memory_index(key.file_type).contains_key(key)
-    }
-
-    /// Returns the capacity of the puffin (index) cache in bytes.
-    pub(crate) fn puffin_cache_capacity(&self) -> u64 {
-        self.puffin_capacity
-    }
-
-    /// Returns the current weighted size (used bytes) of the puffin (index) cache.
-    pub(crate) fn puffin_cache_size(&self) -> u64 {
-        self.inner.puffin_index.weighted_size()
-    }
-
-    /// Downloads a file in `remote_path` from the remote object store to the local cache
-    /// (specified by `index_key`).
-    pub(crate) async fn download(
-        &self,
-        index_key: IndexKey,
-        remote_path: &str,
-        remote_store: &ObjectStore,
-        file_size: u64,
-    ) -> Result<()> {
-        self.inner
-            .download(index_key, remote_path, remote_store, file_size)
-            .await
-    }
 }

 /// Key of file cache index.
 #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
-pub struct IndexKey {
+pub(crate) struct IndexKey {
    pub region_id: RegionId,
    pub file_id: FileId,
    pub file_type: FileType,
@@ -607,7 +565,7 @@ impl fmt::Display for IndexKey {
            "{}.{}.{}",
            self.region_id.as_u64(),
            self.file_id,
-            self.file_type
+            self.file_type.as_str()
        )
    }
 }
@@ -618,16 +576,7 @@ pub enum FileType {
    /// Parquet file.
    Parquet,
    /// Puffin file.
-    Puffin(u64),
-}
-
-impl fmt::Display for FileType {
-    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        match self {
-            FileType::Parquet => write!(f, "parquet"),
-            FileType::Puffin(version) => write!(f, "{}.puffin", version),
-        }
-    }
+    Puffin,
 }

 impl FileType {
@@ -635,16 +584,16 @@ impl FileType {
    fn parse(s: &str) -> Option<FileType> {
        match s {
            "parquet" => Some(FileType::Parquet),
-            "puffin" => Some(FileType::Puffin(0)),
-            _ => {
-                // if post-fix with .puffin, try to parse the version
-                if let Some(version_str) = s.strip_suffix(".puffin") {
-                    let version = version_str.parse::<u64>().ok()?;
-                    Some(FileType::Puffin(version))
-                } else {
-                    None
-                }
-            }
+            "puffin" => Some(FileType::Puffin),
+            _ => None,
+        }
+    }
+
+    /// Converts the file type to string.
+    fn as_str(&self) -> &'static str {
+        match self {
+            FileType::Parquet => "parquet",
+            FileType::Puffin => "puffin",
        }
    }

@@ -652,7 +601,7 @@ impl FileType {
    fn metric_label(&self) -> &'static str {
        match self {
            FileType::Parquet => FILE_TYPE,
-            FileType::Puffin(_) => INDEX_TYPE,
+            FileType::Puffin => INDEX_TYPE,
        }
    }
 }
@@ -734,7 +683,7 @@ mod tests {
        let exist = cache.reader(key).await;
        assert!(exist.is_some());
        tokio::time::sleep(Duration::from_millis(15)).await;
-        cache.inner.parquet_index.run_pending_tasks().await;
+        cache.parquet_index.run_pending_tasks().await;
        let non = cache.reader(key).await;
        assert!(non.is_none());
    }
@@ -772,19 +721,19 @@ mod tests {
        assert_eq!("hello", String::from_utf8(buf).unwrap());

        // Get weighted size.
-        cache.inner.parquet_index.run_pending_tasks().await;
-        assert_eq!(5, cache.inner.parquet_index.weighted_size());
+        cache.parquet_index.run_pending_tasks().await;
+        assert_eq!(5, cache.parquet_index.weighted_size());

        // Remove the file.
        cache.remove(key).await;
        assert!(cache.reader(key).await.is_none());

        // Ensure all pending tasks of the moka cache is done before assertion.
-        cache.inner.parquet_index.run_pending_tasks().await;
+        cache.parquet_index.run_pending_tasks().await;

        // The file also not exists.
        assert!(!local_store.exists(&file_path).await.unwrap());
-        assert_eq!(0, cache.inner.parquet_index.weighted_size());
+        assert_eq!(0, cache.parquet_index.weighted_size());
    }

    #[tokio::test]
@@ -817,7 +766,7 @@ mod tests {
        // Reader is none.
        assert!(cache.reader(key).await.is_none());
        // Key is removed.
-        assert!(!cache.inner.parquet_index.contains_key(&key));
+        assert!(!cache.parquet_index.contains_key(&key));
    }

    #[tokio::test]
@@ -850,7 +799,12 @@ mod tests {
        }

        // Recover the cache.
-        let cache = FileCache::new(local_store.clone(), ReadableSize::mb(10), None, None);
+        let cache = Arc::new(FileCache::new(
+            local_store.clone(),
+            ReadableSize::mb(10),
+            None,
+            None,
+        ));
        // No entry before recovery.
        assert!(
            cache
@@ -861,11 +815,8 @@ mod tests {
        cache.recover(true, None).await;

        // Check size.
-        cache.inner.parquet_index.run_pending_tasks().await;
-        assert_eq!(
-            total_size,
-            cache.inner.parquet_index.weighted_size() as usize
-        );
+        cache.parquet_index.run_pending_tasks().await;
+        assert_eq!(total_size, cache.parquet_index.weighted_size() as usize);

        for (i, file_id) in file_ids.iter().enumerate() {
            let key = IndexKey::new(region_id, *file_id, file_type);
@@ -930,15 +881,6 @@ mod tests {
            IndexKey::new(region_id, file_id, FileType::Parquet),
            parse_index_key("5299989643269.3368731b-a556-42b8-a5df-9c31ce155095.parquet").unwrap()
        );
-        assert_eq!(
-            IndexKey::new(region_id, file_id, FileType::Puffin(0)),
-            parse_index_key("5299989643269.3368731b-a556-42b8-a5df-9c31ce155095.puffin").unwrap()
-        );
-        assert_eq!(
-            IndexKey::new(region_id, file_id, FileType::Puffin(42)),
-            parse_index_key("5299989643269.3368731b-a556-42b8-a5df-9c31ce155095.42.puffin")
-                .unwrap()
-        );
        assert!(parse_index_key("").is_none());
        assert!(parse_index_key(".").is_none());
        assert!(parse_index_key("5299989643269").is_none());
--- a/src/mito2/src/cache/index.rs
+++ b/src/mito2/src/cache/index.rs
@@ -31,29 +31,6 @@ const INDEX_METADATA_TYPE: &str = "index_metadata";
 /// Metrics for index content.
 const INDEX_CONTENT_TYPE: &str = "index_content";

-/// Metrics collected from IndexCache operations.
-#[derive(Debug, Default, Clone)]
-pub struct IndexCacheMetrics {
-    /// Number of cache hits.
-    pub cache_hit: usize,
-    /// Number of cache misses.
-    pub cache_miss: usize,
-    /// Number of pages accessed.
-    pub num_pages: usize,
-    /// Total bytes from pages.
-    pub page_bytes: u64,
-}
-
-impl IndexCacheMetrics {
-    /// Merges another set of metrics into this one.
-    pub fn merge(&mut self, other: &Self) {
-        self.cache_hit += other.cache_hit;
-        self.cache_miss += other.cache_miss;
-        self.num_pages += other.num_pages;
-        self.page_bytes += other.page_bytes;
-    }
-}
-
 #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
 pub struct PageKey {
    page_id: u64,
@@ -183,20 +160,18 @@ where
        offset: u64,
        size: u32,
        load: F,
-    ) -> Result<(Vec<u8>, IndexCacheMetrics), E>
+    ) -> Result<Vec<u8>, E>
    where
        F: Fn(Vec<Range<u64>>) -> Fut,
        Fut: Future<Output = Result<Vec<Bytes>, E>>,
        E: std::error::Error,
    {
-        let mut metrics = IndexCacheMetrics::default();
        let page_keys =
            PageKey::generate_page_keys(offset, size, self.page_size).collect::<Vec<_>>();
        // Size is 0, return empty data.
        if page_keys.is_empty() {
-            return Ok((Vec::new(), metrics));
+            return Ok(Vec::new());
        }
-        metrics.num_pages = page_keys.len();
        let mut data = Vec::with_capacity(page_keys.len());
        data.resize(page_keys.len(), Bytes::new());
        let mut cache_miss_range = vec![];
@@ -207,13 +182,10 @@ where
            match self.get_page(key, *page_key) {
                Some(page) => {
                    CACHE_HIT.with_label_values(&[INDEX_CONTENT_TYPE]).inc();
-                    metrics.cache_hit += 1;
-                    metrics.page_bytes += page.len() as u64;
                    data[i] = page;
                }
                None => {
                    CACHE_MISS.with_label_values(&[INDEX_CONTENT_TYPE]).inc();
-                    metrics.cache_miss += 1;
                    let base_offset = page_key.page_id * self.page_size;
                    let pruned_size = if i == last_index {
                        prune_size(page_keys.iter(), file_size, self.page_size)
@@ -229,18 +201,14 @@ where
            let pages = load(cache_miss_range).await?;
            for (i, page) in cache_miss_idx.into_iter().zip(pages.into_iter()) {
                let page_key = page_keys[i];
-                metrics.page_bytes += page.len() as u64;
                data[i] = page.clone();
                self.put_page(key, page_key, page.clone());
            }
        }
        let buffer = Buffer::from_iter(data.into_iter());
-        Ok((
-            buffer
-                .slice(PageKey::calculate_range(offset, size, self.page_size))
-                .to_vec(),
-            metrics,
-        ))
+        Ok(buffer
+            .slice(PageKey::calculate_range(offset, size, self.page_size))
+            .to_vec())
    }

    fn get_page(&self, key: K, page_key: PageKey) -> Option<Bytes> {
--- a/src/mito2/src/cache/index/bloom_filter_index.rs
+++ b/src/mito2/src/cache/index/bloom_filter_index.rs
@@ -14,14 +14,13 @@

 use std::ops::Range;
 use std::sync::Arc;
-use std::time::Instant;

 use api::v1::index::{BloomFilterLoc, BloomFilterMeta};
 use async_trait::async_trait;
 use bytes::Bytes;
 use index::bloom_filter::error::Result;
-use index::bloom_filter::reader::{BloomFilterReadMetrics, BloomFilterReader};
-use store_api::storage::{ColumnId, FileId, IndexVersion};
+use index::bloom_filter::reader::BloomFilterReader;
+use store_api::storage::{ColumnId, FileId};

 use crate::cache::index::{INDEX_METADATA_TYPE, IndexCache, PageKey};
 use crate::metrics::{CACHE_HIT, CACHE_MISS};
@@ -35,10 +34,8 @@ pub enum Tag {
    Fulltext,
 }

-pub type BloomFilterIndexKey = (FileId, IndexVersion, ColumnId, Tag);
-
 /// Cache for bloom filter index.
-pub type BloomFilterIndexCache = IndexCache<BloomFilterIndexKey, BloomFilterMeta>;
+pub type BloomFilterIndexCache = IndexCache<(FileId, ColumnId, Tag), BloomFilterMeta>;
 pub type BloomFilterIndexCacheRef = Arc<BloomFilterIndexCache>;

 impl BloomFilterIndexCache {
@@ -61,9 +58,11 @@ impl BloomFilterIndexCache {
 }

 /// Calculates weight for bloom filter index metadata.
-fn bloom_filter_index_metadata_weight(k: &BloomFilterIndexKey, meta: &Arc<BloomFilterMeta>) -> u32 {
+fn bloom_filter_index_metadata_weight(
+    k: &(FileId, ColumnId, Tag),
+    meta: &Arc<BloomFilterMeta>,
+) -> u32 {
    let base = k.0.as_bytes().len()
-        + std::mem::size_of::<IndexVersion>()
        + std::mem::size_of::<ColumnId>()
        + std::mem::size_of::<Tag>()
        + std::mem::size_of::<BloomFilterMeta>();
@@ -75,14 +74,16 @@ fn bloom_filter_index_metadata_weight(k: &BloomFilterIndexKey, meta: &Arc<BloomF
 }

 /// Calculates weight for bloom filter index content.
-fn bloom_filter_index_content_weight((k, _): &(BloomFilterIndexKey, PageKey), v: &Bytes) -> u32 {
+fn bloom_filter_index_content_weight(
+    (k, _): &((FileId, ColumnId, Tag), PageKey),
+    v: &Bytes,
+) -> u32 {
    (k.0.as_bytes().len() + std::mem::size_of::<ColumnId>() + v.len()) as u32
 }

 /// Bloom filter index blob reader with cache.
 pub struct CachedBloomFilterIndexBlobReader<R> {
    file_id: FileId,
-    index_version: IndexVersion,
    column_id: ColumnId,
    tag: Tag,
    blob_size: u64,
@@ -94,7 +95,6 @@ impl<R> CachedBloomFilterIndexBlobReader<R> {
    /// Creates a new bloom filter index blob reader with cache.
    pub fn new(
        file_id: FileId,
-        index_version: IndexVersion,
        column_id: ColumnId,
        tag: Tag,
        blob_size: u64,
@@ -103,7 +103,6 @@ impl<R> CachedBloomFilterIndexBlobReader<R> {
    ) -> Self {
        Self {
            file_id,
-            index_version,
            column_id,
            tag,
            blob_size,
@@ -115,95 +114,53 @@ impl<R> CachedBloomFilterIndexBlobReader<R> {

 #[async_trait]
 impl<R: BloomFilterReader + Send> BloomFilterReader for CachedBloomFilterIndexBlobReader<R> {
-    async fn range_read(
-        &self,
-        offset: u64,
-        size: u32,
-        metrics: Option<&mut BloomFilterReadMetrics>,
-    ) -> Result<Bytes> {
-        let start = metrics.as_ref().map(|_| Instant::now());
+    async fn range_read(&self, offset: u64, size: u32) -> Result<Bytes> {
        let inner = &self.inner;
-        let (result, cache_metrics) = self
-            .cache
+        self.cache
            .get_or_load(
-                (self.file_id, self.index_version, self.column_id, self.tag),
+                (self.file_id, self.column_id, self.tag),
                self.blob_size,
                offset,
                size,
-                move |ranges| async move { inner.read_vec(&ranges, None).await },
+                move |ranges| async move { inner.read_vec(&ranges).await },
            )
-            .await?;
-
-        if let Some(m) = metrics {
-            m.total_ranges += cache_metrics.num_pages;
-            m.total_bytes += cache_metrics.page_bytes;
-            m.cache_hit += cache_metrics.cache_hit;
-            m.cache_miss += cache_metrics.cache_miss;
-            if let Some(start) = start {
-                m.fetch_elapsed += start.elapsed();
-            }
-        }
-
-        Ok(result.into())
+            .await
+            .map(|b| b.into())
    }

-    async fn read_vec(
-        &self,
-        ranges: &[Range<u64>],
-        metrics: Option<&mut BloomFilterReadMetrics>,
-    ) -> Result<Vec<Bytes>> {
-        let start = metrics.as_ref().map(|_| Instant::now());
-
+    async fn read_vec(&self, ranges: &[Range<u64>]) -> Result<Vec<Bytes>> {
        let mut pages = Vec::with_capacity(ranges.len());
-        let mut total_cache_metrics = crate::cache::index::IndexCacheMetrics::default();
        for range in ranges {
            let inner = &self.inner;
-            let (page, cache_metrics) = self
+            let page = self
                .cache
                .get_or_load(
-                    (self.file_id, self.index_version, self.column_id, self.tag),
+                    (self.file_id, self.column_id, self.tag),
                    self.blob_size,
                    range.start,
                    (range.end - range.start) as u32,
-                    move |ranges| async move { inner.read_vec(&ranges, None).await },
+                    move |ranges| async move { inner.read_vec(&ranges).await },
                )
                .await?;

-            total_cache_metrics.merge(&cache_metrics);
            pages.push(Bytes::from(page));
        }

-        if let Some(m) = metrics {
-            m.total_ranges += total_cache_metrics.num_pages;
-            m.total_bytes += total_cache_metrics.page_bytes;
-            m.cache_hit += total_cache_metrics.cache_hit;
-            m.cache_miss += total_cache_metrics.cache_miss;
-            if let Some(start) = start {
-                m.fetch_elapsed += start.elapsed();
-            }
-        }
-
        Ok(pages)
    }

    /// Reads the meta information of the bloom filter.
-    async fn metadata(
-        &self,
-        metrics: Option<&mut BloomFilterReadMetrics>,
-    ) -> Result<BloomFilterMeta> {
-        if let Some(cached) =
-            self.cache
-                .get_metadata((self.file_id, self.index_version, self.column_id, self.tag))
+    async fn metadata(&self) -> Result<BloomFilterMeta> {
+        if let Some(cached) = self
+            .cache
+            .get_metadata((self.file_id, self.column_id, self.tag))
        {
            CACHE_HIT.with_label_values(&[INDEX_METADATA_TYPE]).inc();
-            if let Some(m) = metrics {
-                m.cache_hit += 1;
-            }
            Ok((*cached).clone())
        } else {
-            let meta = self.inner.metadata(metrics).await?;
+            let meta = self.inner.metadata().await?;
            self.cache.put_metadata(
-                (self.file_id, self.index_version, self.column_id, self.tag),
+                (self.file_id, self.column_id, self.tag),
                Arc::new(meta.clone()),
            );
            CACHE_MISS.with_label_values(&[INDEX_METADATA_TYPE]).inc();
@@ -223,7 +180,6 @@ mod test {
    #[test]
    fn bloom_filter_metadata_weight_counts_vec_contents() {
        let file_id = FileId::parse_str("00000000-0000-0000-0000-000000000001").unwrap();
-        let version = 0;
        let column_id: ColumnId = 42;
        let tag = Tag::Skipping;

@@ -247,13 +203,10 @@ mod test {
            ],
        };

-        let weight = bloom_filter_index_metadata_weight(
-            &(file_id, version, column_id, tag),
-            &Arc::new(meta.clone()),
-        );
+        let weight =
+            bloom_filter_index_metadata_weight(&(file_id, column_id, tag), &Arc::new(meta.clone()));

        let base = file_id.as_bytes().len()
-            + std::mem::size_of::<IndexVersion>()
            + std::mem::size_of::<ColumnId>()
            + std::mem::size_of::<Tag>()
            + std::mem::size_of::<BloomFilterMeta>();
--- a/src/mito2/src/cache/index/inverted_index.rs
+++ b/src/mito2/src/cache/index/inverted_index.rs
@@ -14,15 +14,14 @@

 use core::ops::Range;
 use std::sync::Arc;
-use std::time::Instant;

 use api::v1::index::InvertedIndexMetas;
 use async_trait::async_trait;
 use bytes::Bytes;
 use index::inverted_index::error::Result;
-use index::inverted_index::format::reader::{InvertedIndexReadMetrics, InvertedIndexReader};
+use index::inverted_index::format::reader::InvertedIndexReader;
 use prost::Message;
-use store_api::storage::{FileId, IndexVersion};
+use store_api::storage::FileId;

 use crate::cache::index::{INDEX_METADATA_TYPE, IndexCache, PageKey};
 use crate::metrics::{CACHE_HIT, CACHE_MISS};
@@ -30,7 +29,7 @@ use crate::metrics::{CACHE_HIT, CACHE_MISS};
 const INDEX_TYPE_INVERTED_INDEX: &str = "inverted_index";

 /// Cache for inverted index.
-pub type InvertedIndexCache = IndexCache<(FileId, IndexVersion), InvertedIndexMetas>;
+pub type InvertedIndexCache = IndexCache<FileId, InvertedIndexMetas>;
 pub type InvertedIndexCacheRef = Arc<InvertedIndexCache>;

 impl InvertedIndexCache {
@@ -48,24 +47,23 @@ impl InvertedIndexCache {

    /// Removes all cached entries for the given `file_id`.
    pub fn invalidate_file(&self, file_id: FileId) {
-        self.invalidate_if(move |key| key.0 == file_id);
+        self.invalidate_if(move |key| *key == file_id);
    }
 }

 /// Calculates weight for inverted index metadata.
-fn inverted_index_metadata_weight(k: &(FileId, IndexVersion), v: &Arc<InvertedIndexMetas>) -> u32 {
-    (k.0.as_bytes().len() + size_of::<IndexVersion>() + v.encoded_len()) as u32
+fn inverted_index_metadata_weight(k: &FileId, v: &Arc<InvertedIndexMetas>) -> u32 {
+    (k.as_bytes().len() + v.encoded_len()) as u32
 }

 /// Calculates weight for inverted index content.
-fn inverted_index_content_weight((k, _): &((FileId, IndexVersion), PageKey), v: &Bytes) -> u32 {
-    (k.0.as_bytes().len() + size_of::<IndexVersion>() + v.len()) as u32
+fn inverted_index_content_weight((k, _): &(FileId, PageKey), v: &Bytes) -> u32 {
+    (k.as_bytes().len() + v.len()) as u32
 }

 /// Inverted index blob reader with cache.
 pub struct CachedInvertedIndexBlobReader<R> {
    file_id: FileId,
-    index_version: IndexVersion,
    blob_size: u64,
    inner: R,
    cache: InvertedIndexCacheRef,
@@ -73,16 +71,9 @@ pub struct CachedInvertedIndexBlobReader<R> {

 impl<R> CachedInvertedIndexBlobReader<R> {
    /// Creates a new inverted index blob reader with cache.
-    pub fn new(
-        file_id: FileId,
-        index_version: IndexVersion,
-        blob_size: u64,
-        inner: R,
-        cache: InvertedIndexCacheRef,
-    ) -> Self {
+    pub fn new(file_id: FileId, blob_size: u64, inner: R, cache: InvertedIndexCacheRef) -> Self {
        Self {
            file_id,
-            index_version,
            blob_size,
            inner,
            cache,
@@ -92,88 +83,47 @@ impl<R> CachedInvertedIndexBlobReader<R> {

 #[async_trait]
 impl<R: InvertedIndexReader> InvertedIndexReader for CachedInvertedIndexBlobReader<R> {
-    async fn range_read<'a>(
-        &self,
-        offset: u64,
-        size: u32,
-        metrics: Option<&'a mut InvertedIndexReadMetrics>,
-    ) -> Result<Vec<u8>> {
-        let start = metrics.as_ref().map(|_| Instant::now());
-
+    async fn range_read(&self, offset: u64, size: u32) -> Result<Vec<u8>> {
        let inner = &self.inner;
-        let (result, cache_metrics) = self
-            .cache
+        self.cache
            .get_or_load(
-                (self.file_id, self.index_version),
+                self.file_id,
                self.blob_size,
                offset,
                size,
-                move |ranges| async move { inner.read_vec(&ranges, None).await },
+                move |ranges| async move { inner.read_vec(&ranges).await },
            )
-            .await?;
-
-        if let Some(m) = metrics {
-            m.total_bytes += cache_metrics.page_bytes;
-            m.total_ranges += cache_metrics.num_pages;
-            m.cache_hit += cache_metrics.cache_hit;
-            m.cache_miss += cache_metrics.cache_miss;
-            m.fetch_elapsed += start.unwrap().elapsed();
-        }
-
-        Ok(result)
+            .await
    }

-    async fn read_vec<'a>(
-        &self,
-        ranges: &[Range<u64>],
-        metrics: Option<&'a mut InvertedIndexReadMetrics>,
-    ) -> Result<Vec<Bytes>> {
-        let start = metrics.as_ref().map(|_| Instant::now());
-
+    async fn read_vec(&self, ranges: &[Range<u64>]) -> Result<Vec<Bytes>> {
        let mut pages = Vec::with_capacity(ranges.len());
-        let mut total_cache_metrics = crate::cache::index::IndexCacheMetrics::default();
        for range in ranges {
            let inner = &self.inner;
-            let (page, cache_metrics) = self
+            let page = self
                .cache
                .get_or_load(
-                    (self.file_id, self.index_version),
+                    self.file_id,
                    self.blob_size,
                    range.start,
                    (range.end - range.start) as u32,
-                    move |ranges| async move { inner.read_vec(&ranges, None).await },
+                    move |ranges| async move { inner.read_vec(&ranges).await },
                )
                .await?;

-            total_cache_metrics.merge(&cache_metrics);
            pages.push(Bytes::from(page));
        }

-        if let Some(m) = metrics {
-            m.total_bytes += total_cache_metrics.page_bytes;
-            m.total_ranges += total_cache_metrics.num_pages;
-            m.cache_hit += total_cache_metrics.cache_hit;
-            m.cache_miss += total_cache_metrics.cache_miss;
-            m.fetch_elapsed += start.unwrap().elapsed();
-        }
-
        Ok(pages)
    }

-    async fn metadata<'a>(
-        &self,
-        metrics: Option<&'a mut InvertedIndexReadMetrics>,
-    ) -> Result<Arc<InvertedIndexMetas>> {
-        if let Some(cached) = self.cache.get_metadata((self.file_id, self.index_version)) {
+    async fn metadata(&self) -> Result<Arc<InvertedIndexMetas>> {
+        if let Some(cached) = self.cache.get_metadata(self.file_id) {
            CACHE_HIT.with_label_values(&[INDEX_METADATA_TYPE]).inc();
-            if let Some(m) = metrics {
-                m.cache_hit += 1;
-            }
            Ok(cached)
        } else {
-            let meta = self.inner.metadata(metrics).await?;
-            self.cache
-                .put_metadata((self.file_id, self.index_version), meta.clone());
+            let meta = self.inner.metadata().await?;
+            self.cache.put_metadata(self.file_id, meta.clone());
            CACHE_MISS.with_label_values(&[INDEX_METADATA_TYPE]).inc();
            Ok(meta)
        }
@@ -308,7 +258,6 @@ mod test {
        // Init a test range reader in local fs.
        let mut env = TestEnv::new().await;
        let file_size = blob.len() as u64;
-        let index_version = 0;
        let store = env.init_object_store_manager();
        let temp_path = "data";
        store.write(temp_path, blob).await.unwrap();
@@ -324,12 +273,11 @@ mod test {
        let reader = InvertedIndexBlobReader::new(range_reader);
        let cached_reader = CachedInvertedIndexBlobReader::new(
            FileId::random(),
-            index_version,
            file_size,
            reader,
            Arc::new(InvertedIndexCache::new(8192, 8192, 50)),
        );
-        let metadata = cached_reader.metadata(None).await.unwrap();
+        let metadata = cached_reader.metadata().await.unwrap();
        assert_eq!(metadata.total_row_count, 8);
        assert_eq!(metadata.segment_row_count, 1);
        assert_eq!(metadata.metas.len(), 2);
@@ -344,19 +292,13 @@ mod test {
            .fst(
                tag0.base_offset + tag0.relative_fst_offset as u64,
                tag0.fst_size,
-                None,
            )
            .await
            .unwrap();
        assert_eq!(fst0.len(), 3);
        let [offset, size] = unpack(fst0.get(b"a").unwrap());
        let bitmap = cached_reader
-            .bitmap(
-                tag0.base_offset + offset as u64,
-                size,
-                BitmapType::Roaring,
-                None,
-            )
+            .bitmap(tag0.base_offset + offset as u64, size, BitmapType::Roaring)
            .await
            .unwrap();
        assert_eq!(
@@ -365,12 +307,7 @@ mod test {
        );
        let [offset, size] = unpack(fst0.get(b"b").unwrap());
        let bitmap = cached_reader
-            .bitmap(
-                tag0.base_offset + offset as u64,
-                size,
-                BitmapType::Roaring,
-                None,
-            )
+            .bitmap(tag0.base_offset + offset as u64, size, BitmapType::Roaring)
            .await
            .unwrap();
        assert_eq!(
@@ -379,12 +316,7 @@ mod test {
        );
        let [offset, size] = unpack(fst0.get(b"c").unwrap());
        let bitmap = cached_reader
-            .bitmap(
-                tag0.base_offset + offset as u64,
-                size,
-                BitmapType::Roaring,
-                None,
-            )
+            .bitmap(tag0.base_offset + offset as u64, size, BitmapType::Roaring)
            .await
            .unwrap();
        assert_eq!(
@@ -403,19 +335,13 @@ mod test {
            .fst(
                tag1.base_offset + tag1.relative_fst_offset as u64,
                tag1.fst_size,
-                None,
            )
            .await
            .unwrap();
        assert_eq!(fst1.len(), 3);
        let [offset, size] = unpack(fst1.get(b"x").unwrap());
        let bitmap = cached_reader
-            .bitmap(
-                tag1.base_offset + offset as u64,
-                size,
-                BitmapType::Roaring,
-                None,
-            )
+            .bitmap(tag1.base_offset + offset as u64, size, BitmapType::Roaring)
            .await
            .unwrap();
        assert_eq!(
@@ -424,12 +350,7 @@ mod test {
        );
        let [offset, size] = unpack(fst1.get(b"y").unwrap());
        let bitmap = cached_reader
-            .bitmap(
-                tag1.base_offset + offset as u64,
-                size,
-                BitmapType::Roaring,
-                None,
-            )
+            .bitmap(tag1.base_offset + offset as u64, size, BitmapType::Roaring)
            .await
            .unwrap();
        assert_eq!(
@@ -438,12 +359,7 @@ mod test {
        );
        let [offset, size] = unpack(fst1.get(b"z").unwrap());
        let bitmap = cached_reader
-            .bitmap(
-                tag1.base_offset + offset as u64,
-                size,
-                BitmapType::Roaring,
-                None,
-            )
+            .bitmap(tag1.base_offset + offset as u64, size, BitmapType::Roaring)
            .await
            .unwrap();
        assert_eq!(
@@ -456,16 +372,16 @@ mod test {
        for _ in 0..FUZZ_REPEAT_TIMES {
            let offset = rng.random_range(0..file_size);
            let size = rng.random_range(0..file_size as u32 - offset as u32);
-            let expected = cached_reader.range_read(offset, size, None).await.unwrap();
+            let expected = cached_reader.range_read(offset, size).await.unwrap();
            let inner = &cached_reader.inner;
-            let (read, _cache_metrics) = cached_reader
+            let read = cached_reader
                .cache
                .get_or_load(
-                    (cached_reader.file_id, cached_reader.index_version),
+                    cached_reader.file_id,
                    file_size,
                    offset,
                    size,
-                    |ranges| async move { inner.read_vec(&ranges, None).await },
+                    |ranges| async move { inner.read_vec(&ranges).await },
                )
                .await
                .unwrap();
--- a/src/mito2/src/cache/manifest_cache.rs
+++ b/src/mito2/src/cache/manifest_cache.rs
@@ -1,574 +0,0 @@
-// Copyright 2023 Greptime Team
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-//! A cache for manifest files.
-
-use std::path::PathBuf;
-use std::sync::Arc;
-use std::time::{Duration, Instant};
-
-use common_base::readable_size::ReadableSize;
-use common_telemetry::{error, info, warn};
-use futures::{FutureExt, TryStreamExt};
-use moka::future::Cache;
-use moka::notification::RemovalCause;
-use moka::policy::EvictionPolicy;
-use object_store::ObjectStore;
-use object_store::util::join_path;
-use snafu::ResultExt;
-
-use crate::error::{OpenDalSnafu, Result};
-use crate::metrics::{CACHE_BYTES, CACHE_HIT, CACHE_MISS};
-
-/// Subdirectory of cached manifest files.
-///
-/// This must contain three layers, corresponding to [`build_prometheus_metrics_layer`](object_store::layers::build_prometheus_metrics_layer).
-const MANIFEST_DIR: &str = "cache/object/manifest/";
-
-/// Metric label for manifest files.
-const MANIFEST_TYPE: &str = "manifest";
-
-/// A manifest cache manages manifest files on local store and evicts files based
-/// on size.
-#[derive(Debug, Clone)]
-pub struct ManifestCache {
-    /// Local store to cache files.
-    local_store: ObjectStore,
-    /// Index to track cached manifest files.
-    index: Cache<String, IndexValue>,
-}
-
-impl ManifestCache {
-    /// Creates a new manifest cache and recovers the index from local store.
-    pub async fn new(
-        local_store: ObjectStore,
-        capacity: ReadableSize,
-        ttl: Option<Duration>,
-    ) -> ManifestCache {
-        let total_capacity = capacity.as_bytes();
-
-        info!(
-            "Initializing manifest cache with capacity: {}",
-            ReadableSize(total_capacity)
-        );
-
-        let index = Self::build_cache(local_store.clone(), total_capacity, ttl);
-
-        let cache = ManifestCache { local_store, index };
-
-        // Recovers the cache index from local store asynchronously
-        cache.recover(false).await;
-
-        cache
-    }
-
-    /// Builds the cache.
-    fn build_cache(
-        local_store: ObjectStore,
-        capacity: u64,
-        ttl: Option<Duration>,
-    ) -> Cache<String, IndexValue> {
-        let cache_store = local_store;
-        let mut builder = Cache::builder()
-            .eviction_policy(EvictionPolicy::lru())
-            .weigher(|key: &String, value: &IndexValue| -> u32 {
-                key.len() as u32 + value.file_size
-            })
-            .max_capacity(capacity)
-            .async_eviction_listener(move |key: Arc<String>, value: IndexValue, cause| {
-                let store = cache_store.clone();
-                // Stores files under MANIFEST_DIR.
-                let file_path = join_path(MANIFEST_DIR, &key);
-                async move {
-                    if let RemovalCause::Replaced = cause {
-                        // The cache is replaced by another file. We don't remove the same
-                        // file but updates the metrics as the file is already replaced by users.
-                        CACHE_BYTES
-                            .with_label_values(&[MANIFEST_TYPE])
-                            .sub(value.file_size.into());
-                        return;
-                    }
-
-                    match store.delete(&file_path).await {
-                        Ok(()) => {
-                            CACHE_BYTES
-                                .with_label_values(&[MANIFEST_TYPE])
-                                .sub(value.file_size.into());
-                        }
-                        Err(e) => {
-                            warn!(e; "Failed to delete cached manifest file {}", file_path);
-                        }
-                    }
-                }
-                .boxed()
-            });
-        if let Some(ttl) = ttl {
-            builder = builder.time_to_idle(ttl);
-        }
-        builder.build()
-    }
-
-    /// Puts a file into the cache index.
-    ///
-    /// The caller should ensure the file is in the correct path.
-    pub(crate) async fn put(&self, key: String, value: IndexValue) {
-        CACHE_BYTES
-            .with_label_values(&[MANIFEST_TYPE])
-            .add(value.file_size.into());
-        self.index.insert(key, value).await;
-
-        // Since files can be large items, we run the pending tasks immediately.
-        self.index.run_pending_tasks().await;
-    }
-
-    /// Gets the index value for the key.
-    pub(crate) async fn get(&self, key: &str) -> Option<IndexValue> {
-        self.index.get(key).await
-    }
-
-    /// Removes a file from the cache explicitly.
-    pub(crate) async fn remove(&self, key: &str) {
-        let file_path = self.cache_file_path(key);
-        self.index.remove(key).await;
-        // Always deletes the file from the local store.
-        if let Err(e) = self.local_store.delete(&file_path).await {
-            warn!(e; "Failed to delete a cached manifest file {}", file_path);
-        }
-    }
-
-    /// Removes multiple files from the cache in batch.
-    pub(crate) async fn remove_batch(&self, keys: &[String]) {
-        if keys.is_empty() {
-            return;
-        }
-
-        for key in keys {
-            self.index.remove(key).await;
-        }
-
-        let file_paths: Vec<String> = keys.iter().map(|key| self.cache_file_path(key)).collect();
-
-        if let Err(e) = self.local_store.delete_iter(file_paths).await {
-            warn!(e; "Failed to delete cached manifest files in batch");
-        }
-    }
-
-    async fn recover_inner(&self) -> Result<()> {
-        let now = Instant::now();
-        let mut lister = self
-            .local_store
-            .lister_with(MANIFEST_DIR)
-            .recursive(true)
-            .await
-            .context(OpenDalSnafu)?;
-        let (mut total_size, mut total_keys) = (0i64, 0);
-        while let Some(entry) = lister.try_next().await.context(OpenDalSnafu)? {
-            let meta = entry.metadata();
-            if !meta.is_file() {
-                continue;
-            }
-
-            let meta = self
-                .local_store
-                .stat(entry.path())
-                .await
-                .context(OpenDalSnafu)?;
-            let file_size = meta.content_length() as u32;
-            let key = entry.path().trim_start_matches(MANIFEST_DIR).to_string();
-            common_telemetry::info!("Manifest cache recover {}, size: {}", key, file_size);
-            self.index.insert(key, IndexValue { file_size }).await;
-            let size = i64::from(file_size);
-            total_size += size;
-            total_keys += 1;
-        }
-        CACHE_BYTES
-            .with_label_values(&[MANIFEST_TYPE])
-            .add(total_size);
-
-        // Runs all pending tasks of the moka cache so that the cache size is updated
-        // and the eviction policy is applied.
-        self.index.run_pending_tasks().await;
-
-        let weight = self.index.weighted_size();
-        let count = self.index.entry_count();
-        info!(
-            "Recovered manifest cache, num_keys: {}, num_bytes: {}, count: {}, weight: {}, cost: {:?}",
-            total_keys,
-            total_size,
-            count,
-            weight,
-            now.elapsed()
-        );
-        Ok(())
-    }
-
-    /// Recovers the index from local store.
-    pub(crate) async fn recover(&self, sync: bool) {
-        let moved_self = self.clone();
-        let handle = tokio::spawn(async move {
-            if let Err(err) = moved_self.recover_inner().await {
-                error!(err; "Failed to recover manifest cache.")
-            }
-
-            moved_self.clean_empty_dirs(true).await;
-        });
-
-        if sync {
-            let _ = handle.await;
-        }
-    }
-
-    /// Returns the cache file path for the key.
-    pub(crate) fn cache_file_path(&self, key: &str) -> String {
-        join_path(MANIFEST_DIR, key)
-    }
-
-    /// Gets a manifest file from cache.
-    /// Returns the file data if found in cache, None otherwise.
-    pub(crate) async fn get_file(&self, key: &str) -> Option<Vec<u8>> {
-        if self.get(key).await.is_none() {
-            CACHE_MISS.with_label_values(&[MANIFEST_TYPE]).inc();
-            return None;
-        }
-
-        let cache_file_path = self.cache_file_path(key);
-        match self.local_store.read(&cache_file_path).await {
-            Ok(data) => {
-                CACHE_HIT.with_label_values(&[MANIFEST_TYPE]).inc();
-                Some(data.to_vec())
-            }
-            Err(e) => {
-                warn!(e; "Failed to read cached manifest file {}", cache_file_path);
-                CACHE_MISS.with_label_values(&[MANIFEST_TYPE]).inc();
-                None
-            }
-        }
-    }
-
-    /// Puts a manifest file into cache.
-    pub(crate) async fn put_file(&self, key: String, data: Vec<u8>) {
-        let cache_file_path = self.cache_file_path(&key);
-
-        if let Err(e) = self.local_store.write(&cache_file_path, data.clone()).await {
-            warn!(e; "Failed to write manifest to cache {}", cache_file_path);
-            return;
-        }
-
-        let file_size = data.len() as u32;
-        self.put(key, IndexValue { file_size }).await;
-    }
-
-    /// Removes empty directories recursively under the manifest cache directory.
-    ///
-    /// If `check_mtime` is true, only removes directories that have not been modified
-    /// for at least 1 hour.
-    pub(crate) async fn clean_empty_dirs(&self, check_mtime: bool) {
-        info!("Clean empty dirs start");
-
-        let root = self.local_store.info().root();
-        let manifest_dir = PathBuf::from(root).join(MANIFEST_DIR);
-        let manifest_dir_clone = manifest_dir.clone();
-
-        let result = tokio::task::spawn_blocking(move || {
-            Self::clean_empty_dirs_sync(&manifest_dir_clone, check_mtime)
-        })
-        .await;
-
-        match result {
-            Ok(Ok(())) => {
-                info!("Clean empty dirs end");
-            }
-            Ok(Err(e)) => {
-                warn!(e; "Failed to clean empty directories under {}", manifest_dir.display());
-            }
-            Err(e) => {
-                warn!(e; "Failed to spawn blocking task for cleaning empty directories");
-            }
-        }
-    }
-
-    /// Removes all manifest files under the given directory from cache and cleans up empty directories.
-    pub(crate) async fn clean_manifests(&self, dir: &str) {
-        info!("Clean manifest cache for directory: {}", dir);
-
-        let cache_dir = join_path(MANIFEST_DIR, dir);
-        let mut lister = match self
-            .local_store
-            .lister_with(&cache_dir)
-            .recursive(true)
-            .await
-        {
-            Ok(lister) => lister,
-            Err(e) => {
-                warn!(e; "Failed to list manifest files under {}", cache_dir);
-                return;
-            }
-        };
-
-        let mut keys_to_remove = Vec::new();
-        loop {
-            match lister.try_next().await {
-                Ok(Some(entry)) => {
-                    let meta = entry.metadata();
-                    if meta.is_file() {
-                        keys_to_remove
-                            .push(entry.path().trim_start_matches(MANIFEST_DIR).to_string());
-                    }
-                }
-                Ok(None) => break,
-                Err(e) => {
-                    warn!(e; "Failed to read entry while listing {}", cache_dir);
-                    break;
-                }
-            }
-        }
-
-        info!(
-            "Going to remove files from manifest cache, files: {:?}",
-            keys_to_remove
-        );
-
-        // Removes all files from cache in batch
-        self.remove_batch(&keys_to_remove).await;
-
-        // Cleans up empty directories under the given dir
-        let root = self.local_store.info().root();
-        let dir_path = PathBuf::from(root).join(&cache_dir);
-        let dir_path_clone = dir_path.clone();
-
-        let result = tokio::task::spawn_blocking(move || {
-            Self::clean_empty_dirs_sync(&dir_path_clone, false)
-        })
-        .await;
-
-        match result {
-            Ok(Ok(())) => {
-                info!("Cleaned manifest cache for directory: {}", dir);
-            }
-            Ok(Err(e)) => {
-                warn!(e; "Failed to clean empty directories under {}", dir_path.display());
-            }
-            Err(e) => {
-                warn!(e; "Failed to spawn blocking task for cleaning empty directories");
-            }
-        }
-    }
-
-    /// Synchronously removes empty directories recursively.
-    ///
-    /// If `check_mtime` is true, only removes directories that have not been modified
-    /// for at least 1 hour.
-    fn clean_empty_dirs_sync(dir: &PathBuf, check_mtime: bool) -> std::io::Result<()> {
-        Self::remove_empty_dirs_recursive_sync(dir, check_mtime)?;
-        Ok(())
-    }
-
-    fn remove_empty_dirs_recursive_sync(dir: &PathBuf, check_mtime: bool) -> std::io::Result<bool> {
-        common_telemetry::debug!(
-            "Maybe remove empty dir: {:?}, check_mtime: {}",
-            dir,
-            check_mtime
-        );
-        let entries = match std::fs::read_dir(dir) {
-            Ok(entries) => entries,
-            Err(e) if e.kind() == std::io::ErrorKind::NotFound => {
-                // Directory doesn't exist, treat as already removed (empty)
-                return Ok(true);
-            }
-            Err(e) => return Err(e),
-        };
-
-        let mut is_empty = true;
-        // Iterates all entries under the directory.
-        // We have to check all entries to clean up all empty subdirectories.
-        for entry in entries {
-            let entry = entry?;
-            let path = entry.path();
-            let metadata = std::fs::metadata(&path)?;
-
-            if metadata.is_dir() {
-                // Checks if we should skip this directory based on modification time
-                if check_mtime
-                    && let Ok(modified) = metadata.modified()
-                    && let Ok(elapsed) = modified.elapsed()
-                    && elapsed < Duration::from_secs(3600)
-                {
-                    common_telemetry::debug!("Skip directory by mtime, elapsed: {:?}", elapsed);
-                    // Only removes if not modified for at least 1 hour.
-                    is_empty = false;
-                    continue;
-                }
-
-                let subdir_empty = Self::remove_empty_dirs_recursive_sync(&path, check_mtime)?;
-                if subdir_empty {
-                    if let Err(e) = std::fs::remove_dir(&path)
-                        && e.kind() != std::io::ErrorKind::NotFound
-                    {
-                        warn!(e; "Failed to remove empty directory {}", path.display());
-                        is_empty = false;
-                    } else {
-                        info!(
-                            "Removed empty directory {} from manifest cache",
-                            path.display()
-                        );
-                    }
-                } else {
-                    is_empty = false;
-                }
-            } else {
-                is_empty = false;
-            }
-        }
-
-        Ok(is_empty)
-    }
-}
-
-/// An entity that describes the file in the manifest cache.
-///
-/// It should only keep minimal information needed by the cache.
-#[derive(Debug, Clone)]
-pub(crate) struct IndexValue {
-    /// Size of the file in bytes.
-    pub(crate) file_size: u32,
-}
-
-#[cfg(test)]
-mod tests {
-    use common_test_util::temp_dir::create_temp_dir;
-    use object_store::services::Fs;
-
-    use super::*;
-
-    fn new_fs_store(path: &str) -> ObjectStore {
-        let builder = Fs::default().root(path);
-        ObjectStore::new(builder).unwrap().finish()
-    }
-
-    #[tokio::test]
-    async fn test_manifest_cache_basic() {
-        common_telemetry::init_default_ut_logging();
-
-        let dir = create_temp_dir("");
-        let local_store = new_fs_store(dir.path().to_str().unwrap());
-
-        let cache = ManifestCache::new(local_store.clone(), ReadableSize::mb(10), None).await;
-        let key = "region_1/manifest/00000000000000000007.json";
-        let file_path = cache.cache_file_path(key);
-
-        // Get an empty file.
-        assert!(cache.get(key).await.is_none());
-
-        // Write a file.
-        local_store
-            .write(&file_path, b"manifest content".as_slice())
-            .await
-            .unwrap();
-        // Add to the cache.
-        cache
-            .put(key.to_string(), IndexValue { file_size: 16 })
-            .await;
-
-        // Get the cached value.
-        let value = cache.get(key).await.unwrap();
-        assert_eq!(16, value.file_size);
-
-        // Get weighted size.
-        cache.index.run_pending_tasks().await;
-        assert_eq!(59, cache.index.weighted_size());
-
-        // Remove the file.
-        cache.remove(key).await;
-        cache.index.run_pending_tasks().await;
-        assert!(cache.get(key).await.is_none());
-
-        // Ensure all pending tasks of the moka cache is done before assertion.
-        cache.index.run_pending_tasks().await;
-
-        // The file also not exists.
-        assert!(!local_store.exists(&file_path).await.unwrap());
-        assert_eq!(0, cache.index.weighted_size());
-    }
-
-    #[tokio::test]
-    async fn test_manifest_cache_recover() {
-        common_telemetry::init_default_ut_logging();
-
-        let dir = create_temp_dir("");
-        let local_store = new_fs_store(dir.path().to_str().unwrap());
-        let cache = ManifestCache::new(local_store.clone(), ReadableSize::mb(10), None).await;
-
-        // Write some manifest files with different paths
-        let keys = [
-            "region_1/manifest/00000000000000000001.json",
-            "region_1/manifest/00000000000000000002.json",
-            "region_1/manifest/00000000000000000001.checkpoint",
-            "region_2/manifest/00000000000000000001.json",
-        ];
-
-        let mut total_size = 0;
-        for (i, key) in keys.iter().enumerate() {
-            let file_path = cache.cache_file_path(key);
-            let content = format!("manifest-{}", i).into_bytes();
-            local_store
-                .write(&file_path, content.clone())
-                .await
-                .unwrap();
-
-            // Add to the cache.
-            cache
-                .put(
-                    key.to_string(),
-                    IndexValue {
-                        file_size: content.len() as u32,
-                    },
-                )
-                .await;
-            total_size += content.len() + key.len();
-        }
-
-        // Create a new cache instance which will automatically recover from local store
-        let cache = ManifestCache::new(local_store.clone(), ReadableSize::mb(10), None).await;
-
-        // Wait for recovery to complete synchronously
-        cache.recover(true).await;
-
-        // Check size.
-        cache.index.run_pending_tasks().await;
-        let total_cached = cache.index.weighted_size() as usize;
-        assert_eq!(total_size, total_cached);
-
-        // Verify all files
-        for (i, key) in keys.iter().enumerate() {
-            let value = cache.get(key).await.unwrap();
-            assert_eq!(format!("manifest-{}", i).len() as u32, value.file_size);
-        }
-    }
-
-    #[tokio::test]
-    async fn test_cache_file_path() {
-        let dir = create_temp_dir("");
-        let local_store = new_fs_store(dir.path().to_str().unwrap());
-        let cache = ManifestCache::new(local_store, ReadableSize::mb(10), None).await;
-
-        assert_eq!(
-            "cache/object/manifest/region_1/manifest/00000000000000000007.json",
-            cache.cache_file_path("region_1/manifest/00000000000000000007.json")
-        );
-        assert_eq!(
-            "cache/object/manifest/region_1/manifest/00000000000000000007.checkpoint",
-            cache.cache_file_path("region_1/manifest/00000000000000000007.checkpoint")
-        );
-    }
-}
--- a/src/mito2/src/cache/write_cache.rs
+++ b/src/mito2/src/cache/write_cache.rs
@@ -30,7 +30,6 @@ use crate::access_layer::{
    TempFileCleaner, WriteCachePathProvider, WriteType, new_fs_cache_store,
 };
 use crate::cache::file_cache::{FileCache, FileCacheRef, FileType, IndexKey, IndexValue};
-use crate::cache::manifest_cache::ManifestCache;
 use crate::error::{self, Result};
 use crate::metrics::UPLOAD_BYTES_TOTAL;
 use crate::region::opener::RegionLoadCacheTask;
@@ -54,8 +53,6 @@ pub struct WriteCache {
    intermediate_manager: IntermediateManager,
    /// Sender for region load cache tasks.
    task_sender: UnboundedSender<RegionLoadCacheTask>,
-    /// Optional cache for manifest files.
-    manifest_cache: Option<ManifestCache>,
 }

 pub type WriteCacheRef = Arc<WriteCache>;
@@ -70,7 +67,6 @@ impl WriteCache {
        index_cache_percent: Option<u8>,
        puffin_manager_factory: PuffinManagerFactory,
        intermediate_manager: IntermediateManager,
-        manifest_cache: Option<ManifestCache>,
    ) -> Result<Self> {
        let (task_sender, task_receiver) = unbounded_channel();

@@ -87,7 +83,6 @@ impl WriteCache {
            puffin_manager_factory,
            intermediate_manager,
            task_sender,
-            manifest_cache,
        })
    }

@@ -99,19 +94,10 @@ impl WriteCache {
        index_cache_percent: Option<u8>,
        puffin_manager_factory: PuffinManagerFactory,
        intermediate_manager: IntermediateManager,
-        manifest_cache_capacity: ReadableSize,
    ) -> Result<Self> {
        info!("Init write cache on {cache_dir}, capacity: {cache_capacity}");

        let local_store = new_fs_cache_store(cache_dir).await?;
-
-        // Create manifest cache if capacity is non-zero
-        let manifest_cache = if manifest_cache_capacity.as_bytes() > 0 {
-            Some(ManifestCache::new(local_store.clone(), manifest_cache_capacity, ttl).await)
-        } else {
-            None
-        };
-
        Self::new(
            local_store,
            cache_capacity,
@@ -119,7 +105,6 @@ impl WriteCache {
            index_cache_percent,
            puffin_manager_factory,
            intermediate_manager,
-            manifest_cache,
        )
        .await
    }
@@ -129,11 +114,6 @@ impl WriteCache {
        self.file_cache.clone()
    }

-    /// Returns the manifest cache if available.
-    pub(crate) fn manifest_cache(&self) -> Option<ManifestCache> {
-        self.manifest_cache.clone()
-    }
-
    /// Build the puffin manager
    pub(crate) fn build_puffin_manager(&self) -> SstPuffinManager {
        let store = self.file_cache.local_store();
@@ -215,7 +195,6 @@ impl WriteCache {
            puffin_manager: self
                .puffin_manager_factory
                .build(store.clone(), path_provider.clone()),
-            write_cache_enabled: true,
            intermediate_manager: self.intermediate_manager.clone(),
            index_options: write_request.index_options,
            inverted_index_config: write_request.inverted_index_config,
@@ -267,7 +246,7 @@ impl WriteCache {
            upload_tracker.push_uploaded_file(parquet_path);

            if sst.index_metadata.file_size > 0 {
-                let puffin_key = IndexKey::new(region_id, sst.file_id, FileType::Puffin(0));
+                let puffin_key = IndexKey::new(region_id, sst.file_id, FileType::Puffin);
                let puffin_path = upload_request
                    .dest_path_provider
                    .build_index_file_path(RegionFileId::new(region_id, sst.file_id));
@@ -440,11 +419,7 @@ impl UploadTracker {
            file_cache.remove(parquet_key).await;

            if sst.index_metadata.file_size > 0 {
-                let puffin_key = IndexKey::new(
-                    self.region_id,
-                    sst.file_id,
-                    FileType::Puffin(sst.index_metadata.version),
-                );
+                let puffin_key = IndexKey::new(self.region_id, sst.file_id, FileType::Puffin);
                file_cache.remove(puffin_key).await;
            }
        }
@@ -553,7 +528,7 @@ mod tests {
        assert_eq!(remote_data.to_vec(), cache_data.to_vec());

        // Check write cache contains the index key
-        let index_key = IndexKey::new(region_id, file_id, FileType::Puffin(0));
+        let index_key = IndexKey::new(region_id, file_id, FileType::Puffin);
        assert!(write_cache.file_cache.contains_key(&index_key));

        let remote_index_data = mock_store.read(&index_upload_path).await.unwrap();
--- a/src/mito2/src/compaction.rs
+++ b/src/mito2/src/compaction.rs
@@ -1110,7 +1110,6 @@ mod tests {
                    compress_type: CompressionType::Uncompressed,
                    checkpoint_distance: 10,
                    remove_file_options: Default::default(),
-                    manifest_cache: None,
                },
                FormatType::PrimaryKey,
                &Default::default(),
--- a/src/mito2/src/compaction/compactor.rs
+++ b/src/mito2/src/compaction/compactor.rs
@@ -399,7 +399,7 @@ impl DefaultCompactor {
                available_indexes: sst_info.index_metadata.build_available_indexes(),
                indexes: sst_info.index_metadata.build_indexes(),
                index_file_size: sst_info.index_metadata.file_size,
-                index_version: 0,
+                index_file_id: None,
                num_rows: sst_info.num_rows as u64,
                num_row_groups: sst_info.num_row_groups,
                sequence: max_sequence,
@@ -501,7 +501,7 @@ impl Compactor for DefaultCompactor {
        // TODO: We might leak files if we fail to update manifest. We can add a cleanup task to remove them later.
        compaction_region
            .manifest_ctx
-            .update_manifest(RegionLeaderState::Writable, action_list, false)
+            .update_manifest(RegionLeaderState::Writable, action_list)
            .await?;

        Ok(edit)
--- a/src/mito2/src/compaction/task.rs
+++ b/src/mito2/src/compaction/task.rs
@@ -117,7 +117,7 @@ impl CompactionTaskImpl {
        };
        if let Err(e) = compaction_region
            .manifest_ctx
-            .update_manifest(current_region_state, action_list, false)
+            .update_manifest(current_region_state, action_list)
            .await
        {
            warn!(
--- a/src/mito2/src/compaction/test_util.rs
+++ b/src/mito2/src/compaction/test_util.rs
@@ -77,7 +77,7 @@ pub fn new_file_handle_with_size_and_sequence(
            available_indexes: Default::default(),
            indexes: Default::default(),
            index_file_size: 0,
-            index_version: 0,
+            index_file_id: None,
            num_rows: 0,
            num_row_groups: 0,
            num_series: 0,
--- a/src/mito2/src/engine.rs
+++ b/src/mito2/src/engine.rs
@@ -135,7 +135,7 @@ use crate::read::stream::ScanBatchStream;
 use crate::region::MitoRegionRef;
 use crate::region::opener::PartitionExprFetcherRef;
 use crate::request::{RegionEditRequest, WorkerRequest};
-use crate::sst::file::{FileMeta, RegionFileId, RegionIndexId};
+use crate::sst::file::{FileMeta, RegionFileId};
 use crate::sst::file_ref::FileReferenceManagerRef;
 use crate::wal::entry_distributor::{
    DEFAULT_ENTRY_RECEIVER_BUFFER_SIZE, build_wal_entry_distributor_and_receivers,
@@ -541,23 +541,22 @@ impl MitoEngine {
                        return Vec::new();
                    };

-                    let index_version = entry.index_version;
-                    let file_id = match FileId::parse_str(&entry.file_id) {
+                    let Some(index_file_id) = entry.index_file_id.as_ref() else {
+                        return Vec::new();
+                    };
+                    let file_id = match FileId::parse_str(index_file_id) {
                        Ok(file_id) => file_id,
                        Err(err) => {
                            warn!(
                                err;
                                "Failed to parse puffin index file id, table_dir: {}, file_id: {}",
                                entry.table_dir,
-                                entry.file_id
+                                index_file_id
                            );
                            return Vec::new();
                        }
                    };
-                    let region_index_id = RegionIndexId::new(
-                        RegionFileId::new(entry.region_id, file_id),
-                        index_version,
-                    );
+                    let region_file_id = RegionFileId::new(entry.region_id, file_id);
                    let context = IndexEntryContext {
                        table_dir: &entry.table_dir,
                        index_file_path: index_file_path.as_str(),
@@ -566,7 +565,7 @@ impl MitoEngine {
                        region_number: entry.region_number,
                        region_group: entry.region_group,
                        region_sequence: entry.region_sequence,
-                        file_id: &entry.file_id,
+                        file_id: index_file_id,
                        index_file_size: entry.index_file_size,
                        node_id,
                    };
@@ -577,7 +576,7 @@ impl MitoEngine {

                    collect_index_entries_from_puffin(
                        manager,
-                        region_index_id,
+                        region_file_id,
                        context,
                        bloom_filter_cache,
                        inverted_index_cache,
--- a/src/mito2/src/engine/alter_test.rs
+++ b/src/mito2/src/engine/alter_test.rs
@@ -901,7 +901,7 @@ async fn test_alter_region_ttl_options_with_format(flat_format: bool) {
    check_ttl(&engine, &Duration::from_secs(500));
 }

-#[tokio::test(flavor = "multi_thread")]
+#[tokio::test]
 async fn test_write_stall_on_altering() {
    common_telemetry::init_default_ut_logging();

@@ -952,8 +952,6 @@ async fn test_write_stall_on_altering_with_format(flat_format: bool) {
            .await
            .unwrap();
    });
-    // Make sure the loop is handling the alter request.
-    tokio::time::sleep(Duration::from_millis(100)).await;

    let column_schemas_cloned = column_schemas.clone();
    let engine_cloned = engine.clone();
@@ -964,8 +962,6 @@ async fn test_write_stall_on_altering_with_format(flat_format: bool) {
        };
        put_rows(&engine_cloned, region_id, rows).await;
    });
-    // Make sure the loop is handling the put request.
-    tokio::time::sleep(Duration::from_millis(100)).await;

    listener.wake_notify();
    alter_job.await.unwrap();
--- a/src/mito2/src/engine/basic_test.rs
+++ b/src/mito2/src/engine/basic_test.rs
@@ -861,10 +861,9 @@ async fn test_cache_null_primary_key_with_format(flat_format: bool) {
 #[tokio::test]
 async fn test_list_ssts() {
    test_list_ssts_with_format(false, r#"
-ManifestSstEntry { table_dir: "test/", region_id: 47244640257(11, 1), table_id: 11, region_number: 1, region_group: 0, region_sequence: 1, file_id: "<file_id>", index_version: 0, level: 0, file_path: "test/11_0000000001/<file_id>.parquet", file_size: 2513, index_file_path: Some("test/11_0000000001/index/<file_id>.puffin"), index_file_size: Some(250), num_rows: 10, num_row_groups: 1, num_series: Some(1), min_ts: 0::Millisecond, max_ts: 9000::Millisecond, sequence: Some(10), origin_region_id: 47244640257(11, 1), node_id: None, visible: true }
-ManifestSstEntry { table_dir: "test/", region_id: 47244640258(11, 2), table_id: 11, region_number: 2, region_group: 0, region_sequence: 2, file_id: "<file_id>", index_version: 0, level: 0, file_path: "test/11_0000000002/<file_id>.parquet", file_size: 2513, index_file_path: Some("test/11_0000000002/index/<file_id>.puffin"), index_file_size: Some(250), num_rows: 10, num_row_groups: 1, num_series: Some(1), min_ts: 0::Millisecond, max_ts: 9000::Millisecond, sequence: Some(10), origin_region_id: 47244640258(11, 2), node_id: None, visible: true }
-ManifestSstEntry { table_dir: "test/", region_id: 94489280554(22, 42), table_id: 22, region_number: 42, region_group: 0, region_sequence: 42, file_id: "<file_id>", index_version: 0, level: 0, file_path: "test/22_0000000042/<file_id>.parquet", file_size: 2513, index_file_path: Some("test/22_0000000042/index/<file_id>.puffin"), index_file_size: Some(250), num_rows: 10, num_row_groups: 1, num_series: Some(1), min_ts: 0::Millisecond, max_ts: 9000::Millisecond, sequence: Some(10), origin_region_id: 94489280554(22, 42), node_id: None, visible: true }"# ,
-r#"
+ManifestSstEntry { table_dir: "test/", region_id: 47244640257(11, 1), table_id: 11, region_number: 1, region_group: 0, region_sequence: 1, file_id: "<file_id>", index_file_id: Some("<index_file_id>"), level: 0, file_path: "test/11_0000000001/<file_id>.parquet", file_size: 2513, index_file_path: Some("test/11_0000000001/index/<file_id>.puffin"), index_file_size: Some(250), num_rows: 10, num_row_groups: 1, num_series: Some(1), min_ts: 0::Millisecond, max_ts: 9000::Millisecond, sequence: Some(10), origin_region_id: 47244640257(11, 1), node_id: None, visible: true }
+ManifestSstEntry { table_dir: "test/", region_id: 47244640258(11, 2), table_id: 11, region_number: 2, region_group: 0, region_sequence: 2, file_id: "<file_id>", index_file_id: Some("<index_file_id>"), level: 0, file_path: "test/11_0000000002/<file_id>.parquet", file_size: 2513, index_file_path: Some("test/11_0000000002/index/<file_id>.puffin"), index_file_size: Some(250), num_rows: 10, num_row_groups: 1, num_series: Some(1), min_ts: 0::Millisecond, max_ts: 9000::Millisecond, sequence: Some(10), origin_region_id: 47244640258(11, 2), node_id: None, visible: true }
+ManifestSstEntry { table_dir: "test/", region_id: 94489280554(22, 42), table_id: 22, region_number: 42, region_group: 0, region_sequence: 42, file_id: "<file_id>", index_file_id: Some("<index_file_id>"), level: 0, file_path: "test/22_0000000042/<file_id>.parquet", file_size: 2513, index_file_path: Some("test/22_0000000042/index/<file_id>.puffin"), index_file_size: Some(250), num_rows: 10, num_row_groups: 1, num_series: Some(1), min_ts: 0::Millisecond, max_ts: 9000::Millisecond, sequence: Some(10), origin_region_id: 94489280554(22, 42), node_id: None, visible: true }"# ,r#"
 StorageSstEntry { file_path: "test/11_0000000001/<file_id>.parquet", file_size: None, last_modified_ms: None, node_id: None }
 StorageSstEntry { file_path: "test/11_0000000001/index/<file_id>.puffin", file_size: None, last_modified_ms: None, node_id: None }
 StorageSstEntry { file_path: "test/11_0000000002/<file_id>.parquet", file_size: None, last_modified_ms: None, node_id: None }
@@ -872,10 +871,9 @@ StorageSstEntry { file_path: "test/11_0000000002/index/<file_id>.puffin", file_s
 StorageSstEntry { file_path: "test/22_0000000042/<file_id>.parquet", file_size: None, last_modified_ms: None, node_id: None }
 StorageSstEntry { file_path: "test/22_0000000042/index/<file_id>.puffin", file_size: None, last_modified_ms: None, node_id: None }"#).await;
    test_list_ssts_with_format(true, r#"
-ManifestSstEntry { table_dir: "test/", region_id: 47244640257(11, 1), table_id: 11, region_number: 1, region_group: 0, region_sequence: 1, file_id: "<file_id>", index_version: 0, level: 0, file_path: "test/11_0000000001/<file_id>.parquet", file_size: 2837, index_file_path: Some("test/11_0000000001/index/<file_id>.puffin"), index_file_size: Some(292), num_rows: 10, num_row_groups: 1, num_series: Some(1), min_ts: 0::Millisecond, max_ts: 9000::Millisecond, sequence: Some(10), origin_region_id: 47244640257(11, 1), node_id: None, visible: true }
-ManifestSstEntry { table_dir: "test/", region_id: 47244640258(11, 2), table_id: 11, region_number: 2, region_group: 0, region_sequence: 2, file_id: "<file_id>", index_version: 0, level: 0, file_path: "test/11_0000000002/<file_id>.parquet", file_size: 2837, index_file_path: Some("test/11_0000000002/index/<file_id>.puffin"), index_file_size: Some(292), num_rows: 10, num_row_groups: 1, num_series: Some(1), min_ts: 0::Millisecond, max_ts: 9000::Millisecond, sequence: Some(10), origin_region_id: 47244640258(11, 2), node_id: None, visible: true }
-ManifestSstEntry { table_dir: "test/", region_id: 94489280554(22, 42), table_id: 22, region_number: 42, region_group: 0, region_sequence: 42, file_id: "<file_id>", index_version: 0, level: 0, file_path: "test/22_0000000042/<file_id>.parquet", file_size: 2837, index_file_path: Some("test/22_0000000042/index/<file_id>.puffin"), index_file_size: Some(292), num_rows: 10, num_row_groups: 1, num_series: Some(1), min_ts: 0::Millisecond, max_ts: 9000::Millisecond, sequence: Some(10), origin_region_id: 94489280554(22, 42), node_id: None, visible: true }"#, 
-r#"
+ManifestSstEntry { table_dir: "test/", region_id: 47244640257(11, 1), table_id: 11, region_number: 1, region_group: 0, region_sequence: 1, file_id: "<file_id>", index_file_id: Some("<index_file_id>"), level: 0, file_path: "test/11_0000000001/<file_id>.parquet", file_size: 2837, index_file_path: Some("test/11_0000000001/index/<file_id>.puffin"), index_file_size: Some(292), num_rows: 10, num_row_groups: 1, num_series: Some(1), min_ts: 0::Millisecond, max_ts: 9000::Millisecond, sequence: Some(10), origin_region_id: 47244640257(11, 1), node_id: None, visible: true }
+ManifestSstEntry { table_dir: "test/", region_id: 47244640258(11, 2), table_id: 11, region_number: 2, region_group: 0, region_sequence: 2, file_id: "<file_id>", index_file_id: Some("<index_file_id>"), level: 0, file_path: "test/11_0000000002/<file_id>.parquet", file_size: 2837, index_file_path: Some("test/11_0000000002/index/<file_id>.puffin"), index_file_size: Some(292), num_rows: 10, num_row_groups: 1, num_series: Some(1), min_ts: 0::Millisecond, max_ts: 9000::Millisecond, sequence: Some(10), origin_region_id: 47244640258(11, 2), node_id: None, visible: true }
+ManifestSstEntry { table_dir: "test/", region_id: 94489280554(22, 42), table_id: 22, region_number: 42, region_group: 0, region_sequence: 42, file_id: "<file_id>", index_file_id: Some("<index_file_id>"), level: 0, file_path: "test/22_0000000042/<file_id>.parquet", file_size: 2837, index_file_path: Some("test/22_0000000042/index/<file_id>.puffin"), index_file_size: Some(292), num_rows: 10, num_row_groups: 1, num_series: Some(1), min_ts: 0::Millisecond, max_ts: 9000::Millisecond, sequence: Some(10), origin_region_id: 94489280554(22, 42), node_id: None, visible: true }"#, r#"
 StorageSstEntry { file_path: "test/11_0000000001/<file_id>.parquet", file_size: None, last_modified_ms: None, node_id: None }
 StorageSstEntry { file_path: "test/11_0000000001/index/<file_id>.puffin", file_size: None, last_modified_ms: None, node_id: None }
 StorageSstEntry { file_path: "test/11_0000000002/<file_id>.parquet", file_size: None, last_modified_ms: None, node_id: None }
@@ -947,13 +945,13 @@ async fn test_list_ssts_with_format(
                .index_file_path
                .map(|p| p.replace(&e.file_id, "<file_id>"));
            e.file_id = "<file_id>".to_string();
-            e.index_version = 0;
+            e.index_file_id = e.index_file_id.map(|_| "<index_file_id>".to_string());
            format!("\n{:?}", e)
        })
        .sorted()
        .collect::<Vec<_>>()
        .join("");
-    assert_eq!(debug_format, expected_manifest_ssts, "{}", debug_format);
+    assert_eq!(debug_format, expected_manifest_ssts,);

    // list from storage
    let storage_entries = engine
@@ -971,7 +969,7 @@ async fn test_list_ssts_with_format(
        .sorted()
        .collect::<Vec<_>>()
        .join("");
-    assert_eq!(debug_format, expected_storage_ssts, "{}", debug_format);
+    assert_eq!(debug_format, expected_storage_ssts,);
 }

 #[tokio::test]
--- a/src/mito2/src/engine/flush_test.rs
+++ b/src/mito2/src/engine/flush_test.rs
@@ -515,7 +515,6 @@ async fn test_flush_workers() {
 }

 async fn test_flush_workers_with_format(flat_format: bool) {
-    common_telemetry::init_default_ut_logging();
    let mut env = TestEnv::new().await;
    let write_buffer_manager = Arc::new(MockWriteBufferManager::default());
    let listener = Arc::new(FlushListener::default());
@@ -575,7 +574,7 @@ async fn test_flush_workers_with_format(flat_format: bool) {
    put_rows(&engine, region_id0, rows).await;

    // Waits until flush is finished.
-    while listener.success_count() < 3 {
+    while listener.success_count() < 2 {
        listener.wait().await;
    }

--- a/src/mito2/src/engine/index_build_test.rs
+++ b/src/mito2/src/engine/index_build_test.rs
@@ -55,10 +55,10 @@ async fn num_of_index_files(engine: &MitoEngine, scanner: &Scanner, region_id: R
        return 0;
    }
    let mut index_files_count: usize = 0;
-    for region_index_id in scanner.index_ids() {
+    for region_file_id in scanner.file_ids() {
        let index_path = location::index_file_path(
            access_layer.table_dir(),
-            region_index_id,
+            region_file_id,
            access_layer.path_type(),
        );
        if access_layer
--- a/src/mito2/src/engine/listener.rs
+++ b/src/mito2/src/engine/listener.rs
@@ -74,9 +74,6 @@ pub trait EventListener: Send + Sync {
    /// Notifies the listener that region starts to send a region change result to worker.
    async fn on_notify_region_change_result_begin(&self, _region_id: RegionId) {}

-    /// Notifies the listener that region starts to send a enter staging result to worker.
-    async fn on_enter_staging_result_begin(&self, _region_id: RegionId) {}
-
    /// Notifies the listener that the index build task is executed successfully.
    async fn on_index_build_finish(&self, _region_file_id: RegionFileId) {}

@@ -310,37 +307,6 @@ impl EventListener for NotifyRegionChangeResultListener {
            region_id
        );
        self.notify.notified().await;
-        info!(
-            "Continue to sending region change result for region {}",
-            region_id
-        );
-    }
-}
-
-#[derive(Default)]
-pub struct NotifyEnterStagingResultListener {
-    notify: Notify,
-}
-
-impl NotifyEnterStagingResultListener {
-    /// Continue to sending enter staging result.
-    pub fn wake_notify(&self) {
-        self.notify.notify_one();
-    }
-}
-
-#[async_trait]
-impl EventListener for NotifyEnterStagingResultListener {
-    async fn on_enter_staging_result_begin(&self, region_id: RegionId) {
-        info!(
-            "Wait on notify to start notify enter staging result for region {}",
-            region_id
-        );
-        self.notify.notified().await;
-        info!(
-            "Continue to sending enter staging result for region {}",
-            region_id
-        );
    }
 }

--- a/src/mito2/src/engine/puffin_index.rs
+++ b/src/mito2/src/engine/puffin_index.rs
@@ -32,7 +32,7 @@ use crate::cache::index::bloom_filter_index::{
    BloomFilterIndexCacheRef, CachedBloomFilterIndexBlobReader, Tag,
 };
 use crate::cache::index::inverted_index::{CachedInvertedIndexBlobReader, InvertedIndexCacheRef};
-use crate::sst::file::RegionIndexId;
+use crate::sst::file::RegionFileId;
 use crate::sst::index::bloom_filter::INDEX_BLOB_TYPE as BLOOM_BLOB_TYPE;
 use crate::sst::index::fulltext_index::{
    INDEX_BLOB_TYPE_BLOOM as FULLTEXT_BLOOM_BLOB_TYPE,
@@ -66,14 +66,14 @@ pub(crate) struct IndexEntryContext<'a> {
 /// Collect index metadata entries present in the SST puffin file.
 pub(crate) async fn collect_index_entries_from_puffin(
    manager: SstPuffinManager,
-    region_index_id: RegionIndexId,
+    region_file_id: RegionFileId,
    context: IndexEntryContext<'_>,
    bloom_filter_cache: Option<BloomFilterIndexCacheRef>,
    inverted_index_cache: Option<InvertedIndexCacheRef>,
 ) -> Vec<PuffinIndexMetaEntry> {
    let mut entries = Vec::new();

-    let reader = match manager.reader(&region_index_id).await {
+    let reader = match manager.reader(&region_file_id).await {
        Ok(reader) => reader,
        Err(err) => {
            warn!(
@@ -104,7 +104,7 @@ pub(crate) async fn collect_index_entries_from_puffin(
            Some(BlobIndexTypeTargetKey::BloomFilter(target_key)) => {
                let bloom_meta = try_read_bloom_meta(
                    &reader,
-                    region_index_id,
+                    region_file_id,
                    blob.blob_type.as_str(),
                    target_key,
                    bloom_filter_cache.as_ref(),
@@ -130,7 +130,7 @@ pub(crate) async fn collect_index_entries_from_puffin(
            Some(BlobIndexTypeTargetKey::FulltextBloom(target_key)) => {
                let bloom_meta = try_read_bloom_meta(
                    &reader,
-                    region_index_id,
+                    region_file_id,
                    blob.blob_type.as_str(),
                    target_key,
                    bloom_filter_cache.as_ref(),
@@ -172,7 +172,7 @@ pub(crate) async fn collect_index_entries_from_puffin(
            Some(BlobIndexTypeTargetKey::Inverted) => {
                let mut inverted_entries = collect_inverted_entries(
                    &reader,
-                    region_index_id,
+                    region_file_id,
                    inverted_index_cache.as_ref(),
                    &context,
                )
@@ -188,12 +188,12 @@ pub(crate) async fn collect_index_entries_from_puffin(

 async fn collect_inverted_entries(
    reader: &SstPuffinReader,
-    region_index_id: RegionIndexId,
+    region_file_id: RegionFileId,
    cache: Option<&InvertedIndexCacheRef>,
    context: &IndexEntryContext<'_>,
 ) -> Vec<PuffinIndexMetaEntry> {
    // Read the inverted index blob and surface its per-column metadata entries.
-    let file_id = region_index_id.file_id();
+    let file_id = region_file_id.file_id();

    let guard = match reader.blob(INVERTED_BLOB_TYPE).await {
        Ok(guard) => guard,
@@ -229,12 +229,11 @@ async fn collect_inverted_entries(
    let metas = if let (Some(cache), Some(blob_size)) = (cache, blob_size) {
        let reader = CachedInvertedIndexBlobReader::new(
            file_id,
-            region_index_id.version,
            blob_size,
            InvertedIndexBlobReader::new(blob_reader),
            cache.clone(),
        );
-        match reader.metadata(None).await {
+        match reader.metadata().await {
            Ok(metas) => metas,
            Err(err) => {
                warn!(
@@ -248,7 +247,7 @@ async fn collect_inverted_entries(
        }
    } else {
        let reader = InvertedIndexBlobReader::new(blob_reader);
-        match reader.metadata(None).await {
+        match reader.metadata().await {
            Ok(metas) => metas,
            Err(err) => {
                warn!(
@@ -290,7 +289,7 @@ fn build_inverted_entries(

 async fn try_read_bloom_meta(
    reader: &SstPuffinReader,
-    region_index_id: RegionIndexId,
+    region_file_id: RegionFileId,
    blob_type: &str,
    target_key: &str,
    cache: Option<&BloomFilterIndexCacheRef>,
@@ -312,18 +311,17 @@ async fn try_read_bloom_meta(
                let result = match (cache, column_id, blob_size) {
                    (Some(cache), Some(column_id), Some(blob_size)) => {
                        CachedBloomFilterIndexBlobReader::new(
-                            region_index_id.file_id(),
-                            region_index_id.version,
+                            region_file_id.file_id(),
                            column_id,
                            tag,
                            blob_size,
                            bloom_reader,
                            cache.clone(),
                        )
-                        .metadata(None)
+                        .metadata()
                        .await
                    }
-                    _ => bloom_reader.metadata(None).await,
+                    _ => bloom_reader.metadata().await,
                };

                match result {
--- a/src/mito2/src/engine/staging_test.rs
+++ b/src/mito2/src/engine/staging_test.rs
@@ -14,30 +14,17 @@

 //! Integration tests for staging state functionality.

-use std::assert_matches::assert_matches;
 use std::fs;
-use std::sync::Arc;
-use std::time::Duration;

 use api::v1::Rows;
-use common_error::ext::ErrorExt;
-use common_error::status_code::StatusCode;
 use common_recordbatch::RecordBatches;
-use object_store::Buffer;
-use object_store::layers::mock::{
-    Entry, Error as MockError, ErrorKind, List, Lister, Metadata, MockLayerBuilder,
-    Result as MockResult, Write, Writer,
-};
 use store_api::region_engine::{RegionEngine, SettableRegionRoleState};
 use store_api::region_request::{
-    EnterStagingRequest, RegionAlterRequest, RegionFlushRequest, RegionRequest,
-    RegionTruncateRequest,
+    RegionAlterRequest, RegionFlushRequest, RegionRequest, RegionTruncateRequest,
 };
 use store_api::storage::{RegionId, ScanRequest};

 use crate::config::MitoConfig;
-use crate::engine::listener::NotifyEnterStagingResultListener;
-use crate::error::Error;
 use crate::region::{RegionLeaderState, RegionRoleState};
 use crate::request::WorkerRequest;
 use crate::test_util::{CreateRequestBuilder, TestEnv, build_rows, put_rows, rows_schema};
@@ -227,8 +214,6 @@ async fn test_staging_state_validation_patterns() {
    );
 }

-const PARTITION_EXPR: &str = "partition_expr";
-
 #[tokio::test]
 async fn test_staging_manifest_directory() {
    test_staging_manifest_directory_with_format(false).await;
@@ -236,7 +221,6 @@ async fn test_staging_manifest_directory() {
 }

 async fn test_staging_manifest_directory_with_format(flat_format: bool) {
-    common_telemetry::init_default_ut_logging();
    let mut env = TestEnv::new().await;
    let engine = env
        .create_engine(MitoConfig {
@@ -271,57 +255,9 @@ async fn test_staging_manifest_directory_with_format(flat_format: bool) {
    // Now test staging mode manifest creation
    // Set region to staging mode using the engine API
    engine
-        .handle_request(
-            region_id,
-            RegionRequest::EnterStaging(EnterStagingRequest {
-                partition_expr: PARTITION_EXPR.to_string(),
-            }),
-        )
+        .set_region_role_state_gracefully(region_id, SettableRegionRoleState::StagingLeader)
        .await
        .unwrap();
-    let region = engine.get_region(region_id).unwrap();
-    let staging_partition_expr = region.staging_partition_expr.lock().unwrap().clone();
-    assert_eq!(staging_partition_expr.unwrap(), PARTITION_EXPR);
-    {
-        let manager = region.manifest_ctx.manifest_manager.read().await;
-        assert_eq!(
-            manager
-                .staging_manifest()
-                .unwrap()
-                .metadata
-                .partition_expr
-                .as_deref()
-                .unwrap(),
-            PARTITION_EXPR
-        );
-        assert!(manager.manifest().metadata.partition_expr.is_none());
-    }
-
-    // Should be ok to enter staging mode again with the same partition expr
-    engine
-        .handle_request(
-            region_id,
-            RegionRequest::EnterStaging(EnterStagingRequest {
-                partition_expr: PARTITION_EXPR.to_string(),
-            }),
-        )
-        .await
-        .unwrap();
-
-    // Should throw error if try to enter staging mode again with a different partition expr
-    let err = engine
-        .handle_request(
-            region_id,
-            RegionRequest::EnterStaging(EnterStagingRequest {
-                partition_expr: "".to_string(),
-            }),
-        )
-        .await
-        .unwrap_err();
-    assert_matches!(
-        err.into_inner().as_any().downcast_ref::<Error>().unwrap(),
-        Error::StagingPartitionExprMismatch { .. }
-    );

    // Put some data and flush in staging mode
    let rows_data = Rows {
@@ -376,7 +312,6 @@ async fn test_staging_exit_success_with_manifests() {
 }

 async fn test_staging_exit_success_with_manifests_with_format(flat_format: bool) {
-    common_telemetry::init_default_ut_logging();
    let mut env = TestEnv::new().await;
    let engine = env
        .create_engine(MitoConfig {
@@ -395,28 +330,16 @@ async fn test_staging_exit_success_with_manifests_with_format(flat_format: bool)
        .await
        .unwrap();

-    // Add some data and flush in staging mode to generate staging manifests
-    let rows_data = Rows {
-        schema: column_schemas.clone(),
-        rows: build_rows(0, 3),
-    };
-    put_rows(&engine, region_id, rows_data).await;
-
    // Enter staging mode
    engine
-        .handle_request(
-            region_id,
-            RegionRequest::EnterStaging(EnterStagingRequest {
-                partition_expr: PARTITION_EXPR.to_string(),
-            }),
-        )
+        .set_region_role_state_gracefully(region_id, SettableRegionRoleState::StagingLeader)
        .await
        .unwrap();

    // Add some data and flush in staging mode to generate staging manifests
    let rows_data = Rows {
        schema: column_schemas.clone(),
-        rows: build_rows(3, 8),
+        rows: build_rows(0, 5),
    };
    put_rows(&engine, region_id, rows_data).await;

@@ -434,7 +357,7 @@ async fn test_staging_exit_success_with_manifests_with_format(flat_format: bool)
    // Add more data and flush again to generate multiple staging manifests
    let rows_data2 = Rows {
        schema: column_schemas.clone(),
-        rows: build_rows(8, 10),
+        rows: build_rows(5, 10),
    };
    put_rows(&engine, region_id, rows_data2).await;

@@ -459,11 +382,8 @@ async fn test_staging_exit_success_with_manifests_with_format(flat_format: bool)
        .unwrap();
    assert_eq!(
        staging_files_before.len(),
-        // Two files for flush operation
-        // One file for entering staging mode
-        3,
-        "Staging manifest directory should contain 3 files before exit, got: {:?}",
-        staging_files_before
+        2,
+        "Staging manifest directory should contain two files before exit"
    );

    // Count normal manifest files before exit
@@ -474,11 +394,8 @@ async fn test_staging_exit_success_with_manifests_with_format(flat_format: bool)
        .unwrap();
    let normal_count_before = normal_files_before.len();
    assert_eq!(
-        // One file for table creation
-        // One file for flush operation
-        normal_count_before,
-        2,
-        "Normal manifest directory should initially contain 2 files"
+        normal_count_before, 1,
+        "Normal manifest directory should initially contain one file"
    );

    // Try read data before exiting staging, SST files should be invisible
@@ -486,8 +403,8 @@ async fn test_staging_exit_success_with_manifests_with_format(flat_format: bool)
    let scanner = engine.scanner(region_id, request).await.unwrap();
    assert_eq!(
        scanner.num_files(),
-        1,
-        "1 SST files should be scanned before exit"
+        0,
+        "No SST files should be scanned before exit"
    );
    assert_eq!(
        scanner.num_memtables(),
@@ -498,20 +415,14 @@ async fn test_staging_exit_success_with_manifests_with_format(flat_format: bool)
    let batches = RecordBatches::try_collect(stream).await.unwrap();
    let total_rows: usize = batches.iter().map(|rb| rb.num_rows()).sum();
    assert_eq!(
-        total_rows, 3,
-        "3 rows should be readable before exit staging mode"
+        total_rows, 0,
+        "No data should be readable before exit staging mode"
    );

    // Inspect SSTs from manifest
    let sst_entries = engine.all_ssts_from_manifest().await;
-    assert_eq!(
-        sst_entries.len(),
-        3,
-        "sst entries should be 3, got: {:?}",
-        sst_entries
-    );
-    assert_eq!(sst_entries.iter().filter(|e| e.visible).count(), 1);
-    assert_eq!(sst_entries.iter().filter(|e| !e.visible).count(), 2);
+    assert_eq!(sst_entries.len(), 2);
+    assert!(sst_entries.iter().all(|e| !e.visible));

    // Exit staging mode successfully
    engine
@@ -559,7 +470,7 @@ async fn test_staging_exit_success_with_manifests_with_format(flat_format: bool)
    let scanner = engine.scanner(region_id, request).await.unwrap();
    assert_eq!(
        scanner.num_files(),
-        3,
+        2,
        "SST files should be scanned after exit"
    );

@@ -571,209 +482,6 @@ async fn test_staging_exit_success_with_manifests_with_format(flat_format: bool)

    // Inspect SSTs from manifest
    let sst_entries = engine.all_ssts_from_manifest().await;
-    assert_eq!(sst_entries.len(), 3);
+    assert_eq!(sst_entries.len(), 2);
    assert!(sst_entries.iter().all(|e| e.visible));
 }
-
-#[tokio::test(flavor = "multi_thread")]
-async fn test_write_stall_on_enter_staging() {
-    test_write_stall_on_enter_staging_with_format(false).await;
-    test_write_stall_on_enter_staging_with_format(true).await;
-}
-
-async fn test_write_stall_on_enter_staging_with_format(flat_format: bool) {
-    let mut env = TestEnv::new().await;
-    let listener = Arc::new(NotifyEnterStagingResultListener::default());
-    let engine = env
-        .create_engine_with(
-            MitoConfig {
-                default_experimental_flat_format: flat_format,
-                ..Default::default()
-            },
-            None,
-            Some(listener.clone()),
-            None,
-        )
-        .await;
-
-    let region_id = RegionId::new(1, 1);
-    let request = CreateRequestBuilder::new().build();
-
-    env.get_schema_metadata_manager()
-        .register_region_table_info(
-            region_id.table_id(),
-            "test_table",
-            "test_catalog",
-            "test_schema",
-            None,
-            env.get_kv_backend(),
-        )
-        .await;
-
-    let column_schemas = rows_schema(&request);
-    engine
-        .handle_request(region_id, RegionRequest::Create(request))
-        .await
-        .unwrap();
-
-    let engine_cloned = engine.clone();
-    let alter_job = tokio::spawn(async move {
-        engine_cloned
-            .handle_request(
-                region_id,
-                RegionRequest::EnterStaging(EnterStagingRequest {
-                    partition_expr: PARTITION_EXPR.to_string(),
-                }),
-            )
-            .await
-            .unwrap();
-    });
-    // Make sure the loop is handling the alter request.
-    tokio::time::sleep(Duration::from_millis(100)).await;
-
-    let column_schemas_cloned = column_schemas.clone();
-    let engine_cloned = engine.clone();
-    let put_job = tokio::spawn(async move {
-        let rows = Rows {
-            schema: column_schemas_cloned,
-            rows: build_rows(0, 3),
-        };
-        put_rows(&engine_cloned, region_id, rows).await;
-    });
-    // Make sure the loop is handling the put request.
-    tokio::time::sleep(Duration::from_millis(100)).await;
-
-    listener.wake_notify();
-    alter_job.await.unwrap();
-    put_job.await.unwrap();
-
-    let expected = "\
-+-------+---------+---------------------+
-| tag_0 | field_0 | ts                  |
-+-------+---------+---------------------+
-| 0     | 0.0     | 1970-01-01T00:00:00 |
-| 1     | 1.0     | 1970-01-01T00:00:01 |
-| 2     | 2.0     | 1970-01-01T00:00:02 |
-+-------+---------+---------------------+";
-    let request = ScanRequest::default();
-    let scanner = engine.scanner(region_id, request).await.unwrap();
-    let stream = scanner.scan().await.unwrap();
-    let batches = RecordBatches::try_collect(stream).await.unwrap();
-    assert_eq!(expected, batches.pretty_print().unwrap());
-}
-
-#[tokio::test]
-async fn test_enter_staging_clean_staging_manifest_error() {
-    common_telemetry::init_default_ut_logging();
-    test_enter_staging_clean_staging_manifest_error_with_format(false).await;
-    test_enter_staging_clean_staging_manifest_error_with_format(true).await;
-}
-
-struct MockLister {
-    path: String,
-    inner: Lister,
-}
-
-impl List for MockLister {
-    async fn next(&mut self) -> MockResult<Option<Entry>> {
-        if self.path.contains("staging") {
-            return Err(MockError::new(ErrorKind::Unexpected, "mock error"));
-        }
-        self.inner.next().await
-    }
-}
-
-struct MockWriter {
-    path: String,
-    inner: Writer,
-}
-
-impl Write for MockWriter {
-    async fn write(&mut self, bs: Buffer) -> MockResult<()> {
-        self.inner.write(bs).await
-    }
-
-    async fn close(&mut self) -> MockResult<Metadata> {
-        if self.path.contains("staging") {
-            return Err(MockError::new(ErrorKind::Unexpected, "mock error"));
-        }
-        self.inner.close().await
-    }
-
-    async fn abort(&mut self) -> MockResult<()> {
-        self.inner.abort().await
-    }
-}
-
-async fn test_enter_staging_error(env: &mut TestEnv, flat_format: bool) {
-    let engine = env
-        .create_engine(MitoConfig {
-            default_experimental_flat_format: flat_format,
-            ..Default::default()
-        })
-        .await;
-    let region_id = RegionId::new(1024, 0);
-    let request = CreateRequestBuilder::new().build();
-    engine
-        .handle_request(region_id, RegionRequest::Create(request))
-        .await
-        .unwrap();
-
-    let err = engine
-        .handle_request(
-            region_id,
-            RegionRequest::EnterStaging(EnterStagingRequest {
-                partition_expr: PARTITION_EXPR.to_string(),
-            }),
-        )
-        .await
-        .unwrap_err();
-    assert_eq!(err.status_code(), StatusCode::StorageUnavailable);
-    let region = engine.get_region(region_id).unwrap();
-    assert!(
-        region
-            .manifest_ctx
-            .manifest_manager
-            .read()
-            .await
-            .staging_manifest()
-            .is_none()
-    );
-    let state = region.state();
-    assert_eq!(state, RegionRoleState::Leader(RegionLeaderState::Writable));
-}
-
-async fn test_enter_staging_clean_staging_manifest_error_with_format(flat_format: bool) {
-    let mock_layer = MockLayerBuilder::default()
-        .lister_factory(Arc::new(|path, _args, lister| {
-            Box::new(MockLister {
-                path: path.to_string(),
-                inner: lister,
-            })
-        }))
-        .build()
-        .unwrap();
-    let mut env = TestEnv::new().await.with_mock_layer(mock_layer);
-    test_enter_staging_error(&mut env, flat_format).await;
-}
-
-#[tokio::test]
-async fn test_enter_staging_save_staging_manifest_error() {
-    common_telemetry::init_default_ut_logging();
-    test_enter_staging_save_staging_manifest_error_with_format(false).await;
-    test_enter_staging_save_staging_manifest_error_with_format(true).await;
-}
-
-async fn test_enter_staging_save_staging_manifest_error_with_format(flat_format: bool) {
-    let mock_layer = MockLayerBuilder::default()
-        .writer_factory(Arc::new(|path, _args, lister| {
-            Box::new(MockWriter {
-                path: path.to_string(),
-                inner: lister,
-            })
-        }))
-        .build()
-        .unwrap();
-    let mut env = TestEnv::new().await.with_mock_layer(mock_layer);
-    test_enter_staging_error(&mut env, flat_format).await;
-}
--- a/src/mito2/src/error.rs
+++ b/src/mito2/src/error.rs
@@ -1150,18 +1150,6 @@ pub enum Error {
        #[snafu(implicit)]
        location: Location,
    },
-
-    #[snafu(display(
-        "Staging partition expr mismatch, manifest: {:?}, request: {}",
-        manifest_expr,
-        request_expr
-    ))]
-    StagingPartitionExprMismatch {
-        manifest_expr: Option<String>,
-        request_expr: String,
-        #[snafu(implicit)]
-        location: Location,
-    },
 }

 pub type Result<T, E = Error> = std::result::Result<T, E>;
@@ -1208,8 +1196,7 @@ impl ErrorExt for Error {
            | InstallManifestTo { .. }
            | Unexpected { .. }
            | SerializeColumnMetadata { .. }
-            | SerializeManifest { .. }
-            | StagingPartitionExprMismatch { .. } => StatusCode::Unexpected,
+            | SerializeManifest { .. } => StatusCode::Unexpected,

            RegionNotFound { .. } => StatusCode::RegionNotFound,
            ObjectStoreNotFound { .. }
--- a/src/mito2/src/flush.rs
+++ b/src/mito2/src/flush.rs
@@ -20,7 +20,7 @@ use std::sync::Arc;
 use std::sync::atomic::{AtomicUsize, Ordering};
 use std::time::Instant;

-use common_telemetry::{debug, error, info};
+use common_telemetry::{debug, error, info, trace};
 use datatypes::arrow::datatypes::SchemaRef;
 use either::Either;
 use partition::expr::PartitionExpr;
@@ -89,12 +89,6 @@ pub trait WriteBufferManager: Send + Sync + std::fmt::Debug {

    /// Returns the total memory used by memtables.
    fn memory_usage(&self) -> usize;
-
-    /// Returns the mutable memtable memory limit.
-    ///
-    /// The write buffer manager should flush memtables when the mutable memory usage
-    /// exceeds this limit.
-    fn flush_limit(&self) -> usize;
 }

 pub type WriteBufferManagerRef = Arc<dyn WriteBufferManager>;
@@ -151,7 +145,7 @@ impl WriteBufferManagerImpl {
 impl WriteBufferManager for WriteBufferManagerImpl {
    fn should_flush_engine(&self) -> bool {
        let mutable_memtable_memory_usage = self.memory_active.load(Ordering::Relaxed);
-        if mutable_memtable_memory_usage >= self.mutable_limit {
+        if mutable_memtable_memory_usage > self.mutable_limit {
            debug!(
                "Engine should flush (over mutable limit), mutable_usage: {}, memory_usage: {}, mutable_limit: {}, global_limit: {}",
                mutable_memtable_memory_usage,
@@ -163,8 +157,23 @@ impl WriteBufferManager for WriteBufferManagerImpl {
        }

        let memory_usage = self.memory_used.load(Ordering::Relaxed);
+        // If the memory exceeds the buffer size, we trigger more aggressive
+        // flush. But if already more than half memory is being flushed,
+        // triggering more flush may not help. We will hold it instead.
        if memory_usage >= self.global_write_buffer_size {
-            return true;
+            if mutable_memtable_memory_usage >= self.global_write_buffer_size / 2 {
+                debug!(
+                    "Engine should flush (over total limit), memory_usage: {}, global_write_buffer_size: {}, \
+                 mutable_usage: {}.",
+                    memory_usage, self.global_write_buffer_size, mutable_memtable_memory_usage
+                );
+                return true;
+            } else {
+                trace!(
+                    "Engine won't flush, memory_usage: {}, global_write_buffer_size: {}, mutable_usage: {}.",
+                    memory_usage, self.global_write_buffer_size, mutable_memtable_memory_usage
+                );
+            }
        }

        false
@@ -196,14 +205,10 @@ impl WriteBufferManager for WriteBufferManagerImpl {
    fn memory_usage(&self) -> usize {
        self.memory_used.load(Ordering::Relaxed)
    }
-
-    fn flush_limit(&self) -> usize {
-        self.mutable_limit
-    }
 }

 /// Reason of a flush task.
-#[derive(Debug, IntoStaticStr, Clone, Copy, PartialEq, Eq)]
+#[derive(Debug, IntoStaticStr)]
 pub enum FlushReason {
    /// Other reasons.
    Others,
@@ -217,8 +222,6 @@ pub enum FlushReason {
    Periodically,
    /// Flush memtable during downgrading state.
    Downgrading,
-    /// Enter staging mode.
-    EnterStaging,
 }

 impl FlushReason {
@@ -250,8 +253,6 @@ pub(crate) struct RegionFlushTask {
    pub(crate) index_options: IndexOptions,
    /// Semaphore to control flush concurrency.
    pub(crate) flush_semaphore: Arc<Semaphore>,
-    /// Whether the region is in staging mode.
-    pub(crate) is_staging: bool,
 }

 impl RegionFlushTask {
@@ -315,7 +316,6 @@ impl RegionFlushTask {
                    _timer: timer,
                    edit,
                    memtables_to_remove,
-                    is_staging: self.is_staging,
                };
                WorkerRequest::Background {
                    region_id: self.region_id,
@@ -398,10 +398,7 @@ impl RegionFlushTask {
            flushed_sequence: Some(version_data.committed_sequence),
            committed_sequence: None,
        };
-        info!(
-            "Applying {edit:?} to region {}, is_staging: {}",
-            self.region_id, self.is_staging
-        );
+        info!("Applying {edit:?} to region {}", self.region_id);

        let action_list = RegionMetaActionList::with_action(RegionMetaAction::Edit(edit.clone()));

@@ -420,12 +417,11 @@ impl RegionFlushTask {
        // add a cleanup job to remove them later.
        let version = self
            .manifest_ctx
-            .update_manifest(expected_state, action_list, self.is_staging)
+            .update_manifest(expected_state, action_list)
            .await?;
        info!(
-            "Successfully update manifest version to {version}, region: {}, is_staging: {}, reason: {}",
+            "Successfully update manifest version to {version}, region: {}, reason: {}",
            self.region_id,
-            self.is_staging,
            self.reason.as_str()
        );

@@ -643,7 +639,7 @@ impl RegionFlushTask {
            available_indexes: sst_info.index_metadata.build_available_indexes(),
            indexes: sst_info.index_metadata.build_indexes(),
            index_file_size: sst_info.index_metadata.file_size,
-            index_version: 0,
+            index_file_id: None,
            num_rows: sst_info.num_rows as u64,
            num_row_groups: sst_info.num_row_groups,
            sequence: NonZeroU64::new(max_sequence),
@@ -883,31 +879,6 @@ impl FlushScheduler {
        self.region_status.contains_key(&region_id)
    }

-    fn schedule_flush_task(
-        &mut self,
-        version_control: &VersionControlRef,
-        task: RegionFlushTask,
-    ) -> Result<()> {
-        let region_id = task.region_id;
-
-        // If current region doesn't have flush status, we can flush the region directly.
-        if let Err(e) = version_control.freeze_mutable() {
-            error!(e; "Failed to freeze the mutable memtable for region {}", region_id);
-
-            return Err(e);
-        }
-        // Submit a flush job.
-        let job = task.into_flush_job(version_control);
-        if let Err(e) = self.scheduler.schedule(job) {
-            // If scheduler returns error, senders in the job will be dropped and waiters
-            // can get recv errors.
-            error!(e; "Failed to schedule flush job for region {}", region_id);
-
-            return Err(e);
-        }
-        Ok(())
-    }
-
    /// Schedules a flush `task` for specific `region`.
    pub(crate) fn schedule_flush(
        &mut self,
@@ -930,21 +901,46 @@ impl FlushScheduler {
            .with_label_values(&[task.reason.as_str()])
            .inc();

-        // If current region has flush status, merge the task.
-        if let Some(flush_status) = self.region_status.get_mut(&region_id) {
-            // Checks whether we can flush the region now.
-            debug!("Merging flush task for region {}", region_id);
+        // Add this region to status map.
+        let flush_status = self
+            .region_status
+            .entry(region_id)
+            .or_insert_with(|| FlushStatus::new(region_id, version_control.clone()));
+        // Checks whether we can flush the region now.
+        if flush_status.flushing {
+            // There is already a flush job running.
            flush_status.merge_task(task);
            return Ok(());
        }

-        self.schedule_flush_task(version_control, task)?;
+        // TODO(yingwen): We can merge with pending and execute directly.
+        // If there are pending tasks, then we should push it to pending list.
+        if flush_status.pending_task.is_some() {
+            flush_status.merge_task(task);
+            return Ok(());
+        }

-        // Add this region to status map.
-        let _ = self.region_status.insert(
-            region_id,
-            FlushStatus::new(region_id, version_control.clone()),
-        );
+        // Now we can flush the region directly.
+        if let Err(e) = version_control.freeze_mutable() {
+            error!(e; "Failed to freeze the mutable memtable for region {}", region_id);
+
+            // Remove from region status if we can't freeze the mutable memtable.
+            self.region_status.remove(&region_id);
+            return Err(e);
+        }
+        // Submit a flush job.
+        let job = task.into_flush_job(version_control);
+        if let Err(e) = self.scheduler.schedule(job) {
+            // If scheduler returns error, senders in the job will be dropped and waiters
+            // can get recv errors.
+            error!(e; "Failed to schedule flush job for region {}", region_id);
+
+            // Remove from region status if we can't submit the task.
+            self.region_status.remove(&region_id);
+            return Err(e);
+        }
+
+        flush_status.flushing = true;

        Ok(())
    }
@@ -961,56 +957,48 @@ impl FlushScheduler {
        Vec<SenderBulkRequest>,
    )> {
        let flush_status = self.region_status.get_mut(&region_id)?;
-        // If region doesn't have any pending flush task, we need to remove it from the status.
-        if flush_status.pending_task.is_none() {
+
+        // This region doesn't have running flush job.
+        flush_status.flushing = false;
+
+        let pending_requests = if flush_status.pending_task.is_none() {
            // The region doesn't have any pending flush task.
            // Safety: The flush status must exist.
-            debug!(
-                "Region {} doesn't have any pending flush task, removing it from the status",
-                region_id
-            );
            let flush_status = self.region_status.remove(&region_id).unwrap();
-            return Some((
+            Some((
                flush_status.pending_ddls,
                flush_status.pending_writes,
                flush_status.pending_bulk_writes,
-            ));
+            ))
+        } else {
+            let version_data = flush_status.version_control.current();
+            if version_data.version.memtables.is_empty() {
+                // The region has nothing to flush, we also need to remove it from the status.
+                // Safety: The pending task is not None.
+                let task = flush_status.pending_task.take().unwrap();
+                // The region has nothing to flush. We can notify pending task.
+                task.on_success();
+                // `schedule_next_flush()` may pick up the same region to flush, so we must remove
+                // it from the status to avoid leaking pending requests.
+                // Safety: The flush status must exist.
+                let flush_status = self.region_status.remove(&region_id).unwrap();
+                Some((
+                    flush_status.pending_ddls,
+                    flush_status.pending_writes,
+                    flush_status.pending_bulk_writes,
+                ))
+            } else {
+                // We can flush the region again, keep it in the region status.
+                None
+            }
+        };
+
+        // Schedule next flush job.
+        if let Err(e) = self.schedule_next_flush() {
+            error!(e; "Flush of region {} is successful, but failed to schedule next flush", region_id);
        }

-        // If region has pending task, but has nothing to flush, we need to remove it from the status.
-        let version_data = flush_status.version_control.current();
-        if version_data.version.memtables.is_empty() {
-            // The region has nothing to flush, we also need to remove it from the status.
-            // Safety: The pending task is not None.
-            let task = flush_status.pending_task.take().unwrap();
-            // The region has nothing to flush. We can notify pending task.
-            task.on_success();
-            debug!(
-                "Region {} has nothing to flush, removing it from the status",
-                region_id
-            );
-            // Safety: The flush status must exist.
-            let flush_status = self.region_status.remove(&region_id).unwrap();
-            return Some((
-                flush_status.pending_ddls,
-                flush_status.pending_writes,
-                flush_status.pending_bulk_writes,
-            ));
-        }
-
-        // If region has pending task and has something to flush, we need to schedule it.
-        debug!("Scheduling pending flush task for region {}", region_id);
-        // Safety: The flush status must exist.
-        let task = flush_status.pending_task.take().unwrap();
-        let version_control = flush_status.version_control.clone();
-        if let Err(err) = self.schedule_flush_task(&version_control, task) {
-            error!(
-                err;
-                "Flush succeeded for region {region_id}, but failed to schedule next flush for it."
-            );
-        }
-        // We can flush the region again, keep it in the region status.
-        None
+        pending_requests
    }

    /// Notifies the scheduler that the flush job is failed.
@@ -1026,6 +1014,11 @@ impl FlushScheduler {

        // Fast fail: cancels all pending tasks and sends error to their waiters.
        flush_status.on_failure(err);
+
+        // Still tries to schedule a new flush.
+        if let Err(e) = self.schedule_next_flush() {
+            error!(e; "Failed to schedule next flush after region {} flush is failed", region_id);
+        }
    }

    /// Notifies the scheduler that the region is dropped.
@@ -1096,6 +1089,30 @@ impl FlushScheduler {
            .map(|status| !status.pending_ddls.is_empty())
            .unwrap_or(false)
    }
+
+    /// Schedules a new flush task when the scheduler can submit next task.
+    pub(crate) fn schedule_next_flush(&mut self) -> Result<()> {
+        debug_assert!(
+            self.region_status
+                .values()
+                .all(|status| status.flushing || status.pending_task.is_some())
+        );
+
+        // Get the first region from status map.
+        let Some(flush_status) = self
+            .region_status
+            .values_mut()
+            .find(|status| status.pending_task.is_some())
+        else {
+            return Ok(());
+        };
+        debug_assert!(!flush_status.flushing);
+        let task = flush_status.pending_task.take().unwrap();
+        let region_id = flush_status.region_id;
+        let version_control = flush_status.version_control.clone();
+
+        self.schedule_flush(region_id, &version_control, task)
+    }
 }

 impl Drop for FlushScheduler {
@@ -1115,6 +1132,11 @@ struct FlushStatus {
    region_id: RegionId,
    /// Version control of the region.
    version_control: VersionControlRef,
+    /// There is a flush task running.
+    ///
+    /// It is possible that a region is not flushing but has pending task if the scheduler
+    /// doesn't schedules this region.
+    flushing: bool,
    /// Task waiting for next flush.
    pending_task: Option<RegionFlushTask>,
    /// Pending ddl requests.
@@ -1130,6 +1152,7 @@ impl FlushStatus {
        FlushStatus {
            region_id,
            version_control,
+            flushing: false,
            pending_task: None,
            pending_ddls: Vec::new(),
            pending_writes: Vec::new(),
@@ -1221,12 +1244,10 @@ mod tests {
        // Global usage is still 1100.
        manager.schedule_free_mem(200);
        assert!(manager.should_flush_engine());
-        assert!(manager.should_stall());

-        // More than global limit, mutable (1100-200-450=450) is less than mutable limit (< 500).
+        // More than global limit, but mutable (1100-200-450=450) is not enough (< 500).
        manager.schedule_free_mem(450);
-        assert!(manager.should_flush_engine());
-        assert!(manager.should_stall());
+        assert!(!manager.should_flush_engine());

        // Now mutable is enough.
        manager.reserve_mem(50);
@@ -1271,7 +1292,6 @@ mod tests {
                .await,
            index_options: IndexOptions::default(),
            flush_semaphore: Arc::new(Semaphore::new(2)),
-            is_staging: false,
        };
        task.push_sender(OptionOutputTx::from(output_tx));
        scheduler
@@ -1314,7 +1334,6 @@ mod tests {
                manifest_ctx: manifest_ctx.clone(),
                index_options: IndexOptions::default(),
                flush_semaphore: Arc::new(Semaphore::new(2)),
-                is_staging: false,
            })
            .collect();
        // Schedule first task.
@@ -1473,92 +1492,4 @@ mod tests {
            assert_eq!(2, total_rows, "append_mode should preserve duplicates");
        }
    }
-
-    #[tokio::test]
-    async fn test_schedule_pending_request_on_flush_success() {
-        common_telemetry::init_default_ut_logging();
-        let job_scheduler = Arc::new(VecScheduler::default());
-        let env = SchedulerEnv::new().await.scheduler(job_scheduler.clone());
-        let (tx, _rx) = mpsc::channel(4);
-        let mut scheduler = env.mock_flush_scheduler();
-        let mut builder = VersionControlBuilder::new();
-        // Overwrites the empty memtable builder.
-        builder.set_memtable_builder(Arc::new(TimeSeriesMemtableBuilder::default()));
-        let version_control = Arc::new(builder.build());
-        // Writes data to the memtable so it is not empty.
-        let version_data = version_control.current();
-        write_rows_to_version(&version_data.version, "host0", 0, 10);
-        let manifest_ctx = env
-            .mock_manifest_context(version_data.version.metadata.clone())
-            .await;
-        // Creates 2 tasks.
-        let mut tasks: Vec<_> = (0..2)
-            .map(|_| RegionFlushTask {
-                region_id: builder.region_id(),
-                reason: FlushReason::Others,
-                senders: Vec::new(),
-                request_sender: tx.clone(),
-                access_layer: env.access_layer.clone(),
-                listener: WorkerListener::default(),
-                engine_config: Arc::new(MitoConfig::default()),
-                row_group_size: None,
-                cache_manager: Arc::new(CacheManager::default()),
-                manifest_ctx: manifest_ctx.clone(),
-                index_options: IndexOptions::default(),
-                flush_semaphore: Arc::new(Semaphore::new(2)),
-                is_staging: false,
-            })
-            .collect();
-        // Schedule first task.
-        let task = tasks.pop().unwrap();
-        scheduler
-            .schedule_flush(builder.region_id(), &version_control, task)
-            .unwrap();
-        // Should schedule 1 flush.
-        assert_eq!(1, scheduler.region_status.len());
-        assert_eq!(1, job_scheduler.num_jobs());
-        // Schedule second task.
-        let task = tasks.pop().unwrap();
-        scheduler
-            .schedule_flush(builder.region_id(), &version_control, task)
-            .unwrap();
-        assert!(
-            scheduler
-                .region_status
-                .get(&builder.region_id())
-                .unwrap()
-                .pending_task
-                .is_some()
-        );
-
-        // Check the new version.
-        let version_data = version_control.current();
-        assert_eq!(0, version_data.version.memtables.immutables()[0].id());
-        // Assumes the flush job is finished.
-        version_control.apply_edit(
-            Some(RegionEdit {
-                files_to_add: Vec::new(),
-                files_to_remove: Vec::new(),
-                timestamp_ms: None,
-                compaction_time_window: None,
-                flushed_entry_id: None,
-                flushed_sequence: None,
-                committed_sequence: None,
-            }),
-            &[0],
-            builder.file_purger(),
-        );
-        write_rows_to_version(&version_data.version, "host1", 0, 10);
-        scheduler.on_flush_success(builder.region_id());
-        assert_eq!(2, job_scheduler.num_jobs());
-        // The pending task is cleared.
-        assert!(
-            scheduler
-                .region_status
-                .get(&builder.region_id())
-                .unwrap()
-                .pending_task
-                .is_none()
-        );
-    }
 }
--- a/src/mito2/src/gc.rs
+++ b/src/mito2/src/gc.rs
@@ -330,9 +330,10 @@ impl LocalGcWorker {

        // TODO(discord9): for now, ignore async index file as it's design is not stable, need to be improved once
        // index file design is stable
-        let file_pairs: Vec<(FileId, u64)> =
-            unused_files.iter().map(|file_id| (*file_id, 0)).collect();
-        // TODO(discord9): gc worker need another major refactor to support versioned index files
+        let file_pairs: Vec<(FileId, FileId)> = unused_files
+            .iter()
+            .map(|file_id| (*file_id, *file_id))
+            .collect();

        debug!(
            "Found {} unused index files to delete for region {}",
@@ -353,7 +354,7 @@ impl LocalGcWorker {
        Ok(unused_files)
    }

-    async fn delete_files(&self, region_id: RegionId, file_ids: &[(FileId, u64)]) -> Result<()> {
+    async fn delete_files(&self, region_id: RegionId, file_ids: &[(FileId, FileId)]) -> Result<()> {
        delete_files(
            region_id,
            file_ids,
--- a/src/mito2/src/manifest/checkpointer.rs
+++ b/src/mito2/src/manifest/checkpointer.rs
@@ -25,6 +25,7 @@ use crate::manifest::action::{RegionCheckpoint, RegionManifest};
 use crate::manifest::manager::RegionManifestOptions;
 use crate::manifest::storage::ManifestObjectStore;
 use crate::metrics::MANIFEST_OP_ELAPSED;
+use crate::region::{RegionLeaderState, RegionRoleState};

 /// [`Checkpointer`] is responsible for doing checkpoint for a region, in an asynchronous way.
 #[derive(Debug)]
@@ -136,7 +137,20 @@ impl Checkpointer {
    /// Check if it's needed to do checkpoint for the region by the checkpoint distance.
    /// If needed, and there's no currently running checkpoint task, it will start a new checkpoint
    /// task running in the background.
-    pub(crate) fn maybe_do_checkpoint(&self, manifest: &RegionManifest) {
+    pub(crate) fn maybe_do_checkpoint(
+        &self,
+        manifest: &RegionManifest,
+        region_state: RegionRoleState,
+    ) {
+        // Skip checkpoint if region is in staging state
+        if region_state == RegionRoleState::Leader(RegionLeaderState::Staging) {
+            info!(
+                "Skipping checkpoint for region {} in staging mode, manifest version: {}",
+                manifest.metadata.region_id, manifest.manifest_version
+            );
+            return;
+        }
+
        if self.manifest_options.checkpoint_distance == 0 {
            return;
        }
--- a/src/mito2/src/manifest/manager.rs
+++ b/src/mito2/src/manifest/manager.rs
@@ -24,7 +24,6 @@ use store_api::metadata::RegionMetadataRef;
 use store_api::storage::FileId;
 use store_api::{MAX_VERSION, MIN_VERSION, ManifestVersion};

-use crate::cache::manifest_cache::ManifestCache;
 use crate::config::MitoConfig;
 use crate::error::{
    self, InstallManifestToSnafu, NoCheckpointSnafu, NoManifestsSnafu, RegionStoppedSnafu, Result,
@@ -53,8 +52,6 @@ pub struct RegionManifestOptions {
    /// Set to 0 to disable checkpoint.
    pub checkpoint_distance: u64,
    pub remove_file_options: RemoveFileOptions,
-    /// Optional cache for manifest files.
-    pub manifest_cache: Option<ManifestCache>,
 }

 impl RegionManifestOptions {
@@ -70,7 +67,6 @@ impl RegionManifestOptions {
            remove_file_options: RemoveFileOptions {
                enable_gc: config.gc.enable,
            },
-            manifest_cache: None,
        }
    }
 }
@@ -155,10 +151,6 @@ pub struct RegionManifestManager {
    last_version: Arc<AtomicU64>,
    checkpointer: Checkpointer,
    manifest: Arc<RegionManifest>,
-    // Staging manifest is used to store the manifest of the staging region before it becomes available.
-    // It is initially inherited from the previous manifest(i.e., `self.manifest`).
-    // When the staging manifest becomes available, it will be used to construct the new manifest.
-    staging_manifest: Option<Arc<RegionManifest>>,
    stats: ManifestStats,
    stopped: bool,
 }
@@ -178,7 +170,6 @@ impl RegionManifestManager {
            options.object_store.clone(),
            options.compress_type,
            stats.total_manifest_size.clone(),
-            options.manifest_cache.clone(),
        );
        let manifest_version = stats.manifest_version.clone();

@@ -238,7 +229,6 @@ impl RegionManifestManager {
            last_version: manifest_version,
            checkpointer,
            manifest: Arc::new(manifest),
-            staging_manifest: None,
            stats: stats.clone(),
            stopped: false,
        })
@@ -261,7 +251,6 @@ impl RegionManifestManager {
            options.object_store.clone(),
            options.compress_type,
            stats.total_manifest_size.clone(),
-            options.manifest_cache.clone(),
        );
        let manifest_version = stats.manifest_version.clone();

@@ -345,8 +334,6 @@ impl RegionManifestManager {
            last_version: manifest_version,
            checkpointer,
            manifest: Arc::new(manifest),
-            // TODO(weny): open the staging manifest if exists.
-            staging_manifest: None,
            stats: stats.clone(),
            stopped: false,
        }))
@@ -517,7 +504,7 @@ impl RegionManifestManager {
    pub async fn update(
        &mut self,
        action_list: RegionMetaActionList,
-        is_staging: bool,
+        region_state: RegionRoleState,
    ) -> Result<ManifestVersion> {
        let _t = MANIFEST_OP_ELAPSED
            .with_label_values(&["update"])
@@ -531,19 +518,13 @@ impl RegionManifestManager {
        );

        let version = self.increase_version();
+        let is_staging = region_state == RegionRoleState::Leader(RegionLeaderState::Staging);
        self.store
            .save(version, &action_list.encode()?, is_staging)
            .await?;

-        // For a staging region, the manifest is initially inherited from the previous manifest(i.e., `self.manifest`).
-        // When the staging manifest becomes available, it will be used to construct the new manifest.
        let mut manifest_builder =
-            if is_staging && let Some(staging_manifest) = self.staging_manifest.as_ref() {
-                RegionManifestBuilder::with_checkpoint(Some(staging_manifest.as_ref().clone()))
-            } else {
-                RegionManifestBuilder::with_checkpoint(Some(self.manifest.as_ref().clone()))
-            };
-
+            RegionManifestBuilder::with_checkpoint(Some(self.manifest.as_ref().clone()));
        for action in action_list.actions {
            match action {
                RegionMetaAction::Change(action) => {
@@ -563,27 +544,17 @@ impl RegionManifestManager {
                }
            }
        }
+        let new_manifest = manifest_builder.try_build()?;
+        new_manifest
+            .removed_files
+            .update_file_removed_cnt_to_stats(&self.stats);
+        let updated_manifest = self
+            .checkpointer
+            .update_manifest_removed_files(new_manifest)?;
+        self.manifest = Arc::new(updated_manifest);

-        if is_staging {
-            let new_manifest = manifest_builder.try_build()?;
-            self.staging_manifest = Some(Arc::new(new_manifest));
-
-            info!(
-                "Skipping checkpoint for region {} in staging mode, manifest version: {}",
-                self.manifest.metadata.region_id, self.manifest.manifest_version
-            );
-        } else {
-            let new_manifest = manifest_builder.try_build()?;
-            new_manifest
-                .removed_files
-                .update_file_removed_cnt_to_stats(&self.stats);
-            let updated_manifest = self
-                .checkpointer
-                .update_manifest_removed_files(new_manifest)?;
-            self.manifest = Arc::new(updated_manifest);
-            self.checkpointer
-                .maybe_do_checkpoint(self.manifest.as_ref());
-        }
+        self.checkpointer
+            .maybe_do_checkpoint(self.manifest.as_ref(), region_state);

        Ok(version)
    }
@@ -604,11 +575,6 @@ impl RegionManifestManager {
        self.manifest.clone()
    }

-    /// Retrieves the current [RegionManifest].
-    pub fn staging_manifest(&self) -> Option<Arc<RegionManifest>> {
-        self.staging_manifest.clone()
-    }
-
    /// Returns total manifest size.
    pub fn manifest_usage(&self) -> u64 {
        self.store.total_manifest_size()
@@ -745,22 +711,6 @@ impl RegionManifestManager {

        Ok(Some(RegionMetaActionList::new(merged_actions)))
    }
-
-    /// Unsets the staging manifest.
-    pub(crate) fn unset_staging_manifest(&mut self) {
-        self.staging_manifest = None;
-    }
-
-    /// Clear all staging manifests.
-    pub(crate) async fn clear_staging_manifest_and_dir(&mut self) -> Result<()> {
-        self.staging_manifest = None;
-        self.store.clear_staging_manifests().await?;
-        info!(
-            "Cleared all staging manifests for region {}",
-            self.manifest.metadata.region_id
-        );
-        Ok(())
-    }
 }

 #[cfg(test)]
@@ -887,7 +837,13 @@ mod test {
                sst_format: FormatType::PrimaryKey,
            }));

-        let current_version = manager.update(action_list, false).await.unwrap();
+        let current_version = manager
+            .update(
+                action_list,
+                RegionRoleState::Leader(RegionLeaderState::Writable),
+            )
+            .await
+            .unwrap();
        assert_eq!(current_version, 1);
        manager.validate_manifest(&new_metadata, 1);

@@ -950,7 +906,13 @@ mod test {
                sst_format: FormatType::PrimaryKey,
            }));

-        let current_version = manager.update(action_list, false).await.unwrap();
+        let current_version = manager
+            .update(
+                action_list,
+                RegionRoleState::Leader(RegionLeaderState::Writable),
+            )
+            .await
+            .unwrap();
        assert_eq!(current_version, 1);
        manager.validate_manifest(&new_metadata, 1);

@@ -971,7 +933,7 @@ mod test {
                        flushed_sequence: None,
                        committed_sequence: None,
                    })]),
-                    false,
+                    RegionRoleState::Leader(RegionLeaderState::Writable),
                )
                .await
                .unwrap();
--- a/src/mito2/src/manifest/storage.rs
+++ b/src/mito2/src/manifest/storage.rs
@@ -33,7 +33,6 @@ use store_api::ManifestVersion;
 use store_api::storage::RegionId;
 use tokio::sync::Semaphore;

-use crate::cache::manifest_cache::ManifestCache;
 use crate::error::{
    ChecksumMismatchSnafu, CompressObjectSnafu, DecompressObjectSnafu, InvalidScanIndexSnafu,
    OpenDalSnafu, Result, SerdeJsonSnafu, Utf8Snafu,
@@ -145,8 +144,6 @@ pub struct ManifestObjectStore {
    /// Stores the size of each manifest file.
    manifest_size_map: Arc<RwLock<HashMap<FileKey, u64>>>,
    total_manifest_size: Arc<AtomicU64>,
-    /// Optional manifest cache for local caching.
-    manifest_cache: Option<ManifestCache>,
 }

 impl ManifestObjectStore {
@@ -155,7 +152,6 @@ impl ManifestObjectStore {
        object_store: ObjectStore,
        compress_type: CompressionType,
        total_manifest_size: Arc<AtomicU64>,
-        manifest_cache: Option<ManifestCache>,
    ) -> Self {
        let path = util::normalize_dir(path);
        let staging_path = {
@@ -170,7 +166,6 @@ impl ManifestObjectStore {
            staging_path,
            manifest_size_map: Arc::new(RwLock::new(HashMap::new())),
            total_manifest_size,
-            manifest_cache,
        }
    }

@@ -296,11 +291,9 @@ impl ManifestObjectStore {
    }

    /// Common implementation for fetching manifests from entries in parallel.
-    /// If `is_staging` is true, cache is skipped.
    async fn fetch_manifests_from_entries(
        &self,
        entries: Vec<(ManifestVersion, Entry)>,
-        is_staging: bool,
    ) -> Result<Vec<(ManifestVersion, Vec<u8>)>> {
        if entries.is_empty() {
            return Ok(vec![]);
@@ -313,13 +306,6 @@ impl ManifestObjectStore {
            // Safety: semaphore must exist.
            let _permit = semaphore.acquire().await.unwrap();

-            let cache_key = entry.path();
-            // Try to get from cache first
-            if let Some(data) = self.get_from_cache(cache_key, is_staging).await {
-                return Ok((*v, data));
-            }
-
-            // Fetch from remote object store
            let compress_type = file_compress_type(entry.name());
            let bytes = self
                .object_store
@@ -333,11 +319,6 @@ impl ManifestObjectStore {
                    compress_type,
                    path: entry.path(),
                })?;
-
-            // Add to cache
-            self.put_to_cache(cache_key.to_string(), &data, is_staging)
-                .await;
-
            Ok((*v, data))
        });

@@ -354,7 +335,7 @@ impl ManifestObjectStore {
        end_version: ManifestVersion,
    ) -> Result<Vec<(ManifestVersion, Vec<u8>)>> {
        let manifests = self.scan(start_version, end_version).await?;
-        self.fetch_manifests_from_entries(manifests, false).await
+        self.fetch_manifests_from_entries(manifests).await
    }

    /// Delete manifest files that version < end.
@@ -424,11 +405,6 @@ impl ManifestObjectStore {
            ret, self.path, end, checkpoint_version, paths,
        );

-        // Remove from cache first
-        for (entry, _, _) in &del_entries {
-            self.remove_from_cache(entry.path()).await;
-        }
-
        self.object_store
            .delete_iter(paths)
            .await
@@ -464,10 +440,11 @@ impl ManifestObjectStore {
                path: &path,
            })?;
        let delta_size = data.len();
-
-        self.write_and_put_cache(&path, data, is_staging).await?;
+        self.object_store
+            .write(&path, data)
+            .await
+            .context(OpenDalSnafu)?;
        self.set_delta_file_size(version, delta_size as u64);
-
        Ok(())
    }

@@ -488,8 +465,10 @@ impl ManifestObjectStore {
            })?;
        let checkpoint_size = data.len();
        let checksum = checkpoint_checksum(bytes);
-
-        self.write_and_put_cache(&path, data, false).await?;
+        self.object_store
+            .write(&path, data)
+            .await
+            .context(OpenDalSnafu)?;
        self.set_checkpoint_file_size(version, checkpoint_size as u64);

        // Because last checkpoint file only contain size and version, which is tiny, so we don't compress it.
@@ -522,80 +501,60 @@ impl ManifestObjectStore {
    ) -> Result<Option<(ManifestVersion, Vec<u8>)>> {
        let version = metadata.version;
        let path = self.checkpoint_file_path(version);
-
-        // Try to get from cache first
-        if let Some(data) = self.get_from_cache(&path, false).await {
-            verify_checksum(&data, metadata.checksum)?;
-            return Ok(Some((version, data)));
-        }
-
        // Due to backward compatibility, it is possible that the user's checkpoint not compressed,
        // so if we don't find file by compressed type. fall back to checkpoint not compressed find again.
-        let checkpoint_data = match self.object_store.read(&path).await {
-            Ok(checkpoint) => {
-                let checkpoint_size = checkpoint.len();
-                let decompress_data =
-                    self.compress_type
-                        .decode(checkpoint)
-                        .await
-                        .with_context(|_| DecompressObjectSnafu {
+        let checkpoint_data =
+            match self.object_store.read(&path).await {
+                Ok(checkpoint) => {
+                    let checkpoint_size = checkpoint.len();
+                    let decompress_data = self.compress_type.decode(checkpoint).await.context(
+                        DecompressObjectSnafu {
                            compress_type: self.compress_type,
-                            path: path.clone(),
-                        })?;
-                verify_checksum(&decompress_data, metadata.checksum)?;
-                // set the checkpoint size
-                self.set_checkpoint_file_size(version, checkpoint_size as u64);
-                // Add to cache
-                self.put_to_cache(path, &decompress_data, false).await;
-                Ok(Some(decompress_data))
-            }
-            Err(e) => {
-                if e.kind() == ErrorKind::NotFound {
-                    if self.compress_type != FALL_BACK_COMPRESS_TYPE {
-                        let fall_back_path = gen_path(
-                            &self.path,
-                            &checkpoint_file(version),
-                            FALL_BACK_COMPRESS_TYPE,
-                        );
-                        debug!(
-                            "Failed to load checkpoint from path: {}, fall back to path: {}",
-                            path, fall_back_path
-                        );
-
-                        // Try to get fallback from cache first
-                        if let Some(data) = self.get_from_cache(&fall_back_path, false).await {
-                            verify_checksum(&data, metadata.checksum)?;
-                            return Ok(Some((version, data)));
-                        }
-
-                        match self.object_store.read(&fall_back_path).await {
-                            Ok(checkpoint) => {
-                                let checkpoint_size = checkpoint.len();
-                                let decompress_data = FALL_BACK_COMPRESS_TYPE
-                                    .decode(checkpoint)
-                                    .await
-                                    .with_context(|_| DecompressObjectSnafu {
-                                        compress_type: FALL_BACK_COMPRESS_TYPE,
-                                        path: fall_back_path.clone(),
-                                    })?;
-                                verify_checksum(&decompress_data, metadata.checksum)?;
-                                self.set_checkpoint_file_size(version, checkpoint_size as u64);
-                                // Add fallback to cache
-                                self.put_to_cache(fall_back_path, &decompress_data, false)
-                                    .await;
-                                Ok(Some(decompress_data))
+                            path,
+                        },
+                    )?;
+                    verify_checksum(&decompress_data, metadata.checksum)?;
+                    // set the checkpoint size
+                    self.set_checkpoint_file_size(version, checkpoint_size as u64);
+                    Ok(Some(decompress_data))
+                }
+                Err(e) => {
+                    if e.kind() == ErrorKind::NotFound {
+                        if self.compress_type != FALL_BACK_COMPRESS_TYPE {
+                            let fall_back_path = gen_path(
+                                &self.path,
+                                &checkpoint_file(version),
+                                FALL_BACK_COMPRESS_TYPE,
+                            );
+                            debug!(
+                                "Failed to load checkpoint from path: {}, fall back to path: {}",
+                                path, fall_back_path
+                            );
+                            match self.object_store.read(&fall_back_path).await {
+                                Ok(checkpoint) => {
+                                    let checkpoint_size = checkpoint.len();
+                                    let decompress_data = FALL_BACK_COMPRESS_TYPE
+                                        .decode(checkpoint)
+                                        .await
+                                        .context(DecompressObjectSnafu {
+                                            compress_type: FALL_BACK_COMPRESS_TYPE,
+                                            path,
+                                        })?;
+                                    verify_checksum(&decompress_data, metadata.checksum)?;
+                                    self.set_checkpoint_file_size(version, checkpoint_size as u64);
+                                    Ok(Some(decompress_data))
+                                }
+                                Err(e) if e.kind() == ErrorKind::NotFound => Ok(None),
+                                Err(e) => Err(e).context(OpenDalSnafu),
                            }
-                            Err(e) if e.kind() == ErrorKind::NotFound => Ok(None),
-                            Err(e) => Err(e).context(OpenDalSnafu),
+                        } else {
+                            Ok(None)
                        }
                    } else {
-                        Ok(None)
+                        Err(e).context(OpenDalSnafu)
                    }
-                } else {
-                    Err(e).context(OpenDalSnafu)
                }
-            }
-        }?;
+            }?;
        Ok(checkpoint_data.map(|data| (version, data)))
    }

@@ -603,10 +562,8 @@ impl ManifestObjectStore {
    /// Return manifest version and the raw [RegionCheckpoint](crate::manifest::action::RegionCheckpoint) content if any
    pub async fn load_last_checkpoint(&mut self) -> Result<Option<(ManifestVersion, Vec<u8>)>> {
        let last_checkpoint_path = self.last_checkpoint_path();
-
-        // Fetch from remote object store without cache
        let last_checkpoint_data = match self.object_store.read(&last_checkpoint_path).await {
-            Ok(data) => data.to_vec(),
+            Ok(data) => data,
            Err(e) if e.kind() == ErrorKind::NotFound => {
                return Ok(None);
            }
@@ -615,7 +572,7 @@ impl ManifestObjectStore {
            }
        };

-        let checkpoint_metadata = CheckpointMetadata::decode(&last_checkpoint_data)?;
+        let checkpoint_metadata = CheckpointMetadata::decode(&last_checkpoint_data.to_vec())?;

        debug!(
            "Load checkpoint in path: {}, metadata: {:?}",
@@ -745,8 +702,7 @@ impl ManifestObjectStore {
        let mut sorted_entries = manifest_entries;
        Self::sort_manifests(&mut sorted_entries);

-        self.fetch_manifests_from_entries(sorted_entries, true)
-            .await
+        self.fetch_manifests_from_entries(sorted_entries).await
    }

    /// Clear all staging manifest files.
@@ -763,63 +719,6 @@ impl ManifestObjectStore {

        Ok(())
    }
-
-    /// Gets a manifest file from cache.
-    /// Returns the file data if found in cache, None otherwise.
-    /// If `is_staging` is true, always returns None.
-    async fn get_from_cache(&self, key: &str, is_staging: bool) -> Option<Vec<u8>> {
-        if is_staging {
-            return None;
-        }
-        let cache = self.manifest_cache.as_ref()?;
-        cache.get_file(key).await
-    }
-
-    /// Puts a manifest file into cache.
-    /// If `is_staging` is true, does nothing.
-    async fn put_to_cache(&self, key: String, data: &[u8], is_staging: bool) {
-        if is_staging {
-            return;
-        }
-        let Some(cache) = &self.manifest_cache else {
-            return;
-        };
-
-        cache.put_file(key, data.to_vec()).await;
-    }
-
-    /// Writes data to object store and puts it into cache.
-    /// If `is_staging` is true, cache is skipped.
-    async fn write_and_put_cache(&self, path: &str, data: Vec<u8>, is_staging: bool) -> Result<()> {
-        // Clone data for cache before writing, only if cache is enabled and not staging
-        let cache_data = if !is_staging && self.manifest_cache.is_some() {
-            Some(data.clone())
-        } else {
-            None
-        };
-
-        // Write to object store
-        self.object_store
-            .write(path, data)
-            .await
-            .context(OpenDalSnafu)?;
-
-        // Put to cache if we cloned the data
-        if let Some(data) = cache_data {
-            self.put_to_cache(path.to_string(), &data, is_staging).await;
-        }
-
-        Ok(())
-    }
-
-    /// Removes a manifest file from cache.
-    async fn remove_from_cache(&self, key: &str) {
-        let Some(cache) = &self.manifest_cache else {
-            return;
-        };
-
-        cache.remove(key).await;
-    }
 }

 #[derive(Serialize, Deserialize, Debug)]
@@ -863,7 +762,6 @@ mod tests {
            object_store,
            CompressionType::Uncompressed,
            Default::default(),
-            None,
        )
    }

--- a/Show More
+++ b/Show More