Merge remote-tracking branch 'origin/main' into zhongzc/repartition-procedure-scaffold

2026-01-04 04:12:55 +00:00 · 2025-10-26 10:56:09 +00:00
parent 3c1d7fcb89 d8563ba56d
commit 1286d4ca74
237 changed files with 12156 additions and 2632 deletions
--- a/.github/scripts/pr-review-reminder.js
+++ b/.github/scripts/pr-review-reminder.js
@@ -57,14 +57,6 @@
    return days;
  }

-  // Get urgency emoji based on PR age
-  function getAgeEmoji(days) {
-    if (days >= 14) return "🔴"; // 14+ days - critical
-    if (days >= 7) return "🟠";  // 7+ days - urgent
-    if (days >= 3) return "🟡";  // 3+ days - needs attention
-    return "🟢"; // < 3 days - fresh
-  }
-
  // Build Slack notification message from PR list
  function buildSlackMessage(prs) {
    if (prs.length === 0) {
--- a/.github/workflows/pr-review-reminder.yml
+++ b/.github/workflows/pr-review-reminder.yml
@@ -2,8 +2,8 @@ name: PR Review Reminder

 on:
  schedule:
-    # Run at 9:00 AM UTC+8 (01:00 AM UTC) every day
-    - cron: '0 1 * * *'
+    # Run at 9:00 AM UTC+8 (01:00 AM UTC) on Monday, Wednesday, Friday
+    - cron: '0 1 * * 1,3,5'
  workflow_dispatch:

 jobs:
--- a/Cargo.lock
+++ b/Cargo.lock
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -99,12 +99,12 @@ rust.unexpected_cfgs = { level = "warn", check-cfg = ['cfg(tokio_unstable)'] }
 # See for more detaiils: https://github.com/rust-lang/cargo/issues/11329
 ahash = { version = "0.8", features = ["compile-time-rng"] }
 aquamarine = "0.6"
-arrow = { version = "56.0", features = ["prettyprint"] }
-arrow-array = { version = "56.0", default-features = false, features = ["chrono-tz"] }
-arrow-buffer = "56.0"
-arrow-flight = "56.0"
-arrow-ipc = { version = "56.0", default-features = false, features = ["lz4", "zstd"] }
-arrow-schema = { version = "56.0", features = ["serde"] }
+arrow = { version = "56.2", features = ["prettyprint"] }
+arrow-array = { version = "56.2", default-features = false, features = ["chrono-tz"] }
+arrow-buffer = "56.2"
+arrow-flight = "56.2"
+arrow-ipc = { version = "56.2", default-features = false, features = ["lz4", "zstd"] }
+arrow-schema = { version = "56.2", features = ["serde"] }
 async-stream = "0.3"
 async-trait = "0.1"
 # Remember to update axum-extra, axum-macros when updating axum
@@ -123,18 +123,18 @@ clap = { version = "4.4", features = ["derive"] }
 config = "0.13.0"
 crossbeam-utils = "0.8"
 dashmap = "6.1"
-datafusion = "49"
-datafusion-common = "49"
-datafusion-expr = "49"
-datafusion-functions = "49"
-datafusion-functions-aggregate-common = "49"
-datafusion-optimizer = "49"
-datafusion-orc = { git = "https://github.com/GreptimeTeam/datafusion-orc", rev = "a0a5f902158f153119316eaeec868cff3fc8a99d" }
-datafusion-pg-catalog = { git = "https://github.com/datafusion-contrib/datafusion-postgres", rev = "3d1b7c7d5b82dd49bafc2803259365e633f654fa" }
-datafusion-physical-expr = "49"
-datafusion-physical-plan = "49"
-datafusion-sql = "49"
-datafusion-substrait = "49"
+datafusion = "50"
+datafusion-common = "50"
+datafusion-expr = "50"
+datafusion-functions = "50"
+datafusion-functions-aggregate-common = "50"
+datafusion-optimizer = "50"
+datafusion-orc = "0.5"
+datafusion-pg-catalog = "0.11"
+datafusion-physical-expr = "50"
+datafusion-physical-plan = "50"
+datafusion-sql = "50"
+datafusion-substrait = "50"
 deadpool = "0.12"
 deadpool-postgres = "0.14"
 derive_builder = "0.20"
@@ -147,7 +147,7 @@ etcd-client = { git = "https://github.com/GreptimeTeam/etcd-client", rev = "f62d
 fst = "0.4.7"
 futures = "0.3"
 futures-util = "0.3"
-greptime-proto = { git = "https://github.com/GreptimeTeam/greptime-proto.git", rev = "69a6089933daa573c96808ec4bbc48f447ec6e8c" }
+greptime-proto = { git = "https://github.com/GreptimeTeam/greptime-proto.git", rev = "14b9dc40bdc8288742b0cefc7bb024303b7429ef" }
 hex = "0.4"
 http = "1"
 humantime = "2.1"
@@ -180,7 +180,7 @@ otel-arrow-rust = { git = "https://github.com/GreptimeTeam/otel-arrow", rev = "2
    "server",
 ] }
 parking_lot = "0.12"
-parquet = { version = "56.0", default-features = false, features = ["arrow", "async", "object_store"] }
+parquet = { version = "56.2", default-features = false, features = ["arrow", "async", "object_store"] }
 paste = "1.0"
 pin-project = "1.0"
 pretty_assertions = "1.4.0"
@@ -191,7 +191,7 @@ prost-types = "0.13"
 raft-engine = { version = "0.4.1", default-features = false }
 rand = "0.9"
 ratelimit = "0.10"
-regex = "1.8"
+regex = "1.12"
 regex-automata = "0.4"
 reqwest = { version = "0.12", default-features = false, features = [
    "json",
@@ -217,10 +217,7 @@ simd-json = "0.15"
 similar-asserts = "1.6.0"
 smallvec = { version = "1", features = ["serde"] }
 snafu = "0.8"
-sqlparser = { git = "https://github.com/GreptimeTeam/sqlparser-rs.git", rev = "39e4fc94c3c741981f77e9d63b5ce8c02e0a27ea", features = [
-    "visitor",
-    "serde",
-] } # branch = "v0.55.x"
+sqlparser = { version = "0.58.0", default-features = false, features = ["std", "visitor", "serde"] }
 sqlx = { version = "0.8", features = [
    "runtime-tokio-rustls",
    "mysql",
@@ -322,16 +319,19 @@ git = "https://github.com/GreptimeTeam/greptime-meter.git"
 rev = "5618e779cf2bb4755b499c630fba4c35e91898cb"

 [patch.crates-io]
-datafusion = { git = "https://github.com/GreptimeTeam/datafusion.git", rev = "7d5214512740b4dfb742b6b3d91ed9affcc2c9d0" }
-datafusion-common = { git = "https://github.com/GreptimeTeam/datafusion.git", rev = "7d5214512740b4dfb742b6b3d91ed9affcc2c9d0" }
-datafusion-expr = { git = "https://github.com/GreptimeTeam/datafusion.git", rev = "7d5214512740b4dfb742b6b3d91ed9affcc2c9d0" }
-datafusion-functions = { git = "https://github.com/GreptimeTeam/datafusion.git", rev = "7d5214512740b4dfb742b6b3d91ed9affcc2c9d0" }
-datafusion-functions-aggregate-common = { git = "https://github.com/GreptimeTeam/datafusion.git", rev = "7d5214512740b4dfb742b6b3d91ed9affcc2c9d0" }
-datafusion-optimizer = { git = "https://github.com/GreptimeTeam/datafusion.git", rev = "7d5214512740b4dfb742b6b3d91ed9affcc2c9d0" }
-datafusion-physical-expr = { git = "https://github.com/GreptimeTeam/datafusion.git", rev = "7d5214512740b4dfb742b6b3d91ed9affcc2c9d0" }
-datafusion-physical-plan = { git = "https://github.com/GreptimeTeam/datafusion.git", rev = "7d5214512740b4dfb742b6b3d91ed9affcc2c9d0" }
-datafusion-sql = { git = "https://github.com/GreptimeTeam/datafusion.git", rev = "7d5214512740b4dfb742b6b3d91ed9affcc2c9d0" }
-datafusion-substrait = { git = "https://github.com/GreptimeTeam/datafusion.git", rev = "7d5214512740b4dfb742b6b3d91ed9affcc2c9d0" }
+datafusion = { git = "https://github.com/GreptimeTeam/datafusion.git", rev = "fd4b2abcf3c3e43e94951bda452c9fd35243aab0" }
+datafusion-common = { git = "https://github.com/GreptimeTeam/datafusion.git", rev = "fd4b2abcf3c3e43e94951bda452c9fd35243aab0" }
+datafusion-expr = { git = "https://github.com/GreptimeTeam/datafusion.git", rev = "fd4b2abcf3c3e43e94951bda452c9fd35243aab0" }
+datafusion-functions = { git = "https://github.com/GreptimeTeam/datafusion.git", rev = "fd4b2abcf3c3e43e94951bda452c9fd35243aab0" }
+datafusion-functions-aggregate-common = { git = "https://github.com/GreptimeTeam/datafusion.git", rev = "fd4b2abcf3c3e43e94951bda452c9fd35243aab0" }
+datafusion-optimizer = { git = "https://github.com/GreptimeTeam/datafusion.git", rev = "fd4b2abcf3c3e43e94951bda452c9fd35243aab0" }
+datafusion-physical-expr = { git = "https://github.com/GreptimeTeam/datafusion.git", rev = "fd4b2abcf3c3e43e94951bda452c9fd35243aab0" }
+datafusion-physical-expr-common = { git = "https://github.com/GreptimeTeam/datafusion.git", rev = "fd4b2abcf3c3e43e94951bda452c9fd35243aab0" }
+datafusion-physical-plan = { git = "https://github.com/GreptimeTeam/datafusion.git", rev = "fd4b2abcf3c3e43e94951bda452c9fd35243aab0" }
+datafusion-datasource = { git = "https://github.com/GreptimeTeam/datafusion.git", rev = "fd4b2abcf3c3e43e94951bda452c9fd35243aab0" }
+datafusion-sql = { git = "https://github.com/GreptimeTeam/datafusion.git", rev = "fd4b2abcf3c3e43e94951bda452c9fd35243aab0" }
+datafusion-substrait = { git = "https://github.com/GreptimeTeam/datafusion.git", rev = "fd4b2abcf3c3e43e94951bda452c9fd35243aab0" }
+sqlparser = { git = "https://github.com/GreptimeTeam/sqlparser-rs.git", rev = "4b519a5caa95472cc3988f5556813a583dd35af1" }                           # branch = "v0.58.x"

 [profile.release]
 debug = 1
--- a/config/config.md
+++ b/config/config.md
@@ -25,12 +25,14 @@
 | `http.addr` | String | `127.0.0.1:4000` | The address to bind the HTTP server. |
 | `http.timeout` | String | `0s` | HTTP request timeout. Set to 0 to disable timeout. |
 | `http.body_limit` | String | `64MB` | HTTP request body limit.<br/>The following units are supported: `B`, `KB`, `KiB`, `MB`, `MiB`, `GB`, `GiB`, `TB`, `TiB`, `PB`, `PiB`.<br/>Set to 0 to disable limit. |
+| `http.max_total_body_memory` | String | Unset | Maximum total memory for all concurrent HTTP request bodies.<br/>Set to 0 to disable the limit. Default: "0" (unlimited) |
 | `http.enable_cors` | Bool | `true` | HTTP CORS support, it's turned on by default<br/>This allows browser to access http APIs without CORS restrictions |
 | `http.cors_allowed_origins` | Array | Unset | Customize allowed origins for HTTP CORS. |
 | `http.prom_validation_mode` | String | `strict` | Whether to enable validation for Prometheus remote write requests.<br/>Available options:<br/>- strict: deny invalid UTF-8 strings (default).<br/>- lossy: allow invalid UTF-8 strings, replace invalid characters with REPLACEMENT_CHARACTER(U+FFFD).<br/>- unchecked: do not valid strings. |
 | `grpc` | -- | -- | The gRPC server options. |
 | `grpc.bind_addr` | String | `127.0.0.1:4001` | The address to bind the gRPC server. |
 | `grpc.runtime_size` | Integer | `8` | The number of server worker threads. |
+| `grpc.max_total_message_memory` | String | Unset | Maximum total memory for all concurrent gRPC request messages.<br/>Set to 0 to disable the limit. Default: "0" (unlimited) |
 | `grpc.max_connection_age` | String | Unset | The maximum connection age for gRPC connection.<br/>The value can be a human-readable time string. For example: `10m` for ten minutes or `1h` for one hour.<br/>Refer to https://grpc.io/docs/guides/keepalive/ for more details. |
 | `grpc.tls` | -- | -- | gRPC server TLS options, see `mysql.tls` section. |
 | `grpc.tls.mode` | String | `disable` | TLS mode. |
@@ -235,6 +237,7 @@
 | `http.addr` | String | `127.0.0.1:4000` | The address to bind the HTTP server. |
 | `http.timeout` | String | `0s` | HTTP request timeout. Set to 0 to disable timeout. |
 | `http.body_limit` | String | `64MB` | HTTP request body limit.<br/>The following units are supported: `B`, `KB`, `KiB`, `MB`, `MiB`, `GB`, `GiB`, `TB`, `TiB`, `PB`, `PiB`.<br/>Set to 0 to disable limit. |
+| `http.max_total_body_memory` | String | Unset | Maximum total memory for all concurrent HTTP request bodies.<br/>Set to 0 to disable the limit. Default: "0" (unlimited) |
 | `http.enable_cors` | Bool | `true` | HTTP CORS support, it's turned on by default<br/>This allows browser to access http APIs without CORS restrictions |
 | `http.cors_allowed_origins` | Array | Unset | Customize allowed origins for HTTP CORS. |
 | `http.prom_validation_mode` | String | `strict` | Whether to enable validation for Prometheus remote write requests.<br/>Available options:<br/>- strict: deny invalid UTF-8 strings (default).<br/>- lossy: allow invalid UTF-8 strings, replace invalid characters with REPLACEMENT_CHARACTER(U+FFFD).<br/>- unchecked: do not valid strings. |
@@ -242,6 +245,7 @@
 | `grpc.bind_addr` | String | `127.0.0.1:4001` | The address to bind the gRPC server. |
 | `grpc.server_addr` | String | `127.0.0.1:4001` | The address advertised to the metasrv, and used for connections from outside the host.<br/>If left empty or unset, the server will automatically use the IP address of the first network interface<br/>on the host, with the same port number as the one specified in `grpc.bind_addr`. |
 | `grpc.runtime_size` | Integer | `8` | The number of server worker threads. |
+| `grpc.max_total_message_memory` | String | Unset | Maximum total memory for all concurrent gRPC request messages.<br/>Set to 0 to disable the limit. Default: "0" (unlimited) |
 | `grpc.flight_compression` | String | `arrow_ipc` | Compression mode for frontend side Arrow IPC service. Available options:<br/>- `none`: disable all compression<br/>- `transport`: only enable gRPC transport compression (zstd)<br/>- `arrow_ipc`: only enable Arrow IPC compression (lz4)<br/>- `all`: enable all compression.<br/>Default to `none` |
 | `grpc.max_connection_age` | String | Unset | The maximum connection age for gRPC connection.<br/>The value can be a human-readable time string. For example: `10m` for ten minutes or `1h` for one hour.<br/>Refer to https://grpc.io/docs/guides/keepalive/ for more details. |
 | `grpc.tls` | -- | -- | gRPC server TLS options, see `mysql.tls` section. |
--- a/config/frontend.example.toml
+++ b/config/frontend.example.toml
@@ -31,6 +31,10 @@ timeout = "0s"
 ## The following units are supported: `B`, `KB`, `KiB`, `MB`, `MiB`, `GB`, `GiB`, `TB`, `TiB`, `PB`, `PiB`.
 ## Set to 0 to disable limit.
 body_limit = "64MB"
+## Maximum total memory for all concurrent HTTP request bodies.
+## Set to 0 to disable the limit. Default: "0" (unlimited)
+## @toml2docs:none-default
+#+ max_total_body_memory = "1GB"
 ## HTTP CORS support, it's turned on by default
 ## This allows browser to access http APIs without CORS restrictions
 enable_cors = true
@@ -54,6 +58,10 @@ bind_addr = "127.0.0.1:4001"
 server_addr = "127.0.0.1:4001"
 ## The number of server worker threads.
 runtime_size = 8
+## Maximum total memory for all concurrent gRPC request messages.
+## Set to 0 to disable the limit. Default: "0" (unlimited)
+## @toml2docs:none-default
+#+ max_total_message_memory = "1GB"
 ## Compression mode for frontend side Arrow IPC service. Available options:
 ## - `none`: disable all compression
 ## - `transport`: only enable gRPC transport compression (zstd)
--- a/config/standalone.example.toml
+++ b/config/standalone.example.toml
@@ -36,6 +36,10 @@ timeout = "0s"
 ## The following units are supported: `B`, `KB`, `KiB`, `MB`, `MiB`, `GB`, `GiB`, `TB`, `TiB`, `PB`, `PiB`.
 ## Set to 0 to disable limit.
 body_limit = "64MB"
+## Maximum total memory for all concurrent HTTP request bodies.
+## Set to 0 to disable the limit. Default: "0" (unlimited)
+## @toml2docs:none-default
+#+ max_total_body_memory = "1GB"
 ## HTTP CORS support, it's turned on by default
 ## This allows browser to access http APIs without CORS restrictions
 enable_cors = true
@@ -56,6 +60,10 @@ prom_validation_mode = "strict"
 bind_addr = "127.0.0.1:4001"
 ## The number of server worker threads.
 runtime_size = 8
+## Maximum total memory for all concurrent gRPC request messages.
+## Set to 0 to disable the limit. Default: "0" (unlimited)
+## @toml2docs:none-default
+#+ max_total_message_memory = "1GB"
 ## The maximum connection age for gRPC connection.
 ## The value can be a human-readable time string. For example: `10m` for ten minutes or `1h` for one hour.
 ## Refer to https://grpc.io/docs/guides/keepalive/ for more details.
--- a/src/api/src/v1/column_def.rs
+++ b/src/api/src/v1/column_def.rs
@@ -16,8 +16,8 @@ use std::collections::HashMap;

 use datatypes::schema::{
    COMMENT_KEY, ColumnDefaultConstraint, ColumnSchema, FULLTEXT_KEY, FulltextAnalyzer,
-    FulltextBackend, FulltextOptions, INVERTED_INDEX_KEY, SKIPPING_INDEX_KEY, SkippingIndexOptions,
-    SkippingIndexType,
+    FulltextBackend, FulltextOptions, INVERTED_INDEX_KEY, JSON_STRUCTURE_SETTINGS_KEY,
+    SKIPPING_INDEX_KEY, SkippingIndexOptions, SkippingIndexType,
 };
 use greptime_proto::v1::{
    Analyzer, FulltextBackend as PbFulltextBackend, SkippingIndexType as PbSkippingIndexType,
@@ -68,6 +68,9 @@ pub fn try_as_column_schema(column_def: &ColumnDef) -> Result<ColumnSchema> {
        if let Some(skipping_index) = options.options.get(SKIPPING_INDEX_GRPC_KEY) {
            metadata.insert(SKIPPING_INDEX_KEY.to_string(), skipping_index.to_owned());
        }
+        if let Some(settings) = options.options.get(JSON_STRUCTURE_SETTINGS_KEY) {
+            metadata.insert(JSON_STRUCTURE_SETTINGS_KEY.to_string(), settings.clone());
+        }
    }

    ColumnSchema::new(&column_def.name, data_type.into(), column_def.is_nullable)
@@ -139,6 +142,11 @@ pub fn options_from_column_schema(column_schema: &ColumnSchema) -> Option<Column
            .options
            .insert(SKIPPING_INDEX_GRPC_KEY.to_string(), skipping_index.clone());
    }
+    if let Some(settings) = column_schema.metadata().get(JSON_STRUCTURE_SETTINGS_KEY) {
+        options
+            .options
+            .insert(JSON_STRUCTURE_SETTINGS_KEY.to_string(), settings.clone());
+    }

    (!options.options.is_empty()).then_some(options)
 }
--- a/src/catalog/src/system_schema/information_schema/cluster_info.rs
+++ b/src/catalog/src/system_schema/information_schema/cluster_info.rs
@@ -33,7 +33,6 @@ use datatypes::timestamp::TimestampMillisecond;
 use datatypes::value::Value;
 use datatypes::vectors::{
    Int64VectorBuilder, StringVectorBuilder, TimestampMillisecondVectorBuilder,
-    UInt32VectorBuilder, UInt64VectorBuilder,
 };
 use serde::Serialize;
 use snafu::ResultExt;
@@ -53,6 +52,8 @@ const PEER_ADDR: &str = "peer_addr";
 const PEER_HOSTNAME: &str = "peer_hostname";
 const TOTAL_CPU_MILLICORES: &str = "total_cpu_millicores";
 const TOTAL_MEMORY_BYTES: &str = "total_memory_bytes";
+const CPU_USAGE_MILLICORES: &str = "cpu_usage_millicores";
+const MEMORY_USAGE_BYTES: &str = "memory_usage_bytes";
 const VERSION: &str = "version";
 const GIT_COMMIT: &str = "git_commit";
 const START_TIME: &str = "start_time";
@@ -67,15 +68,17 @@ const INIT_CAPACITY: usize = 42;
 /// - `peer_id`: the peer server id.
 /// - `peer_type`: the peer type, such as `datanode`, `frontend`, `metasrv` etc.
 /// - `peer_addr`: the peer gRPC address.
+/// - `peer_hostname`: the hostname of the peer.
 /// - `total_cpu_millicores`: the total CPU millicores of the peer.
 /// - `total_memory_bytes`: the total memory bytes of the peer.
+/// - `cpu_usage_millicores`: the CPU usage millicores of the peer.
+/// - `memory_usage_bytes`: the memory usage bytes of the peer.
 /// - `version`: the build package version of the peer.
 /// - `git_commit`: the build git commit hash of the peer.
 /// - `start_time`: the starting time of the peer.
 /// - `uptime`: the uptime of the peer.
 /// - `active_time`: the time since the last activity of the peer.
 /// - `node_status`: the status info of the peer.
-/// - `peer_hostname`: the hostname of the peer.
 ///
 #[derive(Debug)]
 pub(super) struct InformationSchemaClusterInfo {
@@ -99,12 +102,22 @@ impl InformationSchemaClusterInfo {
            ColumnSchema::new(PEER_HOSTNAME, ConcreteDataType::string_datatype(), true),
            ColumnSchema::new(
                TOTAL_CPU_MILLICORES,
-                ConcreteDataType::uint32_datatype(),
+                ConcreteDataType::int64_datatype(),
                false,
            ),
            ColumnSchema::new(
                TOTAL_MEMORY_BYTES,
-                ConcreteDataType::uint64_datatype(),
+                ConcreteDataType::int64_datatype(),
+                false,
+            ),
+            ColumnSchema::new(
+                CPU_USAGE_MILLICORES,
+                ConcreteDataType::int64_datatype(),
+                false,
+            ),
+            ColumnSchema::new(
+                MEMORY_USAGE_BYTES,
+                ConcreteDataType::int64_datatype(),
                false,
            ),
            ColumnSchema::new(VERSION, ConcreteDataType::string_datatype(), false),
@@ -167,8 +180,10 @@ struct InformationSchemaClusterInfoBuilder {
    peer_types: StringVectorBuilder,
    peer_addrs: StringVectorBuilder,
    peer_hostnames: StringVectorBuilder,
-    cpus: UInt32VectorBuilder,
-    memory_bytes: UInt64VectorBuilder,
+    total_cpu_millicores: Int64VectorBuilder,
+    total_memory_bytes: Int64VectorBuilder,
+    cpu_usage_millicores: Int64VectorBuilder,
+    memory_usage_bytes: Int64VectorBuilder,
    versions: StringVectorBuilder,
    git_commits: StringVectorBuilder,
    start_times: TimestampMillisecondVectorBuilder,
@@ -186,8 +201,10 @@ impl InformationSchemaClusterInfoBuilder {
            peer_types: StringVectorBuilder::with_capacity(INIT_CAPACITY),
            peer_addrs: StringVectorBuilder::with_capacity(INIT_CAPACITY),
            peer_hostnames: StringVectorBuilder::with_capacity(INIT_CAPACITY),
-            cpus: UInt32VectorBuilder::with_capacity(INIT_CAPACITY),
-            memory_bytes: UInt64VectorBuilder::with_capacity(INIT_CAPACITY),
+            total_cpu_millicores: Int64VectorBuilder::with_capacity(INIT_CAPACITY),
+            total_memory_bytes: Int64VectorBuilder::with_capacity(INIT_CAPACITY),
+            cpu_usage_millicores: Int64VectorBuilder::with_capacity(INIT_CAPACITY),
+            memory_usage_bytes: Int64VectorBuilder::with_capacity(INIT_CAPACITY),
            versions: StringVectorBuilder::with_capacity(INIT_CAPACITY),
            git_commits: StringVectorBuilder::with_capacity(INIT_CAPACITY),
            start_times: TimestampMillisecondVectorBuilder::with_capacity(INIT_CAPACITY),
@@ -243,8 +260,14 @@ impl InformationSchemaClusterInfoBuilder {
            self.start_times.push(None);
            self.uptimes.push(None);
        }
-        self.cpus.push(Some(node_info.cpus));
-        self.memory_bytes.push(Some(node_info.memory_bytes));
+        self.total_cpu_millicores
+            .push(Some(node_info.total_cpu_millicores));
+        self.total_memory_bytes
+            .push(Some(node_info.total_memory_bytes));
+        self.cpu_usage_millicores
+            .push(Some(node_info.cpu_usage_millicores));
+        self.memory_usage_bytes
+            .push(Some(node_info.memory_usage_bytes));

        if node_info.last_activity_ts > 0 {
            self.active_times.push(Some(
@@ -269,8 +292,10 @@ impl InformationSchemaClusterInfoBuilder {
            Arc::new(self.peer_types.finish()),
            Arc::new(self.peer_addrs.finish()),
            Arc::new(self.peer_hostnames.finish()),
-            Arc::new(self.cpus.finish()),
-            Arc::new(self.memory_bytes.finish()),
+            Arc::new(self.total_cpu_millicores.finish()),
+            Arc::new(self.total_memory_bytes.finish()),
+            Arc::new(self.cpu_usage_millicores.finish()),
+            Arc::new(self.memory_usage_bytes.finish()),
            Arc::new(self.versions.finish()),
            Arc::new(self.git_commits.finish()),
            Arc::new(self.start_times.finish()),
--- a/src/catalog/src/system_schema/pg_catalog.rs
+++ b/src/catalog/src/system_schema/pg_catalog.rs
@@ -27,6 +27,7 @@ use datafusion::error::DataFusionError;
 use datafusion::execution::TaskContext;
 use datafusion::physical_plan::stream::RecordBatchStreamAdapter as DfRecordBatchStreamAdapter;
 use datafusion_pg_catalog::pg_catalog::catalog_info::CatalogInfo;
+use datafusion_pg_catalog::pg_catalog::context::EmptyContextProvider;
 use datafusion_pg_catalog::pg_catalog::{
    PG_CATALOG_TABLES, PgCatalogSchemaProvider, PgCatalogStaticTables, PgCatalogTable,
 };
@@ -44,7 +45,7 @@ use crate::system_schema::{
 /// [`PGCatalogProvider`] is the provider for a schema named `pg_catalog`, it is not a catalog.
 pub struct PGCatalogProvider {
    catalog_name: String,
-    inner: PgCatalogSchemaProvider<CatalogManagerWrapper>,
+    inner: PgCatalogSchemaProvider<CatalogManagerWrapper, EmptyContextProvider>,
    tables: HashMap<String, TableRef>,
    table_ids: HashMap<&'static str, u32>,
 }
@@ -69,6 +70,7 @@ impl PGCatalogProvider {
                catalog_manager,
            },
            Arc::new(static_tables),
+            EmptyContextProvider,
        )
        .expect("Failed to initialize PgCatalogSchemaProvider");

--- a/src/cmd/src/flownode.rs
+++ b/src/cmd/src/flownode.rs
@@ -30,6 +30,7 @@ use common_meta::heartbeat::handler::invalidate_table_cache::InvalidateCacheHand
 use common_meta::heartbeat::handler::parse_mailbox_message::ParseMailboxMessageHandler;
 use common_meta::key::TableMetadataManager;
 use common_meta::key::flow::FlowMetadataManager;
+use common_stat::ResourceStatImpl;
 use common_telemetry::info;
 use common_telemetry::logging::{DEFAULT_LOGGING_DIR, TracingOptions};
 use common_version::{short_version, verbose_version};
@@ -372,11 +373,15 @@ impl StartCommand {
            Arc::new(InvalidateCacheHandler::new(layered_cache_registry.clone())),
        ]);

+        let mut resource_stat = ResourceStatImpl::default();
+        resource_stat.start_collect_cpu_usage();
+
        let heartbeat_task = flow::heartbeat::HeartbeatTask::new(
            &opts,
            meta_client.clone(),
            opts.heartbeat.clone(),
            Arc::new(executor),
+            Arc::new(resource_stat),
        );

        let flow_metadata_manager = Arc::new(FlowMetadataManager::new(cached_meta_backend.clone()));
--- a/src/cmd/src/frontend.rs
+++ b/src/cmd/src/frontend.rs
@@ -30,6 +30,7 @@ use common_meta::cache::{CacheRegistryBuilder, LayeredCacheRegistryBuilder};
 use common_meta::heartbeat::handler::HandlerGroupExecutor;
 use common_meta::heartbeat::handler::invalidate_table_cache::InvalidateCacheHandler;
 use common_meta::heartbeat::handler::parse_mailbox_message::ParseMailboxMessageHandler;
+use common_stat::ResourceStatImpl;
 use common_telemetry::info;
 use common_telemetry::logging::{DEFAULT_LOGGING_DIR, TracingOptions};
 use common_time::timezone::set_default_timezone;
@@ -421,11 +422,15 @@ impl StartCommand {
            Arc::new(InvalidateCacheHandler::new(layered_cache_registry.clone())),
        ]);

+        let mut resource_stat = ResourceStatImpl::default();
+        resource_stat.start_collect_cpu_usage();
+
        let heartbeat_task = HeartbeatTask::new(
            &opts,
            meta_client.clone(),
            opts.heartbeat.clone(),
            Arc::new(executor),
+            Arc::new(resource_stat),
        );
        let heartbeat_task = Some(heartbeat_task);

--- a/src/common/config/Cargo.toml
+++ b/src/common/config/Cargo.toml
@@ -11,7 +11,6 @@ workspace = true
 common-base.workspace = true
 common-error.workspace = true
 common-macro.workspace = true
-common-stat.workspace = true
 config.workspace = true
 humantime-serde.workspace = true
 object-store.workspace = true
--- a/src/common/config/src/lib.rs
+++ b/src/common/config/src/lib.rs
@@ -14,7 +14,6 @@

 pub mod config;
 pub mod error;
-pub mod utils;

 use std::time::Duration;

--- a/src/common/config/src/utils.rs
+++ b/src/common/config/src/utils.rs
@@ -1,34 +0,0 @@
-// Copyright 2023 Greptime Team
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-use common_base::readable_size::ReadableSize;
-use common_stat::{get_total_cpu_millicores, get_total_memory_readable};
-
-/// `ResourceSpec` holds the static resource specifications of a node,
-/// such as CPU cores and memory capacity. These values are fixed
-/// at startup and do not change dynamically during runtime.
-#[derive(Debug, Clone, Copy)]
-pub struct ResourceSpec {
-    pub cpus: i64,
-    pub memory: Option<ReadableSize>,
-}
-
-impl Default for ResourceSpec {
-    fn default() -> Self {
-        Self {
-            cpus: get_total_cpu_millicores(),
-            memory: get_total_memory_readable(),
-        }
-    }
-}
--- a/src/common/datasource/Cargo.toml
+++ b/src/common/datasource/Cargo.toml
@@ -36,7 +36,7 @@ object_store_opendal.workspace = true
 orc-rust = { version = "0.6.3", default-features = false, features = ["async"] }
 parquet.workspace = true
 paste.workspace = true
-regex = "1.7"
+regex.workspace = true
 serde.workspace = true
 snafu.workspace = true
 strum.workspace = true
--- a/src/common/datasource/src/file_format.rs
+++ b/src/common/datasource/src/file_format.rs
@@ -33,7 +33,7 @@ use bytes::{Buf, Bytes};
 use datafusion::datasource::physical_plan::FileOpenFuture;
 use datafusion::error::{DataFusionError, Result as DataFusionResult};
 use datafusion::physical_plan::SendableRecordBatchStream;
-use futures::StreamExt;
+use futures::{StreamExt, TryStreamExt};
 use object_store::ObjectStore;
 use snafu::ResultExt;
 use tokio_util::compat::FuturesAsyncWriteCompatExt;
@@ -179,7 +179,7 @@ pub fn open_with_decoder<T: ArrowDecoder, F: Fn() -> DataFusionResult<T>>(
            Poll::Ready(decoder.flush().transpose())
        });

-        Ok(stream.boxed())
+        Ok(stream.map_err(Into::into).boxed())
    }))
 }

--- a/src/common/function/Cargo.toml
+++ b/src/common/function/Cargo.toml
@@ -51,6 +51,7 @@ nalgebra.workspace = true
 num = "0.4"
 num-traits = "0.2"
 paste.workspace = true
+regex.workspace = true
 s2 = { version = "0.0.12", optional = true }
 serde.workspace = true
 serde_json.workspace = true
--- a/src/common/function/src/aggrs/aggr_wrapper.rs
+++ b/src/common/function/src/aggrs/aggr_wrapper.rs
@@ -22,6 +22,7 @@
 //! `foo_merge`'s input arg is the same as `foo_state`'s output, and its output is the same as `foo`'s input.
 //!

+use std::hash::{Hash, Hasher};
 use std::sync::Arc;

 use arrow::array::StructArray;
@@ -272,7 +273,7 @@ impl StateMergeHelper {
 }

 /// Wrapper to make an aggregate function out of a state function.
-#[derive(Debug, Clone, PartialEq, Eq)]
+#[derive(Debug, Clone, PartialEq, Eq, Hash)]
 pub struct StateWrapper {
    inner: AggregateUDF,
    name: String,
@@ -616,6 +617,20 @@ impl AggregateUDFImpl for MergeWrapper {
    }
 }

+impl PartialEq for MergeWrapper {
+    fn eq(&self, other: &Self) -> bool {
+        self.inner == other.inner
+    }
+}
+
+impl Eq for MergeWrapper {}
+
+impl Hash for MergeWrapper {
+    fn hash<H: Hasher>(&self, state: &mut H) {
+        self.inner.hash(state);
+    }
+}
+
 /// The merge accumulator, which modify `update_batch`'s behavior to accept one struct array which
 /// include the state fields of original aggregate function, and merge said states into original accumulator
 /// the output is the same as original aggregate function
--- a/src/common/function/src/aggrs/aggr_wrapper/tests.rs
+++ b/src/common/function/src/aggrs/aggr_wrapper/tests.rs
@@ -39,8 +39,7 @@ use datafusion::prelude::SessionContext;
 use datafusion_common::arrow::array::AsArray;
 use datafusion_common::arrow::datatypes::{Float64Type, UInt64Type};
 use datafusion_common::{Column, TableReference};
-use datafusion_expr::expr::AggregateFunction;
-use datafusion_expr::sqlparser::ast::NullTreatment;
+use datafusion_expr::expr::{AggregateFunction, NullTreatment};
 use datafusion_expr::{
    Aggregate, ColumnarValue, Expr, LogicalPlan, ScalarFunctionArgs, SortExpr, TableScan, lit,
 };
--- a/src/common/function/src/aggrs/count_hash.rs
+++ b/src/common/function/src/aggrs/count_hash.rs
@@ -68,7 +68,7 @@ impl CountHash {
    }
 }

-#[derive(Debug, Clone)]
+#[derive(Debug, Clone, Eq, PartialEq, Hash)]
 pub struct CountHash {
    signature: Signature,
 }
--- a/src/common/function/src/function_registry.rs
+++ b/src/common/function/src/function_registry.rs
@@ -34,6 +34,7 @@ use crate::scalars::json::JsonFunction;
 use crate::scalars::matches::MatchesFunction;
 use crate::scalars::matches_term::MatchesTermFunction;
 use crate::scalars::math::MathFunction;
+use crate::scalars::string::register_string_functions;
 use crate::scalars::timestamp::TimestampFunction;
 use crate::scalars::uddsketch_calc::UddSketchCalcFunction;
 use crate::scalars::vector::VectorFunction as VectorScalarFunction;
@@ -154,6 +155,9 @@ pub static FUNCTION_REGISTRY: LazyLock<Arc<FunctionRegistry>> = LazyLock::new(||
    // Json related functions
    JsonFunction::register(&function_registry);

+    // String related functions
+    register_string_functions(&function_registry);
+
    // Vector related functions
    VectorScalarFunction::register(&function_registry);
    VectorAggrFunction::register(&function_registry);
--- a/src/common/function/src/scalars.rs
+++ b/src/common/function/src/scalars.rs
@@ -20,6 +20,7 @@ pub mod json;
 pub mod matches;
 pub mod matches_term;
 pub mod math;
+pub(crate) mod string;
 pub mod vector;

 pub(crate) mod hll_count;
--- a/src/common/function/src/scalars/date/date_format.rs
+++ b/src/common/function/src/scalars/date/date_format.rs
@@ -20,7 +20,9 @@ use common_query::error;
 use common_time::{Date, Timestamp};
 use datafusion_common::DataFusionError;
 use datafusion_common::arrow::array::{Array, AsArray, StringViewBuilder};
-use datafusion_common::arrow::datatypes::{ArrowTimestampType, DataType, Date32Type, TimeUnit};
+use datafusion_common::arrow::datatypes::{
+    ArrowTimestampType, DataType, Date32Type, Date64Type, TimeUnit,
+};
 use datafusion_expr::{ColumnarValue, ScalarFunctionArgs, Signature};
 use snafu::ResultExt;

@@ -40,6 +42,7 @@ impl Default for DateFormatFunction {
            signature: helper::one_of_sigs2(
                vec![
                    DataType::Date32,
+                    DataType::Date64,
                    DataType::Timestamp(TimeUnit::Second, None),
                    DataType::Timestamp(TimeUnit::Millisecond, None),
                    DataType::Timestamp(TimeUnit::Microsecond, None),
@@ -115,6 +118,29 @@ impl Function for DateFormatFunction {
                    builder.append_option(result.as_deref());
                }
            }
+            DataType::Date64 => {
+                let left = left.as_primitive::<Date64Type>();
+                for i in 0..size {
+                    let date = left.is_valid(i).then(|| {
+                        let ms = left.value(i);
+                        Timestamp::new_millisecond(ms)
+                    });
+                    let format = formats.is_valid(i).then(|| formats.value(i));
+
+                    let result = match (date, format) {
+                        (Some(ts), Some(fmt)) => {
+                            Some(ts.as_formatted_string(fmt, Some(timezone)).map_err(|e| {
+                                DataFusionError::Execution(format!(
+                                    "cannot format {ts:?} as '{fmt}': {e}"
+                                ))
+                            })?)
+                        }
+                        _ => None,
+                    };
+
+                    builder.append_option(result.as_deref());
+                }
+            }
            x => {
                return Err(DataFusionError::Execution(format!(
                    "unsupported input data type {x}"
@@ -137,7 +163,9 @@ mod tests {
    use std::sync::Arc;

    use arrow_schema::Field;
-    use datafusion_common::arrow::array::{Date32Array, StringArray, TimestampSecondArray};
+    use datafusion_common::arrow::array::{
+        Date32Array, Date64Array, StringArray, TimestampSecondArray,
+    };
    use datafusion_common::config::ConfigOptions;
    use datafusion_expr::{TypeSignature, Volatility};

@@ -166,7 +194,7 @@ mod tests {
                         Signature {
                             type_signature: TypeSignature::OneOf(sigs),
                             volatility: Volatility::Immutable
-                         } if  sigs.len() == 5));
+                         } if  sigs.len() == 6));
    }

    #[test]
@@ -213,6 +241,50 @@ mod tests {
        }
    }

+    #[test]
+    fn test_date64_date_format() {
+        let f = DateFormatFunction::default();
+
+        let dates = vec![Some(123000), None, Some(42000), None];
+        let formats = vec![
+            "%Y-%m-%d %T.%3f",
+            "%Y-%m-%d %T.%3f",
+            "%Y-%m-%d %T.%3f",
+            "%Y-%m-%d %T.%3f",
+        ];
+        let results = [
+            Some("1970-01-01 00:02:03.000"),
+            None,
+            Some("1970-01-01 00:00:42.000"),
+            None,
+        ];
+
+        let mut config_options = ConfigOptions::default();
+        config_options.extensions.insert(FunctionContext::default());
+        let config_options = Arc::new(config_options);
+
+        let args = ScalarFunctionArgs {
+            args: vec![
+                ColumnarValue::Array(Arc::new(Date64Array::from(dates))),
+                ColumnarValue::Array(Arc::new(StringArray::from_iter_values(formats))),
+            ],
+            arg_fields: vec![],
+            number_rows: 4,
+            return_field: Arc::new(Field::new("x", DataType::Utf8View, false)),
+            config_options,
+        };
+        let result = f
+            .invoke_with_args(args)
+            .and_then(|x| x.to_array(4))
+            .unwrap();
+        let vector = result.as_string_view();
+
+        assert_eq!(4, vector.len());
+        for (actual, expect) in vector.iter().zip(results) {
+            assert_eq!(actual, expect);
+        }
+    }
+
    #[test]
    fn test_date_date_format() {
        let f = DateFormatFunction::default();
--- a/src/common/function/src/scalars/geo/geohash.rs
+++ b/src/common/function/src/scalars/geo/geohash.rs
@@ -76,7 +76,7 @@ impl Function for GeohashFunction {
    }

    fn return_type(&self, _: &[DataType]) -> datafusion_common::Result<DataType> {
-        Ok(DataType::Utf8)
+        Ok(DataType::Utf8View)
    }

    fn signature(&self) -> &Signature {
@@ -176,7 +176,7 @@ impl Function for GeohashNeighboursFunction {
        Ok(DataType::List(Arc::new(Field::new(
            "item",
            DataType::Utf8View,
-            false,
+            true,
        ))))
    }

--- a/src/common/function/src/scalars/geo/h3.rs
+++ b/src/common/function/src/scalars/geo/h3.rs
@@ -355,9 +355,9 @@ impl Function for H3CellCenterLatLng {

    fn return_type(&self, _: &[DataType]) -> datafusion_common::Result<DataType> {
        Ok(DataType::List(Arc::new(Field::new(
-            "x",
+            "item",
            DataType::Float64,
-            false,
+            true,
        ))))
    }

--- a/src/common/function/src/scalars/string.rs
+++ b/src/common/function/src/scalars/string.rs
@@ -0,0 +1,26 @@
+// Copyright 2023 Greptime Team
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+//! String scalar functions
+
+mod regexp_extract;
+
+pub(crate) use regexp_extract::RegexpExtractFunction;
+
+use crate::function_registry::FunctionRegistry;
+
+/// Register all string functions
+pub fn register_string_functions(registry: &FunctionRegistry) {
+    RegexpExtractFunction::register(registry);
+}
--- a/src/common/function/src/scalars/string/regexp_extract.rs
+++ b/src/common/function/src/scalars/string/regexp_extract.rs
@@ -0,0 +1,339 @@
+// Copyright 2023 Greptime Team
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+//! Implementation of REGEXP_EXTRACT function
+use std::fmt;
+use std::sync::Arc;
+
+use datafusion_common::DataFusionError;
+use datafusion_common::arrow::array::{Array, AsArray, LargeStringBuilder};
+use datafusion_common::arrow::compute::cast;
+use datafusion_common::arrow::datatypes::DataType;
+use datafusion_expr::{ColumnarValue, ScalarFunctionArgs, Signature, TypeSignature, Volatility};
+use regex::{Regex, RegexBuilder};
+
+use crate::function::Function;
+use crate::function_registry::FunctionRegistry;
+
+const NAME: &str = "regexp_extract";
+
+// Safety limits
+const MAX_REGEX_SIZE: usize = 1024 * 1024; // compiled regex heap cap
+const MAX_DFA_SIZE: usize = 2 * 1024 * 1024; // lazy DFA cap
+const MAX_TOTAL_RESULT_SIZE: usize = 64 * 1024 * 1024; // total batch cap
+const MAX_SINGLE_MATCH: usize = 1024 * 1024; // per-row cap
+const MAX_PATTERN_LEN: usize = 10_000; // pattern text length cap
+
+/// REGEXP_EXTRACT function implementation
+/// Extracts the first substring matching the given regular expression pattern.
+/// If no match is found, returns NULL.
+///
+#[derive(Debug)]
+pub struct RegexpExtractFunction {
+    signature: Signature,
+}
+
+impl RegexpExtractFunction {
+    pub fn register(registry: &FunctionRegistry) {
+        registry.register_scalar(RegexpExtractFunction::default());
+    }
+}
+
+impl Default for RegexpExtractFunction {
+    fn default() -> Self {
+        Self {
+            signature: Signature::one_of(
+                vec![
+                    TypeSignature::Exact(vec![DataType::Utf8View, DataType::Utf8]),
+                    TypeSignature::Exact(vec![DataType::Utf8View, DataType::Utf8View]),
+                    TypeSignature::Exact(vec![DataType::Utf8, DataType::Utf8View]),
+                    TypeSignature::Exact(vec![DataType::LargeUtf8, DataType::Utf8View]),
+                    TypeSignature::Exact(vec![DataType::Utf8View, DataType::LargeUtf8]),
+                    TypeSignature::Exact(vec![DataType::Utf8, DataType::Utf8]),
+                    TypeSignature::Exact(vec![DataType::LargeUtf8, DataType::Utf8]),
+                    TypeSignature::Exact(vec![DataType::Utf8, DataType::LargeUtf8]),
+                    TypeSignature::Exact(vec![DataType::LargeUtf8, DataType::LargeUtf8]),
+                ],
+                Volatility::Immutable,
+            ),
+        }
+    }
+}
+
+impl fmt::Display for RegexpExtractFunction {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        write!(f, "{}", NAME.to_ascii_uppercase())
+    }
+}
+
+impl Function for RegexpExtractFunction {
+    fn name(&self) -> &str {
+        NAME
+    }
+
+    // Always return LargeUtf8 for simplicity and safety
+    fn return_type(&self, _: &[DataType]) -> datafusion_common::Result<DataType> {
+        Ok(DataType::LargeUtf8)
+    }
+
+    fn signature(&self) -> &Signature {
+        &self.signature
+    }
+
+    fn invoke_with_args(
+        &self,
+        args: ScalarFunctionArgs,
+    ) -> datafusion_common::Result<ColumnarValue> {
+        if args.args.len() != 2 {
+            return Err(DataFusionError::Execution(
+                "REGEXP_EXTRACT requires exactly two arguments (text, pattern)".to_string(),
+            ));
+        }
+
+        // Keep original ColumnarValue variants for scalar-pattern fast path
+        let pattern_is_scalar = matches!(args.args[1], ColumnarValue::Scalar(_));
+
+        let arrays = ColumnarValue::values_to_arrays(&args.args)?;
+        let text_array = &arrays[0];
+        let pattern_array = &arrays[1];
+
+        // Cast both to LargeUtf8 for uniform access (supports Utf8/Utf8View/Dictionary<String>)
+        let text_large = cast(text_array.as_ref(), &DataType::LargeUtf8).map_err(|e| {
+            DataFusionError::Execution(format!("REGEXP_EXTRACT: text cast failed: {e}"))
+        })?;
+        let pattern_large = cast(pattern_array.as_ref(), &DataType::LargeUtf8).map_err(|e| {
+            DataFusionError::Execution(format!("REGEXP_EXTRACT: pattern cast failed: {e}"))
+        })?;
+
+        let text = text_large.as_string::<i64>();
+        let pattern = pattern_large.as_string::<i64>();
+        let len = text.len();
+
+        // Pre-size result builder with conservative estimate
+        let mut estimated_total = 0usize;
+        for i in 0..len {
+            if !text.is_null(i) {
+                estimated_total = estimated_total.saturating_add(text.value_length(i) as usize);
+                if estimated_total > MAX_TOTAL_RESULT_SIZE {
+                    return Err(DataFusionError::ResourcesExhausted(format!(
+                        "REGEXP_EXTRACT total output exceeds {} bytes",
+                        MAX_TOTAL_RESULT_SIZE
+                    )));
+                }
+            }
+        }
+        let mut builder = LargeStringBuilder::with_capacity(len, estimated_total);
+
+        // Fast path: if pattern is scalar, compile once
+        let compiled_scalar: Option<Regex> = if pattern_is_scalar && len > 0 && !pattern.is_null(0)
+        {
+            Some(compile_regex_checked(pattern.value(0))?)
+        } else {
+            None
+        };
+
+        for i in 0..len {
+            if text.is_null(i) || pattern.is_null(i) {
+                builder.append_null();
+                continue;
+            }
+
+            let s = text.value(i);
+            let pat = pattern.value(i);
+
+            // Compile or reuse regex
+            let re = if let Some(ref compiled) = compiled_scalar {
+                compiled
+            } else {
+                // TODO: For performance-critical applications with repeating patterns,
+                // consider adding a small LRU cache here
+                &compile_regex_checked(pat)?
+            };
+
+            // First match only
+            if let Some(m) = re.find(s) {
+                let m_str = m.as_str();
+                if m_str.len() > MAX_SINGLE_MATCH {
+                    return Err(DataFusionError::Execution(
+                        "REGEXP_EXTRACT match exceeds per-row limit (1MB)".to_string(),
+                    ));
+                }
+                builder.append_value(m_str);
+            } else {
+                builder.append_null();
+            }
+        }
+
+        Ok(ColumnarValue::Array(Arc::new(builder.finish())))
+    }
+}
+
+// Compile a regex with safety checks
+fn compile_regex_checked(pattern: &str) -> datafusion_common::Result<Regex> {
+    if pattern.len() > MAX_PATTERN_LEN {
+        return Err(DataFusionError::Execution(format!(
+            "REGEXP_EXTRACT pattern too long (> {} chars)",
+            MAX_PATTERN_LEN
+        )));
+    }
+    RegexBuilder::new(pattern)
+        .size_limit(MAX_REGEX_SIZE)
+        .dfa_size_limit(MAX_DFA_SIZE)
+        .build()
+        .map_err(|e| {
+            DataFusionError::Execution(format!("REGEXP_EXTRACT invalid pattern '{}': {e}", pattern))
+        })
+}
+
+#[cfg(test)]
+mod tests {
+    use datafusion_common::arrow::array::StringArray;
+    use datafusion_common::arrow::datatypes::Field;
+    use datafusion_expr::ScalarFunctionArgs;
+
+    use super::*;
+
+    #[test]
+    fn test_regexp_extract_function_basic() {
+        let text_array = Arc::new(StringArray::from(vec!["version 1.2.3", "no match here"]));
+        let pattern_array = Arc::new(StringArray::from(vec!["\\d+\\.\\d+\\.\\d+", "\\d+"]));
+
+        let args = ScalarFunctionArgs {
+            args: vec![
+                ColumnarValue::Array(text_array),
+                ColumnarValue::Array(pattern_array),
+            ],
+            arg_fields: vec![
+                Arc::new(Field::new("arg_0", DataType::Utf8, false)),
+                Arc::new(Field::new("arg_1", DataType::Utf8, false)),
+            ],
+            return_field: Arc::new(Field::new("result", DataType::LargeUtf8, true)),
+            number_rows: 2,
+            config_options: Arc::new(datafusion_common::config::ConfigOptions::default()),
+        };
+
+        let function = RegexpExtractFunction::default();
+        let result = function.invoke_with_args(args).unwrap();
+
+        if let ColumnarValue::Array(array) = result {
+            let string_array = array.as_string::<i64>();
+            assert_eq!(string_array.value(0), "1.2.3");
+            assert!(string_array.is_null(1)); // no match should return NULL
+        } else {
+            panic!("Expected array result");
+        }
+    }
+
+    #[test]
+    fn test_regexp_extract_phone_number() {
+        let text_array = Arc::new(StringArray::from(vec!["Phone: 123-456-7890", "No phone"]));
+        let pattern_array = Arc::new(StringArray::from(vec![
+            "\\d{3}-\\d{3}-\\d{4}",
+            "\\d{3}-\\d{3}-\\d{4}",
+        ]));
+
+        let args = ScalarFunctionArgs {
+            args: vec![
+                ColumnarValue::Array(text_array),
+                ColumnarValue::Array(pattern_array),
+            ],
+            arg_fields: vec![
+                Arc::new(Field::new("arg_0", DataType::Utf8, false)),
+                Arc::new(Field::new("arg_1", DataType::Utf8, false)),
+            ],
+            return_field: Arc::new(Field::new("result", DataType::LargeUtf8, true)),
+            number_rows: 2,
+            config_options: Arc::new(datafusion_common::config::ConfigOptions::default()),
+        };
+
+        let function = RegexpExtractFunction::default();
+        let result = function.invoke_with_args(args).unwrap();
+
+        if let ColumnarValue::Array(array) = result {
+            let string_array = array.as_string::<i64>();
+            assert_eq!(string_array.value(0), "123-456-7890");
+            assert!(string_array.is_null(1)); // no match should return NULL
+        } else {
+            panic!("Expected array result");
+        }
+    }
+
+    #[test]
+    fn test_regexp_extract_email() {
+        let text_array = Arc::new(StringArray::from(vec![
+            "Email: user@domain.com",
+            "Invalid email",
+        ]));
+        let pattern_array = Arc::new(StringArray::from(vec![
+            "[a-zA-Z0-9]+@[a-zA-Z0-9]+\\.[a-zA-Z]+",
+            "[a-zA-Z0-9]+@[a-zA-Z0-9]+\\.[a-zA-Z]+",
+        ]));
+
+        let args = ScalarFunctionArgs {
+            args: vec![
+                ColumnarValue::Array(text_array),
+                ColumnarValue::Array(pattern_array),
+            ],
+            arg_fields: vec![
+                Arc::new(Field::new("arg_0", DataType::Utf8, false)),
+                Arc::new(Field::new("arg_1", DataType::Utf8, false)),
+            ],
+            return_field: Arc::new(Field::new("result", DataType::LargeUtf8, true)),
+            number_rows: 2,
+            config_options: Arc::new(datafusion_common::config::ConfigOptions::default()),
+        };
+
+        let function = RegexpExtractFunction::default();
+        let result = function.invoke_with_args(args).unwrap();
+
+        if let ColumnarValue::Array(array) = result {
+            let string_array = array.as_string::<i64>();
+            assert_eq!(string_array.value(0), "user@domain.com");
+            assert!(string_array.is_null(1)); // no match should return NULL
+        } else {
+            panic!("Expected array result");
+        }
+    }
+
+    #[test]
+    fn test_regexp_extract_with_nulls() {
+        let text_array = Arc::new(StringArray::from(vec![Some("test 123"), None]));
+        let pattern_array = Arc::new(StringArray::from(vec![Some("\\d+"), Some("\\d+")]));
+
+        let args = ScalarFunctionArgs {
+            args: vec![
+                ColumnarValue::Array(text_array),
+                ColumnarValue::Array(pattern_array),
+            ],
+            arg_fields: vec![
+                Arc::new(Field::new("arg_0", DataType::Utf8, true)),
+                Arc::new(Field::new("arg_1", DataType::Utf8, false)),
+            ],
+            return_field: Arc::new(Field::new("result", DataType::LargeUtf8, true)),
+            number_rows: 2,
+            config_options: Arc::new(datafusion_common::config::ConfigOptions::default()),
+        };
+
+        let function = RegexpExtractFunction::default();
+        let result = function.invoke_with_args(args).unwrap();
+
+        if let ColumnarValue::Array(array) = result {
+            let string_array = array.as_string::<i64>();
+            assert_eq!(string_array.value(0), "123");
+            assert!(string_array.is_null(1)); // NULL input should return NULL
+        } else {
+            panic!("Expected array result");
+        }
+    }
+}
--- a/src/common/function/src/scalars/udf.rs
+++ b/src/common/function/src/scalars/udf.rs
@@ -14,6 +14,7 @@

 use std::any::Any;
 use std::fmt::{Debug, Formatter};
+use std::hash::{Hash, Hasher};

 use datafusion::arrow::datatypes::DataType;
 use datafusion::logical_expr::{ScalarFunctionArgs, ScalarUDFImpl};
@@ -33,6 +34,20 @@ impl Debug for ScalarUdf {
    }
 }

+impl PartialEq for ScalarUdf {
+    fn eq(&self, other: &Self) -> bool {
+        self.function.signature() == other.function.signature()
+    }
+}
+
+impl Eq for ScalarUdf {}
+
+impl Hash for ScalarUdf {
+    fn hash<H: Hasher>(&self, state: &mut H) {
+        self.function.signature().hash(state)
+    }
+}
+
 impl ScalarUDFImpl for ScalarUdf {
    fn as_any(&self) -> &dyn Any {
        self
--- a/src/common/function/src/system/pg_catalog.rs
+++ b/src/common/function/src/system/pg_catalog.rs
@@ -32,10 +32,36 @@ use crate::system::define_nullary_udf;
 const CURRENT_SCHEMA_FUNCTION_NAME: &str = "current_schema";
 const CURRENT_SCHEMAS_FUNCTION_NAME: &str = "current_schemas";
 const SESSION_USER_FUNCTION_NAME: &str = "session_user";
+const CURRENT_DATABASE_FUNCTION_NAME: &str = "current_database";

 define_nullary_udf!(CurrentSchemaFunction);
 define_nullary_udf!(CurrentSchemasFunction);
 define_nullary_udf!(SessionUserFunction);
+define_nullary_udf!(CurrentDatabaseFunction);
+
+impl Function for CurrentDatabaseFunction {
+    fn name(&self) -> &str {
+        CURRENT_DATABASE_FUNCTION_NAME
+    }
+
+    fn return_type(&self, _: &[DataType]) -> datafusion_common::Result<DataType> {
+        Ok(DataType::Utf8View)
+    }
+
+    fn signature(&self) -> &Signature {
+        &self.signature
+    }
+
+    fn invoke_with_args(
+        &self,
+        args: ScalarFunctionArgs,
+    ) -> datafusion_common::Result<ColumnarValue> {
+        let func_ctx = find_function_context(&args)?;
+        let db = func_ctx.query_ctx.current_catalog().to_string();
+
+        Ok(ColumnarValue::Scalar(ScalarValue::Utf8View(Some(db))))
+    }
+}

 // Though "current_schema" can be aliased to "database", to not cause any breaking changes,
 // we are not doing it: not until https://github.com/apache/datafusion/issues/17469 is resolved.
@@ -141,6 +167,7 @@ impl PGCatalogFunction {
        registry.register_scalar(CurrentSchemaFunction::default());
        registry.register_scalar(CurrentSchemasFunction::default());
        registry.register_scalar(SessionUserFunction::default());
+        registry.register_scalar(CurrentDatabaseFunction::default());
        registry.register(pg_catalog::format_type::create_format_type_udf());
        registry.register(pg_catalog::create_pg_get_partkeydef_udf());
        registry.register(pg_catalog::has_privilege_udf::create_has_privilege_udf(
--- a/src/common/macro/src/admin_fn.rs
+++ b/src/common/macro/src/admin_fn.rs
@@ -345,6 +345,20 @@ fn build_struct(
                Ok(datafusion_expr::ColumnarValue::Array(result_vector.to_arrow_array()))
            }
        }
+
+        impl PartialEq for #name {
+            fn eq(&self, other: &Self) -> bool {
+                self.signature == other.signature
+            }
+        }
+
+        impl Eq for #name {}
+
+        impl std::hash::Hash for #name {
+            fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
+                self.signature.hash(state)
+            }
+        }
    }
    .into()
 }
--- a/src/common/meta/src/cluster.rs
+++ b/src/common/meta/src/cluster.rs
@@ -120,10 +120,16 @@ pub struct NodeInfo {
    pub start_time_ms: u64,
    // The node build cpus
    #[serde(default)]
-    pub cpus: u32,
+    pub total_cpu_millicores: i64,
    // The node build memory bytes
    #[serde(default)]
-    pub memory_bytes: u64,
+    pub total_memory_bytes: i64,
+    // The node build cpu usage millicores
+    #[serde(default)]
+    pub cpu_usage_millicores: i64,
+    // The node build memory usage bytes
+    #[serde(default)]
+    pub memory_usage_bytes: i64,
    // The node build hostname
    #[serde(default)]
    pub hostname: String,
@@ -333,8 +339,10 @@ mod tests {
            version: "".to_string(),
            git_commit: "".to_string(),
            start_time_ms: 1,
-            cpus: 0,
-            memory_bytes: 0,
+            total_cpu_millicores: 0,
+            total_memory_bytes: 0,
+            cpu_usage_millicores: 0,
+            memory_usage_bytes: 0,
            hostname: "test_hostname".to_string(),
        };

--- a/src/common/meta/src/instruction.rs
+++ b/src/common/meta/src/instruction.rs
@@ -55,6 +55,10 @@ impl Display for RegionIdent {
 /// The result of downgrade leader region.
 #[derive(Debug, Serialize, Deserialize, PartialEq, Eq, Clone)]
 pub struct DowngradeRegionReply {
+    /// The [RegionId].
+    /// For compatibility, it is defaulted to [RegionId::new(0, 0)].
+    #[serde(default)]
+    pub region_id: RegionId,
    /// Returns the `last_entry_id` if available.
    pub last_entry_id: Option<u64>,
    /// Returns the `metadata_last_entry_id` if available (Only available for metric engine).
@@ -423,14 +427,60 @@ pub enum Instruction {
    CloseRegions(Vec<RegionIdent>),
    /// Upgrades a region.
    UpgradeRegion(UpgradeRegion),
+    #[serde(
+        deserialize_with = "single_or_multiple_from",
+        alias = "DowngradeRegion"
+    )]
    /// Downgrades a region.
-    DowngradeRegion(DowngradeRegion),
+    DowngradeRegions(Vec<DowngradeRegion>),
    /// Invalidates batch cache.
    InvalidateCaches(Vec<CacheIdent>),
    /// Flushes regions.
    FlushRegions(FlushRegions),
 }

+impl Instruction {
+    /// Converts the instruction into a vector of [OpenRegion].
+    pub fn into_open_regions(self) -> Option<Vec<OpenRegion>> {
+        match self {
+            Self::OpenRegions(open_regions) => Some(open_regions),
+            _ => None,
+        }
+    }
+
+    /// Converts the instruction into a vector of [RegionIdent].
+    pub fn into_close_regions(self) -> Option<Vec<RegionIdent>> {
+        match self {
+            Self::CloseRegions(close_regions) => Some(close_regions),
+            _ => None,
+        }
+    }
+
+    /// Converts the instruction into a [FlushRegions].
+    pub fn into_flush_regions(self) -> Option<FlushRegions> {
+        match self {
+            Self::FlushRegions(flush_regions) => Some(flush_regions),
+            _ => None,
+        }
+    }
+
+    /// Converts the instruction into a [DowngradeRegion].
+    pub fn into_downgrade_regions(self) -> Option<Vec<DowngradeRegion>> {
+        match self {
+            Self::DowngradeRegions(downgrade_region) => Some(downgrade_region),
+            _ => None,
+        }
+    }
+
+    /// Converts the instruction into a [UpgradeRegion].
+    pub fn into_upgrade_regions(self) -> Option<UpgradeRegion> {
+        match self {
+            Self::UpgradeRegion(upgrade_region) => Some(upgrade_region),
+            _ => None,
+        }
+    }
+}
+
 /// The reply of [UpgradeRegion].
 #[derive(Debug, Serialize, Deserialize, PartialEq, Eq, Clone)]
 pub struct UpgradeRegionReply {
@@ -452,6 +502,39 @@ impl Display for UpgradeRegionReply {
    }
 }

+#[derive(Debug, Serialize, Deserialize, PartialEq, Eq, Clone)]
+pub struct DowngradeRegionsReply {
+    pub replies: Vec<DowngradeRegionReply>,
+}
+
+impl DowngradeRegionsReply {
+    pub fn new(replies: Vec<DowngradeRegionReply>) -> Self {
+        Self { replies }
+    }
+
+    pub fn single(reply: DowngradeRegionReply) -> Self {
+        Self::new(vec![reply])
+    }
+}
+
+#[derive(Deserialize)]
+#[serde(untagged)]
+enum DowngradeRegionsCompat {
+    Single(DowngradeRegionReply),
+    Multiple(DowngradeRegionsReply),
+}
+
+fn downgrade_regions_compat_from<'de, D>(deserializer: D) -> Result<DowngradeRegionsReply, D::Error>
+where
+    D: Deserializer<'de>,
+{
+    let helper = DowngradeRegionsCompat::deserialize(deserializer)?;
+    Ok(match helper {
+        DowngradeRegionsCompat::Single(x) => DowngradeRegionsReply::new(vec![x]),
+        DowngradeRegionsCompat::Multiple(reply) => reply,
+    })
+}
+
 #[derive(Debug, Serialize, Deserialize, PartialEq, Eq, Clone)]
 #[serde(tag = "type", rename_all = "snake_case")]
 pub enum InstructionReply {
@@ -460,7 +543,11 @@ pub enum InstructionReply {
    #[serde(alias = "close_region")]
    CloseRegions(SimpleReply),
    UpgradeRegion(UpgradeRegionReply),
-    DowngradeRegion(DowngradeRegionReply),
+    #[serde(
+        alias = "downgrade_region",
+        deserialize_with = "downgrade_regions_compat_from"
+    )]
+    DowngradeRegions(DowngradeRegionsReply),
    FlushRegions(FlushRegionReply),
 }

@@ -470,8 +557,8 @@ impl Display for InstructionReply {
            Self::OpenRegions(reply) => write!(f, "InstructionReply::OpenRegions({})", reply),
            Self::CloseRegions(reply) => write!(f, "InstructionReply::CloseRegions({})", reply),
            Self::UpgradeRegion(reply) => write!(f, "InstructionReply::UpgradeRegion({})", reply),
-            Self::DowngradeRegion(reply) => {
-                write!(f, "InstructionReply::DowngradeRegion({})", reply)
+            Self::DowngradeRegions(reply) => {
+                write!(f, "InstructionReply::DowngradeRegions({:?})", reply)
            }
            Self::FlushRegions(reply) => write!(f, "InstructionReply::FlushRegions({})", reply),
        }
@@ -493,6 +580,27 @@ impl InstructionReply {
            _ => panic!("Expected OpenRegions reply"),
        }
    }
+
+    pub fn expect_upgrade_region_reply(self) -> UpgradeRegionReply {
+        match self {
+            Self::UpgradeRegion(reply) => reply,
+            _ => panic!("Expected UpgradeRegion reply"),
+        }
+    }
+
+    pub fn expect_downgrade_regions_reply(self) -> Vec<DowngradeRegionReply> {
+        match self {
+            Self::DowngradeRegions(reply) => reply.replies,
+            _ => panic!("Expected DowngradeRegion reply"),
+        }
+    }
+
+    pub fn expect_flush_regions_reply(self) -> FlushRegionReply {
+        match self {
+            Self::FlushRegions(reply) => reply,
+            _ => panic!("Expected FlushRegions reply"),
+        }
+    }
 }

 #[cfg(test)]
@@ -532,11 +640,27 @@ mod tests {
            r#"{"CloseRegions":[{"datanode_id":2,"table_id":1024,"region_number":1,"engine":"mito2"}]}"#,
            serialized
        );
+
+        let downgrade_region = InstructionReply::DowngradeRegions(DowngradeRegionsReply::single(
+            DowngradeRegionReply {
+                region_id: RegionId::new(1024, 1),
+                last_entry_id: None,
+                metadata_last_entry_id: None,
+                exists: true,
+                error: None,
+            },
+        ));
+
+        let serialized = serde_json::to_string(&downgrade_region).unwrap();
+        assert_eq!(
+            r#"{"type":"downgrade_regions","replies":[{"region_id":4398046511105,"last_entry_id":null,"metadata_last_entry_id":null,"exists":true,"error":null}]}"#,
+            serialized
+        )
    }

    #[test]
    fn test_deserialize_instruction() {
-        let open_region_instruction = r#"{"OpenRegion":[{"region_ident":{"datanode_id":2,"table_id":1024,"region_number":1,"engine":"mito2"},"region_storage_path":"test/foo","region_options":{},"region_wal_options":{},"skip_wal_replay":false}]}"#;
+        let open_region_instruction = r#"{"OpenRegion":{"region_ident":{"datanode_id":2,"table_id":1024,"region_number":1,"engine":"mito2"},"region_storage_path":"test/foo","region_options":{},"region_wal_options":{},"skip_wal_replay":false}}"#;
        let open_region_instruction: Instruction =
            serde_json::from_str(open_region_instruction).unwrap();
        let open_region = Instruction::OpenRegions(vec![OpenRegion::new(
@@ -553,7 +677,7 @@ mod tests {
        )]);
        assert_eq!(open_region_instruction, open_region);

-        let close_region_instruction = r#"{"CloseRegion":[{"datanode_id":2,"table_id":1024,"region_number":1,"engine":"mito2"}]}"#;
+        let close_region_instruction = r#"{"CloseRegion":{"datanode_id":2,"table_id":1024,"region_number":1,"engine":"mito2"}}"#;
        let close_region_instruction: Instruction =
            serde_json::from_str(close_region_instruction).unwrap();
        let close_region = Instruction::CloseRegions(vec![RegionIdent {
@@ -564,6 +688,15 @@ mod tests {
        }]);
        assert_eq!(close_region_instruction, close_region);

+        let downgrade_region_instruction = r#"{"DowngradeRegions":{"region_id":4398046511105,"flush_timeout":{"secs":1,"nanos":0}}}"#;
+        let downgrade_region_instruction: Instruction =
+            serde_json::from_str(downgrade_region_instruction).unwrap();
+        let downgrade_region = Instruction::DowngradeRegions(vec![DowngradeRegion {
+            region_id: RegionId::new(1024, 1),
+            flush_timeout: Some(Duration::from_millis(1000)),
+        }]);
+        assert_eq!(downgrade_region_instruction, downgrade_region);
+
        let close_region_instruction_reply =
            r#"{"result":true,"error":null,"type":"close_region"}"#;
        let close_region_instruction_reply: InstructionReply =
@@ -582,6 +715,20 @@ mod tests {
            error: None,
        });
        assert_eq!(open_region_instruction_reply, open_region_reply);
+
+        let downgrade_region_instruction_reply = r#"{"region_id":4398046511105,"last_entry_id":null,"metadata_last_entry_id":null,"exists":true,"error":null,"type":"downgrade_region"}"#;
+        let downgrade_region_instruction_reply: InstructionReply =
+            serde_json::from_str(downgrade_region_instruction_reply).unwrap();
+        let downgrade_region_reply = InstructionReply::DowngradeRegions(
+            DowngradeRegionsReply::single(DowngradeRegionReply {
+                region_id: RegionId::new(1024, 1),
+                last_entry_id: None,
+                metadata_last_entry_id: None,
+                exists: true,
+                error: None,
+            }),
+        );
+        assert_eq!(downgrade_region_instruction_reply, downgrade_region_reply);
    }

    #[derive(Debug, Clone, Serialize, Deserialize)]
--- a/src/common/stat/Cargo.toml
+++ b/src/common/stat/Cargo.toml
@@ -6,11 +6,14 @@ license.workspace = true

 [dependencies]
 common-base.workspace = true
+common-runtime.workspace = true
+common-telemetry.workspace = true
 lazy_static.workspace = true
 nix.workspace = true
 num_cpus.workspace = true
 prometheus.workspace = true
 sysinfo.workspace = true
+tokio.workspace = true

 [lints]
 workspace = true
--- a/src/common/stat/src/cgroups.rs
+++ b/src/common/stat/src/cgroups.rs
@@ -117,7 +117,10 @@ pub fn get_cpu_limit_from_cgroups() -> Option<i64> {
    None
 }

-fn get_cpu_usage() -> Option<i64> {
+/// Get the usage of cpu in millicores from cgroups filesystem.
+///
+/// - Return `None` if it's not in the cgroups v2 environment or fails to read the cpu usage.
+pub fn get_cpu_usage_from_cgroups() -> Option<i64> {
    // In certain bare-metal environments, the `/sys/fs/cgroup/cpu.stat` file may be present and reflect system-wide CPU usage rather than container-specific metrics.
    // To ensure accurate collection of container-level CPU usage, verify the existence of the `/sys/fs/cgroup/memory.current` file.
    // The presence of this file typically indicates execution within a containerized environment, thereby validating the relevance of the collected CPU usage data.
@@ -142,6 +145,22 @@ fn get_cpu_usage() -> Option<i64> {
    fields[1].trim().parse::<i64>().ok()
 }

+// Calculate the cpu usage in millicores from cgroups filesystem.
+//
+// - Return `0` if the current cpu usage is equal to the last cpu usage or the interval is 0.
+pub(crate) fn calculate_cpu_usage(
+    current_cpu_usage_usecs: i64,
+    last_cpu_usage_usecs: i64,
+    interval_milliseconds: i64,
+) -> i64 {
+    let diff = current_cpu_usage_usecs - last_cpu_usage_usecs;
+    if diff > 0 && interval_milliseconds > 0 {
+        ((diff as f64 / interval_milliseconds as f64).round() as i64).max(1)
+    } else {
+        0
+    }
+}
+
 // Check whether the cgroup is v2.
 // - Return `true` if the cgroup is v2, otherwise return `false`.
 // - Return `None` if the detection fails or not on linux.
@@ -230,7 +249,7 @@ impl Collector for CgroupsMetricsCollector {
    }

    fn collect(&self) -> Vec<MetricFamily> {
-        if let Some(cpu_usage) = get_cpu_usage() {
+        if let Some(cpu_usage) = get_cpu_usage_from_cgroups() {
            self.cpu_usage.set(cpu_usage);
        }

--- a/src/common/stat/src/lib.rs
+++ b/src/common/stat/src/lib.rs
@@ -13,66 +13,7 @@
 // limitations under the License.

 mod cgroups;
+mod resource;

 pub use cgroups::*;
-use common_base::readable_size::ReadableSize;
-use sysinfo::System;
-
-/// Get the total CPU in millicores.
-pub fn get_total_cpu_millicores() -> i64 {
-    // Get CPU limit from cgroups filesystem.
-    if let Some(cgroup_cpu_limit) = get_cpu_limit_from_cgroups() {
-        cgroup_cpu_limit
-    } else {
-        // Get total CPU cores from host system.
-        num_cpus::get() as i64 * 1000
-    }
-}
-
-/// Get the total memory in bytes.
-pub fn get_total_memory_bytes() -> i64 {
-    // Get memory limit from cgroups filesystem.
-    if let Some(cgroup_memory_limit) = get_memory_limit_from_cgroups() {
-        cgroup_memory_limit
-    } else {
-        // Get total memory from host system.
-        if sysinfo::IS_SUPPORTED_SYSTEM {
-            let mut sys_info = System::new();
-            sys_info.refresh_memory();
-            sys_info.total_memory() as i64
-        } else {
-            // If the system is not supported, return -1.
-            -1
-        }
-    }
-}
-
-/// Get the total CPU cores. The result will be rounded to the nearest integer.
-/// For example, if the total CPU is 1.5 cores(1500 millicores), the result will be 2.
-pub fn get_total_cpu_cores() -> usize {
-    ((get_total_cpu_millicores() as f64) / 1000.0).round() as usize
-}
-
-/// Get the total memory in readable size.
-pub fn get_total_memory_readable() -> Option<ReadableSize> {
-    if get_total_memory_bytes() > 0 {
-        Some(ReadableSize(get_total_memory_bytes() as u64))
-    } else {
-        None
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    #[test]
-    fn test_get_total_cpu_cores() {
-        assert!(get_total_cpu_cores() > 0);
-    }
-
-    #[test]
-    fn test_get_total_memory_readable() {
-        assert!(get_total_memory_readable().unwrap() > ReadableSize::mb(0));
-    }
-}
+pub use resource::*;
--- a/src/common/stat/src/resource.rs
+++ b/src/common/stat/src/resource.rs
@@ -0,0 +1,187 @@
+// Copyright 2023 Greptime Team
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use std::sync::Arc;
+use std::sync::atomic::{AtomicI64, Ordering};
+use std::time::Duration;
+
+use common_base::readable_size::ReadableSize;
+use common_runtime::JoinHandle;
+use common_telemetry::info;
+use sysinfo::System;
+use tokio::time::sleep;
+
+use crate::cgroups::calculate_cpu_usage;
+use crate::{
+    get_cpu_limit_from_cgroups, get_cpu_usage_from_cgroups, get_memory_limit_from_cgroups,
+    get_memory_usage_from_cgroups,
+};
+
+/// Get the total CPU in millicores. If the CPU limit is unset, it will return the total CPU cores from host system.
+pub fn get_total_cpu_millicores() -> i64 {
+    // Get CPU limit from cgroups filesystem.
+    if let Some(cgroup_cpu_limit) = get_cpu_limit_from_cgroups() {
+        cgroup_cpu_limit
+    } else {
+        // Get total CPU cores from host system.
+        num_cpus::get() as i64 * 1000
+    }
+}
+
+/// Get the total memory in bytes. If the memory limit is unset, it will return the total memory from host system.
+/// If the system is not supported to get the total host memory, it will return 0.
+pub fn get_total_memory_bytes() -> i64 {
+    // Get memory limit from cgroups filesystem.
+    if let Some(cgroup_memory_limit) = get_memory_limit_from_cgroups() {
+        cgroup_memory_limit
+    } else {
+        // Get total memory from host system.
+        if sysinfo::IS_SUPPORTED_SYSTEM {
+            let mut sys_info = System::new();
+            sys_info.refresh_memory();
+            sys_info.total_memory() as i64
+        } else {
+            // If the system is not supported, return 0
+            0
+        }
+    }
+}
+
+/// Get the total CPU cores. The result will be rounded to the nearest integer.
+/// For example, if the total CPU is 1.5 cores(1500 millicores), the result will be 2.
+pub fn get_total_cpu_cores() -> usize {
+    ((get_total_cpu_millicores() as f64) / 1000.0).round() as usize
+}
+
+/// Get the total memory in readable size.
+pub fn get_total_memory_readable() -> Option<ReadableSize> {
+    if get_total_memory_bytes() > 0 {
+        Some(ReadableSize(get_total_memory_bytes() as u64))
+    } else {
+        None
+    }
+}
+
+/// A reference to a `ResourceStat` implementation.
+pub type ResourceStatRef = Arc<dyn ResourceStat + Send + Sync>;
+
+/// A trait for getting resource statistics.
+pub trait ResourceStat {
+    /// Get the total CPU in millicores.
+    fn get_total_cpu_millicores(&self) -> i64;
+    /// Get the total memory in bytes.
+    fn get_total_memory_bytes(&self) -> i64;
+    /// Get the CPU usage in millicores.
+    fn get_cpu_usage_millicores(&self) -> i64;
+    /// Get the memory usage in bytes.
+    fn get_memory_usage_bytes(&self) -> i64;
+}
+
+/// A implementation of `ResourceStat` trait.
+pub struct ResourceStatImpl {
+    cpu_usage_millicores: Arc<AtomicI64>,
+    last_cpu_usage_usecs: Arc<AtomicI64>,
+    calculate_interval: Duration,
+    handler: Option<JoinHandle<()>>,
+}
+
+impl Default for ResourceStatImpl {
+    fn default() -> Self {
+        Self {
+            cpu_usage_millicores: Arc::new(AtomicI64::new(0)),
+            last_cpu_usage_usecs: Arc::new(AtomicI64::new(0)),
+            calculate_interval: Duration::from_secs(5),
+            handler: None,
+        }
+    }
+}
+
+impl ResourceStatImpl {
+    /// Start collecting CPU usage periodically. It will calculate the CPU usage in millicores based on rate of change of CPU usage usage_usec in `/sys/fs/cgroup/cpu.stat`.
+    /// It ONLY works in cgroup v2 environment.
+    pub fn start_collect_cpu_usage(&mut self) {
+        if self.handler.is_some() {
+            return;
+        }
+
+        let cpu_usage_millicores = self.cpu_usage_millicores.clone();
+        let last_cpu_usage_usecs = self.last_cpu_usage_usecs.clone();
+        let calculate_interval = self.calculate_interval;
+
+        let handler = common_runtime::spawn_global(async move {
+            info!(
+                "Starting to collect CPU usage periodically for every {} seconds",
+                calculate_interval.as_secs()
+            );
+            loop {
+                let current_cpu_usage_usecs = get_cpu_usage_from_cgroups();
+                if let Some(current_cpu_usage_usecs) = current_cpu_usage_usecs {
+                    // Skip the first time to collect CPU usage.
+                    if last_cpu_usage_usecs.load(Ordering::Relaxed) == 0 {
+                        last_cpu_usage_usecs.store(current_cpu_usage_usecs, Ordering::Relaxed);
+                        continue;
+                    }
+                    let cpu_usage = calculate_cpu_usage(
+                        current_cpu_usage_usecs,
+                        last_cpu_usage_usecs.load(Ordering::Relaxed),
+                        calculate_interval.as_millis() as i64,
+                    );
+                    cpu_usage_millicores.store(cpu_usage, Ordering::Relaxed);
+                    last_cpu_usage_usecs.store(current_cpu_usage_usecs, Ordering::Relaxed);
+                }
+                sleep(calculate_interval).await;
+            }
+        });
+
+        self.handler = Some(handler);
+    }
+}
+
+impl ResourceStat for ResourceStatImpl {
+    /// Get the total CPU in millicores.
+    fn get_total_cpu_millicores(&self) -> i64 {
+        get_total_cpu_millicores()
+    }
+
+    /// Get the total memory in bytes.
+    fn get_total_memory_bytes(&self) -> i64 {
+        get_total_memory_bytes()
+    }
+
+    /// Get the CPU usage in millicores.
+    fn get_cpu_usage_millicores(&self) -> i64 {
+        self.cpu_usage_millicores.load(Ordering::Relaxed)
+    }
+
+    /// Get the memory usage in bytes.
+    /// It ONLY works in cgroup v2 environment.
+    fn get_memory_usage_bytes(&self) -> i64 {
+        get_memory_usage_from_cgroups().unwrap_or_default()
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_get_total_cpu_cores() {
+        assert!(get_total_cpu_cores() > 0);
+    }
+
+    #[test]
+    fn test_get_total_memory_readable() {
+        assert!(get_total_memory_readable().unwrap() > ReadableSize::mb(0));
+    }
+}
--- a/src/common/test-util/src/recordbatch.rs
+++ b/src/common/test-util/src/recordbatch.rs
@@ -28,7 +28,7 @@ pub async fn check_output_stream(output: OutputData, expected: &str) {
        _ => unreachable!(),
    };
    let pretty_print = recordbatches.pretty_print().unwrap();
-    assert_eq!(pretty_print, expected, "actual: \n{}", pretty_print);
+    assert_eq!(pretty_print, expected.trim(), "actual: \n{}", pretty_print);
 }

 pub async fn execute_and_check_output(db: &Database, sql: &str, expected: ExpectedOutput<'_>) {
--- a/src/datanode/Cargo.toml
+++ b/src/datanode/Cargo.toml
@@ -30,6 +30,7 @@ common-procedure.workspace = true
 common-query.workspace = true
 common-recordbatch.workspace = true
 common-runtime.workspace = true
+common-stat.workspace = true
 common-telemetry.workspace = true
 common-time.workspace = true
 common-version.workspace = true
--- a/src/datanode/src/datanode.rs
+++ b/src/datanode/src/datanode.rs
@@ -27,6 +27,7 @@ use common_meta::key::runtime_switch::RuntimeSwitchManager;
 use common_meta::key::{SchemaMetadataManager, SchemaMetadataManagerRef};
 use common_meta::kv_backend::KvBackendRef;
 pub use common_procedure::options::ProcedureConfig;
+use common_stat::ResourceStatImpl;
 use common_telemetry::{error, info, warn};
 use common_wal::config::DatanodeWalConfig;
 use common_wal::config::kafka::DatanodeKafkaConfig;
@@ -282,6 +283,9 @@ impl DatanodeBuilder {
            open_all_regions.await?;
        }

+        let mut resource_stat = ResourceStatImpl::default();
+        resource_stat.start_collect_cpu_usage();
+
        let heartbeat_task = if let Some(meta_client) = meta_client {
            Some(
                HeartbeatTask::try_new(
@@ -290,6 +294,7 @@ impl DatanodeBuilder {
                    meta_client,
                    cache_registry,
                    self.plugins.clone(),
+                    Arc::new(resource_stat),
                )
                .await?,
            )
--- a/src/datanode/src/heartbeat.rs
+++ b/src/datanode/src/heartbeat.rs
@@ -20,7 +20,6 @@ use std::time::Duration;
 use api::v1::meta::heartbeat_request::NodeWorkloads;
 use api::v1::meta::{DatanodeWorkloads, HeartbeatRequest, NodeInfo, Peer, RegionRole, RegionStat};
 use common_base::Plugins;
-use common_config::utils::ResourceSpec;
 use common_meta::cache_invalidator::CacheInvalidatorRef;
 use common_meta::datanode::REGION_STATISTIC_KEY;
 use common_meta::distributed_time_constants::META_KEEP_ALIVE_INTERVAL_SECS;
@@ -31,6 +30,7 @@ use common_meta::heartbeat::handler::{
 };
 use common_meta::heartbeat::mailbox::{HeartbeatMailbox, MailboxRef};
 use common_meta::heartbeat::utils::outgoing_message_to_mailbox_message;
+use common_stat::ResourceStatRef;
 use common_telemetry::{debug, error, info, trace, warn};
 use common_workload::DatanodeWorkloadType;
 use meta_client::MetaClientRef;
@@ -63,7 +63,7 @@ pub struct HeartbeatTask {
    interval: u64,
    resp_handler_executor: HeartbeatResponseHandlerExecutorRef,
    region_alive_keeper: Arc<RegionAliveKeeper>,
-    resource_spec: ResourceSpec,
+    resource_stat: ResourceStatRef,
 }

 impl Drop for HeartbeatTask {
@@ -80,6 +80,7 @@ impl HeartbeatTask {
        meta_client: MetaClientRef,
        cache_invalidator: CacheInvalidatorRef,
        plugins: Plugins,
+        resource_stat: ResourceStatRef,
    ) -> Result<Self> {
        let countdown_task_handler_ext = plugins.get::<CountdownTaskHandlerExtRef>();
        let region_alive_keeper = Arc::new(RegionAliveKeeper::new(
@@ -109,7 +110,7 @@ impl HeartbeatTask {
            interval: opts.heartbeat.interval.as_millis() as u64,
            resp_handler_executor,
            region_alive_keeper,
-            resource_spec: Default::default(),
+            resource_stat,
        })
    }

@@ -186,6 +187,7 @@ impl HeartbeatTask {
            .context(error::HandleHeartbeatResponseSnafu)
    }

+    #[allow(deprecated)]
    /// Start heartbeat task, spawn background task.
    pub async fn start(
        &self,
@@ -237,8 +239,9 @@ impl HeartbeatTask {

        self.region_alive_keeper.start(Some(event_receiver)).await?;
        let mut last_sent = Instant::now();
-        let cpus = self.resource_spec.cpus as u32;
-        let memory_bytes = self.resource_spec.memory.unwrap_or_default().as_bytes();
+        let total_cpu_millicores = self.resource_stat.get_total_cpu_millicores();
+        let total_memory_bytes = self.resource_stat.get_total_memory_bytes();
+        let resource_stat = self.resource_stat.clone();

        common_runtime::spawn_hb(async move {
            let sleep = tokio::time::sleep(Duration::from_millis(0));
@@ -252,8 +255,13 @@ impl HeartbeatTask {
                    version: build_info.version.to_string(),
                    git_commit: build_info.commit_short.to_string(),
                    start_time_ms: node_epoch,
-                    cpus,
-                    memory_bytes,
+                    total_cpu_millicores,
+                    total_memory_bytes,
+                    cpu_usage_millicores: 0,
+                    memory_usage_bytes: 0,
+                    // TODO(zyy17): Remove these deprecated fields when the deprecated fields are removed from the proto.
+                    cpus: total_cpu_millicores as u32,
+                    memory_bytes: total_memory_bytes as u64,
                    hostname: hostname::get()
                        .unwrap_or_default()
                        .to_string_lossy()
@@ -297,12 +305,18 @@ impl HeartbeatTask {
                        let topic_stats = region_server_clone.topic_stats();
                        let now = Instant::now();
                        let duration_since_epoch = (now - epoch).as_millis() as u64;
-                        let req = HeartbeatRequest {
+                        let mut req = HeartbeatRequest {
                            region_stats,
                            topic_stats,
                            duration_since_epoch,
                            ..heartbeat_request.clone()
                        };
+
+                        if let Some(info) = req.info.as_mut() {
+                            info.cpu_usage_millicores = resource_stat.get_cpu_usage_millicores();
+                            info.memory_usage_bytes = resource_stat.get_memory_usage_bytes();
+                        }
+
                        sleep.as_mut().reset(now + Duration::from_millis(interval));
                        Some(req)
                    }
--- a/src/datanode/src/heartbeat/handler.rs
+++ b/src/datanode/src/heartbeat/handler.rs
@@ -13,16 +13,13 @@
 // limitations under the License.

 use async_trait::async_trait;
-use common_meta::RegionIdent;
 use common_meta::error::{InvalidHeartbeatResponseSnafu, Result as MetaResult};
 use common_meta::heartbeat::handler::{
    HandleControl, HeartbeatResponseHandler, HeartbeatResponseHandlerContext,
 };
 use common_meta::instruction::{Instruction, InstructionReply};
 use common_telemetry::error;
-use futures::future::BoxFuture;
 use snafu::OptionExt;
-use store_api::storage::RegionId;

 mod close_region;
 mod downgrade_region;
@@ -30,10 +27,15 @@ mod flush_region;
 mod open_region;
 mod upgrade_region;

+use crate::heartbeat::handler::close_region::CloseRegionsHandler;
+use crate::heartbeat::handler::downgrade_region::DowngradeRegionsHandler;
+use crate::heartbeat::handler::flush_region::FlushRegionsHandler;
+use crate::heartbeat::handler::open_region::OpenRegionsHandler;
+use crate::heartbeat::handler::upgrade_region::UpgradeRegionsHandler;
 use crate::heartbeat::task_tracker::TaskTracker;
 use crate::region_server::RegionServer;

-/// Handler for [Instruction::OpenRegion] and [Instruction::CloseRegion].
+/// The handler for [`Instruction`]s.
 #[derive(Clone)]
 pub struct RegionHeartbeatResponseHandler {
    region_server: RegionServer,
@@ -43,9 +45,14 @@ pub struct RegionHeartbeatResponseHandler {
    open_region_parallelism: usize,
 }

-/// Handler of the instruction.
-pub type InstructionHandler =
-    Box<dyn FnOnce(HandlerContext) -> BoxFuture<'static, Option<InstructionReply>> + Send>;
+#[async_trait::async_trait]
+pub trait InstructionHandler: Send + Sync {
+    async fn handle(
+        &self,
+        ctx: &HandlerContext,
+        instruction: Instruction,
+    ) -> Option<InstructionReply>;
+}

 #[derive(Clone)]
 pub struct HandlerContext {
@@ -56,10 +63,6 @@ pub struct HandlerContext {
 }

 impl HandlerContext {
-    fn region_ident_to_region_id(region_ident: &RegionIdent) -> RegionId {
-        RegionId::new(region_ident.table_id, region_ident.region_number)
-    }
-
    #[cfg(test)]
    pub fn new_for_test(region_server: RegionServer) -> Self {
        Self {
@@ -90,31 +93,16 @@ impl RegionHeartbeatResponseHandler {
        self
    }

-    /// Builds the [InstructionHandler].
-    fn build_handler(&self, instruction: Instruction) -> MetaResult<InstructionHandler> {
+    fn build_handler(&self, instruction: &Instruction) -> MetaResult<Box<dyn InstructionHandler>> {
        match instruction {
-            Instruction::OpenRegions(open_regions) => {
-                let open_region_parallelism = self.open_region_parallelism;
-                Ok(Box::new(move |handler_context| {
-                    handler_context
-                        .handle_open_regions_instruction(open_regions, open_region_parallelism)
-                }))
-            }
-            Instruction::CloseRegions(close_regions) => Ok(Box::new(move |handler_context| {
-                handler_context.handle_close_regions_instruction(close_regions)
-            })),
-            Instruction::DowngradeRegion(downgrade_region) => {
-                Ok(Box::new(move |handler_context| {
-                    handler_context.handle_downgrade_region_instruction(downgrade_region)
-                }))
-            }
-            Instruction::UpgradeRegion(upgrade_region) => Ok(Box::new(move |handler_context| {
-                handler_context.handle_upgrade_region_instruction(upgrade_region)
+            Instruction::CloseRegions(_) => Ok(Box::new(CloseRegionsHandler)),
+            Instruction::OpenRegions(_) => Ok(Box::new(OpenRegionsHandler {
+                open_region_parallelism: self.open_region_parallelism,
            })),
+            Instruction::FlushRegions(_) => Ok(Box::new(FlushRegionsHandler)),
+            Instruction::DowngradeRegions(_) => Ok(Box::new(DowngradeRegionsHandler)),
+            Instruction::UpgradeRegion(_) => Ok(Box::new(UpgradeRegionsHandler)),
            Instruction::InvalidateCaches(_) => InvalidHeartbeatResponseSnafu.fail(),
-            Instruction::FlushRegions(flush_regions) => Ok(Box::new(move |handler_context| {
-                handler_context.handle_flush_regions_instruction(flush_regions)
-            })),
        }
    }
 }
@@ -124,7 +112,7 @@ impl HeartbeatResponseHandler for RegionHeartbeatResponseHandler {
    fn is_acceptable(&self, ctx: &HeartbeatResponseHandlerContext) -> bool {
        matches!(ctx.incoming_message.as_ref(), |Some((
            _,
-            Instruction::DowngradeRegion { .. },
+            Instruction::DowngradeRegions { .. },
        ))| Some((
            _,
            Instruction::UpgradeRegion { .. }
@@ -151,15 +139,19 @@ impl HeartbeatResponseHandler for RegionHeartbeatResponseHandler {
        let catchup_tasks = self.catchup_tasks.clone();
        let downgrade_tasks = self.downgrade_tasks.clone();
        let flush_tasks = self.flush_tasks.clone();
-        let handler = self.build_handler(instruction)?;
+        let handler = self.build_handler(&instruction)?;
        let _handle = common_runtime::spawn_global(async move {
-            let reply = handler(HandlerContext {
-                region_server,
-                catchup_tasks,
-                downgrade_tasks,
-                flush_tasks,
-            })
-            .await;
+            let reply = handler
+                .handle(
+                    &HandlerContext {
+                        region_server,
+                        catchup_tasks,
+                        downgrade_tasks,
+                        flush_tasks,
+                    },
+                    instruction,
+                )
+                .await;

            if let Some(reply) = reply
                && let Err(e) = mailbox.send((meta, reply)).await
@@ -179,6 +171,7 @@ mod tests {
    use std::sync::Arc;
    use std::time::Duration;

+    use common_meta::RegionIdent;
    use common_meta::heartbeat::mailbox::{
        HeartbeatMailbox, IncomingMessage, MailboxRef, MessageMeta,
    };
@@ -249,10 +242,10 @@ mod tests {
        );

        // Downgrade region
-        let instruction = Instruction::DowngradeRegion(DowngradeRegion {
+        let instruction = Instruction::DowngradeRegions(vec![DowngradeRegion {
            region_id: RegionId::new(2048, 1),
            flush_timeout: Some(Duration::from_secs(1)),
-        });
+        }]);
        assert!(
            heartbeat_handler
                .is_acceptable(&heartbeat_env.create_handler_ctx((meta.clone(), instruction)))
@@ -447,10 +440,10 @@ mod tests {
        // Should be ok, if we try to downgrade it twice.
        for _ in 0..2 {
            let meta = MessageMeta::new_test(1, "test", "dn-1", "me-0");
-            let instruction = Instruction::DowngradeRegion(DowngradeRegion {
+            let instruction = Instruction::DowngradeRegions(vec![DowngradeRegion {
                region_id,
                flush_timeout: Some(Duration::from_secs(1)),
-            });
+            }]);

            let mut ctx = heartbeat_env.create_handler_ctx((meta, instruction));
            let control = heartbeat_handler.handle(&mut ctx).await.unwrap();
@@ -458,33 +451,27 @@ mod tests {

            let (_, reply) = heartbeat_env.receiver.recv().await.unwrap();

-            if let InstructionReply::DowngradeRegion(reply) = reply {
-                assert!(reply.exists);
-                assert!(reply.error.is_none());
-                assert_eq!(reply.last_entry_id.unwrap(), 0);
-            } else {
-                unreachable!()
-            }
+            let reply = &reply.expect_downgrade_regions_reply()[0];
+            assert!(reply.exists);
+            assert!(reply.error.is_none());
+            assert_eq!(reply.last_entry_id.unwrap(), 0);
        }

        // Downgrades a not exists region.
        let meta = MessageMeta::new_test(1, "test", "dn-1", "me-0");
-        let instruction = Instruction::DowngradeRegion(DowngradeRegion {
+        let instruction = Instruction::DowngradeRegions(vec![DowngradeRegion {
            region_id: RegionId::new(2048, 1),
            flush_timeout: Some(Duration::from_secs(1)),
-        });
+        }]);
        let mut ctx = heartbeat_env.create_handler_ctx((meta, instruction));
        let control = heartbeat_handler.handle(&mut ctx).await.unwrap();
        assert_matches!(control, HandleControl::Continue);

        let (_, reply) = heartbeat_env.receiver.recv().await.unwrap();

-        if let InstructionReply::DowngradeRegion(reply) = reply {
-            assert!(!reply.exists);
-            assert!(reply.error.is_none());
-            assert!(reply.last_entry_id.is_none());
-        } else {
-            unreachable!()
-        }
+        let reply = reply.expect_downgrade_regions_reply();
+        assert!(!reply[0].exists);
+        assert!(reply[0].error.is_none());
+        assert!(reply[0].last_entry_id.is_none());
    }
 }
--- a/src/datanode/src/heartbeat/handler/close_region.rs
+++ b/src/datanode/src/heartbeat/handler/close_region.rs
@@ -12,60 +12,64 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

-use common_meta::RegionIdent;
-use common_meta::instruction::{InstructionReply, SimpleReply};
+use common_meta::instruction::{Instruction, InstructionReply, SimpleReply};
 use common_telemetry::warn;
 use futures::future::join_all;
-use futures_util::future::BoxFuture;
 use store_api::region_request::{RegionCloseRequest, RegionRequest};
+use store_api::storage::RegionId;

 use crate::error;
-use crate::heartbeat::handler::HandlerContext;
+use crate::heartbeat::handler::{HandlerContext, InstructionHandler};

-impl HandlerContext {
-    pub(crate) fn handle_close_regions_instruction(
-        self,
-        region_idents: Vec<RegionIdent>,
-    ) -> BoxFuture<'static, Option<InstructionReply>> {
-        Box::pin(async move {
-            let region_ids = region_idents
-                .into_iter()
-                .map(|region_ident| Self::region_ident_to_region_id(&region_ident))
-                .collect::<Vec<_>>();
+#[derive(Debug, Clone, Copy, Default)]
+pub struct CloseRegionsHandler;

-            let futs = region_ids.iter().map(|region_id| {
-                self.region_server
-                    .handle_request(*region_id, RegionRequest::Close(RegionCloseRequest {}))
-            });
+#[async_trait::async_trait]
+impl InstructionHandler for CloseRegionsHandler {
+    async fn handle(
+        &self,
+        ctx: &HandlerContext,
+        instruction: Instruction,
+    ) -> Option<InstructionReply> {
+        // Safety: must be `Instruction::CloseRegions` instruction.
+        let region_idents = instruction.into_close_regions().unwrap();
+        let region_ids = region_idents
+            .into_iter()
+            .map(|region_ident| RegionId::new(region_ident.table_id, region_ident.region_number))
+            .collect::<Vec<_>>();

-            let results = join_all(futs).await;
+        let futs = region_ids.iter().map(|region_id| {
+            ctx.region_server
+                .handle_request(*region_id, RegionRequest::Close(RegionCloseRequest {}))
+        });

-            let mut errors = vec![];
-            for (region_id, result) in region_ids.into_iter().zip(results.into_iter()) {
-                match result {
-                    Ok(_) => (),
-                    Err(error::Error::RegionNotFound { .. }) => {
-                        warn!(
-                            "Received a close regions instruction from meta, but target region:{} is not found.",
-                            region_id
-                        );
-                    }
-                    Err(err) => errors.push(format!("region:{region_id}: {err:?}")),
+        let results = join_all(futs).await;
+
+        let mut errors = vec![];
+        for (region_id, result) in region_ids.into_iter().zip(results.into_iter()) {
+            match result {
+                Ok(_) => (),
+                Err(error::Error::RegionNotFound { .. }) => {
+                    warn!(
+                        "Received a close regions instruction from meta, but target region:{} is not found.",
+                        region_id
+                    );
                }
+                Err(err) => errors.push(format!("region:{region_id}: {err:?}")),
            }
+        }

-            if errors.is_empty() {
-                return Some(InstructionReply::CloseRegions(SimpleReply {
-                    result: true,
-                    error: None,
-                }));
-            }
+        if errors.is_empty() {
+            return Some(InstructionReply::CloseRegions(SimpleReply {
+                result: true,
+                error: None,
+            }));
+        }

-            Some(InstructionReply::CloseRegions(SimpleReply {
-                result: false,
-                error: Some(errors.join("; ")),
-            }))
-        })
+        Some(InstructionReply::CloseRegions(SimpleReply {
+            result: false,
+            error: Some(errors.join("; ")),
+        }))
    }
 }

--- a/src/datanode/src/heartbeat/handler/downgrade_region.rs
+++ b/src/datanode/src/heartbeat/handler/downgrade_region.rs
@@ -12,209 +12,242 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

-use common_meta::instruction::{DowngradeRegion, DowngradeRegionReply, InstructionReply};
+use common_meta::instruction::{
+    DowngradeRegion, DowngradeRegionReply, DowngradeRegionsReply, Instruction, InstructionReply,
+};
 use common_telemetry::tracing::info;
 use common_telemetry::{error, warn};
-use futures_util::future::BoxFuture;
+use futures::future::join_all;
 use store_api::region_engine::{SetRegionRoleStateResponse, SettableRegionRoleState};
 use store_api::region_request::{RegionFlushRequest, RegionRequest};
 use store_api::storage::RegionId;

-use crate::heartbeat::handler::HandlerContext;
+use crate::heartbeat::handler::{HandlerContext, InstructionHandler};
 use crate::heartbeat::task_tracker::WaitResult;

-impl HandlerContext {
-    async fn downgrade_to_follower_gracefully(
+#[derive(Debug, Clone, Copy, Default)]
+pub struct DowngradeRegionsHandler;
+
+impl DowngradeRegionsHandler {
+    async fn handle_downgrade_region(
+        ctx: &HandlerContext,
+        DowngradeRegion {
+            region_id,
+            flush_timeout,
+        }: DowngradeRegion,
+    ) -> DowngradeRegionReply {
+        let Some(writable) = ctx.region_server.is_region_leader(region_id) else {
+            warn!("Region: {region_id} is not found");
+            return DowngradeRegionReply {
+                region_id,
+                last_entry_id: None,
+                metadata_last_entry_id: None,
+                exists: false,
+                error: None,
+            };
+        };
+
+        let region_server_moved = ctx.region_server.clone();
+
+        // Ignores flush request
+        if !writable {
+            warn!(
+                "Region: {region_id} is not writable, flush_timeout: {:?}",
+                flush_timeout
+            );
+            return ctx.downgrade_to_follower_gracefully(region_id).await;
+        }
+
+        // If flush_timeout is not set, directly convert region to follower.
+        let Some(flush_timeout) = flush_timeout else {
+            return ctx.downgrade_to_follower_gracefully(region_id).await;
+        };
+
+        // Sets region to downgrading,
+        // the downgrading region will reject all write requests.
+        // However, the downgrading region will still accept read, flush requests.
+        match ctx
+            .region_server
+            .set_region_role_state_gracefully(region_id, SettableRegionRoleState::DowngradingLeader)
+            .await
+        {
+            Ok(SetRegionRoleStateResponse::Success { .. }) => {}
+            Ok(SetRegionRoleStateResponse::NotFound) => {
+                warn!("Region: {region_id} is not found");
+                return DowngradeRegionReply {
+                    region_id,
+                    last_entry_id: None,
+                    metadata_last_entry_id: None,
+                    exists: false,
+                    error: None,
+                };
+            }
+            Ok(SetRegionRoleStateResponse::InvalidTransition(err)) => {
+                error!(err; "Failed to convert region to downgrading leader - invalid transition");
+                return DowngradeRegionReply {
+                    region_id,
+                    last_entry_id: None,
+                    metadata_last_entry_id: None,
+                    exists: true,
+                    error: Some(format!("{err:?}")),
+                };
+            }
+            Err(err) => {
+                error!(err; "Failed to convert region to downgrading leader");
+                return DowngradeRegionReply {
+                    region_id,
+                    last_entry_id: None,
+                    metadata_last_entry_id: None,
+                    exists: true,
+                    error: Some(format!("{err:?}")),
+                };
+            }
+        }
+
+        let register_result = ctx
+            .downgrade_tasks
+            .try_register(
+                region_id,
+                Box::pin(async move {
+                    info!("Flush region: {region_id} before converting region to follower");
+                    region_server_moved
+                        .handle_request(
+                            region_id,
+                            RegionRequest::Flush(RegionFlushRequest {
+                                row_group_size: None,
+                            }),
+                        )
+                        .await?;
+
+                    Ok(())
+                }),
+            )
+            .await;
+
+        if register_result.is_busy() {
+            warn!("Another flush task is running for the region: {region_id}");
+        }
+
+        let mut watcher = register_result.into_watcher();
+        let result = ctx.downgrade_tasks.wait(&mut watcher, flush_timeout).await;
+
+        match result {
+            WaitResult::Timeout => DowngradeRegionReply {
+                region_id,
+                last_entry_id: None,
+                metadata_last_entry_id: None,
+                exists: true,
+                error: Some(format!(
+                    "Flush region timeout, region: {region_id}, timeout: {:?}",
+                    flush_timeout
+                )),
+            },
+            WaitResult::Finish(Ok(_)) => ctx.downgrade_to_follower_gracefully(region_id).await,
+            WaitResult::Finish(Err(err)) => DowngradeRegionReply {
+                region_id,
+                last_entry_id: None,
+                metadata_last_entry_id: None,
+                exists: true,
+                error: Some(format!("{err:?}")),
+            },
+        }
+    }
+}
+
+#[async_trait::async_trait]
+impl InstructionHandler for DowngradeRegionsHandler {
+    async fn handle(
        &self,
-        region_id: RegionId,
+        ctx: &HandlerContext,
+        instruction: Instruction,
    ) -> Option<InstructionReply> {
+        // Safety: must be `Instruction::DowngradeRegion` instruction.
+        let downgrade_regions = instruction.into_downgrade_regions().unwrap();
+        let futures = downgrade_regions
+            .into_iter()
+            .map(|downgrade_region| Self::handle_downgrade_region(ctx, downgrade_region));
+        // Join all futures; parallelism is governed by the underlying flush scheduler.
+        let results = join_all(futures).await;
+
+        Some(InstructionReply::DowngradeRegions(
+            DowngradeRegionsReply::new(results),
+        ))
+    }
+}
+
+impl HandlerContext {
+    async fn downgrade_to_follower_gracefully(&self, region_id: RegionId) -> DowngradeRegionReply {
        match self
            .region_server
            .set_region_role_state_gracefully(region_id, SettableRegionRoleState::Follower)
            .await
        {
-            Ok(SetRegionRoleStateResponse::Success(success)) => {
-                Some(InstructionReply::DowngradeRegion(DowngradeRegionReply {
-                    last_entry_id: success.last_entry_id(),
-                    metadata_last_entry_id: success.metadata_last_entry_id(),
-                    exists: true,
-                    error: None,
-                }))
-            }
+            Ok(SetRegionRoleStateResponse::Success(success)) => DowngradeRegionReply {
+                region_id,
+                last_entry_id: success.last_entry_id(),
+                metadata_last_entry_id: success.metadata_last_entry_id(),
+                exists: true,
+                error: None,
+            },
            Ok(SetRegionRoleStateResponse::NotFound) => {
                warn!("Region: {region_id} is not found");
-                Some(InstructionReply::DowngradeRegion(DowngradeRegionReply {
+                DowngradeRegionReply {
+                    region_id,
                    last_entry_id: None,
                    metadata_last_entry_id: None,
                    exists: false,
                    error: None,
-                }))
+                }
            }
            Ok(SetRegionRoleStateResponse::InvalidTransition(err)) => {
                error!(err; "Failed to convert region to follower - invalid transition");
-                Some(InstructionReply::DowngradeRegion(DowngradeRegionReply {
+                DowngradeRegionReply {
+                    region_id,
                    last_entry_id: None,
                    metadata_last_entry_id: None,
                    exists: true,
                    error: Some(format!("{err:?}")),
-                }))
+                }
            }
            Err(err) => {
                error!(err; "Failed to convert region to {}", SettableRegionRoleState::Follower);
-                Some(InstructionReply::DowngradeRegion(DowngradeRegionReply {
+                DowngradeRegionReply {
+                    region_id,
                    last_entry_id: None,
                    metadata_last_entry_id: None,
                    exists: true,
                    error: Some(format!("{err:?}")),
-                }))
+                }
            }
        }
    }
-
-    pub(crate) fn handle_downgrade_region_instruction(
-        self,
-        DowngradeRegion {
-            region_id,
-            flush_timeout,
-        }: DowngradeRegion,
-    ) -> BoxFuture<'static, Option<InstructionReply>> {
-        Box::pin(async move {
-            let Some(writable) = self.region_server.is_region_leader(region_id) else {
-                warn!("Region: {region_id} is not found");
-                return Some(InstructionReply::DowngradeRegion(DowngradeRegionReply {
-                    last_entry_id: None,
-                    metadata_last_entry_id: None,
-                    exists: false,
-                    error: None,
-                }));
-            };
-
-            let region_server_moved = self.region_server.clone();
-
-            // Ignores flush request
-            if !writable {
-                warn!(
-                    "Region: {region_id} is not writable, flush_timeout: {:?}",
-                    flush_timeout
-                );
-                return self.downgrade_to_follower_gracefully(region_id).await;
-            }
-
-            // If flush_timeout is not set, directly convert region to follower.
-            let Some(flush_timeout) = flush_timeout else {
-                return self.downgrade_to_follower_gracefully(region_id).await;
-            };
-
-            // Sets region to downgrading,
-            // the downgrading region will reject all write requests.
-            // However, the downgrading region will still accept read, flush requests.
-            match self
-                .region_server
-                .set_region_role_state_gracefully(
-                    region_id,
-                    SettableRegionRoleState::DowngradingLeader,
-                )
-                .await
-            {
-                Ok(SetRegionRoleStateResponse::Success { .. }) => {}
-                Ok(SetRegionRoleStateResponse::NotFound) => {
-                    warn!("Region: {region_id} is not found");
-                    return Some(InstructionReply::DowngradeRegion(DowngradeRegionReply {
-                        last_entry_id: None,
-                        metadata_last_entry_id: None,
-                        exists: false,
-                        error: None,
-                    }));
-                }
-                Ok(SetRegionRoleStateResponse::InvalidTransition(err)) => {
-                    error!(err; "Failed to convert region to downgrading leader - invalid transition");
-                    return Some(InstructionReply::DowngradeRegion(DowngradeRegionReply {
-                        last_entry_id: None,
-                        metadata_last_entry_id: None,
-                        exists: true,
-                        error: Some(format!("{err:?}")),
-                    }));
-                }
-                Err(err) => {
-                    error!(err; "Failed to convert region to downgrading leader");
-                    return Some(InstructionReply::DowngradeRegion(DowngradeRegionReply {
-                        last_entry_id: None,
-                        metadata_last_entry_id: None,
-                        exists: true,
-                        error: Some(format!("{err:?}")),
-                    }));
-                }
-            }
-
-            let register_result = self
-                .downgrade_tasks
-                .try_register(
-                    region_id,
-                    Box::pin(async move {
-                        info!("Flush region: {region_id} before converting region to follower");
-                        region_server_moved
-                            .handle_request(
-                                region_id,
-                                RegionRequest::Flush(RegionFlushRequest {
-                                    row_group_size: None,
-                                }),
-                            )
-                            .await?;
-
-                        Ok(())
-                    }),
-                )
-                .await;
-
-            if register_result.is_busy() {
-                warn!("Another flush task is running for the region: {region_id}");
-            }
-
-            let mut watcher = register_result.into_watcher();
-            let result = self.downgrade_tasks.wait(&mut watcher, flush_timeout).await;
-
-            match result {
-                WaitResult::Timeout => {
-                    Some(InstructionReply::DowngradeRegion(DowngradeRegionReply {
-                        last_entry_id: None,
-                        metadata_last_entry_id: None,
-                        exists: true,
-                        error: Some(format!(
-                            "Flush region timeout, region: {region_id}, timeout: {:?}",
-                            flush_timeout
-                        )),
-                    }))
-                }
-                WaitResult::Finish(Ok(_)) => self.downgrade_to_follower_gracefully(region_id).await,
-                WaitResult::Finish(Err(err)) => {
-                    Some(InstructionReply::DowngradeRegion(DowngradeRegionReply {
-                        last_entry_id: None,
-                        metadata_last_entry_id: None,
-                        exists: true,
-                        error: Some(format!("{err:?}")),
-                    }))
-                }
-            }
-        })
-    }
 }

 #[cfg(test)]
 mod tests {
    use std::assert_matches::assert_matches;
+    use std::sync::Arc;
    use std::time::Duration;

-    use common_meta::instruction::{DowngradeRegion, InstructionReply};
+    use common_meta::heartbeat::handler::{HandleControl, HeartbeatResponseHandler};
+    use common_meta::heartbeat::mailbox::MessageMeta;
+    use common_meta::instruction::{DowngradeRegion, Instruction};
+    use mito2::config::MitoConfig;
    use mito2::engine::MITO_ENGINE_NAME;
+    use mito2::test_util::{CreateRequestBuilder, TestEnv};
    use store_api::region_engine::{
-        RegionRole, SetRegionRoleStateResponse, SetRegionRoleStateSuccess,
+        RegionEngine, RegionRole, SetRegionRoleStateResponse, SetRegionRoleStateSuccess,
    };
    use store_api::region_request::RegionRequest;
    use store_api::storage::RegionId;
    use tokio::time::Instant;

    use crate::error;
-    use crate::heartbeat::handler::HandlerContext;
+    use crate::heartbeat::handler::downgrade_region::DowngradeRegionsHandler;
+    use crate::heartbeat::handler::tests::HeartbeatResponseTestEnv;
+    use crate::heartbeat::handler::{
+        HandlerContext, InstructionHandler, RegionHeartbeatResponseHandler,
+    };
    use crate::tests::{MockRegionEngine, mock_region_server};

    #[tokio::test]
@@ -227,20 +260,20 @@ mod tests {
        let waits = vec![None, Some(Duration::from_millis(100u64))];

        for flush_timeout in waits {
-            let reply = handler_context
-                .clone()
-                .handle_downgrade_region_instruction(DowngradeRegion {
-                    region_id,
-                    flush_timeout,
-                })
+            let reply = DowngradeRegionsHandler
+                .handle(
+                    &handler_context,
+                    Instruction::DowngradeRegions(vec![DowngradeRegion {
+                        region_id,
+                        flush_timeout,
+                    }]),
+                )
                .await;
-            assert_matches!(reply, Some(InstructionReply::DowngradeRegion(_)));

-            if let InstructionReply::DowngradeRegion(reply) = reply.unwrap() {
-                assert!(!reply.exists);
-                assert!(reply.error.is_none());
-                assert!(reply.last_entry_id.is_none());
-            }
+            let reply = &reply.unwrap().expect_downgrade_regions_reply()[0];
+            assert!(!reply.exists);
+            assert!(reply.error.is_none());
+            assert!(reply.last_entry_id.is_none());
        }
    }

@@ -270,20 +303,20 @@ mod tests {

        let waits = vec![None, Some(Duration::from_millis(100u64))];
        for flush_timeout in waits {
-            let reply = handler_context
-                .clone()
-                .handle_downgrade_region_instruction(DowngradeRegion {
-                    region_id,
-                    flush_timeout,
-                })
+            let reply = DowngradeRegionsHandler
+                .handle(
+                    &handler_context,
+                    Instruction::DowngradeRegions(vec![DowngradeRegion {
+                        region_id,
+                        flush_timeout,
+                    }]),
+                )
                .await;
-            assert_matches!(reply, Some(InstructionReply::DowngradeRegion(_)));

-            if let InstructionReply::DowngradeRegion(reply) = reply.unwrap() {
-                assert!(reply.exists);
-                assert!(reply.error.is_none());
-                assert_eq!(reply.last_entry_id.unwrap(), 1024);
-            }
+            let reply = &reply.unwrap().expect_downgrade_regions_reply()[0];
+            assert!(reply.exists);
+            assert!(reply.error.is_none());
+            assert_eq!(reply.last_entry_id.unwrap(), 1024);
        }
    }

@@ -305,20 +338,20 @@ mod tests {
        let handler_context = HandlerContext::new_for_test(mock_region_server);

        let flush_timeout = Duration::from_millis(100);
-        let reply = handler_context
-            .clone()
-            .handle_downgrade_region_instruction(DowngradeRegion {
-                region_id,
-                flush_timeout: Some(flush_timeout),
-            })
+        let reply = DowngradeRegionsHandler
+            .handle(
+                &handler_context,
+                Instruction::DowngradeRegions(vec![DowngradeRegion {
+                    region_id,
+                    flush_timeout: Some(flush_timeout),
+                }]),
+            )
            .await;
-        assert_matches!(reply, Some(InstructionReply::DowngradeRegion(_)));

-        if let InstructionReply::DowngradeRegion(reply) = reply.unwrap() {
-            assert!(reply.exists);
-            assert!(reply.error.unwrap().contains("timeout"));
-            assert!(reply.last_entry_id.is_none());
-        }
+        let reply = &reply.unwrap().expect_downgrade_regions_reply()[0];
+        assert!(reply.exists);
+        assert!(reply.error.as_ref().unwrap().contains("timeout"));
+        assert!(reply.last_entry_id.is_none());
    }

    #[tokio::test]
@@ -344,36 +377,38 @@ mod tests {
        ];

        for flush_timeout in waits {
-            let reply = handler_context
-                .clone()
-                .handle_downgrade_region_instruction(DowngradeRegion {
-                    region_id,
-                    flush_timeout,
-                })
+            let reply = DowngradeRegionsHandler
+                .handle(
+                    &handler_context,
+                    Instruction::DowngradeRegions(vec![DowngradeRegion {
+                        region_id,
+                        flush_timeout,
+                    }]),
+                )
                .await;
-            assert_matches!(reply, Some(InstructionReply::DowngradeRegion(_)));
-            if let InstructionReply::DowngradeRegion(reply) = reply.unwrap() {
-                assert!(reply.exists);
-                assert!(reply.error.unwrap().contains("timeout"));
-                assert!(reply.last_entry_id.is_none());
-            }
+
+            let reply = &reply.unwrap().expect_downgrade_regions_reply()[0];
+            assert!(reply.exists);
+            assert!(reply.error.as_ref().unwrap().contains("timeout"));
+            assert!(reply.last_entry_id.is_none());
        }
        let timer = Instant::now();
-        let reply = handler_context
-            .handle_downgrade_region_instruction(DowngradeRegion {
-                region_id,
-                flush_timeout: Some(Duration::from_millis(500)),
-            })
+        let reply = DowngradeRegionsHandler
+            .handle(
+                &handler_context,
+                Instruction::DowngradeRegions(vec![DowngradeRegion {
+                    region_id,
+                    flush_timeout: Some(Duration::from_millis(500)),
+                }]),
+            )
            .await;
-        assert_matches!(reply, Some(InstructionReply::DowngradeRegion(_)));
        // Must less than 300 ms.
        assert!(timer.elapsed().as_millis() < 300);

-        if let InstructionReply::DowngradeRegion(reply) = reply.unwrap() {
-            assert!(reply.exists);
-            assert!(reply.error.is_none());
-            assert_eq!(reply.last_entry_id.unwrap(), 1024);
-        }
+        let reply = &reply.unwrap().expect_downgrade_regions_reply()[0];
+        assert!(reply.exists);
+        assert!(reply.error.is_none());
+        assert_eq!(reply.last_entry_id.unwrap(), 1024);
    }

    #[tokio::test]
@@ -405,36 +440,36 @@ mod tests {
        ];

        for flush_timeout in waits {
-            let reply = handler_context
-                .clone()
-                .handle_downgrade_region_instruction(DowngradeRegion {
-                    region_id,
-                    flush_timeout,
-                })
+            let reply = DowngradeRegionsHandler
+                .handle(
+                    &handler_context,
+                    Instruction::DowngradeRegions(vec![DowngradeRegion {
+                        region_id,
+                        flush_timeout,
+                    }]),
+                )
                .await;
-            assert_matches!(reply, Some(InstructionReply::DowngradeRegion(_)));
-            if let InstructionReply::DowngradeRegion(reply) = reply.unwrap() {
-                assert!(reply.exists);
-                assert!(reply.error.unwrap().contains("timeout"));
-                assert!(reply.last_entry_id.is_none());
-            }
-        }
-        let timer = Instant::now();
-        let reply = handler_context
-            .handle_downgrade_region_instruction(DowngradeRegion {
-                region_id,
-                flush_timeout: Some(Duration::from_millis(500)),
-            })
-            .await;
-        assert_matches!(reply, Some(InstructionReply::DowngradeRegion(_)));
-        // Must less than 300 ms.
-        assert!(timer.elapsed().as_millis() < 300);
-
-        if let InstructionReply::DowngradeRegion(reply) = reply.unwrap() {
+            let reply = &reply.unwrap().expect_downgrade_regions_reply()[0];
            assert!(reply.exists);
-            assert!(reply.error.unwrap().contains("flush failed"));
+            assert!(reply.error.as_ref().unwrap().contains("timeout"));
            assert!(reply.last_entry_id.is_none());
        }
+        let timer = Instant::now();
+        let reply = DowngradeRegionsHandler
+            .handle(
+                &handler_context,
+                Instruction::DowngradeRegions(vec![DowngradeRegion {
+                    region_id,
+                    flush_timeout: Some(Duration::from_millis(500)),
+                }]),
+            )
+            .await;
+        // Must less than 300 ms.
+        assert!(timer.elapsed().as_millis() < 300);
+        let reply = &reply.unwrap().expect_downgrade_regions_reply()[0];
+        assert!(reply.exists);
+        assert!(reply.error.as_ref().unwrap().contains("flush failed"));
+        assert!(reply.last_entry_id.is_none());
    }

    #[tokio::test]
@@ -449,19 +484,19 @@ mod tests {
            });
        mock_region_server.register_test_region(region_id, mock_engine);
        let handler_context = HandlerContext::new_for_test(mock_region_server);
-        let reply = handler_context
-            .clone()
-            .handle_downgrade_region_instruction(DowngradeRegion {
-                region_id,
-                flush_timeout: None,
-            })
+        let reply = DowngradeRegionsHandler
+            .handle(
+                &handler_context,
+                Instruction::DowngradeRegions(vec![DowngradeRegion {
+                    region_id,
+                    flush_timeout: None,
+                }]),
+            )
            .await;
-        assert_matches!(reply, Some(InstructionReply::DowngradeRegion(_)));
-        if let InstructionReply::DowngradeRegion(reply) = reply.unwrap() {
-            assert!(!reply.exists);
-            assert!(reply.error.is_none());
-            assert!(reply.last_entry_id.is_none());
-        }
+        let reply = &reply.unwrap().expect_downgrade_regions_reply()[0];
+        assert!(!reply.exists);
+        assert!(reply.error.is_none());
+        assert!(reply.last_entry_id.is_none());
    }

    #[tokio::test]
@@ -480,23 +515,77 @@ mod tests {
            });
        mock_region_server.register_test_region(region_id, mock_engine);
        let handler_context = HandlerContext::new_for_test(mock_region_server);
-        let reply = handler_context
-            .clone()
-            .handle_downgrade_region_instruction(DowngradeRegion {
-                region_id,
-                flush_timeout: None,
-            })
+        let reply = DowngradeRegionsHandler
+            .handle(
+                &handler_context,
+                Instruction::DowngradeRegions(vec![DowngradeRegion {
+                    region_id,
+                    flush_timeout: None,
+                }]),
+            )
            .await;
-        assert_matches!(reply, Some(InstructionReply::DowngradeRegion(_)));
-        if let InstructionReply::DowngradeRegion(reply) = reply.unwrap() {
-            assert!(reply.exists);
-            assert!(
-                reply
-                    .error
-                    .unwrap()
-                    .contains("Failed to set region to readonly")
-            );
-            assert!(reply.last_entry_id.is_none());
-        }
+        let reply = &reply.unwrap().expect_downgrade_regions_reply()[0];
+        assert!(reply.exists);
+        assert!(
+            reply
+                .error
+                .as_ref()
+                .unwrap()
+                .contains("Failed to set region to readonly")
+        );
+        assert!(reply.last_entry_id.is_none());
+    }
+
+    #[tokio::test]
+    async fn test_downgrade_regions() {
+        common_telemetry::init_default_ut_logging();
+
+        let mut region_server = mock_region_server();
+        let heartbeat_handler = RegionHeartbeatResponseHandler::new(region_server.clone());
+        let mut engine_env = TestEnv::with_prefix("downgrade-regions").await;
+        let engine = engine_env.create_engine(MitoConfig::default()).await;
+        region_server.register_engine(Arc::new(engine.clone()));
+        let region_id = RegionId::new(1024, 1);
+        let region_id1 = RegionId::new(1024, 2);
+        let builder = CreateRequestBuilder::new();
+        let create_req = builder.build();
+        region_server
+            .handle_request(region_id, RegionRequest::Create(create_req))
+            .await
+            .unwrap();
+        let create_req1 = builder.build();
+        region_server
+            .handle_request(region_id1, RegionRequest::Create(create_req1))
+            .await
+            .unwrap();
+        let meta = MessageMeta::new_test(1, "test", "dn-1", "meta-0");
+        let instruction = Instruction::DowngradeRegions(vec![
+            DowngradeRegion {
+                region_id,
+                flush_timeout: Some(Duration::from_secs(1)),
+            },
+            DowngradeRegion {
+                region_id: region_id1,
+                flush_timeout: Some(Duration::from_secs(1)),
+            },
+        ]);
+        let mut heartbeat_env = HeartbeatResponseTestEnv::new();
+        let mut ctx = heartbeat_env.create_handler_ctx((meta, instruction));
+        let control = heartbeat_handler.handle(&mut ctx).await.unwrap();
+        assert_matches!(control, HandleControl::Continue);
+
+        let (_, reply) = heartbeat_env.receiver.recv().await.unwrap();
+        let reply = reply.expect_downgrade_regions_reply();
+        assert_eq!(reply[0].region_id, region_id);
+        assert!(reply[0].exists);
+        assert!(reply[0].error.is_none());
+        assert_eq!(reply[0].last_entry_id, Some(0));
+        assert_eq!(reply[1].region_id, region_id1);
+        assert!(reply[1].exists);
+        assert!(reply[1].error.is_none());
+        assert_eq!(reply[1].last_entry_id, Some(0));
+
+        assert_eq!(engine.role(region_id).unwrap(), RegionRole::Follower);
+        assert_eq!(engine.role(region_id1).unwrap(), RegionRole::Follower);
    }
 }
--- a/src/datanode/src/heartbeat/handler/flush_region.rs
+++ b/src/datanode/src/heartbeat/handler/flush_region.rs
@@ -15,19 +15,53 @@
 use std::time::Instant;

 use common_meta::instruction::{
-    FlushErrorStrategy, FlushRegionReply, FlushRegions, FlushStrategy, InstructionReply,
+    FlushErrorStrategy, FlushRegionReply, FlushStrategy, Instruction, InstructionReply,
 };
 use common_telemetry::{debug, warn};
-use futures_util::future::BoxFuture;
 use store_api::region_request::{RegionFlushRequest, RegionRequest};
 use store_api::storage::RegionId;

-use crate::error::{self, RegionNotFoundSnafu, RegionNotReadySnafu, UnexpectedSnafu};
-use crate::heartbeat::handler::HandlerContext;
+use crate::error::{self, RegionNotFoundSnafu, RegionNotReadySnafu, Result, UnexpectedSnafu};
+use crate::heartbeat::handler::{HandlerContext, InstructionHandler};
+
+pub struct FlushRegionsHandler;
+
+#[async_trait::async_trait]
+impl InstructionHandler for FlushRegionsHandler {
+    async fn handle(
+        &self,
+        ctx: &HandlerContext,
+        instruction: Instruction,
+    ) -> Option<InstructionReply> {
+        let start_time = Instant::now();
+        let flush_regions = instruction.into_flush_regions().unwrap();
+        let strategy = flush_regions.strategy;
+        let region_ids = flush_regions.region_ids;
+        let error_strategy = flush_regions.error_strategy;
+
+        let reply = if matches!(strategy, FlushStrategy::Async) {
+            // Asynchronous hint mode: fire-and-forget, no reply expected
+            ctx.handle_flush_hint(region_ids).await;
+            None
+        } else {
+            // Synchronous mode: return reply with results
+            let reply = ctx.handle_flush_sync(region_ids, error_strategy).await;
+            Some(InstructionReply::FlushRegions(reply))
+        };
+
+        let elapsed = start_time.elapsed();
+        debug!(
+            "FlushRegions strategy: {:?}, elapsed: {:?}, reply: {:?}",
+            strategy, elapsed, reply
+        );
+
+        reply
+    }
+}

 impl HandlerContext {
    /// Performs the actual region flush operation.
-    async fn perform_region_flush(&self, region_id: RegionId) -> Result<(), error::Error> {
+    async fn perform_region_flush(&self, region_id: RegionId) -> Result<()> {
        let request = RegionRequest::Flush(RegionFlushRequest {
            row_group_size: None,
        });
@@ -92,7 +126,7 @@ impl HandlerContext {
    }

    /// Flushes a single region synchronously with proper error handling.
-    async fn flush_single_region_sync(&self, region_id: RegionId) -> Result<(), error::Error> {
+    async fn flush_single_region_sync(&self, region_id: RegionId) -> Result<()> {
        // Check if region is leader and writable
        let Some(writable) = self.region_server.is_region_leader(region_id) else {
            return Err(RegionNotFoundSnafu { region_id }.build());
@@ -135,37 +169,6 @@ impl HandlerContext {
            .build()),
        }
    }
-
-    /// Unified handler for FlushRegions with all flush semantics.
-    pub(crate) fn handle_flush_regions_instruction(
-        self,
-        flush_regions: FlushRegions,
-    ) -> BoxFuture<'static, Option<InstructionReply>> {
-        Box::pin(async move {
-            let start_time = Instant::now();
-            let strategy = flush_regions.strategy;
-            let region_ids = flush_regions.region_ids;
-            let error_strategy = flush_regions.error_strategy;
-
-            let reply = if matches!(strategy, FlushStrategy::Async) {
-                // Asynchronous hint mode: fire-and-forget, no reply expected
-                self.handle_flush_hint(region_ids).await;
-                None
-            } else {
-                // Synchronous mode: return reply with results
-                let reply = self.handle_flush_sync(region_ids, error_strategy).await;
-                Some(InstructionReply::FlushRegions(reply))
-            };
-
-            let elapsed = start_time.elapsed();
-            debug!(
-                "FlushRegions strategy: {:?}, elapsed: {:?}, reply: {:?}",
-                strategy, elapsed, reply
-            );
-
-            reply
-        })
-    }
 }

 #[cfg(test)]
@@ -201,9 +204,11 @@ mod tests {

        // Async hint mode
        let flush_instruction = FlushRegions::async_batch(region_ids.clone());
-        let reply = handler_context
-            .clone()
-            .handle_flush_regions_instruction(flush_instruction)
+        let reply = FlushRegionsHandler
+            .handle(
+                &handler_context,
+                Instruction::FlushRegions(flush_instruction),
+            )
            .await;
        assert!(reply.is_none()); // Hint mode returns no reply
        assert_eq!(*flushed_region_ids.read().unwrap(), region_ids);
@@ -212,8 +217,11 @@ mod tests {
        flushed_region_ids.write().unwrap().clear();
        let not_found_region_ids = (0..2).map(|i| RegionId::new(2048, i)).collect::<Vec<_>>();
        let flush_instruction = FlushRegions::async_batch(not_found_region_ids);
-        let reply = handler_context
-            .handle_flush_regions_instruction(flush_instruction)
+        let reply = FlushRegionsHandler
+            .handle(
+                &handler_context,
+                Instruction::FlushRegions(flush_instruction),
+            )
            .await;
        assert!(reply.is_none());
        assert!(flushed_region_ids.read().unwrap().is_empty());
@@ -238,20 +246,17 @@ mod tests {
        let handler_context = HandlerContext::new_for_test(mock_region_server);

        let flush_instruction = FlushRegions::sync_single(region_id);
-        let reply = handler_context
-            .handle_flush_regions_instruction(flush_instruction)
+        let reply = FlushRegionsHandler
+            .handle(
+                &handler_context,
+                Instruction::FlushRegions(flush_instruction),
+            )
            .await;
-
-        assert!(reply.is_some());
-        if let Some(InstructionReply::FlushRegions(flush_reply)) = reply {
-            assert!(flush_reply.overall_success);
-            assert_eq!(flush_reply.results.len(), 1);
-            assert_eq!(flush_reply.results[0].0, region_id);
-            assert!(flush_reply.results[0].1.is_ok());
-        } else {
-            panic!("Expected FlushRegions reply");
-        }
-
+        let flush_reply = reply.unwrap().expect_flush_regions_reply();
+        assert!(flush_reply.overall_success);
+        assert_eq!(flush_reply.results.len(), 1);
+        assert_eq!(flush_reply.results[0].0, region_id);
+        assert!(flush_reply.results[0].1.is_ok());
        assert_eq!(*flushed_region_ids.read().unwrap(), vec![region_id]);
    }

@@ -281,18 +286,16 @@ mod tests {
        // Sync batch with fail-fast strategy
        let flush_instruction =
            FlushRegions::sync_batch(region_ids.clone(), FlushErrorStrategy::FailFast);
-        let reply = handler_context
-            .handle_flush_regions_instruction(flush_instruction)
+        let reply = FlushRegionsHandler
+            .handle(
+                &handler_context,
+                Instruction::FlushRegions(flush_instruction),
+            )
            .await;
-
-        assert!(reply.is_some());
-        if let Some(InstructionReply::FlushRegions(flush_reply)) = reply {
-            assert!(!flush_reply.overall_success); // Should fail due to non-existent regions
-            // With fail-fast, only process regions until first failure
-            assert!(flush_reply.results.len() <= region_ids.len());
-        } else {
-            panic!("Expected FlushRegions reply");
-        }
+        let flush_reply = reply.unwrap().expect_flush_regions_reply();
+        assert!(!flush_reply.overall_success); // Should fail due to non-existent regions
+        // With fail-fast, only process regions until first failure
+        assert!(flush_reply.results.len() <= region_ids.len());
    }

    #[tokio::test]
@@ -317,20 +320,18 @@ mod tests {
        // Sync batch with try-all strategy
        let flush_instruction =
            FlushRegions::sync_batch(region_ids.clone(), FlushErrorStrategy::TryAll);
-        let reply = handler_context
-            .handle_flush_regions_instruction(flush_instruction)
+        let reply = FlushRegionsHandler
+            .handle(
+                &handler_context,
+                Instruction::FlushRegions(flush_instruction),
+            )
            .await;
-
-        assert!(reply.is_some());
-        if let Some(InstructionReply::FlushRegions(flush_reply)) = reply {
-            assert!(!flush_reply.overall_success); // Should fail due to one non-existent region
-            // With try-all, should process all regions
-            assert_eq!(flush_reply.results.len(), region_ids.len());
-            // First should succeed, second should fail
-            assert!(flush_reply.results[0].1.is_ok());
-            assert!(flush_reply.results[1].1.is_err());
-        } else {
-            panic!("Expected FlushRegions reply");
-        }
+        let flush_reply = reply.unwrap().expect_flush_regions_reply();
+        assert!(!flush_reply.overall_success); // Should fail due to one non-existent region
+        // With try-all, should process all regions
+        assert_eq!(flush_reply.results.len(), region_ids.len());
+        // First should succeed, second should fail
+        assert!(flush_reply.results[0].1.is_ok());
+        assert!(flush_reply.results[1].1.is_err());
    }
 }
--- a/src/datanode/src/heartbeat/handler/open_region.rs
+++ b/src/datanode/src/heartbeat/handler/open_region.rs
@@ -12,56 +12,62 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

-use common_meta::instruction::{InstructionReply, OpenRegion, SimpleReply};
+use common_meta::instruction::{Instruction, InstructionReply, OpenRegion, SimpleReply};
 use common_meta::wal_options_allocator::prepare_wal_options;
-use futures_util::future::BoxFuture;
 use store_api::path_utils::table_dir;
 use store_api::region_request::{PathType, RegionOpenRequest};
+use store_api::storage::RegionId;

-use crate::heartbeat::handler::HandlerContext;
+use crate::heartbeat::handler::{HandlerContext, InstructionHandler};

-impl HandlerContext {
-    pub(crate) fn handle_open_regions_instruction(
-        self,
-        open_regions: Vec<OpenRegion>,
-        open_region_parallelism: usize,
-    ) -> BoxFuture<'static, Option<InstructionReply>> {
-        Box::pin(async move {
-            let requests = open_regions
-                .into_iter()
-                .map(|open_region| {
-                    let OpenRegion {
-                        region_ident,
-                        region_storage_path,
-                        mut region_options,
-                        region_wal_options,
-                        skip_wal_replay,
-                    } = open_region;
-                    let region_id = Self::region_ident_to_region_id(&region_ident);
-                    prepare_wal_options(&mut region_options, region_id, &region_wal_options);
-                    let request = RegionOpenRequest {
-                        engine: region_ident.engine,
-                        table_dir: table_dir(&region_storage_path, region_id.table_id()),
-                        path_type: PathType::Bare,
-                        options: region_options,
-                        skip_wal_replay,
-                        checkpoint: None,
-                    };
-                    (region_id, request)
-                })
-                .collect::<Vec<_>>();
+pub struct OpenRegionsHandler {
+    pub open_region_parallelism: usize,
+}

-            let result = self
-                .region_server
-                .handle_batch_open_requests(open_region_parallelism, requests, false)
-                .await;
-            let success = result.is_ok();
-            let error = result.as_ref().map_err(|e| format!("{e:?}")).err();
-            Some(InstructionReply::OpenRegions(SimpleReply {
-                result: success,
-                error,
-            }))
-        })
+#[async_trait::async_trait]
+impl InstructionHandler for OpenRegionsHandler {
+    async fn handle(
+        &self,
+        ctx: &HandlerContext,
+        instruction: Instruction,
+    ) -> Option<InstructionReply> {
+        let open_regions = instruction.into_open_regions().unwrap();
+
+        let requests = open_regions
+            .into_iter()
+            .map(|open_region| {
+                let OpenRegion {
+                    region_ident,
+                    region_storage_path,
+                    mut region_options,
+                    region_wal_options,
+                    skip_wal_replay,
+                } = open_region;
+                let region_id = RegionId::new(region_ident.table_id, region_ident.region_number);
+                prepare_wal_options(&mut region_options, region_id, &region_wal_options);
+                let request = RegionOpenRequest {
+                    engine: region_ident.engine,
+                    table_dir: table_dir(&region_storage_path, region_id.table_id()),
+                    path_type: PathType::Bare,
+                    options: region_options,
+                    skip_wal_replay,
+                    checkpoint: None,
+                };
+                (region_id, request)
+            })
+            .collect::<Vec<_>>();
+
+        let result = ctx
+            .region_server
+            .handle_batch_open_requests(self.open_region_parallelism, requests, false)
+            .await;
+        let success = result.is_ok();
+        let error = result.as_ref().map_err(|e| format!("{e:?}")).err();
+
+        Some(InstructionReply::OpenRegions(SimpleReply {
+            result: success,
+            error,
+        }))
    }
 }

--- a/src/datanode/src/heartbeat/handler/upgrade_region.rs
+++ b/src/datanode/src/heartbeat/handler/upgrade_region.rs
@@ -12,18 +12,24 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

-use common_meta::instruction::{InstructionReply, UpgradeRegion, UpgradeRegionReply};
+use common_meta::instruction::{Instruction, InstructionReply, UpgradeRegion, UpgradeRegionReply};
 use common_telemetry::{info, warn};
-use futures_util::future::BoxFuture;
 use store_api::region_request::{RegionCatchupRequest, RegionRequest, ReplayCheckpoint};

-use crate::heartbeat::handler::HandlerContext;
+use crate::heartbeat::handler::{HandlerContext, InstructionHandler};
 use crate::heartbeat::task_tracker::WaitResult;

-impl HandlerContext {
-    pub(crate) fn handle_upgrade_region_instruction(
-        self,
-        UpgradeRegion {
+#[derive(Debug, Clone, Copy, Default)]
+pub struct UpgradeRegionsHandler;
+
+#[async_trait::async_trait]
+impl InstructionHandler for UpgradeRegionsHandler {
+    async fn handle(
+        &self,
+        ctx: &HandlerContext,
+        instruction: Instruction,
+    ) -> Option<InstructionReply> {
+        let UpgradeRegion {
            region_id,
            last_entry_id,
            metadata_last_entry_id,
@@ -31,116 +37,116 @@ impl HandlerContext {
            location_id,
            replay_entry_id,
            metadata_replay_entry_id,
-        }: UpgradeRegion,
-    ) -> BoxFuture<'static, Option<InstructionReply>> {
-        Box::pin(async move {
-            let Some(writable) = self.region_server.is_region_leader(region_id) else {
-                return Some(InstructionReply::UpgradeRegion(UpgradeRegionReply {
-                    ready: false,
-                    exists: false,
-                    error: None,
-                }));
-            };
+        } = instruction.into_upgrade_regions().unwrap();

-            if writable {
-                return Some(InstructionReply::UpgradeRegion(UpgradeRegionReply {
+        let Some(writable) = ctx.region_server.is_region_leader(region_id) else {
+            return Some(InstructionReply::UpgradeRegion(UpgradeRegionReply {
+                ready: false,
+                exists: false,
+                error: None,
+            }));
+        };
+
+        if writable {
+            return Some(InstructionReply::UpgradeRegion(UpgradeRegionReply {
+                ready: true,
+                exists: true,
+                error: None,
+            }));
+        }
+
+        let region_server_moved = ctx.region_server.clone();
+
+        let checkpoint = match (replay_entry_id, metadata_replay_entry_id) {
+            (Some(entry_id), metadata_entry_id) => Some(ReplayCheckpoint {
+                entry_id,
+                metadata_entry_id,
+            }),
+            _ => None,
+        };
+
+        // The catchup task is almost zero cost if the inside region is writable.
+        // Therefore, it always registers a new catchup task.
+        let register_result = ctx
+            .catchup_tasks
+            .try_register(
+                region_id,
+                Box::pin(async move {
+                    info!(
+                        "Executing region: {region_id} catchup to: last entry id {last_entry_id:?}"
+                    );
+                    region_server_moved
+                        .handle_request(
+                            region_id,
+                            RegionRequest::Catchup(RegionCatchupRequest {
+                                set_writable: true,
+                                entry_id: last_entry_id,
+                                metadata_entry_id: metadata_last_entry_id,
+                                location_id,
+                                checkpoint,
+                            }),
+                        )
+                        .await?;
+
+                    Ok(())
+                }),
+            )
+            .await;
+
+        if register_result.is_busy() {
+            warn!("Another catchup task is running for the region: {region_id}");
+        }
+
+        // Returns immediately
+        let Some(replay_timeout) = replay_timeout else {
+            return Some(InstructionReply::UpgradeRegion(UpgradeRegionReply {
+                ready: false,
+                exists: true,
+                error: None,
+            }));
+        };
+
+        // We don't care that it returns a newly registered or running task.
+        let mut watcher = register_result.into_watcher();
+        let result = ctx.catchup_tasks.wait(&mut watcher, replay_timeout).await;
+
+        match result {
+            WaitResult::Timeout => Some(InstructionReply::UpgradeRegion(UpgradeRegionReply {
+                ready: false,
+                exists: true,
+                error: None,
+            })),
+            WaitResult::Finish(Ok(_)) => {
+                Some(InstructionReply::UpgradeRegion(UpgradeRegionReply {
                    ready: true,
                    exists: true,
                    error: None,
-                }));
+                }))
            }
-
-            let region_server_moved = self.region_server.clone();
-
-            let checkpoint = match (replay_entry_id, metadata_replay_entry_id) {
-                (Some(entry_id), metadata_entry_id) => Some(ReplayCheckpoint {
-                    entry_id,
-                    metadata_entry_id,
-                }),
-                _ => None,
-            };
-
-            // The catchup task is almost zero cost if the inside region is writable.
-            // Therefore, it always registers a new catchup task.
-            let register_result = self
-                .catchup_tasks
-                .try_register(
-                    region_id,
-                    Box::pin(async move {
-                        info!("Executing region: {region_id} catchup to: last entry id {last_entry_id:?}");
-                        region_server_moved
-                            .handle_request(
-                                region_id,
-                                RegionRequest::Catchup(RegionCatchupRequest {
-                                    set_writable: true,
-                                    entry_id: last_entry_id,
-                                    metadata_entry_id: metadata_last_entry_id,
-                                    location_id,
-                                    checkpoint,
-                                }),
-                            )
-                            .await?;
-
-                        Ok(())
-                    }),
-                )
-                .await;
-
-            if register_result.is_busy() {
-                warn!("Another catchup task is running for the region: {region_id}");
-            }
-
-            // Returns immediately
-            let Some(replay_timeout) = replay_timeout else {
-                return Some(InstructionReply::UpgradeRegion(UpgradeRegionReply {
+            WaitResult::Finish(Err(err)) => {
+                Some(InstructionReply::UpgradeRegion(UpgradeRegionReply {
                    ready: false,
                    exists: true,
-                    error: None,
-                }));
-            };
-
-            // We don't care that it returns a newly registered or running task.
-            let mut watcher = register_result.into_watcher();
-            let result = self.catchup_tasks.wait(&mut watcher, replay_timeout).await;
-
-            match result {
-                WaitResult::Timeout => Some(InstructionReply::UpgradeRegion(UpgradeRegionReply {
-                    ready: false,
-                    exists: true,
-                    error: None,
-                })),
-                WaitResult::Finish(Ok(_)) => {
-                    Some(InstructionReply::UpgradeRegion(UpgradeRegionReply {
-                        ready: true,
-                        exists: true,
-                        error: None,
-                    }))
-                }
-                WaitResult::Finish(Err(err)) => {
-                    Some(InstructionReply::UpgradeRegion(UpgradeRegionReply {
-                        ready: false,
-                        exists: true,
-                        error: Some(format!("{err:?}")),
-                    }))
-                }
+                    error: Some(format!("{err:?}")),
+                }))
            }
-        })
+        }
    }
 }

 #[cfg(test)]
 mod tests {
-    use std::assert_matches::assert_matches;
    use std::time::Duration;

-    use common_meta::instruction::{InstructionReply, UpgradeRegion};
+    use common_meta::instruction::{Instruction, UpgradeRegion};
    use mito2::engine::MITO_ENGINE_NAME;
    use store_api::region_engine::RegionRole;
    use store_api::storage::RegionId;
    use tokio::time::Instant;

    use crate::error;
-    use crate::heartbeat::handler::HandlerContext;
+    use crate::heartbeat::handler::upgrade_region::UpgradeRegionsHandler;
+    use crate::heartbeat::handler::{HandlerContext, InstructionHandler};
    use crate::tests::{MockRegionEngine, mock_region_server};

    #[tokio::test]
@@ -155,20 +161,20 @@ mod tests {
        let waits = vec![None, Some(Duration::from_millis(100u64))];

        for replay_timeout in waits {
-            let reply = handler_context
-                .clone()
-                .handle_upgrade_region_instruction(UpgradeRegion {
-                    region_id,
-                    replay_timeout,
-                    ..Default::default()
-                })
+            let reply = UpgradeRegionsHandler
+                .handle(
+                    &handler_context,
+                    Instruction::UpgradeRegion(UpgradeRegion {
+                        region_id,
+                        replay_timeout,
+                        ..Default::default()
+                    }),
+                )
                .await;
-            assert_matches!(reply, Some(InstructionReply::UpgradeRegion(_)));

-            if let InstructionReply::UpgradeRegion(reply) = reply.unwrap() {
-                assert!(!reply.exists);
-                assert!(reply.error.is_none());
-            }
+            let reply = reply.unwrap().expect_upgrade_region_reply();
+            assert!(!reply.exists);
+            assert!(reply.error.is_none());
        }
    }

@@ -192,21 +198,21 @@ mod tests {
        let waits = vec![None, Some(Duration::from_millis(100u64))];

        for replay_timeout in waits {
-            let reply = handler_context
-                .clone()
-                .handle_upgrade_region_instruction(UpgradeRegion {
-                    region_id,
-                    replay_timeout,
-                    ..Default::default()
-                })
+            let reply = UpgradeRegionsHandler
+                .handle(
+                    &handler_context,
+                    Instruction::UpgradeRegion(UpgradeRegion {
+                        region_id,
+                        replay_timeout,
+                        ..Default::default()
+                    }),
+                )
                .await;
-            assert_matches!(reply, Some(InstructionReply::UpgradeRegion(_)));

-            if let InstructionReply::UpgradeRegion(reply) = reply.unwrap() {
-                assert!(reply.ready);
-                assert!(reply.exists);
-                assert!(reply.error.is_none());
-            }
+            let reply = reply.unwrap().expect_upgrade_region_reply();
+            assert!(reply.ready);
+            assert!(reply.exists);
+            assert!(reply.error.is_none());
        }
    }

@@ -230,21 +236,21 @@ mod tests {
        let waits = vec![None, Some(Duration::from_millis(100u64))];

        for replay_timeout in waits {
-            let reply = handler_context
-                .clone()
-                .handle_upgrade_region_instruction(UpgradeRegion {
-                    region_id,
-                    replay_timeout,
-                    ..Default::default()
-                })
+            let reply = UpgradeRegionsHandler
+                .handle(
+                    &handler_context,
+                    Instruction::UpgradeRegion(UpgradeRegion {
+                        region_id,
+                        replay_timeout,
+                        ..Default::default()
+                    }),
+                )
                .await;
-            assert_matches!(reply, Some(InstructionReply::UpgradeRegion(_)));

-            if let InstructionReply::UpgradeRegion(reply) = reply.unwrap() {
-                assert!(!reply.ready);
-                assert!(reply.exists);
-                assert!(reply.error.is_none());
-            }
+            let reply = reply.unwrap().expect_upgrade_region_reply();
+            assert!(!reply.ready);
+            assert!(reply.exists);
+            assert!(reply.error.is_none());
        }
    }

@@ -271,40 +277,41 @@ mod tests {
        let handler_context = HandlerContext::new_for_test(mock_region_server);

        for replay_timeout in waits {
-            let reply = handler_context
-                .clone()
-                .handle_upgrade_region_instruction(UpgradeRegion {
-                    region_id,
-                    replay_timeout,
-                    ..Default::default()
-                })
+            let reply = UpgradeRegionsHandler
+                .handle(
+                    &handler_context,
+                    Instruction::UpgradeRegion(UpgradeRegion {
+                        region_id,
+                        replay_timeout,
+                        ..Default::default()
+                    }),
+                )
                .await;
-            assert_matches!(reply, Some(InstructionReply::UpgradeRegion(_)));

-            if let InstructionReply::UpgradeRegion(reply) = reply.unwrap() {
-                assert!(!reply.ready);
-                assert!(reply.exists);
-                assert!(reply.error.is_none());
-            }
-        }
-
-        let timer = Instant::now();
-        let reply = handler_context
-            .handle_upgrade_region_instruction(UpgradeRegion {
-                region_id,
-                replay_timeout: Some(Duration::from_millis(500)),
-                ..Default::default()
-            })
-            .await;
-        assert_matches!(reply, Some(InstructionReply::UpgradeRegion(_)));
-        // Must less than 300 ms.
-        assert!(timer.elapsed().as_millis() < 300);
-
-        if let InstructionReply::UpgradeRegion(reply) = reply.unwrap() {
-            assert!(reply.ready);
+            let reply = reply.unwrap().expect_upgrade_region_reply();
+            assert!(!reply.ready);
            assert!(reply.exists);
            assert!(reply.error.is_none());
        }
+
+        let timer = Instant::now();
+        let reply = UpgradeRegionsHandler
+            .handle(
+                &handler_context,
+                Instruction::UpgradeRegion(UpgradeRegion {
+                    region_id,
+                    replay_timeout: Some(Duration::from_millis(500)),
+                    ..Default::default()
+                }),
+            )
+            .await;
+        // Must less than 300 ms.
+        assert!(timer.elapsed().as_millis() < 300);
+
+        let reply = reply.unwrap().expect_upgrade_region_reply();
+        assert!(reply.ready);
+        assert!(reply.exists);
+        assert!(reply.error.is_none());
    }

    #[tokio::test]
@@ -329,37 +336,37 @@ mod tests {

        let handler_context = HandlerContext::new_for_test(mock_region_server);

-        let reply = handler_context
-            .clone()
-            .handle_upgrade_region_instruction(UpgradeRegion {
-                region_id,
-                ..Default::default()
-            })
+        let reply = UpgradeRegionsHandler
+            .handle(
+                &handler_context,
+                Instruction::UpgradeRegion(UpgradeRegion {
+                    region_id,
+                    ..Default::default()
+                }),
+            )
            .await;
-        assert_matches!(reply, Some(InstructionReply::UpgradeRegion(_)));

        // It didn't wait for handle returns; it had no idea about the error.
-        if let InstructionReply::UpgradeRegion(reply) = reply.unwrap() {
-            assert!(!reply.ready);
-            assert!(reply.exists);
-            assert!(reply.error.is_none());
-        }
+        let reply = reply.unwrap().expect_upgrade_region_reply();
+        assert!(!reply.ready);
+        assert!(reply.exists);
+        assert!(reply.error.is_none());

-        let reply = handler_context
-            .clone()
-            .handle_upgrade_region_instruction(UpgradeRegion {
-                region_id,
-                replay_timeout: Some(Duration::from_millis(200)),
-                ..Default::default()
-            })
+        let reply = UpgradeRegionsHandler
+            .handle(
+                &handler_context,
+                Instruction::UpgradeRegion(UpgradeRegion {
+                    region_id,
+                    replay_timeout: Some(Duration::from_millis(200)),
+                    ..Default::default()
+                }),
+            )
            .await;
-        assert_matches!(reply, Some(InstructionReply::UpgradeRegion(_)));

-        if let InstructionReply::UpgradeRegion(reply) = reply.unwrap() {
-            assert!(!reply.ready);
-            assert!(reply.exists);
-            assert!(reply.error.is_some());
-            assert!(reply.error.unwrap().contains("mock_error"));
-        }
+        let reply = reply.unwrap().expect_upgrade_region_reply();
+        assert!(!reply.ready);
+        assert!(reply.exists);
+        assert!(reply.error.is_some());
+        assert!(reply.error.unwrap().contains("mock_error"));
    }
 }
--- a/src/datatypes/src/json.rs
+++ b/src/datatypes/src/json.rs
@@ -24,6 +24,7 @@ use std::sync::Arc;

 use common_base::bytes::StringBytes;
 use ordered_float::OrderedFloat;
+use serde::{Deserialize, Serialize};
 use serde_json::{Map, Value as Json};
 use snafu::{ResultExt, ensure};

@@ -45,7 +46,7 @@ use crate::value::{ListValue, StructValue, Value};
 /// convert them to fully structured StructValue for user-facing APIs: the UI protocol and the UDF interface.
 ///
 /// **Important**: This settings only controls the internal form of JSON encoding.
-#[derive(Debug, Clone)]
+#[derive(Debug, Clone, Serialize, Deserialize)]
 pub enum JsonStructureSettings {
    // TODO(sunng87): provide a limit
    Structured(Option<StructType>),
@@ -111,6 +112,12 @@ impl JsonStructureSettings {
    }
 }

+impl Default for JsonStructureSettings {
+    fn default() -> Self {
+        Self::Structured(None)
+    }
+}
+
 impl<'a> JsonContext<'a> {
    /// Create a new context with an updated key path
    pub fn with_key(&self, key: &str) -> JsonContext<'a> {
--- a/src/datatypes/src/schema.rs
+++ b/src/datatypes/src/schema.rs
@@ -32,8 +32,9 @@ pub use crate::schema::column_schema::{
    COLUMN_FULLTEXT_OPT_KEY_FALSE_POSITIVE_RATE, COLUMN_FULLTEXT_OPT_KEY_GRANULARITY,
    COLUMN_SKIPPING_INDEX_OPT_KEY_FALSE_POSITIVE_RATE, COLUMN_SKIPPING_INDEX_OPT_KEY_GRANULARITY,
    COLUMN_SKIPPING_INDEX_OPT_KEY_TYPE, COMMENT_KEY, ColumnExtType, ColumnSchema, FULLTEXT_KEY,
-    FulltextAnalyzer, FulltextBackend, FulltextOptions, INVERTED_INDEX_KEY, Metadata,
-    SKIPPING_INDEX_KEY, SkippingIndexOptions, SkippingIndexType, TIME_INDEX_KEY,
+    FulltextAnalyzer, FulltextBackend, FulltextOptions, INVERTED_INDEX_KEY,
+    JSON_STRUCTURE_SETTINGS_KEY, Metadata, SKIPPING_INDEX_KEY, SkippingIndexOptions,
+    SkippingIndexType, TIME_INDEX_KEY,
 };
 pub use crate::schema::constraint::ColumnDefaultConstraint;
 pub use crate::schema::raw::RawSchema;
@@ -368,8 +369,7 @@ impl TryFrom<DFSchemaRef> for Schema {
    type Error = Error;

    fn try_from(value: DFSchemaRef) -> Result<Self> {
-        let s: ArrowSchema = value.as_ref().into();
-        s.try_into()
+        value.inner().clone().try_into()
    }
 }

--- a/src/datatypes/src/schema/column_schema.rs
+++ b/src/datatypes/src/schema/column_schema.rs
@@ -23,6 +23,7 @@ use sqlparser_derive::{Visit, VisitMut};

 use crate::data_type::{ConcreteDataType, DataType};
 use crate::error::{self, Error, InvalidFulltextOptionSnafu, ParseExtendedTypeSnafu, Result};
+use crate::json::JsonStructureSettings;
 use crate::schema::TYPE_KEY;
 use crate::schema::constraint::ColumnDefaultConstraint;
 use crate::value::Value;
@@ -41,6 +42,7 @@ pub const FULLTEXT_KEY: &str = "greptime:fulltext";
 pub const INVERTED_INDEX_KEY: &str = "greptime:inverted_index";
 /// Key used to store skip options in arrow field's metadata.
 pub const SKIPPING_INDEX_KEY: &str = "greptime:skipping_index";
+pub const JSON_STRUCTURE_SETTINGS_KEY: &str = "greptime:json:structure_settings";

 /// Keys used in fulltext options
 pub const COLUMN_FULLTEXT_CHANGE_OPT_KEY_ENABLE: &str = "enable";
@@ -391,6 +393,21 @@ impl ColumnSchema {
        self.metadata.remove(SKIPPING_INDEX_KEY);
        Ok(())
    }
+
+    pub fn json_structure_settings(&self) -> Result<Option<JsonStructureSettings>> {
+        self.metadata
+            .get(JSON_STRUCTURE_SETTINGS_KEY)
+            .map(|json| serde_json::from_str(json).context(error::DeserializeSnafu { json }))
+            .transpose()
+    }
+
+    pub fn with_json_structure_settings(&mut self, settings: &JsonStructureSettings) -> Result<()> {
+        self.metadata.insert(
+            JSON_STRUCTURE_SETTINGS_KEY.to_string(),
+            serde_json::to_string(settings).context(error::SerializeSnafu)?,
+        );
+        Ok(())
+    }
 }

 /// Column extended type set in column schema's metadata.
--- a/src/datatypes/src/types/json_type.rs
+++ b/src/datatypes/src/types/json_type.rs
@@ -15,6 +15,7 @@
 use std::str::FromStr;

 use arrow::datatypes::DataType as ArrowDataType;
+use arrow_schema::Fields;
 use common_base::bytes::Bytes;
 use serde::{Deserialize, Serialize};
 use snafu::ResultExt;
@@ -63,7 +64,10 @@ impl DataType for JsonType {
    }

    fn as_arrow_type(&self) -> ArrowDataType {
-        ArrowDataType::Binary
+        match self.format {
+            JsonFormat::Jsonb => ArrowDataType::Binary,
+            JsonFormat::Native(_) => ArrowDataType::Struct(Fields::empty()),
+        }
    }

    fn create_mutable_vector(&self, capacity: usize) -> Box<dyn MutableVector> {
--- a/src/datatypes/src/value.rs
+++ b/src/datatypes/src/value.rs
@@ -1208,7 +1208,9 @@ impl TryFrom<ScalarValue> for Value {
                    .collect::<Result<Vec<Value>>>()?;
                Value::Struct(StructValue::try_new(items, struct_type)?)
            }
-            ScalarValue::Decimal256(_, _, _)
+            ScalarValue::Decimal32(_, _, _)
+            | ScalarValue::Decimal64(_, _, _)
+            | ScalarValue::Decimal256(_, _, _)
            | ScalarValue::FixedSizeList(_)
            | ScalarValue::LargeList(_)
            | ScalarValue::Dictionary(_, _)
--- a/src/datatypes/src/vectors/helper.rs
+++ b/src/datatypes/src/vectors/helper.rs
@@ -245,7 +245,9 @@ impl Helper {
                    length,
                )
            }
-            ScalarValue::Decimal256(_, _, _)
+            ScalarValue::Decimal32(_, _, _)
+            | ScalarValue::Decimal64(_, _, _)
+            | ScalarValue::Decimal256(_, _, _)
            | ScalarValue::FixedSizeList(_)
            | ScalarValue::LargeList(_)
            | ScalarValue::Dictionary(_, _)
--- a/src/flow/src/df_optimizer.rs
+++ b/src/flow/src/df_optimizer.rs
@@ -427,7 +427,7 @@ fn expand_tumble_analyzer(

 /// This is a placeholder for tumble_start and tumble_end function, so that datafusion can
 /// recognize them as scalar function
-#[derive(Debug)]
+#[derive(Debug, PartialEq, Eq, Hash)]
 pub struct TumbleExpand {
    signature: Signature,
    name: String,
--- a/src/flow/src/heartbeat.rs
+++ b/src/flow/src/heartbeat.rs
@@ -18,7 +18,6 @@ use std::sync::Arc;
 use std::sync::atomic::{AtomicBool, Ordering};

 use api::v1::meta::{HeartbeatRequest, Peer};
-use common_config::utils::ResourceSpec;
 use common_error::ext::BoxedError;
 use common_meta::heartbeat::handler::{
    HeartbeatResponseHandlerContext, HeartbeatResponseHandlerExecutorRef,
@@ -26,6 +25,7 @@ use common_meta::heartbeat::handler::{
 use common_meta::heartbeat::mailbox::{HeartbeatMailbox, MailboxRef, OutgoingMessage};
 use common_meta::heartbeat::utils::outgoing_message_to_mailbox_message;
 use common_meta::key::flow::flow_state::FlowStat;
+use common_stat::ResourceStatRef;
 use common_telemetry::{debug, error, info, warn};
 use greptime_proto::v1::meta::NodeInfo;
 use meta_client::client::{HeartbeatSender, HeartbeatStream, MetaClient};
@@ -69,7 +69,7 @@ pub struct HeartbeatTask {
    resp_handler_executor: HeartbeatResponseHandlerExecutorRef,
    running: Arc<AtomicBool>,
    query_stat_size: Option<SizeReportSender>,
-    resource_spec: ResourceSpec,
+    resource_stat: ResourceStatRef,
 }

 impl HeartbeatTask {
@@ -77,11 +77,13 @@ impl HeartbeatTask {
        self.query_stat_size = Some(query_stat_size);
        self
    }
+
    pub fn new(
        opts: &FlownodeOptions,
        meta_client: Arc<MetaClient>,
        heartbeat_opts: HeartbeatOptions,
        resp_handler_executor: HeartbeatResponseHandlerExecutorRef,
+        resource_stat: ResourceStatRef,
    ) -> Self {
        Self {
            node_id: opts.node_id.unwrap_or(0),
@@ -93,7 +95,7 @@ impl HeartbeatTask {
            resp_handler_executor,
            running: Arc::new(AtomicBool::new(false)),
            query_stat_size: None,
-            resource_spec: Default::default(),
+            resource_stat,
        }
    }

@@ -146,6 +148,8 @@ impl HeartbeatTask {
        heartbeat_request: &HeartbeatRequest,
        message: Option<OutgoingMessage>,
        latest_report: &Option<FlowStat>,
+        cpu_usage: i64,
+        memory_usage: i64,
    ) -> Option<HeartbeatRequest> {
        let mailbox_message = match message.map(outgoing_message_to_mailbox_message) {
            Some(Ok(message)) => Some(message),
@@ -170,21 +174,38 @@ impl HeartbeatTask {
                    .collect(),
            });

-        Some(HeartbeatRequest {
+        let mut heartbeat_request = HeartbeatRequest {
            mailbox_message,
            flow_stat,
            ..heartbeat_request.clone()
-        })
+        };
+
+        if let Some(info) = heartbeat_request.info.as_mut() {
+            info.cpu_usage_millicores = cpu_usage;
+            info.memory_usage_bytes = memory_usage;
+        }
+
+        Some(heartbeat_request)
    }

-    fn build_node_info(start_time_ms: u64, cpus: u32, memory_bytes: u64) -> Option<NodeInfo> {
+    #[allow(deprecated)]
+    fn build_node_info(
+        start_time_ms: u64,
+        total_cpu_millicores: i64,
+        total_memory_bytes: i64,
+    ) -> Option<NodeInfo> {
        let build_info = common_version::build_info();
        Some(NodeInfo {
            version: build_info.version.to_string(),
            git_commit: build_info.commit_short.to_string(),
            start_time_ms,
-            cpus,
-            memory_bytes,
+            total_cpu_millicores,
+            total_memory_bytes,
+            cpu_usage_millicores: 0,
+            memory_usage_bytes: 0,
+            // TODO(zyy17): Remove these deprecated fields when the deprecated fields are removed from the proto.
+            cpus: total_cpu_millicores as u32,
+            memory_bytes: total_memory_bytes as u64,
            hostname: hostname::get()
                .unwrap_or_default()
                .to_string_lossy()
@@ -203,9 +224,9 @@ impl HeartbeatTask {
            id: self.node_id,
            addr: self.peer_addr.clone(),
        });
-        let cpus = self.resource_spec.cpus as u32;
-        let memory_bytes = self.resource_spec.memory.unwrap_or_default().as_bytes();
-
+        let total_cpu_millicores = self.resource_stat.get_total_cpu_millicores();
+        let total_memory_bytes = self.resource_stat.get_total_memory_bytes();
+        let resource_stat = self.resource_stat.clone();
        let query_stat_size = self.query_stat_size.clone();

        common_runtime::spawn_hb(async move {
@@ -218,7 +239,7 @@ impl HeartbeatTask {
            let heartbeat_request = HeartbeatRequest {
                peer: self_peer,
                node_epoch,
-                info: Self::build_node_info(node_epoch, cpus, memory_bytes),
+                info: Self::build_node_info(node_epoch, total_cpu_millicores, total_memory_bytes),
                ..Default::default()
            };

@@ -226,7 +247,7 @@ impl HeartbeatTask {
                let req = tokio::select! {
                    message = outgoing_rx.recv() => {
                        if let Some(message) = message {
-                            Self::new_heartbeat_request(&heartbeat_request, Some(message), &latest_report)
+                            Self::new_heartbeat_request(&heartbeat_request, Some(message), &latest_report, 0, 0)
                        } else {
                            warn!("Sender has been dropped, exiting the heartbeat loop");
                            // Receives None that means Sender was dropped, we need to break the current loop
@@ -234,7 +255,7 @@ impl HeartbeatTask {
                        }
                    }
                    _ = interval.tick() => {
-                        Self::new_heartbeat_request(&heartbeat_request, None, &latest_report)
+                        Self::new_heartbeat_request(&heartbeat_request, None, &latest_report, resource_stat.get_cpu_usage_millicores(), resource_stat.get_memory_usage_bytes())
                    }
                };

--- a/src/flow/src/server.rs
+++ b/src/flow/src/server.rs
@@ -490,6 +490,7 @@ impl<'a> FlownodeServiceBuilder<'a> {
        let config = GrpcServerConfig {
            max_recv_message_size: opts.grpc.max_recv_message_size.as_bytes() as usize,
            max_send_message_size: opts.grpc.max_send_message_size.as_bytes() as usize,
+            max_total_message_memory: opts.grpc.max_total_message_memory.as_bytes() as usize,
            tls: opts.grpc.tls.clone(),
            max_connection_age: opts.grpc.max_connection_age,
        };
--- a/src/frontend/Cargo.toml
+++ b/src/frontend/Cargo.toml
@@ -37,6 +37,7 @@ common-procedure.workspace = true
 common-query.workspace = true
 common-recordbatch.workspace = true
 common-runtime.workspace = true
+common-stat.workspace = true
 common-telemetry.workspace = true
 common-time.workspace = true
 common-version.workspace = true
--- a/src/frontend/src/heartbeat.rs
+++ b/src/frontend/src/heartbeat.rs
@@ -18,12 +18,12 @@ mod tests;
 use std::sync::Arc;

 use api::v1::meta::{HeartbeatRequest, NodeInfo, Peer};
-use common_config::utils::ResourceSpec;
 use common_meta::heartbeat::handler::{
    HeartbeatResponseHandlerContext, HeartbeatResponseHandlerExecutorRef,
 };
 use common_meta::heartbeat::mailbox::{HeartbeatMailbox, MailboxRef, OutgoingMessage};
 use common_meta::heartbeat::utils::outgoing_message_to_mailbox_message;
+use common_stat::ResourceStatRef;
 use common_telemetry::{debug, error, info, warn};
 use meta_client::client::{HeartbeatSender, HeartbeatStream, MetaClient};
 use servers::addrs;
@@ -47,7 +47,7 @@ pub struct HeartbeatTask {
    retry_interval: Duration,
    resp_handler_executor: HeartbeatResponseHandlerExecutorRef,
    start_time_ms: u64,
-    resource_spec: ResourceSpec,
+    resource_stat: ResourceStatRef,
 }

 impl HeartbeatTask {
@@ -56,6 +56,7 @@ impl HeartbeatTask {
        meta_client: Arc<MetaClient>,
        heartbeat_opts: HeartbeatOptions,
        resp_handler_executor: HeartbeatResponseHandlerExecutorRef,
+        resource_stat: ResourceStatRef,
    ) -> Self {
        HeartbeatTask {
            // if internal grpc is configured, use its address as the peer address
@@ -71,7 +72,7 @@ impl HeartbeatTask {
            retry_interval: heartbeat_opts.retry_interval,
            resp_handler_executor,
            start_time_ms: common_time::util::current_time_millis() as u64,
-            resource_spec: Default::default(),
+            resource_stat,
        }
    }

@@ -133,6 +134,8 @@ impl HeartbeatTask {
    fn new_heartbeat_request(
        heartbeat_request: &HeartbeatRequest,
        message: Option<OutgoingMessage>,
+        cpu_usage: i64,
+        memory_usage: i64,
    ) -> Option<HeartbeatRequest> {
        let mailbox_message = match message.map(outgoing_message_to_mailbox_message) {
            Some(Ok(message)) => Some(message),
@@ -143,21 +146,38 @@ impl HeartbeatTask {
            None => None,
        };

-        Some(HeartbeatRequest {
+        let mut heartbeat_request = HeartbeatRequest {
            mailbox_message,
            ..heartbeat_request.clone()
-        })
+        };
+
+        if let Some(info) = heartbeat_request.info.as_mut() {
+            info.memory_usage_bytes = memory_usage;
+            info.cpu_usage_millicores = cpu_usage;
+        }
+
+        Some(heartbeat_request)
    }

-    fn build_node_info(start_time_ms: u64, cpus: u32, memory_bytes: u64) -> Option<NodeInfo> {
+    #[allow(deprecated)]
+    fn build_node_info(
+        start_time_ms: u64,
+        total_cpu_millicores: i64,
+        total_memory_bytes: i64,
+    ) -> Option<NodeInfo> {
        let build_info = common_version::build_info();

        Some(NodeInfo {
            version: build_info.version.to_string(),
            git_commit: build_info.commit_short.to_string(),
            start_time_ms,
-            cpus,
-            memory_bytes,
+            total_cpu_millicores,
+            total_memory_bytes,
+            cpu_usage_millicores: 0,
+            memory_usage_bytes: 0,
+            // TODO(zyy17): Remove these deprecated fields when the deprecated fields are removed from the proto.
+            cpus: total_cpu_millicores as u32,
+            memory_bytes: total_memory_bytes as u64,
            hostname: hostname::get()
                .unwrap_or_default()
                .to_string_lossy()
@@ -177,16 +197,20 @@ impl HeartbeatTask {
            id: 0,
            addr: self.peer_addr.clone(),
        });
-        let cpus = self.resource_spec.cpus as u32;
-        let memory_bytes = self.resource_spec.memory.unwrap_or_default().as_bytes();
-
+        let total_cpu_millicores = self.resource_stat.get_total_cpu_millicores();
+        let total_memory_bytes = self.resource_stat.get_total_memory_bytes();
+        let resource_stat = self.resource_stat.clone();
        common_runtime::spawn_hb(async move {
            let sleep = tokio::time::sleep(Duration::from_millis(0));
            tokio::pin!(sleep);

            let heartbeat_request = HeartbeatRequest {
                peer: self_peer,
-                info: Self::build_node_info(start_time_ms, cpus, memory_bytes),
+                info: Self::build_node_info(
+                    start_time_ms,
+                    total_cpu_millicores,
+                    total_memory_bytes,
+                ),
                ..Default::default()
            };

@@ -194,7 +218,7 @@ impl HeartbeatTask {
                let req = tokio::select! {
                    message = outgoing_rx.recv() => {
                        if let Some(message) = message {
-                            Self::new_heartbeat_request(&heartbeat_request, Some(message))
+                            Self::new_heartbeat_request(&heartbeat_request, Some(message), 0, 0)
                        } else {
                            warn!("Sender has been dropped, exiting the heartbeat loop");
                            // Receives None that means Sender was dropped, we need to break the current loop
@@ -202,8 +226,8 @@ impl HeartbeatTask {
                        }
                    }
                    _ = &mut sleep => {
-                        sleep.as_mut().reset(Instant::now() + report_interval);
-                       Self::new_heartbeat_request(&heartbeat_request, None)
+                       sleep.as_mut().reset(Instant::now() + report_interval);
+                       Self::new_heartbeat_request(&heartbeat_request, None, resource_stat.get_cpu_usage_millicores(), resource_stat.get_memory_usage_bytes())
                    }
                };

--- a/src/meta-client/src/client.rs
+++ b/src/meta-client/src/client.rs
@@ -24,7 +24,9 @@ mod util;
 use std::fmt::Debug;
 use std::sync::Arc;

-use api::v1::meta::{ProcedureDetailResponse, ReconcileRequest, ReconcileResponse, Role};
+use api::v1::meta::{
+    MetasrvNodeInfo, ProcedureDetailResponse, ReconcileRequest, ReconcileResponse, Role,
+};
 pub use ask_leader::{AskLeader, LeaderProvider, LeaderProviderRef};
 use cluster::Client as ClusterClient;
 pub use cluster::ClusterKvBackend;
@@ -371,7 +373,8 @@ impl ClusterInfo for MetaClient {
        let mut nodes = if get_metasrv_nodes {
            let last_activity_ts = -1; // Metasrv does not provide this information.

-            let (leader, followers) = cluster_client.get_metasrv_peers().await?;
+            let (leader, followers): (Option<MetasrvNodeInfo>, Vec<MetasrvNodeInfo>) =
+                cluster_client.get_metasrv_peers().await?;
            followers
                .into_iter()
                .map(|node| {
@@ -383,8 +386,10 @@ impl ClusterInfo for MetaClient {
                            version: node_info.version,
                            git_commit: node_info.git_commit,
                            start_time_ms: node_info.start_time_ms,
-                            cpus: node_info.cpus,
-                            memory_bytes: node_info.memory_bytes,
+                            total_cpu_millicores: node_info.total_cpu_millicores,
+                            total_memory_bytes: node_info.total_memory_bytes,
+                            cpu_usage_millicores: node_info.cpu_usage_millicores,
+                            memory_usage_bytes: node_info.memory_usage_bytes,
                            hostname: node_info.hostname,
                        }
                    } else {
@@ -396,8 +401,10 @@ impl ClusterInfo for MetaClient {
                            version: node.version,
                            git_commit: node.git_commit,
                            start_time_ms: node.start_time_ms,
-                            cpus: node.cpus,
-                            memory_bytes: node.memory_bytes,
+                            total_cpu_millicores: node.cpus as i64,
+                            total_memory_bytes: node.memory_bytes as i64,
+                            cpu_usage_millicores: 0,
+                            memory_usage_bytes: 0,
                            hostname: "".to_string(),
                        }
                    }
@@ -411,8 +418,10 @@ impl ClusterInfo for MetaClient {
                            version: node_info.version,
                            git_commit: node_info.git_commit,
                            start_time_ms: node_info.start_time_ms,
-                            cpus: node_info.cpus,
-                            memory_bytes: node_info.memory_bytes,
+                            total_cpu_millicores: node_info.total_cpu_millicores,
+                            total_memory_bytes: node_info.total_memory_bytes,
+                            cpu_usage_millicores: node_info.cpu_usage_millicores,
+                            memory_usage_bytes: node_info.memory_usage_bytes,
                            hostname: node_info.hostname,
                        }
                    } else {
@@ -424,8 +433,10 @@ impl ClusterInfo for MetaClient {
                            version: node.version,
                            git_commit: node.git_commit,
                            start_time_ms: node.start_time_ms,
-                            cpus: node.cpus,
-                            memory_bytes: node.memory_bytes,
+                            total_cpu_millicores: node.cpus as i64,
+                            total_memory_bytes: node.memory_bytes as i64,
+                            cpu_usage_millicores: 0,
+                            memory_usage_bytes: 0,
                            hostname: "".to_string(),
                        }
                    }
--- a/src/meta-srv/Cargo.toml
+++ b/src/meta-srv/Cargo.toml
@@ -39,6 +39,7 @@ common-meta.workspace = true
 common-options.workspace = true
 common-procedure.workspace = true
 common-runtime.workspace = true
+common-stat.workspace = true
 common-telemetry.workspace = true
 common-time.workspace = true
 common-version.workspace = true
--- a/src/meta-srv/src/discovery/lease.rs
+++ b/src/meta-srv/src/discovery/lease.rs
@@ -243,8 +243,10 @@ mod tests {
            version: "1.0.0".to_string(),
            git_commit: "1234567890".to_string(),
            start_time_ms: current_time_millis() as u64,
-            cpus: 0,
-            memory_bytes: 0,
+            total_cpu_millicores: 0,
+            total_memory_bytes: 0,
+            cpu_usage_millicores: 0,
+            memory_usage_bytes: 0,
            hostname: "test_hostname".to_string(),
        };

@@ -269,8 +271,10 @@ mod tests {
            version: "1.0.0".to_string(),
            git_commit: "1234567890".to_string(),
            start_time_ms: current_time_millis() as u64,
-            cpus: 0,
-            memory_bytes: 0,
+            total_cpu_millicores: 0,
+            total_memory_bytes: 0,
+            cpu_usage_millicores: 0,
+            memory_usage_bytes: 0,
            hostname: "test_hostname".to_string(),
        };

@@ -307,8 +311,10 @@ mod tests {
            version: "1.0.0".to_string(),
            git_commit: "1234567890".to_string(),
            start_time_ms: last_activity_ts as u64,
-            cpus: 0,
-            memory_bytes: 0,
+            total_cpu_millicores: 0,
+            total_memory_bytes: 0,
+            cpu_usage_millicores: 0,
+            memory_usage_bytes: 0,
            hostname: "test_hostname".to_string(),
        };

--- a/src/meta-srv/src/election/rds/mysql.rs
+++ b/src/meta-srv/src/election/rds/mysql.rs
@@ -1161,8 +1161,10 @@ mod tests {
            version: "test_version".to_string(),
            git_commit: "test_git_commit".to_string(),
            start_time_ms: 0,
-            cpus: 0,
-            memory_bytes: 0,
+            total_cpu_millicores: 0,
+            total_memory_bytes: 0,
+            cpu_usage_millicores: 0,
+            memory_usage_bytes: 0,
            hostname: "test_hostname".to_string(),
        };
        mysql_election.register_candidate(&node_info).await.unwrap();
--- a/src/meta-srv/src/election/rds/postgres.rs
+++ b/src/meta-srv/src/election/rds/postgres.rs
@@ -1000,8 +1000,10 @@ mod tests {
            version: "test_version".to_string(),
            git_commit: "test_git_commit".to_string(),
            start_time_ms: 0,
-            cpus: 0,
-            memory_bytes: 0,
+            total_cpu_millicores: 0,
+            total_memory_bytes: 0,
+            cpu_usage_millicores: 0,
+            memory_usage_bytes: 0,
            hostname: "test_hostname".to_string(),
        };
        pg_election.register_candidate(&node_info).await.unwrap();
--- a/src/meta-srv/src/handler/collect_cluster_info_handler.rs
+++ b/src/meta-srv/src/handler/collect_cluster_info_handler.rs
@@ -52,8 +52,10 @@ impl HeartbeatHandler for CollectFrontendClusterInfoHandler {
            version: info.version,
            git_commit: info.git_commit,
            start_time_ms: info.start_time_ms,
-            cpus: info.cpus,
-            memory_bytes: info.memory_bytes,
+            total_cpu_millicores: info.total_cpu_millicores,
+            total_memory_bytes: info.total_memory_bytes,
+            cpu_usage_millicores: info.cpu_usage_millicores,
+            memory_usage_bytes: info.memory_usage_bytes,
            hostname: info.hostname,
        };

@@ -88,8 +90,10 @@ impl HeartbeatHandler for CollectFlownodeClusterInfoHandler {
            version: info.version,
            git_commit: info.git_commit,
            start_time_ms: info.start_time_ms,
-            cpus: info.cpus,
-            memory_bytes: info.memory_bytes,
+            total_cpu_millicores: info.total_cpu_millicores,
+            total_memory_bytes: info.total_memory_bytes,
+            cpu_usage_millicores: info.cpu_usage_millicores,
+            memory_usage_bytes: info.memory_usage_bytes,
            hostname: info.hostname,
        };

@@ -142,8 +146,10 @@ impl HeartbeatHandler for CollectDatanodeClusterInfoHandler {
            version: info.version,
            git_commit: info.git_commit,
            start_time_ms: info.start_time_ms,
-            cpus: info.cpus,
-            memory_bytes: info.memory_bytes,
+            total_cpu_millicores: info.total_cpu_millicores,
+            total_memory_bytes: info.total_memory_bytes,
+            cpu_usage_millicores: info.cpu_usage_millicores,
+            memory_usage_bytes: info.memory_usage_bytes,
            hostname: info.hostname,
        };

--- a/src/meta-srv/src/metasrv.rs
+++ b/src/meta-srv/src/metasrv.rs
@@ -22,7 +22,6 @@ use std::time::Duration;
 use clap::ValueEnum;
 use common_base::Plugins;
 use common_base::readable_size::ReadableSize;
-use common_config::utils::ResourceSpec;
 use common_config::{Configurable, DEFAULT_DATA_HOME};
 use common_event_recorder::EventRecorderOptions;
 use common_greptimedb_telemetry::GreptimeDBTelemetryTask;
@@ -47,6 +46,7 @@ use common_options::datanode::DatanodeClientOptions;
 use common_options::memory::MemoryOptions;
 use common_procedure::ProcedureManagerRef;
 use common_procedure::options::ProcedureConfig;
+use common_stat::ResourceStatRef;
 use common_telemetry::logging::{LoggingOptions, TracingOptions};
 use common_telemetry::{error, info, warn};
 use common_wal::config::MetasrvWalConfig;
@@ -372,12 +372,16 @@ pub struct MetasrvNodeInfo {
    pub git_commit: String,
    // The node start timestamp in milliseconds
    pub start_time_ms: u64,
-    // The node cpus
+    // The node total cpu millicores
    #[serde(default)]
-    pub cpus: u32,
-    // The node memory bytes
+    pub total_cpu_millicores: i64,
    #[serde(default)]
-    pub memory_bytes: u64,
+    // The node total memory bytes
+    pub total_memory_bytes: i64,
+    /// The node build cpu usage millicores
+    pub cpu_usage_millicores: i64,
+    /// The node build memory usage bytes
+    pub memory_usage_bytes: i64,
    // The node hostname
    #[serde(default)]
    pub hostname: String,
@@ -397,15 +401,19 @@ impl From<MetasrvNodeInfo> for api::v1::meta::MetasrvNodeInfo {
            version: node_info.version.clone(),
            git_commit: node_info.git_commit.clone(),
            start_time_ms: node_info.start_time_ms,
-            cpus: node_info.cpus,
-            memory_bytes: node_info.memory_bytes,
+            cpus: node_info.total_cpu_millicores as u32,
+            memory_bytes: node_info.total_memory_bytes as u64,
            // The canonical location for node information.
            info: Some(api::v1::meta::NodeInfo {
                version: node_info.version,
                git_commit: node_info.git_commit,
                start_time_ms: node_info.start_time_ms,
-                cpus: node_info.cpus,
-                memory_bytes: node_info.memory_bytes,
+                total_cpu_millicores: node_info.total_cpu_millicores,
+                total_memory_bytes: node_info.total_memory_bytes,
+                cpu_usage_millicores: node_info.cpu_usage_millicores,
+                memory_usage_bytes: node_info.memory_usage_bytes,
+                cpus: node_info.total_cpu_millicores as u32,
+                memory_bytes: node_info.total_memory_bytes as u64,
                hostname: node_info.hostname,
            }),
        }
@@ -517,7 +525,7 @@ pub struct Metasrv {
    region_flush_ticker: Option<RegionFlushTickerRef>,
    table_id_sequence: SequenceRef,
    reconciliation_manager: ReconciliationManagerRef,
-    resource_spec: ResourceSpec,
+    resource_stat: ResourceStatRef,

    plugins: Plugins,
 }
@@ -699,8 +707,8 @@ impl Metasrv {
        self.start_time_ms
    }

-    pub fn resource_spec(&self) -> &ResourceSpec {
-        &self.resource_spec
+    pub fn resource_stat(&self) -> &ResourceStatRef {
+        &self.resource_stat
    }

    pub fn node_info(&self) -> MetasrvNodeInfo {
@@ -710,8 +718,10 @@ impl Metasrv {
            version: build_info.version.to_string(),
            git_commit: build_info.commit_short.to_string(),
            start_time_ms: self.start_time_ms(),
-            cpus: self.resource_spec().cpus as u32,
-            memory_bytes: self.resource_spec().memory.unwrap_or_default().as_bytes(),
+            total_cpu_millicores: self.resource_stat.get_total_cpu_millicores(),
+            total_memory_bytes: self.resource_stat.get_total_memory_bytes(),
+            cpu_usage_millicores: self.resource_stat.get_cpu_usage_millicores(),
+            memory_usage_bytes: self.resource_stat.get_memory_usage_bytes(),
            hostname: hostname::get()
                .unwrap_or_default()
                .to_string_lossy()
--- a/src/meta-srv/src/metasrv/builder.rs
+++ b/src/meta-srv/src/metasrv/builder.rs
@@ -46,6 +46,7 @@ use common_meta::stats::topic::TopicStatsRegistry;
 use common_meta::wal_options_allocator::{build_kafka_client, build_wal_options_allocator};
 use common_procedure::ProcedureManagerRef;
 use common_procedure::local::{LocalManager, ManagerConfig};
+use common_stat::ResourceStatImpl;
 use common_telemetry::{info, warn};
 use snafu::{ResultExt, ensure};
 use store_api::storage::MAX_REGION_SEQ;
@@ -517,6 +518,9 @@ impl MetasrvBuilder {
            .try_start()
            .context(error::InitReconciliationManagerSnafu)?;

+        let mut resource_stat = ResourceStatImpl::default();
+        resource_stat.start_collect_cpu_usage();
+
        Ok(Metasrv {
            state,
            started: Arc::new(AtomicBool::new(false)),
@@ -556,7 +560,7 @@ impl MetasrvBuilder {
            table_id_sequence,
            reconciliation_manager,
            topic_stats_registry,
-            resource_spec: Default::default(),
+            resource_stat: Arc::new(resource_stat),
        })
    }
 }
--- a/src/meta-srv/src/procedure/region_migration/downgrade_leader_region.rs
+++ b/src/meta-srv/src/procedure/region_migration/downgrade_leader_region.rs
@@ -19,7 +19,7 @@ use api::v1::meta::MailboxMessage;
 use common_error::ext::BoxedError;
 use common_meta::distributed_time_constants::REGION_LEASE_SECS;
 use common_meta::instruction::{
-    DowngradeRegion, DowngradeRegionReply, Instruction, InstructionReply,
+    DowngradeRegion, DowngradeRegionReply, DowngradeRegionsReply, Instruction, InstructionReply,
 };
 use common_procedure::{Context as ProcedureContext, Status};
 use common_telemetry::{error, info, warn};
@@ -120,10 +120,10 @@ impl DowngradeLeaderRegion {
    ) -> Instruction {
        let pc = &ctx.persistent_ctx;
        let region_id = pc.region_id;
-        Instruction::DowngradeRegion(DowngradeRegion {
+        Instruction::DowngradeRegions(vec![DowngradeRegion {
            region_id,
            flush_timeout: Some(flush_timeout),
-        })
+        }])
    }

    /// Tries to downgrade a leader region.
@@ -173,12 +173,7 @@ impl DowngradeLeaderRegion {
                    region_id,
                    now.elapsed()
                );
-                let InstructionReply::DowngradeRegion(DowngradeRegionReply {
-                    last_entry_id,
-                    metadata_last_entry_id,
-                    exists,
-                    error,
-                }) = reply
+                let InstructionReply::DowngradeRegions(DowngradeRegionsReply { replies }) = reply
                else {
                    return error::UnexpectedInstructionReplySnafu {
                        mailbox_message: msg.to_string(),
@@ -187,6 +182,15 @@ impl DowngradeLeaderRegion {
                    .fail();
                };

+                // TODO(weny): handle multiple replies.
+                let DowngradeRegionReply {
+                    region_id,
+                    last_entry_id,
+                    metadata_last_entry_id,
+                    exists,
+                    error,
+                } = &replies[0];
+
                if error.is_some() {
                    return error::RetryLaterSnafu {
                        reason: format!(
@@ -216,12 +220,12 @@ impl DowngradeLeaderRegion {
                }

                if let Some(last_entry_id) = last_entry_id {
-                    ctx.volatile_ctx.set_last_entry_id(last_entry_id);
+                    ctx.volatile_ctx.set_last_entry_id(*last_entry_id);
                }

                if let Some(metadata_last_entry_id) = metadata_last_entry_id {
                    ctx.volatile_ctx
-                        .set_metadata_last_entry_id(metadata_last_entry_id);
+                        .set_metadata_last_entry_id(*metadata_last_entry_id);
                }

                Ok(())
--- a/src/meta-srv/src/procedure/test_util.rs
+++ b/src/meta-srv/src/procedure/test_util.rs
@@ -17,7 +17,8 @@ use std::collections::HashMap;
 use api::v1::meta::mailbox_message::Payload;
 use api::v1::meta::{HeartbeatResponse, MailboxMessage};
 use common_meta::instruction::{
-    DowngradeRegionReply, FlushRegionReply, InstructionReply, SimpleReply, UpgradeRegionReply,
+    DowngradeRegionReply, DowngradeRegionsReply, FlushRegionReply, InstructionReply, SimpleReply,
+    UpgradeRegionReply,
 };
 use common_meta::key::TableMetadataManagerRef;
 use common_meta::key::table_route::TableRouteValue;
@@ -183,12 +184,15 @@ pub fn new_downgrade_region_reply(
        to: "meta".to_string(),
        timestamp_millis: current_time_millis(),
        payload: Some(Payload::Json(
-            serde_json::to_string(&InstructionReply::DowngradeRegion(DowngradeRegionReply {
-                last_entry_id,
-                metadata_last_entry_id: None,
-                exists: exist,
-                error,
-            }))
+            serde_json::to_string(&InstructionReply::DowngradeRegions(
+                DowngradeRegionsReply::new(vec![DowngradeRegionReply {
+                    region_id: RegionId::new(0, 0),
+                    last_entry_id,
+                    metadata_last_entry_id: None,
+                    exists: exist,
+                    error,
+                }]),
+            ))
            .unwrap(),
        )),
    }
--- a/src/meta-srv/src/service/cluster.rs
+++ b/src/meta-srv/src/service/cluster.rs
@@ -97,8 +97,10 @@ impl Metasrv {
            version: build_info.version.to_string(),
            git_commit: build_info.commit_short.to_string(),
            start_time_ms: self.start_time_ms(),
-            cpus: self.resource_spec().cpus as u32,
-            memory_bytes: self.resource_spec().memory.unwrap_or_default().as_bytes(),
+            total_cpu_millicores: self.resource_stat().get_total_cpu_millicores(),
+            total_memory_bytes: self.resource_stat().get_total_memory_bytes(),
+            cpu_usage_millicores: self.resource_stat().get_cpu_usage_millicores(),
+            memory_usage_bytes: self.resource_stat().get_memory_usage_bytes(),
            hostname: hostname::get()
                .unwrap_or_default()
                .to_string_lossy()
--- a/src/metric-engine/src/engine/flush.rs
+++ b/src/metric-engine/src/engine/flush.rs
@@ -127,12 +127,12 @@ mod tests {
        assert_eq!(
            debug_format,
            r#"
-ManifestSstEntry { table_dir: "test_metric_region/", region_id: 47244640257(11, 1), table_id: 11, region_number: 1, region_group: 0, region_sequence: 1, file_id: "<file_id>", level: 0, file_path: "test_metric_region/11_0000000001/data/<file_id>.parquet", file_size: 3173, index_file_path: Some("test_metric_region/11_0000000001/data/index/<file_id>.puffin"), index_file_size: Some(235), num_rows: 10, num_row_groups: 1, min_ts: 0::Millisecond, max_ts: 9::Millisecond, sequence: Some(20), origin_region_id: 47244640257(11, 1), node_id: None, visible: true }
-ManifestSstEntry { table_dir: "test_metric_region/", region_id: 47244640258(11, 2), table_id: 11, region_number: 2, region_group: 0, region_sequence: 2, file_id: "<file_id>", level: 0, file_path: "test_metric_region/11_0000000002/data/<file_id>.parquet", file_size: 3173, index_file_path: Some("test_metric_region/11_0000000002/data/index/<file_id>.puffin"), index_file_size: Some(235), num_rows: 10, num_row_groups: 1, min_ts: 0::Millisecond, max_ts: 9::Millisecond, sequence: Some(10), origin_region_id: 47244640258(11, 2), node_id: None, visible: true }
-ManifestSstEntry { table_dir: "test_metric_region/", region_id: 47261417473(11, 16777217), table_id: 11, region_number: 16777217, region_group: 1, region_sequence: 1, file_id: "<file_id>", level: 0, file_path: "test_metric_region/11_0000000001/metadata/<file_id>.parquet", file_size: 3505, index_file_path: None, index_file_size: None, num_rows: 8, num_row_groups: 1, min_ts: 0::Millisecond, max_ts: 0::Millisecond, sequence: Some(8), origin_region_id: 47261417473(11, 16777217), node_id: None, visible: true }
-ManifestSstEntry { table_dir: "test_metric_region/", region_id: 47261417474(11, 16777218), table_id: 11, region_number: 16777218, region_group: 1, region_sequence: 2, file_id: "<file_id>", level: 0, file_path: "test_metric_region/11_0000000002/metadata/<file_id>.parquet", file_size: 3489, index_file_path: None, index_file_size: None, num_rows: 4, num_row_groups: 1, min_ts: 0::Millisecond, max_ts: 0::Millisecond, sequence: Some(4), origin_region_id: 47261417474(11, 16777218), node_id: None, visible: true }
-ManifestSstEntry { table_dir: "test_metric_region/", region_id: 94489280554(22, 42), table_id: 22, region_number: 42, region_group: 0, region_sequence: 42, file_id: "<file_id>", level: 0, file_path: "test_metric_region/22_0000000042/data/<file_id>.parquet", file_size: 3173, index_file_path: Some("test_metric_region/22_0000000042/data/index/<file_id>.puffin"), index_file_size: Some(235), num_rows: 10, num_row_groups: 1, min_ts: 0::Millisecond, max_ts: 9::Millisecond, sequence: Some(10), origin_region_id: 94489280554(22, 42), node_id: None, visible: true }
-ManifestSstEntry { table_dir: "test_metric_region/", region_id: 94506057770(22, 16777258), table_id: 22, region_number: 16777258, region_group: 1, region_sequence: 42, file_id: "<file_id>", level: 0, file_path: "test_metric_region/22_0000000042/metadata/<file_id>.parquet", file_size: 3489, index_file_path: None, index_file_size: None, num_rows: 4, num_row_groups: 1, min_ts: 0::Millisecond, max_ts: 0::Millisecond, sequence: Some(4), origin_region_id: 94506057770(22, 16777258), node_id: None, visible: true }"#
+ManifestSstEntry { table_dir: "test_metric_region/", region_id: 47244640257(11, 1), table_id: 11, region_number: 1, region_group: 0, region_sequence: 1, file_id: "<file_id>", level: 0, file_path: "test_metric_region/11_0000000001/data/<file_id>.parquet", file_size: 3173, index_file_path: Some("test_metric_region/11_0000000001/data/index/<file_id>.puffin"), index_file_size: Some(235), num_rows: 10, num_row_groups: 1, num_series: Some(1), min_ts: 0::Millisecond, max_ts: 9::Millisecond, sequence: Some(20), origin_region_id: 47244640257(11, 1), node_id: None, visible: true }
+ManifestSstEntry { table_dir: "test_metric_region/", region_id: 47244640258(11, 2), table_id: 11, region_number: 2, region_group: 0, region_sequence: 2, file_id: "<file_id>", level: 0, file_path: "test_metric_region/11_0000000002/data/<file_id>.parquet", file_size: 3173, index_file_path: Some("test_metric_region/11_0000000002/data/index/<file_id>.puffin"), index_file_size: Some(235), num_rows: 10, num_row_groups: 1, num_series: Some(1), min_ts: 0::Millisecond, max_ts: 9::Millisecond, sequence: Some(10), origin_region_id: 47244640258(11, 2), node_id: None, visible: true }
+ManifestSstEntry { table_dir: "test_metric_region/", region_id: 47261417473(11, 16777217), table_id: 11, region_number: 16777217, region_group: 1, region_sequence: 1, file_id: "<file_id>", level: 0, file_path: "test_metric_region/11_0000000001/metadata/<file_id>.parquet", file_size: 3505, index_file_path: None, index_file_size: None, num_rows: 8, num_row_groups: 1, num_series: Some(8), min_ts: 0::Millisecond, max_ts: 0::Millisecond, sequence: Some(8), origin_region_id: 47261417473(11, 16777217), node_id: None, visible: true }
+ManifestSstEntry { table_dir: "test_metric_region/", region_id: 47261417474(11, 16777218), table_id: 11, region_number: 16777218, region_group: 1, region_sequence: 2, file_id: "<file_id>", level: 0, file_path: "test_metric_region/11_0000000002/metadata/<file_id>.parquet", file_size: 3489, index_file_path: None, index_file_size: None, num_rows: 4, num_row_groups: 1, num_series: Some(4), min_ts: 0::Millisecond, max_ts: 0::Millisecond, sequence: Some(4), origin_region_id: 47261417474(11, 16777218), node_id: None, visible: true }
+ManifestSstEntry { table_dir: "test_metric_region/", region_id: 94489280554(22, 42), table_id: 22, region_number: 42, region_group: 0, region_sequence: 42, file_id: "<file_id>", level: 0, file_path: "test_metric_region/22_0000000042/data/<file_id>.parquet", file_size: 3173, index_file_path: Some("test_metric_region/22_0000000042/data/index/<file_id>.puffin"), index_file_size: Some(235), num_rows: 10, num_row_groups: 1, num_series: Some(1), min_ts: 0::Millisecond, max_ts: 9::Millisecond, sequence: Some(10), origin_region_id: 94489280554(22, 42), node_id: None, visible: true }
+ManifestSstEntry { table_dir: "test_metric_region/", region_id: 94506057770(22, 16777258), table_id: 22, region_number: 16777258, region_group: 1, region_sequence: 42, file_id: "<file_id>", level: 0, file_path: "test_metric_region/22_0000000042/metadata/<file_id>.parquet", file_size: 3489, index_file_path: None, index_file_size: None, num_rows: 4, num_row_groups: 1, num_series: Some(4), min_ts: 0::Millisecond, max_ts: 0::Millisecond, sequence: Some(4), origin_region_id: 94506057770(22, 16777258), node_id: None, visible: true }"#
        );
        // list from storage
        let storage_entries = mito
--- a/src/mito2/Cargo.toml
+++ b/src/mito2/Cargo.toml
@@ -65,7 +65,7 @@ partition.workspace = true
 puffin.workspace = true
 rand.workspace = true
 rayon = "1.10"
-regex = "1.5"
+regex.workspace = true
 rskafka = { workspace = true, optional = true }
 rstest = { workspace = true, optional = true }
 rstest_reuse = { workspace = true, optional = true }
--- a/src/mito2/src/compaction/compactor.rs
+++ b/src/mito2/src/compaction/compactor.rs
@@ -433,6 +433,7 @@ impl Compactor for DefaultCompactor {
                        num_row_groups: sst_info.num_row_groups,
                        sequence: max_sequence,
                        partition_expr: partition_expr.clone(),
+                        num_series: sst_info.num_series,
                    })
                    .collect::<Vec<_>>();
                let output_file_names =
--- a/src/mito2/src/compaction/test_util.rs
+++ b/src/mito2/src/compaction/test_util.rs
@@ -78,6 +78,7 @@ pub fn new_file_handle_with_size_and_sequence(
            index_file_size: 0,
            num_rows: 0,
            num_row_groups: 0,
+            num_series: 0,
            sequence: NonZeroU64::new(sequence),
            partition_expr: None,
        },
--- a/src/mito2/src/engine/basic_test.rs
+++ b/src/mito2/src/engine/basic_test.rs
@@ -859,9 +859,9 @@ async fn test_cache_null_primary_key_with_format(flat_format: bool) {
 #[tokio::test]
 async fn test_list_ssts() {
    test_list_ssts_with_format(false, r#"
-ManifestSstEntry { table_dir: "test/", region_id: 47244640257(11, 1), table_id: 11, region_number: 1, region_group: 0, region_sequence: 1, file_id: "<file_id>", level: 0, file_path: "test/11_0000000001/<file_id>.parquet", file_size: 2531, index_file_path: Some("test/11_0000000001/index/<file_id>.puffin"), index_file_size: Some(250), num_rows: 10, num_row_groups: 1, min_ts: 0::Millisecond, max_ts: 9000::Millisecond, sequence: Some(10), origin_region_id: 47244640257(11, 1), node_id: None, visible: true }
-ManifestSstEntry { table_dir: "test/", region_id: 47244640258(11, 2), table_id: 11, region_number: 2, region_group: 0, region_sequence: 2, file_id: "<file_id>", level: 0, file_path: "test/11_0000000002/<file_id>.parquet", file_size: 2531, index_file_path: Some("test/11_0000000002/index/<file_id>.puffin"), index_file_size: Some(250), num_rows: 10, num_row_groups: 1, min_ts: 0::Millisecond, max_ts: 9000::Millisecond, sequence: Some(10), origin_region_id: 47244640258(11, 2), node_id: None, visible: true }
-ManifestSstEntry { table_dir: "test/", region_id: 94489280554(22, 42), table_id: 22, region_number: 42, region_group: 0, region_sequence: 42, file_id: "<file_id>", level: 0, file_path: "test/22_0000000042/<file_id>.parquet", file_size: 2531, index_file_path: Some("test/22_0000000042/index/<file_id>.puffin"), index_file_size: Some(250), num_rows: 10, num_row_groups: 1, min_ts: 0::Millisecond, max_ts: 9000::Millisecond, sequence: Some(10), origin_region_id: 94489280554(22, 42), node_id: None, visible: true }"# ,r#"
+ManifestSstEntry { table_dir: "test/", region_id: 47244640257(11, 1), table_id: 11, region_number: 1, region_group: 0, region_sequence: 1, file_id: "<file_id>", level: 0, file_path: "test/11_0000000001/<file_id>.parquet", file_size: 2531, index_file_path: Some("test/11_0000000001/index/<file_id>.puffin"), index_file_size: Some(250), num_rows: 10, num_row_groups: 1, num_series: Some(1), min_ts: 0::Millisecond, max_ts: 9000::Millisecond, sequence: Some(10), origin_region_id: 47244640257(11, 1), node_id: None, visible: true }
+ManifestSstEntry { table_dir: "test/", region_id: 47244640258(11, 2), table_id: 11, region_number: 2, region_group: 0, region_sequence: 2, file_id: "<file_id>", level: 0, file_path: "test/11_0000000002/<file_id>.parquet", file_size: 2531, index_file_path: Some("test/11_0000000002/index/<file_id>.puffin"), index_file_size: Some(250), num_rows: 10, num_row_groups: 1, num_series: Some(1), min_ts: 0::Millisecond, max_ts: 9000::Millisecond, sequence: Some(10), origin_region_id: 47244640258(11, 2), node_id: None, visible: true }
+ManifestSstEntry { table_dir: "test/", region_id: 94489280554(22, 42), table_id: 22, region_number: 42, region_group: 0, region_sequence: 42, file_id: "<file_id>", level: 0, file_path: "test/22_0000000042/<file_id>.parquet", file_size: 2531, index_file_path: Some("test/22_0000000042/index/<file_id>.puffin"), index_file_size: Some(250), num_rows: 10, num_row_groups: 1, num_series: Some(1), min_ts: 0::Millisecond, max_ts: 9000::Millisecond, sequence: Some(10), origin_region_id: 94489280554(22, 42), node_id: None, visible: true }"# ,r#"
 StorageSstEntry { file_path: "test/11_0000000001/<file_id>.parquet", file_size: None, last_modified_ms: None, node_id: None }
 StorageSstEntry { file_path: "test/11_0000000001/index/<file_id>.puffin", file_size: None, last_modified_ms: None, node_id: None }
 StorageSstEntry { file_path: "test/11_0000000002/<file_id>.parquet", file_size: None, last_modified_ms: None, node_id: None }
@@ -869,9 +869,9 @@ StorageSstEntry { file_path: "test/11_0000000002/index/<file_id>.puffin", file_s
 StorageSstEntry { file_path: "test/22_0000000042/<file_id>.parquet", file_size: None, last_modified_ms: None, node_id: None }
 StorageSstEntry { file_path: "test/22_0000000042/index/<file_id>.puffin", file_size: None, last_modified_ms: None, node_id: None }"#).await;
    test_list_ssts_with_format(true, r#"
-ManifestSstEntry { table_dir: "test/", region_id: 47244640257(11, 1), table_id: 11, region_number: 1, region_group: 0, region_sequence: 1, file_id: "<file_id>", level: 0, file_path: "test/11_0000000001/<file_id>.parquet", file_size: 2855, index_file_path: Some("test/11_0000000001/index/<file_id>.puffin"), index_file_size: Some(292), num_rows: 10, num_row_groups: 1, min_ts: 0::Millisecond, max_ts: 9000::Millisecond, sequence: Some(10), origin_region_id: 47244640257(11, 1), node_id: None, visible: true }
-ManifestSstEntry { table_dir: "test/", region_id: 47244640258(11, 2), table_id: 11, region_number: 2, region_group: 0, region_sequence: 2, file_id: "<file_id>", level: 0, file_path: "test/11_0000000002/<file_id>.parquet", file_size: 2855, index_file_path: Some("test/11_0000000002/index/<file_id>.puffin"), index_file_size: Some(292), num_rows: 10, num_row_groups: 1, min_ts: 0::Millisecond, max_ts: 9000::Millisecond, sequence: Some(10), origin_region_id: 47244640258(11, 2), node_id: None, visible: true }
-ManifestSstEntry { table_dir: "test/", region_id: 94489280554(22, 42), table_id: 22, region_number: 42, region_group: 0, region_sequence: 42, file_id: "<file_id>", level: 0, file_path: "test/22_0000000042/<file_id>.parquet", file_size: 2855, index_file_path: Some("test/22_0000000042/index/<file_id>.puffin"), index_file_size: Some(292), num_rows: 10, num_row_groups: 1, min_ts: 0::Millisecond, max_ts: 9000::Millisecond, sequence: Some(10), origin_region_id: 94489280554(22, 42), node_id: None, visible: true }"#, r#"
+ManifestSstEntry { table_dir: "test/", region_id: 47244640257(11, 1), table_id: 11, region_number: 1, region_group: 0, region_sequence: 1, file_id: "<file_id>", level: 0, file_path: "test/11_0000000001/<file_id>.parquet", file_size: 2855, index_file_path: Some("test/11_0000000001/index/<file_id>.puffin"), index_file_size: Some(292), num_rows: 10, num_row_groups: 1, num_series: Some(1), min_ts: 0::Millisecond, max_ts: 9000::Millisecond, sequence: Some(10), origin_region_id: 47244640257(11, 1), node_id: None, visible: true }
+ManifestSstEntry { table_dir: "test/", region_id: 47244640258(11, 2), table_id: 11, region_number: 2, region_group: 0, region_sequence: 2, file_id: "<file_id>", level: 0, file_path: "test/11_0000000002/<file_id>.parquet", file_size: 2855, index_file_path: Some("test/11_0000000002/index/<file_id>.puffin"), index_file_size: Some(292), num_rows: 10, num_row_groups: 1, num_series: Some(1), min_ts: 0::Millisecond, max_ts: 9000::Millisecond, sequence: Some(10), origin_region_id: 47244640258(11, 2), node_id: None, visible: true }
+ManifestSstEntry { table_dir: "test/", region_id: 94489280554(22, 42), table_id: 22, region_number: 42, region_group: 0, region_sequence: 42, file_id: "<file_id>", level: 0, file_path: "test/22_0000000042/<file_id>.parquet", file_size: 2855, index_file_path: Some("test/22_0000000042/index/<file_id>.puffin"), index_file_size: Some(292), num_rows: 10, num_row_groups: 1, num_series: Some(1), min_ts: 0::Millisecond, max_ts: 9000::Millisecond, sequence: Some(10), origin_region_id: 94489280554(22, 42), node_id: None, visible: true }"#, r#"
 StorageSstEntry { file_path: "test/11_0000000001/<file_id>.parquet", file_size: None, last_modified_ms: None, node_id: None }
 StorageSstEntry { file_path: "test/11_0000000001/index/<file_id>.puffin", file_size: None, last_modified_ms: None, node_id: None }
 StorageSstEntry { file_path: "test/11_0000000002/<file_id>.parquet", file_size: None, last_modified_ms: None, node_id: None }
@@ -972,17 +972,17 @@ async fn test_list_ssts_with_format(
 #[tokio::test]
 async fn test_all_index_metas_list_all_types() {
    test_all_index_metas_list_all_types_with_format(false, r#"
-PuffinIndexMetaEntry { table_dir: "test/", index_file_path: "test/11_0000000001/index/<file_id>.puffin", region_id: 47244640257(11, 1), table_id: 11, region_number: 1, region_group: 0, region_sequence: 1, file_id: "<file_id>", index_file_size: Some(6032), index_type: "bloom_filter", target_type: "column", target_key: "3", target_json: "{\"column\":3}", blob_size: 751, meta_json: Some("{\"bloom\":{\"bloom_filter_size\":640,\"row_count\":20,\"rows_per_segment\":2,\"segment_count\":10}}"), node_id: None }
-PuffinIndexMetaEntry { table_dir: "test/", index_file_path: "test/11_0000000001/index/<file_id>.puffin", region_id: 47244640257(11, 1), table_id: 11, region_number: 1, region_group: 0, region_sequence: 1, file_id: "<file_id>", index_file_size: Some(6032), index_type: "fulltext_bloom", target_type: "column", target_key: "1", target_json: "{\"column\":1}", blob_size: 87, meta_json: Some("{\"bloom\":{\"bloom_filter_size\":64,\"row_count\":20,\"rows_per_segment\":4,\"segment_count\":5},\"fulltext\":{\"analyzer\":\"English\",\"case_sensitive\":false}}"), node_id: None }
-PuffinIndexMetaEntry { table_dir: "test/", index_file_path: "test/11_0000000001/index/<file_id>.puffin", region_id: 47244640257(11, 1), table_id: 11, region_number: 1, region_group: 0, region_sequence: 1, file_id: "<file_id>", index_file_size: Some(6032), index_type: "fulltext_tantivy", target_type: "column", target_key: "2", target_json: "{\"column\":2}", blob_size: 1104, meta_json: Some("{\"fulltext\":{\"analyzer\":\"Chinese\",\"case_sensitive\":true}}"), node_id: None }
-PuffinIndexMetaEntry { table_dir: "test/", index_file_path: "test/11_0000000001/index/<file_id>.puffin", region_id: 47244640257(11, 1), table_id: 11, region_number: 1, region_group: 0, region_sequence: 1, file_id: "<file_id>", index_file_size: Some(6032), index_type: "inverted", target_type: "column", target_key: "0", target_json: "{\"column\":0}", blob_size: 70, meta_json: Some("{\"inverted\":{\"base_offset\":0,\"bitmap_type\":\"Roaring\",\"fst_size\":44,\"inverted_index_size\":70,\"null_bitmap_size\":8,\"relative_fst_offset\":26,\"relative_null_bitmap_offset\":0,\"segment_row_count\":1024,\"total_row_count\":20}}"), node_id: None }
-PuffinIndexMetaEntry { table_dir: "test/", index_file_path: "test/11_0000000001/index/<file_id>.puffin", region_id: 47244640257(11, 1), table_id: 11, region_number: 1, region_group: 0, region_sequence: 1, file_id: "<file_id>", index_file_size: Some(6032), index_type: "inverted", target_type: "column", target_key: "4", target_json: "{\"column\":4}", blob_size: 515, meta_json: Some("{\"inverted\":{\"base_offset\":0,\"bitmap_type\":\"Roaring\",\"fst_size\":147,\"inverted_index_size\":515,\"null_bitmap_size\":8,\"relative_fst_offset\":368,\"relative_null_bitmap_offset\":0,\"segment_row_count\":1024,\"total_row_count\":20}}"), node_id: None }"#).await;
+PuffinIndexMetaEntry { table_dir: "test/", index_file_path: "test/11_0000000001/index/<file_id>.puffin", region_id: 47244640257(11, 1), table_id: 11, region_number: 1, region_group: 0, region_sequence: 1, file_id: "<file_id>", index_file_size: Some(6500), index_type: "bloom_filter", target_type: "column", target_key: "1", target_json: "{\"column\":1}", blob_size: 751, meta_json: Some("{\"bloom\":{\"bloom_filter_size\":640,\"row_count\":20,\"rows_per_segment\":2,\"segment_count\":10}}"), node_id: None }
+PuffinIndexMetaEntry { table_dir: "test/", index_file_path: "test/11_0000000001/index/<file_id>.puffin", region_id: 47244640257(11, 1), table_id: 11, region_number: 1, region_group: 0, region_sequence: 1, file_id: "<file_id>", index_file_size: Some(6500), index_type: "fulltext_bloom", target_type: "column", target_key: "4", target_json: "{\"column\":4}", blob_size: 89, meta_json: Some("{\"bloom\":{\"bloom_filter_size\":64,\"row_count\":20,\"rows_per_segment\":4,\"segment_count\":5},\"fulltext\":{\"analyzer\":\"English\",\"case_sensitive\":false}}"), node_id: None }
+PuffinIndexMetaEntry { table_dir: "test/", index_file_path: "test/11_0000000001/index/<file_id>.puffin", region_id: 47244640257(11, 1), table_id: 11, region_number: 1, region_group: 0, region_sequence: 1, file_id: "<file_id>", index_file_size: Some(6500), index_type: "fulltext_tantivy", target_type: "column", target_key: "5", target_json: "{\"column\":5}", blob_size: 1100, meta_json: Some("{\"fulltext\":{\"analyzer\":\"Chinese\",\"case_sensitive\":true}}"), node_id: None }
+PuffinIndexMetaEntry { table_dir: "test/", index_file_path: "test/11_0000000001/index/<file_id>.puffin", region_id: 47244640257(11, 1), table_id: 11, region_number: 1, region_group: 0, region_sequence: 1, file_id: "<file_id>", index_file_size: Some(6500), index_type: "inverted", target_type: "column", target_key: "1", target_json: "{\"column\":1}", blob_size: 518, meta_json: Some("{\"inverted\":{\"base_offset\":0,\"bitmap_type\":\"Roaring\",\"fst_size\":150,\"inverted_index_size\":518,\"null_bitmap_size\":8,\"relative_fst_offset\":368,\"relative_null_bitmap_offset\":0,\"segment_row_count\":1024,\"total_row_count\":20}}"), node_id: None }
+PuffinIndexMetaEntry { table_dir: "test/", index_file_path: "test/11_0000000001/index/<file_id>.puffin", region_id: 47244640257(11, 1), table_id: 11, region_number: 1, region_group: 0, region_sequence: 1, file_id: "<file_id>", index_file_size: Some(6500), index_type: "inverted", target_type: "column", target_key: "2", target_json: "{\"column\":2}", blob_size: 515, meta_json: Some("{\"inverted\":{\"base_offset\":0,\"bitmap_type\":\"Roaring\",\"fst_size\":147,\"inverted_index_size\":515,\"null_bitmap_size\":8,\"relative_fst_offset\":368,\"relative_null_bitmap_offset\":0,\"segment_row_count\":1024,\"total_row_count\":20}}"), node_id: None }"#).await;
    test_all_index_metas_list_all_types_with_format(true, r#"
-PuffinIndexMetaEntry { table_dir: "test/", index_file_path: "test/11_0000000001/index/<file_id>.puffin", region_id: 47244640257(11, 1), table_id: 11, region_number: 1, region_group: 0, region_sequence: 1, file_id: "<file_id>", index_file_size: Some(6144), index_type: "bloom_filter", target_type: "column", target_key: "3", target_json: "{\"column\":3}", blob_size: 751, meta_json: Some("{\"bloom\":{\"bloom_filter_size\":640,\"row_count\":20,\"rows_per_segment\":2,\"segment_count\":10}}"), node_id: None }
-PuffinIndexMetaEntry { table_dir: "test/", index_file_path: "test/11_0000000001/index/<file_id>.puffin", region_id: 47244640257(11, 1), table_id: 11, region_number: 1, region_group: 0, region_sequence: 1, file_id: "<file_id>", index_file_size: Some(6144), index_type: "fulltext_bloom", target_type: "column", target_key: "1", target_json: "{\"column\":1}", blob_size: 89, meta_json: Some("{\"bloom\":{\"bloom_filter_size\":64,\"row_count\":20,\"rows_per_segment\":4,\"segment_count\":5},\"fulltext\":{\"analyzer\":\"English\",\"case_sensitive\":false}}"), node_id: None }
-PuffinIndexMetaEntry { table_dir: "test/", index_file_path: "test/11_0000000001/index/<file_id>.puffin", region_id: 47244640257(11, 1), table_id: 11, region_number: 1, region_group: 0, region_sequence: 1, file_id: "<file_id>", index_file_size: Some(6144), index_type: "fulltext_tantivy", target_type: "column", target_key: "2", target_json: "{\"column\":2}", blob_size: 1104, meta_json: Some("{\"fulltext\":{\"analyzer\":\"Chinese\",\"case_sensitive\":true}}"), node_id: None }
-PuffinIndexMetaEntry { table_dir: "test/", index_file_path: "test/11_0000000001/index/<file_id>.puffin", region_id: 47244640257(11, 1), table_id: 11, region_number: 1, region_group: 0, region_sequence: 1, file_id: "<file_id>", index_file_size: Some(6144), index_type: "inverted", target_type: "column", target_key: "0", target_json: "{\"column\":0}", blob_size: 92, meta_json: Some("{\"inverted\":{\"base_offset\":0,\"bitmap_type\":\"Roaring\",\"fst_size\":66,\"inverted_index_size\":92,\"null_bitmap_size\":8,\"relative_fst_offset\":26,\"relative_null_bitmap_offset\":0,\"segment_row_count\":1024,\"total_row_count\":20}}"), node_id: None }
-PuffinIndexMetaEntry { table_dir: "test/", index_file_path: "test/11_0000000001/index/<file_id>.puffin", region_id: 47244640257(11, 1), table_id: 11, region_number: 1, region_group: 0, region_sequence: 1, file_id: "<file_id>", index_file_size: Some(6144), index_type: "inverted", target_type: "column", target_key: "4", target_json: "{\"column\":4}", blob_size: 515, meta_json: Some("{\"inverted\":{\"base_offset\":0,\"bitmap_type\":\"Roaring\",\"fst_size\":147,\"inverted_index_size\":515,\"null_bitmap_size\":8,\"relative_fst_offset\":368,\"relative_null_bitmap_offset\":0,\"segment_row_count\":1024,\"total_row_count\":20}}"), node_id: None }"#).await;
+PuffinIndexMetaEntry { table_dir: "test/", index_file_path: "test/11_0000000001/index/<file_id>.puffin", region_id: 47244640257(11, 1), table_id: 11, region_number: 1, region_group: 0, region_sequence: 1, file_id: "<file_id>", index_file_size: Some(6500), index_type: "bloom_filter", target_type: "column", target_key: "1", target_json: "{\"column\":1}", blob_size: 751, meta_json: Some("{\"bloom\":{\"bloom_filter_size\":640,\"row_count\":20,\"rows_per_segment\":2,\"segment_count\":10}}"), node_id: None }
+PuffinIndexMetaEntry { table_dir: "test/", index_file_path: "test/11_0000000001/index/<file_id>.puffin", region_id: 47244640257(11, 1), table_id: 11, region_number: 1, region_group: 0, region_sequence: 1, file_id: "<file_id>", index_file_size: Some(6500), index_type: "fulltext_bloom", target_type: "column", target_key: "4", target_json: "{\"column\":4}", blob_size: 89, meta_json: Some("{\"bloom\":{\"bloom_filter_size\":64,\"row_count\":20,\"rows_per_segment\":4,\"segment_count\":5},\"fulltext\":{\"analyzer\":\"English\",\"case_sensitive\":false}}"), node_id: None }
+PuffinIndexMetaEntry { table_dir: "test/", index_file_path: "test/11_0000000001/index/<file_id>.puffin", region_id: 47244640257(11, 1), table_id: 11, region_number: 1, region_group: 0, region_sequence: 1, file_id: "<file_id>", index_file_size: Some(6500), index_type: "fulltext_tantivy", target_type: "column", target_key: "5", target_json: "{\"column\":5}", blob_size: 1100, meta_json: Some("{\"fulltext\":{\"analyzer\":\"Chinese\",\"case_sensitive\":true}}"), node_id: None }
+PuffinIndexMetaEntry { table_dir: "test/", index_file_path: "test/11_0000000001/index/<file_id>.puffin", region_id: 47244640257(11, 1), table_id: 11, region_number: 1, region_group: 0, region_sequence: 1, file_id: "<file_id>", index_file_size: Some(6500), index_type: "inverted", target_type: "column", target_key: "1", target_json: "{\"column\":1}", blob_size: 518, meta_json: Some("{\"inverted\":{\"base_offset\":0,\"bitmap_type\":\"Roaring\",\"fst_size\":150,\"inverted_index_size\":518,\"null_bitmap_size\":8,\"relative_fst_offset\":368,\"relative_null_bitmap_offset\":0,\"segment_row_count\":1024,\"total_row_count\":20}}"), node_id: None }
+PuffinIndexMetaEntry { table_dir: "test/", index_file_path: "test/11_0000000001/index/<file_id>.puffin", region_id: 47244640257(11, 1), table_id: 11, region_number: 1, region_group: 0, region_sequence: 1, file_id: "<file_id>", index_file_size: Some(6500), index_type: "inverted", target_type: "column", target_key: "2", target_json: "{\"column\":2}", blob_size: 515, meta_json: Some("{\"inverted\":{\"base_offset\":0,\"bitmap_type\":\"Roaring\",\"fst_size\":147,\"inverted_index_size\":515,\"null_bitmap_size\":8,\"relative_fst_offset\":368,\"relative_null_bitmap_offset\":0,\"segment_row_count\":1024,\"total_row_count\":20}}"), node_id: None }"#).await;
 }

 async fn test_all_index_metas_list_all_types_with_format(flat_format: bool, expect_format: &str) {
@@ -1001,12 +1001,33 @@ async fn test_all_index_metas_list_all_types_with_format(flat_format: bool, expe
    // One region with both fulltext backends and inverted index enabled, plus bloom skipping index
    let region_id = RegionId::new(11, 1);

-    let mut request = CreateRequestBuilder::new().tag_num(3).field_num(2).build();
-    // inverted index on tag_0
-    request.column_metadatas[0]
+    let mut request = CreateRequestBuilder::new().tag_num(1).field_num(2).build();
+    // bloom filter skipping index on field_1
+    let skipping = SkippingIndexOptions::new_unchecked(2, 0.01, SkippingIndexType::BloomFilter);
+    request.column_metadatas[1]
+        .column_schema
+        .set_skipping_options(&skipping)
+        .unwrap();
+
+    // inverted index on field_1
+    request.column_metadatas[2]
        .column_schema
        .set_inverted_index(true);
-    // fulltext bloom on tag_1
+    // inverted index on tag_0
+    request.column_metadatas[1]
+        .column_schema
+        .set_inverted_index(true);
+
+    request.column_metadatas.push(ColumnMetadata {
+        column_schema: ColumnSchema::new(
+            "field_2".to_string(),
+            ConcreteDataType::string_datatype(),
+            true,
+        ),
+        semantic_type: SemanticType::Field,
+        column_id: 4,
+    });
+    // fulltext bloom on field_2
    let ft_bloom = FulltextOptions::new_unchecked(
        true,
        FulltextAnalyzer::English,
@@ -1015,11 +1036,24 @@ async fn test_all_index_metas_list_all_types_with_format(flat_format: bool, expe
        4,
        0.001,
    );
-    request.column_metadatas[1]
+    request
+        .column_metadatas
+        .last_mut()
+        .unwrap()
        .column_schema
        .set_fulltext_options(&ft_bloom)
        .unwrap();
-    // fulltext tantivy on tag_2
+
+    request.column_metadatas.push(ColumnMetadata {
+        column_schema: ColumnSchema::new(
+            "field_3".to_string(),
+            ConcreteDataType::string_datatype(),
+            true,
+        ),
+        semantic_type: SemanticType::Field,
+        column_id: 5,
+    });
+    // fulltext tantivy on field_3
    let ft_tantivy = FulltextOptions::new_unchecked(
        true,
        FulltextAnalyzer::Chinese,
@@ -1028,28 +1062,20 @@ async fn test_all_index_metas_list_all_types_with_format(flat_format: bool, expe
        2,
        0.01,
    );
-    request.column_metadatas[2]
+    request
+        .column_metadatas
+        .last_mut()
+        .unwrap()
        .column_schema
        .set_fulltext_options(&ft_tantivy)
        .unwrap();
-    // bloom filter skipping index on field_1 (which is at index 3)
-    let skipping = SkippingIndexOptions::new_unchecked(2, 0.01, SkippingIndexType::BloomFilter);
-    request.column_metadatas[3]
-        .column_schema
-        .set_skipping_options(&skipping)
-        .unwrap();
-
-    // inverted index on field_1
-    request.column_metadatas[4]
-        .column_schema
-        .set_inverted_index(true);

    engine
        .handle_request(region_id, RegionRequest::Create(request.clone()))
        .await
        .unwrap();

-    // write some rows (schema: tag_0, tag_1, tag_2, field_0, field_1, ts)
+    // write some rows (schema: tag_0, field_0, field_1, field_2, field_3, ts)
    let column_schemas = rows_schema(&request);
    let rows_vec: Vec<api::v1::Row> = (0..20)
        .map(|ts| api::v1::Row {
@@ -1057,12 +1083,6 @@ async fn test_all_index_metas_list_all_types_with_format(flat_format: bool, expe
                api::v1::Value {
                    value_data: Some(api::v1::value::ValueData::StringValue("x".to_string())),
                },
-                api::v1::Value {
-                    value_data: Some(api::v1::value::ValueData::StringValue("y".to_string())),
-                },
-                api::v1::Value {
-                    value_data: Some(api::v1::value::ValueData::StringValue("z".to_string())),
-                },
                api::v1::Value {
                    value_data: Some(api::v1::value::ValueData::F64Value(ts as f64)),
                },
@@ -1074,6 +1094,12 @@ async fn test_all_index_metas_list_all_types_with_format(flat_format: bool, expe
                        ts as i64 * 1000,
                    )),
                },
+                api::v1::Value {
+                    value_data: Some(api::v1::value::ValueData::StringValue("y".to_string())),
+                },
+                api::v1::Value {
+                    value_data: Some(api::v1::value::ValueData::StringValue("z".to_string())),
+                },
            ],
        })
        .collect();
@@ -1095,7 +1121,7 @@ async fn test_all_index_metas_list_all_types_with_format(flat_format: bool, expe
        .unwrap();

    fn bucket_size(size: u64) -> u64 {
-        if size < 512 { size } else { (size / 16) * 16 }
+        if size < 512 { size } else { (size / 100) * 100 }
    }

    let mut metas = engine.all_index_metas().await;
@@ -1125,5 +1151,5 @@ async fn test_all_index_metas_list_all_types_with_format(flat_format: bool, expe
        .map(|entry| format!("\n{:?}", entry))
        .collect::<String>();

-    assert_eq!(debug_format, expect_format);
+    assert_eq!(expect_format, debug_format);
 }
--- a/src/mito2/src/engine/index_build_test.rs
+++ b/src/mito2/src/engine/index_build_test.rs
@@ -32,11 +32,6 @@ use crate::test_util::{
    CreateRequestBuilder, TestEnv, build_rows, flush_region, put_rows, reopen_region, rows_schema,
 };

-// wait listener receives enough success count.
-async fn wait_finish(listener: &IndexBuildListener, times: usize) {
-    listener.wait_finish(times).await;
-}
-
 fn async_build_mode_config(is_create_on_flush: bool) -> MitoConfig {
    let mut config = MitoConfig::default();
    config.index.build_mode = IndexBuildMode::Async;
@@ -84,7 +79,7 @@ fn assert_listener_counts(
    expected_success_count: usize,
 ) {
    assert_eq!(listener.begin_count(), expected_begin_count);
-    assert_eq!(listener.success_count(), expected_success_count);
+    assert_eq!(listener.finish_count(), expected_success_count);
 }

 #[tokio::test]
@@ -155,7 +150,7 @@ async fn test_index_build_type_flush() {
    flush_region(&engine, region_id, None).await;

    // After 2 index build task are finished, 2 index files should exist.
-    wait_finish(&listener, 2).await;
+    listener.wait_finish(2).await;
    let scanner = engine
        .scanner(region_id, ScanRequest::default())
        .await
@@ -204,6 +199,8 @@ async fn test_index_build_type_compact() {
    put_and_flush(&engine, region_id, &column_schemas, 15..25).await;
    put_and_flush(&engine, region_id, &column_schemas, 40..50).await;

+    // all index build tasks begin means flush tasks are all finished.
+    listener.wait_begin(4).await;
    // Before compaction is triggered, files should be 4, and not all index files are built.
    let scanner = engine
        .scanner(region_id, ScanRequest::default())
@@ -216,8 +213,8 @@ async fn test_index_build_type_compact() {
    // This explicit compaction call serves to make the process deterministic for the test.
    compact(&engine, region_id).await;

+    listener.wait_begin(5).await; // 4 flush + 1 compaction begin
    // Before compaction is triggered, files should be 2, and not all index files are built.
-    listener.clear_success_count();
    let scanner = engine
        .scanner(region_id, ScanRequest::default())
        .await
@@ -226,7 +223,7 @@ async fn test_index_build_type_compact() {
    assert!(num_of_index_files(&engine, &scanner, region_id).await < 2);

    // Wait a while to make sure index build tasks are finished.
-    wait_finish(&listener, 2).await;
+    listener.wait_stop(5).await; // 4 flush + 1 compaction = some abort + some finish
    let scanner = engine
        .scanner(region_id, ScanRequest::default())
        .await
@@ -292,7 +289,7 @@ async fn test_index_build_type_schema_change() {
        .handle_request(region_id, RegionRequest::Alter(set_index_request))
        .await
        .unwrap();
-    wait_finish(&listener, 1).await;
+    listener.wait_finish(1).await;
    let scanner = engine
        .scanner(region_id, ScanRequest::default())
        .await
--- a/src/mito2/src/engine/listener.rs
+++ b/src/mito2/src/engine/listener.rs
@@ -75,10 +75,13 @@ pub trait EventListener: Send + Sync {
    async fn on_notify_region_change_result_begin(&self, _region_id: RegionId) {}

    /// Notifies the listener that the index build task is executed successfully.
-    async fn on_index_build_success(&self, _region_file_id: RegionFileId) {}
+    async fn on_index_build_finish(&self, _region_file_id: RegionFileId) {}

    /// Notifies the listener that the index build task is started.
    async fn on_index_build_begin(&self, _region_file_id: RegionFileId) {}
+
+    /// Notifies the listener that the index build task is aborted.
+    async fn on_index_build_abort(&self, _region_file_id: RegionFileId) {}
 }

 pub type EventListenerRef = Arc<dyn EventListener>;
@@ -309,45 +312,75 @@ impl EventListener for NotifyRegionChangeResultListener {

 #[derive(Default)]
 pub struct IndexBuildListener {
-    notify: Notify,
-    success_count: AtomicUsize,
-    start_count: AtomicUsize,
+    begin_count: AtomicUsize,
+    begin_notify: Notify,
+    finish_count: AtomicUsize,
+    finish_notify: Notify,
+    abort_count: AtomicUsize,
+    abort_notify: Notify,
+    // stop means finished or aborted
+    stop_notify: Notify,
 }

 impl IndexBuildListener {
    /// Wait until index build is done for `times` times.
    pub async fn wait_finish(&self, times: usize) {
-        while self.success_count.load(Ordering::Relaxed) < times {
-            self.notify.notified().await;
+        while self.finish_count.load(Ordering::Relaxed) < times {
+            self.finish_notify.notified().await;
+        }
+    }
+
+    /// Wait until index build is stopped for `times` times.
+    pub async fn wait_stop(&self, times: usize) {
+        while self.finish_count.load(Ordering::Relaxed) + self.abort_count.load(Ordering::Relaxed)
+            < times
+        {
+            self.stop_notify.notified().await;
+        }
+    }
+
+    /// Wait until index build is begun for `times` times.
+    pub async fn wait_begin(&self, times: usize) {
+        while self.begin_count.load(Ordering::Relaxed) < times {
+            self.begin_notify.notified().await;
        }
    }

    /// Clears the success count.
-    pub fn clear_success_count(&self) {
-        self.success_count.store(0, Ordering::Relaxed);
+    pub fn clear_finish_count(&self) {
+        self.finish_count.store(0, Ordering::Relaxed);
    }

    /// Returns the success count.
-    pub fn success_count(&self) -> usize {
-        self.success_count.load(Ordering::Relaxed)
+    pub fn finish_count(&self) -> usize {
+        self.finish_count.load(Ordering::Relaxed)
    }

    /// Returns the start count.
    pub fn begin_count(&self) -> usize {
-        self.start_count.load(Ordering::Relaxed)
+        self.begin_count.load(Ordering::Relaxed)
    }
 }

 #[async_trait]
 impl EventListener for IndexBuildListener {
-    async fn on_index_build_success(&self, region_file_id: RegionFileId) {
+    async fn on_index_build_finish(&self, region_file_id: RegionFileId) {
        info!("Region {} index build successfully", region_file_id);
-        self.success_count.fetch_add(1, Ordering::Relaxed);
-        self.notify.notify_one();
+        self.finish_count.fetch_add(1, Ordering::Relaxed);
+        self.finish_notify.notify_one();
+        self.stop_notify.notify_one();
    }

    async fn on_index_build_begin(&self, region_file_id: RegionFileId) {
        info!("Region {} index build begin", region_file_id);
-        self.start_count.fetch_add(1, Ordering::Relaxed);
+        self.begin_count.fetch_add(1, Ordering::Relaxed);
+        self.begin_notify.notify_one();
+    }
+
+    async fn on_index_build_abort(&self, region_file_id: RegionFileId) {
+        info!("Region {} index build aborted", region_file_id);
+        self.abort_count.fetch_add(1, Ordering::Relaxed);
+        self.abort_notify.notify_one();
+        self.stop_notify.notify_one();
    }
 }
--- a/src/mito2/src/flush.rs
+++ b/src/mito2/src/flush.rs
@@ -641,6 +641,7 @@ impl RegionFlushTask {
            num_row_groups: sst_info.num_row_groups,
            sequence: NonZeroU64::new(max_sequence),
            partition_expr,
+            num_series: sst_info.num_series,
        }
    }

--- a/src/mito2/src/manifest/tests/checkpoint.rs
+++ b/src/mito2/src/manifest/tests/checkpoint.rs
@@ -269,6 +269,7 @@ async fn checkpoint_with_different_compression_types() {
            num_row_groups: 0,
            sequence: None,
            partition_expr: None,
+            num_series: 0,
        };
        let action = RegionMetaActionList::new(vec![RegionMetaAction::Edit(RegionEdit {
            files_to_add: vec![file_meta],
@@ -334,6 +335,7 @@ fn generate_action_lists(num: usize) -> (Vec<FileId>, Vec<RegionMetaActionList>)
            num_row_groups: 0,
            sequence: None,
            partition_expr: None,
+            num_series: 0,
        };
        let action = RegionMetaActionList::new(vec![RegionMetaAction::Edit(RegionEdit {
            files_to_add: vec![file_meta],
--- a/src/mito2/src/memtable/bulk/part.rs
+++ b/src/mito2/src/memtable/bulk/part.rs
@@ -69,7 +69,7 @@ use crate::sst::parquet::flat_format::primary_key_column_index;
 use crate::sst::parquet::format::{PrimaryKeyArray, PrimaryKeyArrayBuilder, ReadFormat};
 use crate::sst::parquet::helper::parse_parquet_metadata;
 use crate::sst::parquet::{PARQUET_METADATA_KEY, SstInfo};
-use crate::sst::to_sst_arrow_schema;
+use crate::sst::{SeriesEstimator, to_sst_arrow_schema};

 const INIT_DICT_VALUE_CAPACITY: usize = 8;

@@ -563,6 +563,7 @@ impl EncodedBulkPart {
            num_row_groups: self.metadata.parquet_metadata.num_row_groups() as u64,
            file_metadata: Some(self.metadata.parquet_metadata.clone()),
            index_metadata: IndexOutput::default(),
+            num_series: self.metadata.num_series,
        }
    }

@@ -602,6 +603,8 @@ pub struct BulkPartMeta {
    pub parquet_metadata: Arc<ParquetMetaData>,
    /// Part region schema.
    pub region_metadata: RegionMetadataRef,
+    /// Number of series.
+    pub num_series: u64,
 }

 /// Metrics for encoding a part.
@@ -669,6 +672,7 @@ impl BulkPartEncoder {
        let mut writer = ArrowWriter::try_new(&mut buf, arrow_schema, self.writer_props.clone())
            .context(EncodeMemtableSnafu)?;
        let mut total_rows = 0;
+        let mut series_estimator = SeriesEstimator::default();

        // Process each batch from the iterator
        let mut iter_start = Instant::now();
@@ -679,6 +683,7 @@ impl BulkPartEncoder {
                continue;
            }

+            series_estimator.update_flat(&batch);
            metrics.raw_size += record_batch_estimated_size(&batch);
            let write_start = Instant::now();
            writer.write(&batch).context(EncodeMemtableSnafu)?;
@@ -701,6 +706,7 @@ impl BulkPartEncoder {

        let buf = Bytes::from(buf);
        let parquet_metadata = Arc::new(parse_parquet_metadata(file_metadata)?);
+        let num_series = series_estimator.finish();

        Ok(Some(EncodedBulkPart {
            data: buf,
@@ -710,6 +716,7 @@ impl BulkPartEncoder {
                min_timestamp,
                parquet_metadata,
                region_metadata: self.metadata.clone(),
+                num_series,
            },
        }))
    }
@@ -742,6 +749,7 @@ impl BulkPartEncoder {
                min_timestamp: part.min_timestamp,
                parquet_metadata,
                region_metadata: self.metadata.clone(),
+                num_series: part.estimated_series_count() as u64,
            },
        }))
    }
--- a/src/mito2/src/memtable/bulk/part_reader.rs
+++ b/src/mito2/src/memtable/bulk/part_reader.rs
@@ -13,12 +13,10 @@
 // limitations under the License.

 use std::collections::VecDeque;
-use std::ops::BitAnd;
 use std::sync::Arc;

 use bytes::Bytes;
 use datatypes::arrow::array::BooleanArray;
-use datatypes::arrow::buffer::BooleanBuffer;
 use datatypes::arrow::record_batch::RecordBatch;
 use parquet::arrow::ProjectionMask;
 use parquet::arrow::arrow_reader::ParquetRecordBatchReader;
@@ -30,7 +28,7 @@ use crate::error::{self, ComputeArrowSnafu, DecodeArrowRowGroupSnafu};
 use crate::memtable::bulk::context::{BulkIterContext, BulkIterContextRef};
 use crate::memtable::bulk::row_group_reader::MemtableRowGroupReaderBuilder;
 use crate::sst::parquet::flat_format::sequence_column_index;
-use crate::sst::parquet::reader::{MaybeFilter, RowGroupReaderContext};
+use crate::sst::parquet::reader::RowGroupReaderContext;

 /// Iterator for reading data inside a bulk part.
 pub struct EncodedBulkPartIter {
@@ -191,38 +189,13 @@ fn apply_combined_filters(
    let num_rows = record_batch.num_rows();
    let mut combined_filter = None;

-    // First, apply predicate filters.
+    // First, apply predicate filters using the shared method.
    if !context.base.filters.is_empty() {
-        let num_rows = record_batch.num_rows();
-        let mut mask = BooleanBuffer::new_set(num_rows);
-
-        // Run filter one by one and combine them result, similar to RangeBase::precise_filter
-        for filter_ctx in &context.base.filters {
-            let filter = match filter_ctx.filter() {
-                MaybeFilter::Filter(f) => f,
-                // Column matches.
-                MaybeFilter::Matched => continue,
-                // Column doesn't match, filter the entire batch.
-                MaybeFilter::Pruned => return Ok(None),
-            };
-
-            // Safety: We checked the format type in new().
-            let Some(column_index) = context
-                .read_format()
-                .as_flat()
-                .unwrap()
-                .projected_index_by_id(filter_ctx.column_id())
-            else {
-                continue;
-            };
-            let array = record_batch.column(column_index);
-            let result = filter
-                .evaluate_array(array)
-                .context(crate::error::RecordBatchSnafu)?;
-
-            mask = mask.bitand(&result);
-        }
-        // Convert the mask to BooleanArray
+        let predicate_mask = context.base.compute_filter_mask_flat(&record_batch)?;
+        // If predicate filters out the entire batch, return None early
+        let Some(mask) = predicate_mask else {
+            return Ok(None);
+        };
        combined_filter = Some(BooleanArray::from(mask));
    }

--- a/src/mito2/src/read/compat.rs
+++ b/src/mito2/src/read/compat.rs
@@ -386,7 +386,8 @@ impl FlatCompatBatch {
 /// Repeats the vector value `to_len` times.
 fn repeat_vector(vector: &VectorRef, to_len: usize, is_tag: bool) -> Result<ArrayRef> {
    assert_eq!(1, vector.len());
-    if is_tag {
+    let data_type = vector.data_type();
+    if is_tag && data_type.is_string() {
        let values = vector.to_arrow_array();
        if values.is_null(0) {
            // Creates a dictionary array with `to_len` null keys.
--- a/src/mito2/src/read/flat_projection.rs
+++ b/src/mito2/src/read/flat_projection.rs
@@ -48,6 +48,8 @@ pub struct FlatProjectionMapper {
    /// Ids of columns to project. It keeps ids in the same order as the `projection`
    /// indices to build the mapper.
    /// The mapper won't deduplicate the column ids.
+    ///
+    /// Note that this doesn't contain the `__table_id` and `__tsid`.
    column_ids: Vec<ColumnId>,
    /// Ids and DataTypes of columns of the expected batch.
    /// We can use this to check if the batch is compatible with the expected schema.
--- a/src/mito2/src/region.rs
+++ b/src/mito2/src/region.rs
@@ -608,6 +608,7 @@ impl MitoRegion {
                    index_file_size,
                    num_rows: meta.num_rows,
                    num_row_groups: meta.num_row_groups,
+                    num_series: Some(meta.num_series),
                    min_ts: meta.time_range.0,
                    max_ts: meta.time_range.1,
                    sequence: meta.sequence.map(|s| s.get()),
--- a/src/mito2/src/remap_manifest.rs
+++ b/src/mito2/src/remap_manifest.rs
@@ -431,6 +431,7 @@ mod tests {
            num_row_groups: 1,
            sequence: NonZeroU64::new(1),
            partition_expr,
+            num_series: 1,
        }
    }

--- a/src/mito2/src/sst.rs
+++ b/src/mito2/src/sst.rs
@@ -21,7 +21,9 @@ use common_base::readable_size::ReadableSize;
 use datatypes::arrow::datatypes::{
    DataType as ArrowDataType, Field, FieldRef, Fields, Schema, SchemaRef,
 };
+use datatypes::arrow::record_batch::RecordBatch;
 use datatypes::prelude::ConcreteDataType;
+use datatypes::timestamp::timestamp_array_to_primitive;
 use serde::{Deserialize, Serialize};
 use store_api::codec::PrimaryKeyEncoding;
 use store_api::metadata::RegionMetadata;
@@ -29,6 +31,9 @@ use store_api::storage::consts::{
    OP_TYPE_COLUMN_NAME, PRIMARY_KEY_COLUMN_NAME, SEQUENCE_COLUMN_NAME,
 };

+use crate::read::Batch;
+use crate::sst::parquet::flat_format::time_index_column_index;
+
 pub mod file;
 pub mod file_purger;
 pub mod file_ref;
@@ -241,3 +246,426 @@ fn plain_internal_fields() -> [FieldRef; 2] {
        Arc::new(Field::new(OP_TYPE_COLUMN_NAME, ArrowDataType::UInt8, false)),
    ]
 }
+
+/// Gets the estimated number of series from record batches.
+///
+/// This struct tracks the last timestamp value to detect series boundaries
+/// by observing when timestamps decrease (indicating a new series).
+#[derive(Default)]
+pub(crate) struct SeriesEstimator {
+    /// The last timestamp value seen
+    last_timestamp: Option<i64>,
+    /// The estimated number of series
+    series_count: u64,
+}
+
+impl SeriesEstimator {
+    /// Updates the estimator with a new Batch.
+    ///
+    /// Since each Batch contains only one series, this increments the series count
+    /// and updates the last timestamp.
+    pub(crate) fn update(&mut self, batch: &Batch) {
+        let Some(last_ts) = batch.last_timestamp() else {
+            return;
+        };
+
+        // Checks if there's a boundary between the last batch and this batch
+        if let Some(prev_last_ts) = self.last_timestamp {
+            // If the first timestamp of this batch is less than the last timestamp
+            // we've seen, it indicates a new series
+            if let Some(first_ts) = batch.first_timestamp()
+                && first_ts.value() <= prev_last_ts
+            {
+                self.series_count += 1;
+            }
+        } else {
+            // First batch, counts as first series
+            self.series_count = 1;
+        }
+
+        // Updates the last timestamp
+        self.last_timestamp = Some(last_ts.value());
+    }
+
+    /// Updates the estimator with a new record batch in flat format.
+    ///
+    /// This method examines the time index column to detect series boundaries.
+    pub(crate) fn update_flat(&mut self, record_batch: &RecordBatch) {
+        let batch_rows = record_batch.num_rows();
+        if batch_rows == 0 {
+            return;
+        }
+
+        let time_index_pos = time_index_column_index(record_batch.num_columns());
+        let timestamps = record_batch.column(time_index_pos);
+        let Some((ts_values, _unit)) = timestamp_array_to_primitive(timestamps) else {
+            return;
+        };
+        let values = ts_values.values();
+
+        // Checks if there's a boundary between the last batch and this batch
+        if let Some(last_ts) = self.last_timestamp {
+            if values[0] <= last_ts {
+                self.series_count += 1;
+            }
+        } else {
+            // First batch, counts as first series
+            self.series_count = 1;
+        }
+
+        // Counts series boundaries within this batch.
+        for i in 0..batch_rows - 1 {
+            // We assumes the same timestamp as a new series, which is different from
+            // how we split batches.
+            if values[i] >= values[i + 1] {
+                self.series_count += 1;
+            }
+        }
+
+        // Updates the last timestamp
+        self.last_timestamp = Some(values[batch_rows - 1]);
+    }
+
+    /// Returns the estimated number of series.
+    pub(crate) fn finish(&mut self) -> u64 {
+        self.last_timestamp = None;
+        let count = self.series_count;
+        self.series_count = 0;
+
+        count
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use std::sync::Arc;
+
+    use api::v1::OpType;
+    use datatypes::arrow::array::{
+        BinaryArray, DictionaryArray, TimestampMillisecondArray, UInt8Array, UInt8Builder,
+        UInt32Array, UInt64Array,
+    };
+    use datatypes::arrow::datatypes::{DataType as ArrowDataType, Field, Schema, TimeUnit};
+    use datatypes::arrow::record_batch::RecordBatch;
+
+    use super::*;
+    use crate::read::{Batch, BatchBuilder};
+
+    fn new_batch(
+        primary_key: &[u8],
+        timestamps: &[i64],
+        sequences: &[u64],
+        op_types: &[OpType],
+    ) -> Batch {
+        let timestamps = Arc::new(TimestampMillisecondArray::from(timestamps.to_vec()));
+        let sequences = Arc::new(UInt64Array::from(sequences.to_vec()));
+        let mut op_type_builder = UInt8Builder::with_capacity(op_types.len());
+        for op_type in op_types {
+            op_type_builder.append_value(*op_type as u8);
+        }
+        let op_types = Arc::new(UInt8Array::from(
+            op_types.iter().map(|op| *op as u8).collect::<Vec<_>>(),
+        ));
+
+        let mut builder = BatchBuilder::new(primary_key.to_vec());
+        builder
+            .timestamps_array(timestamps)
+            .unwrap()
+            .sequences_array(sequences)
+            .unwrap()
+            .op_types_array(op_types)
+            .unwrap();
+        builder.build().unwrap()
+    }
+
+    fn new_flat_record_batch(timestamps: &[i64]) -> RecordBatch {
+        // Flat format has: [fields..., time_index, __primary_key, __sequence, __op_type]
+        let num_cols = 4; // time_index + 3 internal columns
+        let time_index_pos = time_index_column_index(num_cols);
+        assert_eq!(time_index_pos, 0); // For 4 columns, time index should be at position 0
+
+        let time_array = Arc::new(TimestampMillisecondArray::from(timestamps.to_vec()));
+        let pk_array = Arc::new(DictionaryArray::new(
+            UInt32Array::from(vec![0; timestamps.len()]),
+            Arc::new(BinaryArray::from(vec![b"test".as_slice()])),
+        ));
+        let seq_array = Arc::new(UInt64Array::from(vec![1; timestamps.len()]));
+        let op_array = Arc::new(UInt8Array::from(vec![1; timestamps.len()]));
+
+        let schema = Arc::new(Schema::new(vec![
+            Field::new(
+                "time",
+                ArrowDataType::Timestamp(TimeUnit::Millisecond, None),
+                false,
+            ),
+            Field::new_dictionary(
+                "__primary_key",
+                ArrowDataType::UInt32,
+                ArrowDataType::Binary,
+                false,
+            ),
+            Field::new("__sequence", ArrowDataType::UInt64, false),
+            Field::new("__op_type", ArrowDataType::UInt8, false),
+        ]));
+
+        RecordBatch::try_new(schema, vec![time_array, pk_array, seq_array, op_array]).unwrap()
+    }
+
+    #[test]
+    fn test_series_estimator_empty_batch() {
+        let mut estimator = SeriesEstimator::default();
+        let batch = new_batch(b"test", &[], &[], &[]);
+        estimator.update(&batch);
+        assert_eq!(0, estimator.finish());
+    }
+
+    #[test]
+    fn test_series_estimator_single_batch() {
+        let mut estimator = SeriesEstimator::default();
+        let batch = new_batch(
+            b"test",
+            &[1, 2, 3],
+            &[1, 2, 3],
+            &[OpType::Put, OpType::Put, OpType::Put],
+        );
+        estimator.update(&batch);
+        assert_eq!(1, estimator.finish());
+    }
+
+    #[test]
+    fn test_series_estimator_multiple_batches_same_series() {
+        let mut estimator = SeriesEstimator::default();
+
+        // First batch with timestamps 1, 2, 3
+        let batch1 = new_batch(
+            b"test",
+            &[1, 2, 3],
+            &[1, 2, 3],
+            &[OpType::Put, OpType::Put, OpType::Put],
+        );
+        estimator.update(&batch1);
+
+        // Second batch with timestamps 4, 5, 6 (continuation)
+        let batch2 = new_batch(
+            b"test",
+            &[4, 5, 6],
+            &[4, 5, 6],
+            &[OpType::Put, OpType::Put, OpType::Put],
+        );
+        estimator.update(&batch2);
+
+        assert_eq!(1, estimator.finish());
+    }
+
+    #[test]
+    fn test_series_estimator_new_series_detected() {
+        let mut estimator = SeriesEstimator::default();
+
+        // First batch with timestamps 1, 2, 3
+        let batch1 = new_batch(
+            b"pk0",
+            &[1, 2, 3],
+            &[1, 2, 3],
+            &[OpType::Put, OpType::Put, OpType::Put],
+        );
+        estimator.update(&batch1);
+
+        // Second batch with timestamps 2, 3, 4 (timestamp goes back, new series)
+        let batch2 = new_batch(
+            b"pk1",
+            &[2, 3, 4],
+            &[4, 5, 6],
+            &[OpType::Put, OpType::Put, OpType::Put],
+        );
+        estimator.update(&batch2);
+
+        assert_eq!(2, estimator.finish());
+    }
+
+    #[test]
+    fn test_series_estimator_equal_timestamp_boundary() {
+        let mut estimator = SeriesEstimator::default();
+
+        // First batch ending at timestamp 5
+        let batch1 = new_batch(
+            b"test",
+            &[1, 2, 5],
+            &[1, 2, 3],
+            &[OpType::Put, OpType::Put, OpType::Put],
+        );
+        estimator.update(&batch1);
+
+        // Second batch starting at timestamp 5 (equal, indicates new series)
+        let batch2 = new_batch(
+            b"test",
+            &[5, 6, 7],
+            &[4, 5, 6],
+            &[OpType::Put, OpType::Put, OpType::Put],
+        );
+        estimator.update(&batch2);
+
+        assert_eq!(2, estimator.finish());
+    }
+
+    #[test]
+    fn test_series_estimator_finish_resets_state() {
+        let mut estimator = SeriesEstimator::default();
+
+        let batch1 = new_batch(
+            b"test",
+            &[1, 2, 3],
+            &[1, 2, 3],
+            &[OpType::Put, OpType::Put, OpType::Put],
+        );
+        estimator.update(&batch1);
+
+        assert_eq!(1, estimator.finish());
+
+        // After finish, state should be reset
+        let batch2 = new_batch(
+            b"test",
+            &[4, 5, 6],
+            &[4, 5, 6],
+            &[OpType::Put, OpType::Put, OpType::Put],
+        );
+        estimator.update(&batch2);
+
+        assert_eq!(1, estimator.finish());
+    }
+
+    #[test]
+    fn test_series_estimator_flat_empty_batch() {
+        let mut estimator = SeriesEstimator::default();
+        let record_batch = new_flat_record_batch(&[]);
+        estimator.update_flat(&record_batch);
+        assert_eq!(0, estimator.finish());
+    }
+
+    #[test]
+    fn test_series_estimator_flat_single_batch() {
+        let mut estimator = SeriesEstimator::default();
+        let record_batch = new_flat_record_batch(&[1, 2, 3]);
+        estimator.update_flat(&record_batch);
+        assert_eq!(1, estimator.finish());
+    }
+
+    #[test]
+    fn test_series_estimator_flat_series_boundary_within_batch() {
+        let mut estimator = SeriesEstimator::default();
+        // Timestamps decrease from 3 to 2, indicating a series boundary
+        let record_batch = new_flat_record_batch(&[1, 2, 3, 2, 4, 5]);
+        estimator.update_flat(&record_batch);
+        // Should detect boundary at position 3 (3 >= 2)
+        assert_eq!(2, estimator.finish());
+    }
+
+    #[test]
+    fn test_series_estimator_flat_multiple_boundaries_within_batch() {
+        let mut estimator = SeriesEstimator::default();
+        // Multiple series boundaries: 5>=4, 6>=3
+        let record_batch = new_flat_record_batch(&[1, 2, 5, 4, 6, 3, 7]);
+        estimator.update_flat(&record_batch);
+        assert_eq!(3, estimator.finish());
+    }
+
+    #[test]
+    fn test_series_estimator_flat_equal_timestamps() {
+        let mut estimator = SeriesEstimator::default();
+        // Equal timestamps are considered as new series
+        let record_batch = new_flat_record_batch(&[1, 2, 2, 3, 3, 3, 4]);
+        estimator.update_flat(&record_batch);
+        // Boundaries at: 2>=2, 3>=3, 3>=3
+        assert_eq!(4, estimator.finish());
+    }
+
+    #[test]
+    fn test_series_estimator_flat_multiple_batches_continuation() {
+        let mut estimator = SeriesEstimator::default();
+
+        // First batch: timestamps 1, 2, 3
+        let batch1 = new_flat_record_batch(&[1, 2, 3]);
+        estimator.update_flat(&batch1);
+
+        // Second batch: timestamps 4, 5, 6 (continuation)
+        let batch2 = new_flat_record_batch(&[4, 5, 6]);
+        estimator.update_flat(&batch2);
+
+        assert_eq!(1, estimator.finish());
+    }
+
+    #[test]
+    fn test_series_estimator_flat_multiple_batches_new_series() {
+        let mut estimator = SeriesEstimator::default();
+
+        // First batch: timestamps 1, 2, 3
+        let batch1 = new_flat_record_batch(&[1, 2, 3]);
+        estimator.update_flat(&batch1);
+
+        // Second batch: timestamps 2, 3, 4 (goes back to 2, new series)
+        let batch2 = new_flat_record_batch(&[2, 3, 4]);
+        estimator.update_flat(&batch2);
+
+        assert_eq!(2, estimator.finish());
+    }
+
+    #[test]
+    fn test_series_estimator_flat_boundary_at_batch_edge_equal() {
+        let mut estimator = SeriesEstimator::default();
+
+        // First batch ending at 5
+        let batch1 = new_flat_record_batch(&[1, 2, 5]);
+        estimator.update_flat(&batch1);
+
+        // Second batch starting at 5 (equal timestamp, new series)
+        let batch2 = new_flat_record_batch(&[5, 6, 7]);
+        estimator.update_flat(&batch2);
+
+        assert_eq!(2, estimator.finish());
+    }
+
+    #[test]
+    fn test_series_estimator_flat_mixed_batches() {
+        let mut estimator = SeriesEstimator::default();
+
+        // Batch 1: single series [10, 20, 30]
+        let batch1 = new_flat_record_batch(&[10, 20, 30]);
+        estimator.update_flat(&batch1);
+
+        // Batch 2: starts new series [5, 15], boundary within batch [15, 10, 25]
+        let batch2 = new_flat_record_batch(&[5, 15, 10, 25]);
+        estimator.update_flat(&batch2);
+
+        // Batch 3: continues from 25 to [30, 35]
+        let batch3 = new_flat_record_batch(&[30, 35]);
+        estimator.update_flat(&batch3);
+
+        // Expected: 1 (batch1) + 1 (batch2 start) + 1 (within batch2) = 3
+        assert_eq!(3, estimator.finish());
+    }
+
+    #[test]
+    fn test_series_estimator_flat_descending_timestamps() {
+        let mut estimator = SeriesEstimator::default();
+        // Strictly descending timestamps - each pair creates a boundary
+        let record_batch = new_flat_record_batch(&[10, 9, 8, 7, 6]);
+        estimator.update_flat(&record_batch);
+        // Boundaries: 10>=9, 9>=8, 8>=7, 7>=6 = 4 boundaries + 1 initial = 5 series
+        assert_eq!(5, estimator.finish());
+    }
+
+    #[test]
+    fn test_series_estimator_flat_finish_resets_state() {
+        let mut estimator = SeriesEstimator::default();
+
+        let batch1 = new_flat_record_batch(&[1, 2, 3]);
+        estimator.update_flat(&batch1);
+
+        assert_eq!(1, estimator.finish());
+
+        // After finish, state should be reset
+        let batch2 = new_flat_record_batch(&[4, 5, 6]);
+        estimator.update_flat(&batch2);
+
+        assert_eq!(1, estimator.finish());
+    }
+}
--- a/src/mito2/src/sst/file.rs
+++ b/src/mito2/src/sst/file.rs
@@ -175,6 +175,10 @@ pub struct FileMeta {
        deserialize_with = "deserialize_partition_expr"
    )]
    pub partition_expr: Option<PartitionExpr>,
+    /// Number of series in the file.
+    ///
+    /// The number is 0 if the series number is not available.
+    pub num_series: u64,
 }

 impl Debug for FileMeta {
@@ -210,6 +214,7 @@ impl Debug for FileMeta {
                }
            })
            .field("partition_expr", &self.partition_expr)
+            .field("num_series", &self.num_series)
            .finish()
    }
 }
@@ -458,6 +463,7 @@ mod tests {
            num_row_groups: 0,
            sequence: None,
            partition_expr: None,
+            num_series: 0,
        }
    }

@@ -503,6 +509,7 @@ mod tests {
            num_row_groups: 0,
            sequence: None,
            partition_expr: Some(partition_expr.clone()),
+            num_series: 0,
        };

        // Test serialization/deserialization
--- a/src/mito2/src/sst/file_purger.rs
+++ b/src/mito2/src/sst/file_purger.rs
@@ -236,6 +236,7 @@ mod tests {
                    num_row_groups: 0,
                    sequence: None,
                    partition_expr: None,
+                    num_series: 0,
                },
                file_purger,
            );
@@ -302,6 +303,7 @@ mod tests {
                    num_row_groups: 1,
                    sequence: NonZeroU64::new(4096),
                    partition_expr: None,
+                    num_series: 0,
                },
                file_purger,
            );
--- a/src/mito2/src/sst/file_ref.rs
+++ b/src/mito2/src/sst/file_ref.rs
@@ -259,6 +259,7 @@ mod tests {
            num_row_groups: 1,
            sequence: NonZeroU64::new(4096),
            partition_expr: None,
+            num_series: 0,
        };

        file_ref_mgr.add_file(&file_meta);
--- a/src/mito2/src/sst/index.rs
+++ b/src/mito2/src/sst/index.rs
@@ -26,10 +26,13 @@ use std::sync::Arc;

 use bloom_filter::creator::BloomFilterIndexer;
 use common_telemetry::{debug, info, warn};
+use datatypes::arrow::array::BinaryArray;
 use datatypes::arrow::record_batch::RecordBatch;
+use mito_codec::index::IndexValuesCodec;
+use mito_codec::row_converter::CompositeValues;
 use puffin_manager::SstPuffinManager;
 use smallvec::{SmallVec, smallvec};
-use snafu::ResultExt;
+use snafu::{OptionExt, ResultExt};
 use statistics::{ByteCount, RowCount};
 use store_api::metadata::RegionMetadataRef;
 use store_api::storage::{ColumnId, FileId, RegionId};
@@ -40,7 +43,7 @@ use crate::access_layer::{AccessLayerRef, FilePathProvider, OperationType, Regio
 use crate::cache::file_cache::{FileType, IndexKey};
 use crate::cache::write_cache::{UploadTracker, WriteCacheRef};
 use crate::config::{BloomFilterConfig, FulltextIndexConfig, InvertedIndexConfig};
-use crate::error::{BuildIndexAsyncSnafu, Error, Result};
+use crate::error::{BuildIndexAsyncSnafu, DecodeSnafu, Error, InvalidRecordBatchSnafu, Result};
 use crate::manifest::action::{RegionEdit, RegionMetaAction, RegionMetaActionList};
 use crate::metrics::INDEX_CREATE_MEMORY_USAGE;
 use crate::read::{Batch, BatchReader};
@@ -57,6 +60,9 @@ use crate::sst::index::fulltext_index::creator::FulltextIndexer;
 use crate::sst::index::intermediate::IntermediateManager;
 use crate::sst::index::inverted_index::creator::InvertedIndexer;
 use crate::sst::parquet::SstInfo;
+use crate::sst::parquet::flat_format::primary_key_column_index;
+use crate::sst::parquet::format::PrimaryKeyArray;
+use crate::worker::WorkerListener;

 pub(crate) const TYPE_INVERTED_INDEX: &str = "inverted_index";
 pub(crate) const TYPE_FULLTEXT_INDEX: &str = "fulltext_index";
@@ -446,6 +452,7 @@ pub struct IndexBuildTask {
    pub file_meta: FileMeta,
    pub reason: IndexBuildType,
    pub access_layer: AccessLayerRef,
+    pub(crate) listener: WorkerListener,
    pub(crate) manifest_ctx: ManifestContextRef,
    pub write_cache: Option<WriteCacheRef>,
    pub file_purger: FilePurgerRef,
@@ -481,6 +488,12 @@ impl IndexBuildTask {
    }

    async fn do_index_build(&mut self, version_control: VersionControlRef) {
+        self.listener
+            .on_index_build_begin(RegionFileId::new(
+                self.file_meta.region_id,
+                self.file_meta.file_id,
+            ))
+            .await;
        match self.index_build(version_control).await {
            Ok(outcome) => self.on_success(outcome).await,
            Err(e) => {
@@ -535,6 +548,12 @@ impl IndexBuildTask {
        if !self.check_sst_file_exists(&version_control).await {
            // Calls abort to clean up index files.
            indexer.abort().await;
+            self.listener
+                .on_index_build_abort(RegionFileId::new(
+                    self.file_meta.region_id,
+                    self.file_meta.file_id,
+                ))
+                .await;
            return Ok(IndexBuildOutcome::Aborted(format!(
                "SST file not found during index build, region: {}, file_id: {}",
                self.file_meta.region_id, self.file_meta.file_id
@@ -570,6 +589,12 @@ impl IndexBuildTask {
            if !self.check_sst_file_exists(&version_control).await {
                // Calls abort to clean up index files.
                indexer.abort().await;
+                self.listener
+                    .on_index_build_abort(RegionFileId::new(
+                        self.file_meta.region_id,
+                        self.file_meta.file_id,
+                    ))
+                    .await;
                return Ok(IndexBuildOutcome::Aborted(format!(
                    "SST file not found during index build, region: {}, file_id: {}",
                    self.file_meta.region_id, self.file_meta.file_id
@@ -698,6 +723,56 @@ impl IndexBuildScheduler {
    }
 }

+/// Decodes primary keys from a flat format RecordBatch.
+/// Returns a list of (decoded_pk_value, count) tuples where count is the number of occurrences.
+pub(crate) fn decode_primary_keys_with_counts(
+    batch: &RecordBatch,
+    codec: &IndexValuesCodec,
+) -> Result<Vec<(CompositeValues, usize)>> {
+    let primary_key_index = primary_key_column_index(batch.num_columns());
+    let pk_dict_array = batch
+        .column(primary_key_index)
+        .as_any()
+        .downcast_ref::<PrimaryKeyArray>()
+        .context(InvalidRecordBatchSnafu {
+            reason: "Primary key column is not a dictionary array",
+        })?;
+    let pk_values_array = pk_dict_array
+        .values()
+        .as_any()
+        .downcast_ref::<BinaryArray>()
+        .context(InvalidRecordBatchSnafu {
+            reason: "Primary key values are not binary array",
+        })?;
+    let keys = pk_dict_array.keys();
+
+    // Decodes primary keys and count consecutive occurrences
+    let mut result: Vec<(CompositeValues, usize)> = Vec::new();
+    let mut prev_key: Option<u32> = None;
+
+    for i in 0..keys.len() {
+        let current_key = keys.value(i);
+
+        // Checks if current key is the same as previous key
+        if let Some(prev) = prev_key
+            && prev == current_key
+        {
+            // Safety: We already have a key in the result vector.
+            result.last_mut().unwrap().1 += 1;
+            continue;
+        }
+
+        // New key, decodes it.
+        let pk_bytes = pk_values_array.value(current_key as usize);
+        let decoded_value = codec.decoder().decode(pk_bytes).context(DecodeSnafu)?;
+
+        result.push((decoded_value, 1));
+        prev_key = Some(current_key);
+    }
+
+    Ok(result)
+}
+
 #[cfg(test)]
 mod tests {
    use std::sync::Arc;
@@ -1137,6 +1212,7 @@ mod tests {
            },
            reason: IndexBuildType::Flush,
            access_layer: env.access_layer.clone(),
+            listener: WorkerListener::default(),
            manifest_ctx,
            write_cache: None,
            file_purger,
@@ -1187,6 +1263,7 @@ mod tests {
            file_meta: file_meta.clone(),
            reason: IndexBuildType::Flush,
            access_layer: env.access_layer.clone(),
+            listener: WorkerListener::default(),
            manifest_ctx,
            write_cache: None,
            file_purger,
@@ -1254,6 +1331,7 @@ mod tests {
            file_meta: file_meta.clone(),
            reason: IndexBuildType::Flush,
            access_layer: env.access_layer.clone(),
+            listener: WorkerListener::default(),
            manifest_ctx,
            write_cache: None,
            file_purger,
@@ -1350,6 +1428,7 @@ mod tests {
            file_meta: file_meta.clone(),
            reason: IndexBuildType::Flush,
            access_layer: env.access_layer.clone(),
+            listener: WorkerListener::default(),
            manifest_ctx,
            write_cache: None,
            file_purger,
@@ -1430,6 +1509,7 @@ mod tests {
            file_meta: file_meta.clone(),
            reason: IndexBuildType::Flush,
            access_layer: env.access_layer.clone(),
+            listener: WorkerListener::default(),
            manifest_ctx,
            write_cache: Some(write_cache.clone()),
            file_purger,
--- a/src/mito2/src/sst/index/bloom_filter/creator.rs
+++ b/src/mito2/src/sst/index/bloom_filter/creator.rs
@@ -16,6 +16,7 @@ use std::collections::HashMap;
 use std::sync::Arc;
 use std::sync::atomic::AtomicUsize;

+use api::v1::SemanticType;
 use common_telemetry::{debug, warn};
 use datatypes::arrow::record_batch::RecordBatch;
 use datatypes::schema::SkippingIndexType;
@@ -23,9 +24,10 @@ use datatypes::vectors::Helper;
 use index::bloom_filter::creator::BloomFilterCreator;
 use index::target::IndexTarget;
 use mito_codec::index::{IndexValueCodec, IndexValuesCodec};
-use mito_codec::row_converter::SortField;
+use mito_codec::row_converter::{CompositeValues, SortField};
 use puffin::puffin_manager::{PuffinWriter, PutOptions};
 use snafu::{ResultExt, ensure};
+use store_api::codec::PrimaryKeyEncoding;
 use store_api::metadata::RegionMetadataRef;
 use store_api::storage::{ColumnId, FileId};
 use tokio_util::compat::{TokioAsyncReadCompatExt, TokioAsyncWriteCompatExt};
@@ -35,13 +37,13 @@ use crate::error::{
    OperateAbortedIndexSnafu, PuffinAddBlobSnafu, PushBloomFilterValueSnafu, Result,
 };
 use crate::read::Batch;
-use crate::sst::index::TYPE_BLOOM_FILTER_INDEX;
 use crate::sst::index::bloom_filter::INDEX_BLOB_TYPE;
 use crate::sst::index::intermediate::{
    IntermediateLocation, IntermediateManager, TempFileProvider,
 };
 use crate::sst::index::puffin_manager::SstPuffinWriter;
 use crate::sst::index::statistics::{ByteCount, RowCount, Statistics};
+use crate::sst::index::{TYPE_BLOOM_FILTER_INDEX, decode_primary_keys_with_counts};

 /// The buffer size for the pipe used to send index data to the puffin blob.
 const PIPE_BUFFER_SIZE_FOR_SENDING_BLOB: usize = 8192;
@@ -289,47 +291,81 @@ impl BloomFilterIndexer {
        let n = batch.num_rows();
        guard.inc_row_count(n);

+        let is_sparse = self.metadata.primary_key_encoding == PrimaryKeyEncoding::Sparse;
+        let mut decoded_pks: Option<Vec<(CompositeValues, usize)>> = None;
+
        for (col_id, creator) in &mut self.creators {
-            // Get the column name from metadata
-            if let Some(column_meta) = self.metadata.column_by_id(*col_id) {
-                let column_name = &column_meta.column_schema.name;
+            // Safety: `creators` are created from the metadata so it won't be None.
+            let column_meta = self.metadata.column_by_id(*col_id).unwrap();
+            let column_name = &column_meta.column_schema.name;
+            if let Some(column_array) = batch.column_by_name(column_name) {
+                // Convert Arrow array to VectorRef
+                let vector = Helper::try_into_vector(column_array.clone())
+                    .context(crate::error::ConvertVectorSnafu)?;
+                let sort_field = SortField::new(vector.data_type());

-                // Find the column in the RecordBatch by name
-                if let Some(column_array) = batch.column_by_name(column_name) {
-                    // Convert Arrow array to VectorRef
-                    let vector = Helper::try_into_vector(column_array.clone())
-                        .context(crate::error::ConvertVectorSnafu)?;
-                    let sort_field = SortField::new(vector.data_type());
+                for i in 0..n {
+                    let value = vector.get_ref(i);
+                    let elems = (!value.is_null())
+                        .then(|| {
+                            let mut buf = vec![];
+                            IndexValueCodec::encode_nonnull_value(value, &sort_field, &mut buf)
+                                .context(EncodeSnafu)?;
+                            Ok(buf)
+                        })
+                        .transpose()?;

-                    for i in 0..n {
-                        let value = vector.get_ref(i);
-                        let elems = (!value.is_null())
-                            .then(|| {
-                                let mut buf = vec![];
-                                IndexValueCodec::encode_nonnull_value(value, &sort_field, &mut buf)
-                                    .context(EncodeSnafu)?;
-                                Ok(buf)
-                            })
-                            .transpose()?;
+                    creator
+                        .push_row_elems(elems)
+                        .await
+                        .context(PushBloomFilterValueSnafu)?;
+                }
+            } else if is_sparse && column_meta.semantic_type == SemanticType::Tag {
+                // Column not found in batch, tries to decode from primary keys for sparse encoding.
+                if decoded_pks.is_none() {
+                    decoded_pks = Some(decode_primary_keys_with_counts(batch, &self.codec)?);
+                }

-                        creator
-                            .push_row_elems(elems)
-                            .await
-                            .context(PushBloomFilterValueSnafu)?;
-                    }
-                } else {
+                let pk_values_with_counts = decoded_pks.as_ref().unwrap();
+                let Some(col_info) = self.codec.pk_col_info(*col_id) else {
                    debug!(
-                        "Column {} not found in the batch during building bloom filter index",
+                        "Column {} not found in primary key during building bloom filter index",
                        column_name
                    );
-                    // Push empty elements to maintain alignment
-                    for _ in 0..n {
-                        creator
-                            .push_row_elems(None)
-                            .await
-                            .context(PushBloomFilterValueSnafu)?;
-                    }
+                    continue;
+                };
+                let pk_index = col_info.idx;
+                let field = &col_info.field;
+                for (decoded, count) in pk_values_with_counts {
+                    let value = match decoded {
+                        CompositeValues::Dense(dense) => dense.get(pk_index).map(|v| &v.1),
+                        CompositeValues::Sparse(sparse) => sparse.get(col_id),
+                    };
+
+                    let elems = value
+                        .filter(|v| !v.is_null())
+                        .map(|v| {
+                            let mut buf = vec![];
+                            IndexValueCodec::encode_nonnull_value(
+                                v.as_value_ref(),
+                                field,
+                                &mut buf,
+                            )
+                            .context(EncodeSnafu)?;
+                            Ok(buf)
+                        })
+                        .transpose()?;
+
+                    creator
+                        .push_n_row_elems(*count, elems)
+                        .await
+                        .context(PushBloomFilterValueSnafu)?;
                }
+            } else {
+                debug!(
+                    "Column {} not found in the batch during building bloom filter index",
+                    column_name
+                );
            }
        }

--- a/src/mito2/src/sst/index/fulltext_index/creator.rs
+++ b/src/mito2/src/sst/index/fulltext_index/creator.rs
@@ -16,6 +16,7 @@ use std::collections::HashMap;
 use std::sync::Arc;
 use std::sync::atomic::AtomicUsize;

+use api::v1::SemanticType;
 use common_telemetry::warn;
 use datatypes::arrow::array::{Array, LargeStringArray, StringArray};
 use datatypes::arrow::datatypes::DataType;
@@ -69,6 +70,17 @@ impl FulltextIndexer {
        let mut creators = HashMap::new();

        for column in &metadata.column_metadatas {
+            // Tag columns don't support fulltext index now.
+            // If we need to support fulltext index for tag columns, we also need to parse
+            // the codec and handle sparse encoding for flat format specially.
+            if column.semantic_type == SemanticType::Tag {
+                common_telemetry::debug!(
+                    "Skip creating fulltext index for tag column {}",
+                    column.column_schema.name
+                );
+                continue;
+            }
+
            let options = column
                .column_schema
                .fulltext_options()
--- a/src/mito2/src/sst/index/inverted_index/creator.rs
+++ b/src/mito2/src/sst/index/inverted_index/creator.rs
@@ -17,6 +17,7 @@ use std::num::NonZeroUsize;
 use std::sync::Arc;
 use std::sync::atomic::AtomicUsize;

+use api::v1::SemanticType;
 use common_telemetry::{debug, warn};
 use datatypes::arrow::record_batch::RecordBatch;
 use datatypes::vectors::Helper;
@@ -26,9 +27,10 @@ use index::inverted_index::create::sort_create::SortIndexCreator;
 use index::inverted_index::format::writer::InvertedIndexBlobWriter;
 use index::target::IndexTarget;
 use mito_codec::index::{IndexValueCodec, IndexValuesCodec};
-use mito_codec::row_converter::SortField;
+use mito_codec::row_converter::{CompositeValues, SortField};
 use puffin::puffin_manager::{PuffinWriter, PutOptions};
 use snafu::{ResultExt, ensure};
+use store_api::codec::PrimaryKeyEncoding;
 use store_api::metadata::RegionMetadataRef;
 use store_api::storage::{ColumnId, FileId};
 use tokio::io::duplex;
@@ -39,13 +41,13 @@ use crate::error::{
    PushIndexValueSnafu, Result,
 };
 use crate::read::Batch;
-use crate::sst::index::TYPE_INVERTED_INDEX;
 use crate::sst::index::intermediate::{
    IntermediateLocation, IntermediateManager, TempFileProvider,
 };
 use crate::sst::index::inverted_index::INDEX_BLOB_TYPE;
 use crate::sst::index::puffin_manager::SstPuffinWriter;
 use crate::sst::index::statistics::{ByteCount, RowCount, Statistics};
+use crate::sst::index::{TYPE_INVERTED_INDEX, decode_primary_keys_with_counts};

 /// The minimum memory usage threshold for one column.
 const MIN_MEMORY_USAGE_THRESHOLD_PER_COLUMN: usize = 1024 * 1024; // 1MB
@@ -78,9 +80,6 @@ pub struct InvertedIndexer {

    /// Region metadata for column lookups.
    metadata: RegionMetadataRef,
-    /// Cache for mapping indexed column positions to their indices in the RecordBatch.
-    /// Aligns with indexed_column_ids. Initialized lazily when first batch is processed.
-    column_index_cache: Option<Vec<Option<usize>>>,
 }

 impl InvertedIndexer {
@@ -130,7 +129,6 @@ impl InvertedIndexer {
            memory_usage,
            indexed_column_ids,
            metadata: metadata.clone(),
-            column_index_cache: None,
        }
    }

@@ -170,29 +168,29 @@ impl InvertedIndexer {
    }

    async fn do_update_flat(&mut self, batch: &RecordBatch) -> Result<()> {
-        // Initialize column index cache if not already done
-        if self.column_index_cache.is_none() {
-            self.initialize_column_index_cache(batch);
-        }
-
        let mut guard = self.stats.record_update();

-        let n = batch.num_rows();
-        guard.inc_row_count(n);
+        guard.inc_row_count(batch.num_rows());

-        let column_indices = self.column_index_cache.as_ref().unwrap();
+        let is_sparse = self.metadata.primary_key_encoding == PrimaryKeyEncoding::Sparse;
+        let mut decoded_pks: Option<Vec<(CompositeValues, usize)>> = None;

-        for ((col_id, target_key), &column_index) in
-            self.indexed_column_ids.iter().zip(column_indices.iter())
-        {
-            if let Some(index) = column_index {
-                let column_array = batch.column(index);
+        for (col_id, target_key) in &self.indexed_column_ids {
+            let Some(column_meta) = self.metadata.column_by_id(*col_id) else {
+                debug!(
+                    "Column {} not found in the metadata during building inverted index",
+                    col_id
+                );
+                continue;
+            };
+            let column_name = &column_meta.column_schema.name;
+            if let Some(column_array) = batch.column_by_name(column_name) {
                // Convert Arrow array to VectorRef using Helper
                let vector = Helper::try_into_vector(column_array.clone())
                    .context(crate::error::ConvertVectorSnafu)?;
                let sort_field = SortField::new(vector.data_type());

-                for row in 0..n {
+                for row in 0..batch.num_rows() {
                    self.value_buf.clear();
                    let value_ref = vector.get_ref(row);

@@ -214,6 +212,47 @@ impl InvertedIndexer {
                            .context(PushIndexValueSnafu)?;
                    }
                }
+            } else if is_sparse && column_meta.semantic_type == SemanticType::Tag {
+                // Column not found in batch, tries to decode from primary keys for sparse encoding.
+                if decoded_pks.is_none() {
+                    decoded_pks = Some(decode_primary_keys_with_counts(batch, &self.codec)?);
+                }
+
+                let pk_values_with_counts = decoded_pks.as_ref().unwrap();
+                let Some(col_info) = self.codec.pk_col_info(*col_id) else {
+                    debug!(
+                        "Column {} not found in primary key during building bloom filter index",
+                        column_name
+                    );
+                    continue;
+                };
+                let pk_index = col_info.idx;
+                let field = &col_info.field;
+                for (decoded, count) in pk_values_with_counts {
+                    let value = match decoded {
+                        CompositeValues::Dense(dense) => dense.get(pk_index).map(|v| &v.1),
+                        CompositeValues::Sparse(sparse) => sparse.get(col_id),
+                    };
+
+                    let elem = value
+                        .filter(|v| !v.is_null())
+                        .map(|v| {
+                            self.value_buf.clear();
+                            IndexValueCodec::encode_nonnull_value(
+                                v.as_value_ref(),
+                                field,
+                                &mut self.value_buf,
+                            )
+                            .context(EncodeSnafu)?;
+                            Ok(self.value_buf.as_slice())
+                        })
+                        .transpose()?;
+
+                    self.index_creator
+                        .push_with_name_n(target_key, elem, *count)
+                        .await
+                        .context(PushIndexValueSnafu)?;
+                }
            } else {
                debug!(
                    "Column {} not found in the batch during building inverted index",
@@ -225,26 +264,6 @@ impl InvertedIndexer {
        Ok(())
    }

-    /// Initializes the column index cache by mapping indexed column ids to their positions in the RecordBatch.
-    fn initialize_column_index_cache(&mut self, batch: &RecordBatch) {
-        let mut column_indices = Vec::with_capacity(self.indexed_column_ids.len());
-
-        for (col_id, _) in &self.indexed_column_ids {
-            let column_index = if let Some(column_meta) = self.metadata.column_by_id(*col_id) {
-                let column_name = &column_meta.column_schema.name;
-                batch
-                    .schema()
-                    .column_with_name(column_name)
-                    .map(|(index, _)| index)
-            } else {
-                None
-            };
-            column_indices.push(column_index);
-        }
-
-        self.column_index_cache = Some(column_indices);
-    }
-
    /// Finishes index creation and cleans up garbage.
    /// Returns the number of rows and bytes written.
    pub async fn finish(
--- a/src/mito2/src/sst/parquet.rs
+++ b/src/mito2/src/sst/parquet.rs
@@ -84,6 +84,8 @@ pub struct SstInfo {
    pub file_metadata: Option<Arc<ParquetMetaData>>,
    /// Index Meta Data
    pub index_metadata: IndexOutput,
+    /// Number of series
+    pub num_series: u64,
 }

 #[cfg(test)]
@@ -766,6 +768,7 @@ mod tests {
                        .expect("partition expression should be valid JSON"),
                    None => None,
                },
+                num_series: 0,
            },
            Arc::new(NoopFilePurger),
        );
--- a/src/mito2/src/sst/parquet/file_range.rs
+++ b/src/mito2/src/sst/parquet/file_range.rs
@@ -15,18 +15,20 @@
 //! Structs and functions for reading ranges from a parquet file. A file range
 //! is usually a row group in a parquet file.

+use std::collections::HashMap;
 use std::ops::BitAnd;
 use std::sync::Arc;

 use api::v1::{OpType, SemanticType};
 use common_telemetry::error;
-use datatypes::arrow::array::BooleanArray;
+use datatypes::arrow::array::{ArrayRef, BooleanArray};
 use datatypes::arrow::buffer::BooleanBuffer;
 use datatypes::arrow::record_batch::RecordBatch;
 use mito_codec::row_converter::{CompositeValues, PrimaryKeyCodec};
 use parquet::arrow::arrow_reader::RowSelection;
 use snafu::{OptionExt, ResultExt};
-use store_api::storage::TimeSeriesRowSelector;
+use store_api::codec::PrimaryKeyEncoding;
+use store_api::storage::{ColumnId, TimeSeriesRowSelector};

 use crate::error::{
    ComputeArrowSnafu, DataTypeMismatchSnafu, DecodeSnafu, DecodeStatsSnafu, RecordBatchSnafu,
@@ -37,11 +39,11 @@ use crate::read::compat::CompatBatch;
 use crate::read::last_row::RowGroupLastRowCachedReader;
 use crate::read::prune::{FlatPruneReader, PruneReader};
 use crate::sst::file::FileHandle;
+use crate::sst::parquet::flat_format::{DecodedPrimaryKeys, decode_primary_keys};
 use crate::sst::parquet::format::ReadFormat;
 use crate::sst::parquet::reader::{
    FlatRowGroupReader, MaybeFilter, RowGroupReader, RowGroupReaderBuilder, SimpleFilterContext,
 };
-
 /// A range of a parquet SST. Now it is a row group.
 /// We can read different file ranges in parallel.
 #[derive(Clone)]
@@ -357,7 +359,34 @@ impl RangeBase {
    }

    /// Filters the input RecordBatch by the pushed down predicate and returns RecordBatch.
+    ///
+    /// It assumes all necessary tags are already decoded from the primary key.
    pub(crate) fn precise_filter_flat(&self, input: RecordBatch) -> Result<Option<RecordBatch>> {
+        let mask = self.compute_filter_mask_flat(&input)?;
+
+        // If mask is None, the entire batch is filtered out
+        let Some(mask) = mask else {
+            return Ok(None);
+        };
+
+        let filtered_batch =
+            datatypes::arrow::compute::filter_record_batch(&input, &BooleanArray::from(mask))
+                .context(ComputeArrowSnafu)?;
+
+        if filtered_batch.num_rows() > 0 {
+            Ok(Some(filtered_batch))
+        } else {
+            Ok(None)
+        }
+    }
+
+    /// Computes the filter mask for the input RecordBatch based on pushed down predicates.
+    ///
+    /// Returns `None` if the entire batch is filtered out, otherwise returns the boolean mask.
+    pub(crate) fn compute_filter_mask_flat(
+        &self,
+        input: &RecordBatch,
+    ) -> Result<Option<BooleanBuffer>> {
        let mut mask = BooleanBuffer::new_set(input.num_rows());

        let flat_format = self
@@ -367,6 +396,11 @@ impl RangeBase {
                reason: "Expected flat format for precise_filter_flat",
            })?;

+        // Decodes primary keys once if we have any tag filters not in projection
+        let mut decoded_pks: Option<DecodedPrimaryKeys> = None;
+        // Cache decoded tag arrays by column id to avoid redundant decoding
+        let mut decoded_tag_cache: HashMap<ColumnId, ArrayRef> = HashMap::new();
+
        // Run filter one by one and combine them result
        for filter_ctx in &self.filters {
            let filter = match filter_ctx.filter() {
@@ -383,20 +417,53 @@ impl RangeBase {
                let column = &input.columns()[idx];
                let result = filter.evaluate_array(column).context(RecordBatchSnafu)?;
                mask = mask.bitand(&result);
-            } else {
-                // Column not found in projection, continue
-                continue;
+            } else if filter_ctx.semantic_type() == SemanticType::Tag {
+                // Column not found in projection, it may be a tag column.
+                // Decodes primary keys if not already decoded.
+                if decoded_pks.is_none() {
+                    decoded_pks = Some(decode_primary_keys(self.codec.as_ref(), input)?);
+                }
+
+                let metadata = flat_format.metadata();
+                let column_id = filter_ctx.column_id();
+
+                // Check cache first
+                let tag_column = if let Some(cached_column) = decoded_tag_cache.get(&column_id) {
+                    cached_column.clone()
+                } else {
+                    // For dense encoding, we need pk_index. For sparse encoding, pk_index is None.
+                    let pk_index = if self.codec.encoding() == PrimaryKeyEncoding::Sparse {
+                        None
+                    } else {
+                        metadata.primary_key_index(column_id)
+                    };
+                    let column_index = metadata.column_index_by_id(column_id);
+
+                    if let (Some(column_index), Some(decoded)) =
+                        (column_index, decoded_pks.as_ref())
+                    {
+                        let column_metadata = &metadata.column_metadatas[column_index];
+                        let tag_column = decoded.get_tag_column(
+                            column_id,
+                            pk_index,
+                            &column_metadata.column_schema.data_type,
+                        )?;
+                        // Cache the decoded tag column
+                        decoded_tag_cache.insert(column_id, tag_column.clone());
+                        tag_column
+                    } else {
+                        continue;
+                    }
+                };
+
+                let result = filter
+                    .evaluate_array(&tag_column)
+                    .context(RecordBatchSnafu)?;
+                mask = mask.bitand(&result);
            }
+            // Non-tag column not found in projection.
        }

-        let filtered_batch =
-            datatypes::arrow::compute::filter_record_batch(&input, &BooleanArray::from(mask))
-                .context(ComputeArrowSnafu)?;
-
-        if filtered_batch.num_rows() > 0 {
-            Ok(Some(filtered_batch))
-        } else {
-            Ok(None)
-        }
+        Ok(Some(mask))
    }
 }
--- a/src/mito2/src/sst/parquet/flat_format.rs
+++ b/src/mito2/src/sst/parquet/flat_format.rs
@@ -127,7 +127,9 @@ pub(crate) fn op_type_column_index(num_columns: usize) -> usize {
    num_columns - 1
 }

-// TODO(yingwen): Add an option to skip reading internal columns.
+// TODO(yingwen): Add an option to skip reading internal columns if the region is
+// append only and doesn't use sparse encoding (We need to check the table id under
+// sparse encoding).
 /// Helper for reading the flat SST format with projection.
 ///
 /// It only supports flat format that stores primary keys additionally.
@@ -528,6 +530,125 @@ pub(crate) fn sst_column_id_indices(metadata: &RegionMetadata) -> HashMap<Column
    id_to_index
 }

+/// Decodes primary keys from a batch and returns decoded primary key information.
+///
+/// The batch must contain a primary key column at the expected index.
+pub(crate) fn decode_primary_keys(
+    codec: &dyn PrimaryKeyCodec,
+    batch: &RecordBatch,
+) -> Result<DecodedPrimaryKeys> {
+    let primary_key_index = primary_key_column_index(batch.num_columns());
+    let pk_dict_array = batch
+        .column(primary_key_index)
+        .as_any()
+        .downcast_ref::<PrimaryKeyArray>()
+        .with_context(|| InvalidRecordBatchSnafu {
+            reason: "Primary key column is not a dictionary array".to_string(),
+        })?;
+    let pk_values_array = pk_dict_array
+        .values()
+        .as_any()
+        .downcast_ref::<BinaryArray>()
+        .with_context(|| InvalidRecordBatchSnafu {
+            reason: "Primary key values are not binary array".to_string(),
+        })?;
+
+    let keys = pk_dict_array.keys();
+
+    // Decodes primary key values by iterating through keys, reusing decoded values for duplicate keys.
+    // Maps original key index -> new decoded value index
+    let mut key_to_decoded_index = Vec::with_capacity(keys.len());
+    let mut decoded_pk_values = Vec::new();
+    let mut prev_key: Option<u32> = None;
+
+    // The parquet reader may read the whole dictionary page into the dictionary values, so
+    // we may decode many primary keys not in this batch if we decode the values array directly.
+    for i in 0..keys.len() {
+        let current_key = keys.value(i);
+
+        // Check if current key is the same as previous key
+        if let Some(prev) = prev_key
+            && prev == current_key
+        {
+            // Reuse the last decoded index
+            key_to_decoded_index.push((decoded_pk_values.len() - 1) as u32);
+            continue;
+        }
+
+        // New key, decodes the value
+        let pk_bytes = pk_values_array.value(current_key as usize);
+        let decoded_value = codec.decode(pk_bytes).context(DecodeSnafu)?;
+
+        decoded_pk_values.push(decoded_value);
+        key_to_decoded_index.push((decoded_pk_values.len() - 1) as u32);
+        prev_key = Some(current_key);
+    }
+
+    // Create the keys array from key_to_decoded_index
+    let keys_array = UInt32Array::from(key_to_decoded_index);
+
+    Ok(DecodedPrimaryKeys {
+        decoded_pk_values,
+        keys_array,
+    })
+}
+
+/// Holds decoded primary key values and their indices.
+pub(crate) struct DecodedPrimaryKeys {
+    /// Decoded primary key values for unique keys in the dictionary.
+    decoded_pk_values: Vec<CompositeValues>,
+    /// Prebuilt keys array for creating dictionary arrays.
+    keys_array: UInt32Array,
+}
+
+impl DecodedPrimaryKeys {
+    /// Gets a tag column array by column id and data type.
+    ///
+    /// For sparse encoding, uses column_id to lookup values.
+    /// For dense encoding, uses pk_index to get values.
+    pub(crate) fn get_tag_column(
+        &self,
+        column_id: ColumnId,
+        pk_index: Option<usize>,
+        column_type: &ConcreteDataType,
+    ) -> Result<ArrayRef> {
+        // Gets values from the primary key.
+        let mut builder = column_type.create_mutable_vector(self.decoded_pk_values.len());
+        for decoded in &self.decoded_pk_values {
+            match decoded {
+                CompositeValues::Dense(dense) => {
+                    let pk_idx = pk_index.expect("pk_index required for dense encoding");
+                    if pk_idx < dense.len() {
+                        builder.push_value_ref(&dense[pk_idx].1.as_value_ref());
+                    } else {
+                        builder.push_null();
+                    }
+                }
+                CompositeValues::Sparse(sparse) => {
+                    let value = sparse.get_or_null(column_id);
+                    builder.push_value_ref(&value.as_value_ref());
+                }
+            };
+        }
+
+        let values_vector = builder.to_vector();
+        let values_array = values_vector.to_arrow_array();
+
+        // Only creates dictionary array for string types, otherwise take values by keys
+        if column_type.is_string() {
+            // Creates dictionary array using the same keys for string types
+            // Note that the dictionary values may have nulls.
+            let dict_array = DictionaryArray::new(self.keys_array.clone(), values_array);
+            Ok(Arc::new(dict_array))
+        } else {
+            // For non-string types, takes values by keys indices to create a regular array
+            let taken_array =
+                take(&values_array, &self.keys_array, None).context(ComputeArrowSnafu)?;
+            Ok(taken_array)
+        }
+    }
+}
+
 /// Converts a batch that doesn't have decoded primary key columns into a batch that has decoded
 /// primary key columns in flat format.
 pub(crate) struct FlatConvertFormat {
@@ -577,53 +698,22 @@ impl FlatConvertFormat {

    /// Converts a batch to have decoded primary key columns in flat format.
    ///
-    /// The primary key array in the batch is a dictionary array. We decode each value which is a
-    /// primary key and reuse the keys array to build a dictionary array for each tag column.
-    /// The decoded columns are inserted in front of other columns.
+    /// The primary key array in the batch is a dictionary array.
    pub(crate) fn convert(&self, batch: RecordBatch) -> Result<RecordBatch> {
        if self.projected_primary_keys.is_empty() {
            return Ok(batch);
        }

-        let primary_key_index = primary_key_column_index(batch.num_columns());
-        let pk_dict_array = batch
-            .column(primary_key_index)
-            .as_any()
-            .downcast_ref::<PrimaryKeyArray>()
-            .with_context(|| InvalidRecordBatchSnafu {
-                reason: "Primary key column is not a dictionary array".to_string(),
-            })?;
-
-        let pk_values_array = pk_dict_array
-            .values()
-            .as_any()
-            .downcast_ref::<BinaryArray>()
-            .with_context(|| InvalidRecordBatchSnafu {
-                reason: "Primary key values are not binary array".to_string(),
-            })?;
-
-        // Decodes all primary key values
-        let mut decoded_pk_values = Vec::with_capacity(pk_values_array.len());
-        for i in 0..pk_values_array.len() {
-            if pk_values_array.is_null(i) {
-                decoded_pk_values.push(None);
-            } else {
-                let pk_bytes = pk_values_array.value(i);
-                let decoded = self.codec.decode(pk_bytes).context(DecodeSnafu)?;
-                decoded_pk_values.push(Some(decoded));
-            }
-        }
+        let decoded_pks = decode_primary_keys(self.codec.as_ref(), &batch)?;

        // Builds decoded tag column arrays.
        let mut decoded_columns = Vec::new();
        for (column_id, pk_index, column_index) in &self.projected_primary_keys {
            let column_metadata = &self.metadata.column_metadatas[*column_index];
-            let tag_column = self.build_primary_key_column(
+            let tag_column = decoded_pks.get_tag_column(
                *column_id,
-                *pk_index,
+                Some(*pk_index),
                &column_metadata.column_schema.data_type,
-                pk_dict_array.keys(),
-                &decoded_pk_values,
            )?;
            decoded_columns.push(tag_column);
        }
@@ -648,57 +738,6 @@ impl FlatConvertFormat {
        let new_schema = Arc::new(Schema::new(new_fields));
        RecordBatch::try_new(new_schema, new_columns).context(NewRecordBatchSnafu)
    }
-
-    /// Builds an array for a specific tag column.
-    ///
-    /// It may build a dictionary array if the type is string. Note that the dictionary
-    /// array may have null values, although keys are not null.
-    fn build_primary_key_column(
-        &self,
-        column_id: ColumnId,
-        pk_index: usize,
-        column_type: &ConcreteDataType,
-        keys: &UInt32Array,
-        decoded_pk_values: &[Option<CompositeValues>],
-    ) -> Result<ArrayRef> {
-        // Gets values from the primary key.
-        let mut builder = column_type.create_mutable_vector(decoded_pk_values.len());
-        for decoded_opt in decoded_pk_values {
-            match decoded_opt {
-                Some(decoded) => {
-                    match decoded {
-                        CompositeValues::Dense(dense) => {
-                            if pk_index < dense.len() {
-                                builder.push_value_ref(&dense[pk_index].1.as_value_ref());
-                            } else {
-                                builder.push_null();
-                            }
-                        }
-                        CompositeValues::Sparse(sparse) => {
-                            let value = sparse.get_or_null(column_id);
-                            builder.push_value_ref(&value.as_value_ref());
-                        }
-                    };
-                }
-                None => builder.push_null(),
-            }
-        }
-
-        let values_vector = builder.to_vector();
-        let values_array = values_vector.to_arrow_array();
-
-        // Only creates dictionary array for string types, otherwise take values by keys
-        if column_type.is_string() {
-            // Creates dictionary array using the same keys for string types
-            // Note that the dictionary values may have nulls.
-            let dict_array = DictionaryArray::new(keys.clone(), values_array);
-            Ok(Arc::new(dict_array))
-        } else {
-            // For non-string types, takes values by keys indices to create a regular array
-            let taken_array = take(&values_array, keys, None).context(ComputeArrowSnafu)?;
-            Ok(taken_array)
-        }
-    }
 }

 #[cfg(test)]
--- a/src/mito2/src/sst/parquet/reader.rs
+++ b/src/mito2/src/sst/parquet/reader.rs
@@ -1397,6 +1397,7 @@ impl FlatRowGroupReader {
                let record_batch = batch_result.context(ArrowReaderSnafu {
                    path: self.context.file_path(),
                })?;
+
                // Safety: Only flat format use FlatRowGroupReader.
                let flat_format = self.context.read_format().as_flat().unwrap();
                let record_batch =
--- a/src/mito2/src/sst/parquet/writer.rs
+++ b/src/mito2/src/sst/parquet/writer.rs
@@ -57,7 +57,9 @@ use crate::sst::parquet::flat_format::{FlatWriteFormat, time_index_column_index}
 use crate::sst::parquet::format::PrimaryKeyWriteFormat;
 use crate::sst::parquet::helper::parse_parquet_metadata;
 use crate::sst::parquet::{PARQUET_METADATA_KEY, SstInfo, WriteOptions};
-use crate::sst::{DEFAULT_WRITE_BUFFER_SIZE, DEFAULT_WRITE_CONCURRENCY, FlatSchemaOptions};
+use crate::sst::{
+    DEFAULT_WRITE_BUFFER_SIZE, DEFAULT_WRITE_CONCURRENCY, FlatSchemaOptions, SeriesEstimator,
+};

 /// Parquet SST writer.
 pub struct ParquetWriter<F: WriterFactory, I: IndexerBuilder, P: FilePathProvider> {
@@ -176,7 +178,7 @@ where
    ) -> Result<()> {
        // maybe_init_writer will re-create a new file.
        if let Some(mut current_writer) = mem::take(&mut self.writer) {
-            let stats = mem::take(stats);
+            let mut stats = mem::take(stats);
            // At least one row has been written.
            assert!(stats.num_rows > 0);

@@ -211,6 +213,7 @@ where

            // convert FileMetaData to ParquetMetaData
            let parquet_metadata = parse_parquet_metadata(file_meta)?;
+            let num_series = stats.series_estimator.finish();
            ssts.push(SstInfo {
                file_id: self.current_file,
                time_range,
@@ -219,6 +222,7 @@ where
                num_row_groups: parquet_metadata.num_row_groups() as u64,
                file_metadata: Some(Arc::new(parquet_metadata)),
                index_metadata: index_output,
+                num_series,
            });
            self.current_file = FileId::random();
            self.bytes_written.store(0, Ordering::Relaxed)
@@ -496,6 +500,8 @@ struct SourceStats {
    num_rows: usize,
    /// Time range of fetched batches.
    time_range: Option<(Timestamp, Timestamp)>,
+    /// Series estimator for computing num_series.
+    series_estimator: SeriesEstimator,
 }

 impl SourceStats {
@@ -505,6 +511,7 @@ impl SourceStats {
        }

        self.num_rows += batch.num_rows();
+        self.series_estimator.update(batch);
        // Safety: batch is not empty.
        let (min_in_batch, max_in_batch) = (
            batch.first_timestamp().unwrap(),
@@ -524,6 +531,7 @@ impl SourceStats {
        }

        self.num_rows += record_batch.num_rows();
+        self.series_estimator.update_flat(record_batch);

        // Get the timestamp column by index
        let time_index_col_idx = time_index_column_index(record_batch.num_columns());
--- a/src/mito2/src/test_util/sst_util.rs
+++ b/src/mito2/src/test_util/sst_util.rs
@@ -127,6 +127,7 @@ pub fn sst_file_handle_with_file_id(file_id: FileId, start_ms: i64, end_ms: i64)
            index_file_size: 0,
            num_rows: 0,
            num_row_groups: 0,
+            num_series: 0,
            sequence: None,
            partition_expr: None,
        },
--- a/src/mito2/src/test_util/version_util.rs
+++ b/src/mito2/src/test_util/version_util.rs
@@ -105,6 +105,7 @@ impl VersionControlBuilder {
                index_file_size: 0,
                num_rows: 0,
                num_row_groups: 0,
+                num_series: 0,
                sequence: NonZeroU64::new(start_ms as u64),
                partition_expr: match &self.metadata.partition_expr {
                    Some(json_str) => partition::expr::PartitionExpr::from_json_str(json_str)
@@ -193,6 +194,7 @@ pub(crate) fn apply_edit(
                index_file_size: 0,
                num_rows: 0,
                num_row_groups: 0,
+                num_series: 0,
                sequence: NonZeroU64::new(*start_ms as u64),
                partition_expr: match &version_control.current().version.metadata.partition_expr {
                    Some(json_str) => partition::expr::PartitionExpr::from_json_str(json_str)
--- a/src/mito2/src/worker.rs
+++ b/src/mito2/src/worker.rs
@@ -1220,10 +1220,10 @@ impl WorkerListener {
        }
    }

-    pub(crate) async fn on_index_build_success(&self, _region_file_id: RegionFileId) {
+    pub(crate) async fn on_index_build_finish(&self, _region_file_id: RegionFileId) {
        #[cfg(any(test, feature = "test"))]
        if let Some(listener) = &self.listener {
-            listener.on_index_build_success(_region_file_id).await;
+            listener.on_index_build_finish(_region_file_id).await;
        }
    }

@@ -1233,6 +1233,13 @@ impl WorkerListener {
            listener.on_index_build_begin(_region_file_id).await;
        }
    }
+
+    pub(crate) async fn on_index_build_abort(&self, _region_file_id: RegionFileId) {
+        #[cfg(any(test, feature = "test"))]
+        if let Some(listener) = &self.listener {
+            listener.on_index_build_abort(_region_file_id).await;
+        }
+    }
 }

 #[cfg(test)]
--- a/Show More
+++ b/Show More