From a9a3e0b121fff1a6d15baaa26c62cb82dd6e5f4e Mon Sep 17 00:00:00 2001
From: discord9 <55937128+discord9@users.noreply.github.com>
Date: Wed, 22 Oct 2025 17:04:09 +0800
Subject: [PATCH 01/14] fix: prom ql logical plan use column index not name
 (#7109)

* feat: use index not col name

Signed-off-by: discord9 <discord9@163.com>

* fix: use name without qualifier&output schema fix

Signed-off-by: discord9 <discord9@163.com>

* proto

Signed-off-by: discord9 <discord9@163.com>

* refactor: resolve column name/index

Signed-off-by: discord9 <discord9@163.com>

* pcr

Signed-off-by: discord9 <discord9@163.com>

* chore: update proto

Signed-off-by: discord9 <discord9@163.com>

* chore: update proto

Signed-off-by: discord9 <discord9@163.com>

---------

Signed-off-by: discord9 <discord9@163.com>
---
 Cargo.lock                                    |   2 +-
 Cargo.toml                                    |   2 +-
 src/promql/src/extension_plan.rs              |  43 ++++++
 src/promql/src/extension_plan/absent.rs       |  94 +++++++++----
 .../src/extension_plan/instant_manipulate.rs  |  93 ++++++++++---
 src/promql/src/extension_plan/normalize.rs    |  90 ++++++++++---
 .../src/extension_plan/range_manipulate.rs    |  95 +++++++++++---
 .../src/extension_plan/scalar_calculate.rs    | 124 ++++++++++++++----
 .../src/extension_plan/series_divide.rs       |  72 ++++++++--
 .../common/promql/encode_substrait.result     |  49 +++++++
 .../common/promql/encode_substrait.sql        |  22 ++++
 11 files changed, 576 insertions(+), 110 deletions(-)
 create mode 100644 tests/cases/standalone/common/promql/encode_substrait.result
 create mode 100644 tests/cases/standalone/common/promql/encode_substrait.sql

diff --git a/Cargo.lock b/Cargo.lock
index d15f70b631..14f8089da7 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -5328,7 +5328,7 @@ dependencies = [
 [[package]]
 name = "greptime-proto"
 version = "0.1.0"
-source = "git+https://github.com/GreptimeTeam/greptime-proto.git?rev=69a6089933daa573c96808ec4bbc48f447ec6e8c#69a6089933daa573c96808ec4bbc48f447ec6e8c"
+source = "git+https://github.com/GreptimeTeam/greptime-proto.git?rev=72a0d22e0f5f716b2ee21bca091f87a88c36e5ca#72a0d22e0f5f716b2ee21bca091f87a88c36e5ca"
 dependencies = [
  "prost 0.13.5",
  "prost-types 0.13.5",
diff --git a/Cargo.toml b/Cargo.toml
index f500f70b0e..50e286195c 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -147,7 +147,7 @@ etcd-client = { git = "https://github.com/GreptimeTeam/etcd-client", rev = "f62d
 fst = "0.4.7"
 futures = "0.3"
 futures-util = "0.3"
-greptime-proto = { git = "https://github.com/GreptimeTeam/greptime-proto.git", rev = "69a6089933daa573c96808ec4bbc48f447ec6e8c" }
+greptime-proto = { git = "https://github.com/GreptimeTeam/greptime-proto.git", rev = "72a0d22e0f5f716b2ee21bca091f87a88c36e5ca" }
 hex = "0.4"
 http = "1"
 humantime = "2.1"
diff --git a/src/promql/src/extension_plan.rs b/src/promql/src/extension_plan.rs
index 7aaeb83484..a72ddbeaaf 100644
--- a/src/promql/src/extension_plan.rs
+++ b/src/promql/src/extension_plan.rs
@@ -27,6 +27,8 @@ mod union_distinct_on;
 
 pub use absent::{Absent, AbsentExec, AbsentStream};
 use datafusion::arrow::datatypes::{ArrowPrimitiveType, TimestampMillisecondType};
+use datafusion::common::DFSchemaRef;
+use datafusion::error::{DataFusionError, Result as DataFusionResult};
 pub use empty_metric::{EmptyMetric, EmptyMetricExec, EmptyMetricStream, build_special_time_expr};
 pub use histogram_fold::{HistogramFold, HistogramFoldExec, HistogramFoldStream};
 pub use instant_manipulate::{InstantManipulate, InstantManipulateExec, InstantManipulateStream};
@@ -40,3 +42,44 @@ pub use union_distinct_on::{UnionDistinctOn, UnionDistinctOnExec, UnionDistinctO
 pub type Millisecond = <TimestampMillisecondType as ArrowPrimitiveType>::Native;
 
 const METRIC_NUM_SERIES: &str = "num_series";
+
+/// Utilities for handling unfix logic in extension plans
+/// Convert column name to index for serialization
+pub fn serialize_column_index(schema: &DFSchemaRef, column_name: &str) -> u64 {
+    schema
+        .index_of_column_by_name(None, column_name)
+        .map(|idx| idx as u64)
+        .unwrap_or(u64::MAX) // make sure if not found, it will report error in deserialization
+}
+
+/// Convert index back to column name for deserialization
+pub fn resolve_column_name(
+    index: u64,
+    schema: &DFSchemaRef,
+    context: &str,
+    column_type: &str,
+) -> DataFusionResult<String> {
+    let columns = schema.columns();
+    columns
+        .get(index as usize)
+        .ok_or_else(|| {
+            DataFusionError::Internal(format!(
+                "Failed to get {} column at idx {} during unfixing {} with columns:{:?}",
+                column_type, index, context, columns
+            ))
+        })
+        .map(|field| field.name().to_string())
+}
+
+/// Batch process multiple column indices
+pub fn resolve_column_names(
+    indices: &[u64],
+    schema: &DFSchemaRef,
+    context: &str,
+    column_type: &str,
+) -> DataFusionResult<Vec<String>> {
+    indices
+        .iter()
+        .map(|idx| resolve_column_name(*idx, schema, context, column_type))
+        .collect()
+}
diff --git a/src/promql/src/extension_plan/absent.rs b/src/promql/src/extension_plan/absent.rs
index 843f9a468f..2c01a6f570 100644
--- a/src/promql/src/extension_plan/absent.rs
+++ b/src/promql/src/extension_plan/absent.rs
@@ -47,7 +47,7 @@ use prost::Message;
 use snafu::ResultExt;
 
 use crate::error::DeserializeSnafu;
-use crate::extension_plan::Millisecond;
+use crate::extension_plan::{Millisecond, resolve_column_name, serialize_column_index};
 
 /// Maximum number of rows per output batch
 const ABSENT_BATCH_SIZE: usize = 8192;
@@ -62,6 +62,13 @@ pub struct Absent {
     fake_labels: Vec<(String, String)>,
     input: LogicalPlan,
     output_schema: DFSchemaRef,
+    unfix: Option<UnfixIndices>,
+}
+
+#[derive(Debug, PartialEq, Eq, Hash, PartialOrd)]
+struct UnfixIndices {
+    pub time_index_column_idx: u64,
+    pub value_column_idx: u64,
 }
 
 impl PartialOrd for Absent {
@@ -122,16 +129,44 @@ impl UserDefinedLogicalNodeCore for Absent {
             ));
         }
 
-        Ok(Self {
-            start: self.start,
-            end: self.end,
-            step: self.step,
-            time_index_column: self.time_index_column.clone(),
-            value_column: self.value_column.clone(),
-            fake_labels: self.fake_labels.clone(),
-            input: inputs[0].clone(),
-            output_schema: self.output_schema.clone(),
-        })
+        let input: LogicalPlan = inputs[0].clone();
+        let input_schema = input.schema();
+
+        if let Some(unfix) = &self.unfix {
+            // transform indices to names
+            let time_index_column = resolve_column_name(
+                unfix.time_index_column_idx,
+                input_schema,
+                "Absent",
+                "time index",
+            )?;
+
+            let value_column =
+                resolve_column_name(unfix.value_column_idx, input_schema, "Absent", "value")?;
+
+            // Recreate output schema with actual field names
+            Self::try_new(
+                self.start,
+                self.end,
+                self.step,
+                time_index_column,
+                value_column,
+                self.fake_labels.clone(),
+                input,
+            )
+        } else {
+            Ok(Self {
+                start: self.start,
+                end: self.end,
+                step: self.step,
+                time_index_column: self.time_index_column.clone(),
+                value_column: self.value_column.clone(),
+                fake_labels: self.fake_labels.clone(),
+                input,
+                output_schema: self.output_schema.clone(),
+                unfix: None,
+            })
+        }
     }
 }
 
@@ -179,6 +214,7 @@ impl Absent {
             fake_labels,
             input,
             output_schema,
+            unfix: None,
         })
     }
 
@@ -209,12 +245,17 @@ impl Absent {
     }
 
     pub fn serialize(&self) -> Vec<u8> {
+        let time_index_column_idx =
+            serialize_column_index(self.input.schema(), &self.time_index_column);
+
+        let value_column_idx = serialize_column_index(self.input.schema(), &self.value_column);
+
         pb::Absent {
             start: self.start,
             end: self.end,
             step: self.step,
-            time_index_column: self.time_index_column.clone(),
-            value_column: self.value_column.clone(),
+            time_index_column_idx,
+            value_column_idx,
             fake_labels: self
                 .fake_labels
                 .iter()
@@ -223,6 +264,7 @@ impl Absent {
                     value: value.clone(),
                 })
                 .collect(),
+            ..Default::default()
         }
         .encode_to_vec()
     }
@@ -233,19 +275,27 @@ impl Absent {
             produce_one_row: false,
             schema: Arc::new(DFSchema::empty()),
         });
-        Self::try_new(
-            pb_absent.start,
-            pb_absent.end,
-            pb_absent.step,
-            pb_absent.time_index_column,
-            pb_absent.value_column,
-            pb_absent
+
+        let unfix = UnfixIndices {
+            time_index_column_idx: pb_absent.time_index_column_idx,
+            value_column_idx: pb_absent.value_column_idx,
+        };
+
+        Ok(Self {
+            start: pb_absent.start,
+            end: pb_absent.end,
+            step: pb_absent.step,
+            time_index_column: String::new(),
+            value_column: String::new(),
+            fake_labels: pb_absent
                 .fake_labels
                 .iter()
                 .map(|label| (label.key.clone(), label.value.clone()))
                 .collect(),
-            placeholder_plan,
-        )
+            input: placeholder_plan,
+            output_schema: Arc::new(DFSchema::empty()),
+            unfix: Some(unfix),
+        })
     }
 }
 
diff --git a/src/promql/src/extension_plan/instant_manipulate.rs b/src/promql/src/extension_plan/instant_manipulate.rs
index 110a926399..aa4cd6d184 100644
--- a/src/promql/src/extension_plan/instant_manipulate.rs
+++ b/src/promql/src/extension_plan/instant_manipulate.rs
@@ -41,7 +41,9 @@ use prost::Message;
 use snafu::ResultExt;
 
 use crate::error::{DeserializeSnafu, Result};
-use crate::extension_plan::{METRIC_NUM_SERIES, Millisecond};
+use crate::extension_plan::{
+    METRIC_NUM_SERIES, Millisecond, resolve_column_name, serialize_column_index,
+};
 use crate::metrics::PROMQL_SERIES_COUNT;
 
 /// Manipulate the input record batch to make it suitable for Instant Operator.
@@ -59,6 +61,13 @@ pub struct InstantManipulate {
     /// A optional column for validating staleness
     field_column: Option<String>,
     input: LogicalPlan,
+    unfix: Option<UnfixIndices>,
+}
+
+#[derive(Debug, PartialEq, Eq, Hash, PartialOrd)]
+struct UnfixIndices {
+    pub time_index_idx: u64,
+    pub field_index_idx: u64,
 }
 
 impl UserDefinedLogicalNodeCore for InstantManipulate {
@@ -97,15 +106,51 @@ impl UserDefinedLogicalNodeCore for InstantManipulate {
             ));
         }
 
-        Ok(Self {
-            start: self.start,
-            end: self.end,
-            lookback_delta: self.lookback_delta,
-            interval: self.interval,
-            time_index_column: self.time_index_column.clone(),
-            field_column: self.field_column.clone(),
-            input: inputs.into_iter().next().unwrap(),
-        })
+        let input: LogicalPlan = inputs.into_iter().next().unwrap();
+        let input_schema = input.schema();
+
+        if let Some(unfix) = &self.unfix {
+            // transform indices to names
+            let time_index_column = resolve_column_name(
+                unfix.time_index_idx,
+                input_schema,
+                "InstantManipulate",
+                "time index",
+            )?;
+
+            let field_column = if unfix.field_index_idx == u64::MAX {
+                None
+            } else {
+                Some(resolve_column_name(
+                    unfix.field_index_idx,
+                    input_schema,
+                    "InstantManipulate",
+                    "field",
+                )?)
+            };
+
+            Ok(Self {
+                start: self.start,
+                end: self.end,
+                lookback_delta: self.lookback_delta,
+                interval: self.interval,
+                time_index_column,
+                field_column,
+                input,
+                unfix: None,
+            })
+        } else {
+            Ok(Self {
+                start: self.start,
+                end: self.end,
+                lookback_delta: self.lookback_delta,
+                interval: self.interval,
+                time_index_column: self.time_index_column.clone(),
+                field_column: self.field_column.clone(),
+                input,
+                unfix: None,
+            })
+        }
     }
 }
 
@@ -127,6 +172,7 @@ impl InstantManipulate {
             time_index_column,
             field_column,
             input,
+            unfix: None,
         }
     }
 
@@ -148,13 +194,22 @@ impl InstantManipulate {
     }
 
     pub fn serialize(&self) -> Vec<u8> {
+        let time_index_idx = serialize_column_index(self.input.schema(), &self.time_index_column);
+
+        let field_index_idx = self
+            .field_column
+            .as_ref()
+            .map(|name| serialize_column_index(self.input.schema(), name))
+            .unwrap_or(u64::MAX);
+
         pb::InstantManipulate {
             start: self.start,
             end: self.end,
             interval: self.interval,
             lookback_delta: self.lookback_delta,
-            time_index: self.time_index_column.clone(),
-            field_index: self.field_column.clone().unwrap_or_default(),
+            time_index_idx,
+            field_index_idx,
+            ..Default::default()
         }
         .encode_to_vec()
     }
@@ -166,19 +221,21 @@ impl InstantManipulate {
             produce_one_row: false,
             schema: Arc::new(DFSchema::empty()),
         });
-        let field_column = if pb_instant_manipulate.field_index.is_empty() {
-            None
-        } else {
-            Some(pb_instant_manipulate.field_index)
+
+        let unfix = UnfixIndices {
+            time_index_idx: pb_instant_manipulate.time_index_idx,
+            field_index_idx: pb_instant_manipulate.field_index_idx,
         };
+
         Ok(Self {
             start: pb_instant_manipulate.start,
             end: pb_instant_manipulate.end,
             lookback_delta: pb_instant_manipulate.lookback_delta,
             interval: pb_instant_manipulate.interval,
-            time_index_column: pb_instant_manipulate.time_index,
-            field_column,
+            time_index_column: String::new(),
+            field_column: None,
             input: placeholder_plan,
+            unfix: Some(unfix),
         })
     }
 }
diff --git a/src/promql/src/extension_plan/normalize.rs b/src/promql/src/extension_plan/normalize.rs
index eddb60f000..ccd21a9cd7 100644
--- a/src/promql/src/extension_plan/normalize.rs
+++ b/src/promql/src/extension_plan/normalize.rs
@@ -40,7 +40,9 @@ use prost::Message;
 use snafu::ResultExt;
 
 use crate::error::{DeserializeSnafu, Result};
-use crate::extension_plan::{METRIC_NUM_SERIES, Millisecond};
+use crate::extension_plan::{
+    METRIC_NUM_SERIES, Millisecond, resolve_column_name, serialize_column_index,
+};
 use crate::metrics::PROMQL_SERIES_COUNT;
 
 /// Normalize the input record batch. Notice that for simplicity, this method assumes
@@ -58,6 +60,13 @@ pub struct SeriesNormalize {
     tag_columns: Vec<String>,
 
     input: LogicalPlan,
+    unfix: Option<UnfixIndices>,
+}
+
+#[derive(Debug, PartialEq, Eq, Hash, PartialOrd)]
+struct UnfixIndices {
+    pub time_index_idx: u64,
+    pub tag_column_indices: Vec<u64>,
 }
 
 impl UserDefinedLogicalNodeCore for SeriesNormalize {
@@ -96,13 +105,42 @@ impl UserDefinedLogicalNodeCore for SeriesNormalize {
             ));
         }
 
-        Ok(Self {
-            offset: self.offset,
-            time_index_column_name: self.time_index_column_name.clone(),
-            need_filter_out_nan: self.need_filter_out_nan,
-            input: inputs.into_iter().next().unwrap(),
-            tag_columns: self.tag_columns.clone(),
-        })
+        let input: LogicalPlan = inputs.into_iter().next().unwrap();
+        let input_schema = input.schema();
+
+        if let Some(unfix) = &self.unfix {
+            // transform indices to names
+            let time_index_column_name = resolve_column_name(
+                unfix.time_index_idx,
+                input_schema,
+                "SeriesNormalize",
+                "time index",
+            )?;
+
+            let tag_columns = unfix
+                .tag_column_indices
+                .iter()
+                .map(|idx| resolve_column_name(*idx, input_schema, "SeriesNormalize", "tag"))
+                .collect::<DataFusionResult<Vec<String>>>()?;
+
+            Ok(Self {
+                offset: self.offset,
+                time_index_column_name,
+                need_filter_out_nan: self.need_filter_out_nan,
+                tag_columns,
+                input,
+                unfix: None,
+            })
+        } else {
+            Ok(Self {
+                offset: self.offset,
+                time_index_column_name: self.time_index_column_name.clone(),
+                need_filter_out_nan: self.need_filter_out_nan,
+                tag_columns: self.tag_columns.clone(),
+                input,
+                unfix: None,
+            })
+        }
     }
 }
 
@@ -120,6 +158,7 @@ impl SeriesNormalize {
             need_filter_out_nan,
             tag_columns,
             input,
+            unfix: None,
         }
     }
 
@@ -139,11 +178,21 @@ impl SeriesNormalize {
     }
 
     pub fn serialize(&self) -> Vec<u8> {
+        let time_index_idx =
+            serialize_column_index(self.input.schema(), &self.time_index_column_name);
+
+        let tag_column_indices = self
+            .tag_columns
+            .iter()
+            .map(|name| serialize_column_index(self.input.schema(), name))
+            .collect::<Vec<u64>>();
+
         pb::SeriesNormalize {
             offset: self.offset,
-            time_index: self.time_index_column_name.clone(),
+            time_index_idx,
             filter_nan: self.need_filter_out_nan,
-            tag_columns: self.tag_columns.clone(),
+            tag_column_indices,
+            ..Default::default()
         }
         .encode_to_vec()
     }
@@ -154,13 +203,20 @@ impl SeriesNormalize {
             produce_one_row: false,
             schema: Arc::new(DFSchema::empty()),
         });
-        Ok(Self::new(
-            pb_normalize.offset,
-            pb_normalize.time_index,
-            pb_normalize.filter_nan,
-            pb_normalize.tag_columns,
-            placeholder_plan,
-        ))
+
+        let unfix = UnfixIndices {
+            time_index_idx: pb_normalize.time_index_idx,
+            tag_column_indices: pb_normalize.tag_column_indices.clone(),
+        };
+
+        Ok(Self {
+            offset: pb_normalize.offset,
+            time_index_column_name: String::new(),
+            need_filter_out_nan: pb_normalize.filter_nan,
+            tag_columns: Vec::new(),
+            input: placeholder_plan,
+            unfix: Some(unfix),
+        })
     }
 }
 
diff --git a/src/promql/src/extension_plan/range_manipulate.rs b/src/promql/src/extension_plan/range_manipulate.rs
index 1e18e34cd1..540fa4c174 100644
--- a/src/promql/src/extension_plan/range_manipulate.rs
+++ b/src/promql/src/extension_plan/range_manipulate.rs
@@ -18,6 +18,7 @@ use std::pin::Pin;
 use std::sync::Arc;
 use std::task::{Context, Poll};
 
+use common_telemetry::debug;
 use datafusion::arrow::array::{Array, ArrayRef, Int64Array, TimestampMillisecondArray};
 use datafusion::arrow::compute;
 use datafusion::arrow::datatypes::{Field, SchemaRef};
@@ -43,7 +44,9 @@ use prost::Message;
 use snafu::ResultExt;
 
 use crate::error::{DeserializeSnafu, Result};
-use crate::extension_plan::{METRIC_NUM_SERIES, Millisecond};
+use crate::extension_plan::{
+    METRIC_NUM_SERIES, Millisecond, resolve_column_name, serialize_column_index,
+};
 use crate::metrics::PROMQL_SERIES_COUNT;
 use crate::range_array::RangeArray;
 
@@ -62,11 +65,17 @@ pub struct RangeManipulate {
     end: Millisecond,
     interval: Millisecond,
     range: Millisecond,
-
     time_index: String,
     field_columns: Vec<String>,
     input: LogicalPlan,
     output_schema: DFSchemaRef,
+    unfix: Option<UnfixIndices>,
+}
+
+#[derive(Debug, Clone, PartialEq, Eq, Hash)]
+struct UnfixIndices {
+    pub time_index_idx: u64,
+    pub tag_column_indices: Vec<u64>,
 }
 
 impl RangeManipulate {
@@ -90,6 +99,7 @@ impl RangeManipulate {
             field_columns,
             input,
             output_schema,
+            unfix: None,
         })
     }
 
@@ -181,13 +191,22 @@ impl RangeManipulate {
     }
 
     pub fn serialize(&self) -> Vec<u8> {
+        let time_index_idx = serialize_column_index(self.input.schema(), &self.time_index);
+
+        let tag_column_indices = self
+            .field_columns
+            .iter()
+            .map(|name| serialize_column_index(self.input.schema(), name))
+            .collect::<Vec<u64>>();
+
         pb::RangeManipulate {
             start: self.start,
             end: self.end,
             interval: self.interval,
             range: self.range,
-            time_index: self.time_index.clone(),
-            tag_columns: self.field_columns.clone(),
+            time_index_idx,
+            tag_column_indices,
+            ..Default::default()
         }
         .encode_to_vec()
     }
@@ -200,6 +219,12 @@ impl RangeManipulate {
             schema: empty_schema.clone(),
         });
 
+        let unfix = UnfixIndices {
+            time_index_idx: pb_range_manipulate.time_index_idx,
+            tag_column_indices: pb_range_manipulate.tag_column_indices.clone(),
+        };
+        debug!("RangeManipulate deserialize unfix: {:?}", unfix);
+
         // Unlike `Self::new()`, this method doesn't check the input schema as it will fail
         // because the input schema is empty.
         // But this is Ok since datafusion guarantees to call `with_exprs_and_inputs` for the
@@ -209,10 +234,11 @@ impl RangeManipulate {
             end: pb_range_manipulate.end,
             interval: pb_range_manipulate.interval,
             range: pb_range_manipulate.range,
-            time_index: pb_range_manipulate.time_index,
-            field_columns: pb_range_manipulate.tag_columns,
+            time_index: String::new(),
+            field_columns: Vec::new(),
             input: placeholder_plan,
             output_schema: empty_schema,
+            unfix: Some(unfix),
         })
     }
 }
@@ -286,19 +312,52 @@ impl UserDefinedLogicalNodeCore for RangeManipulate {
 
         let input: LogicalPlan = inputs.pop().unwrap();
         let input_schema = input.schema();
-        let output_schema =
-            Self::calculate_output_schema(input_schema, &self.time_index, &self.field_columns)?;
 
-        Ok(Self {
-            start: self.start,
-            end: self.end,
-            interval: self.interval,
-            range: self.range,
-            time_index: self.time_index.clone(),
-            field_columns: self.field_columns.clone(),
-            input,
-            output_schema,
-        })
+        if let Some(unfix) = &self.unfix {
+            // transform indices to names
+            let time_index = resolve_column_name(
+                unfix.time_index_idx,
+                input_schema,
+                "RangeManipulate",
+                "time index",
+            )?;
+
+            let field_columns = unfix
+                .tag_column_indices
+                .iter()
+                .map(|idx| resolve_column_name(*idx, input_schema, "RangeManipulate", "tag"))
+                .collect::<DataFusionResult<Vec<String>>>()?;
+
+            let output_schema =
+                Self::calculate_output_schema(input_schema, &time_index, &field_columns)?;
+
+            Ok(Self {
+                start: self.start,
+                end: self.end,
+                interval: self.interval,
+                range: self.range,
+                time_index,
+                field_columns,
+                input,
+                output_schema,
+                unfix: None,
+            })
+        } else {
+            let output_schema =
+                Self::calculate_output_schema(input_schema, &self.time_index, &self.field_columns)?;
+
+            Ok(Self {
+                start: self.start,
+                end: self.end,
+                interval: self.interval,
+                range: self.range,
+                time_index: self.time_index.clone(),
+                field_columns: self.field_columns.clone(),
+                input,
+                output_schema,
+                unfix: None,
+            })
+        }
     }
 }
 
diff --git a/src/promql/src/extension_plan/scalar_calculate.rs b/src/promql/src/extension_plan/scalar_calculate.rs
index 04768053fb..8619e79387 100644
--- a/src/promql/src/extension_plan/scalar_calculate.rs
+++ b/src/promql/src/extension_plan/scalar_calculate.rs
@@ -41,7 +41,7 @@ use prost::Message;
 use snafu::ResultExt;
 
 use crate::error::{ColumnNotFoundSnafu, DataFusionPlanningSnafu, DeserializeSnafu, Result};
-use crate::extension_plan::Millisecond;
+use crate::extension_plan::{Millisecond, resolve_column_name, serialize_column_index};
 
 /// `ScalarCalculate` is the custom logical plan to calculate
 /// [`scalar`](https://prometheus.io/docs/prometheus/latest/querying/functions/#scalar)
@@ -59,6 +59,14 @@ pub struct ScalarCalculate {
     field_column: String,
     input: LogicalPlan,
     output_schema: DFSchemaRef,
+    unfix: Option<UnfixIndices>,
+}
+
+#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd)]
+struct UnfixIndices {
+    pub time_index_idx: u64,
+    pub tag_column_indices: Vec<u64>,
+    pub field_column_idx: u64,
 }
 
 impl ScalarCalculate {
@@ -101,6 +109,7 @@ impl ScalarCalculate {
             field_column: field_column.to_string(),
             input,
             output_schema: Arc::new(schema),
+            unfix: None,
         })
     }
 
@@ -149,13 +158,24 @@ impl ScalarCalculate {
     }
 
     pub fn serialize(&self) -> Vec<u8> {
+        let time_index_idx = serialize_column_index(self.input.schema(), &self.time_index);
+
+        let tag_column_indices = self
+            .tag_columns
+            .iter()
+            .map(|name| serialize_column_index(self.input.schema(), name))
+            .collect::<Vec<u64>>();
+
+        let field_column_idx = serialize_column_index(self.input.schema(), &self.field_column);
+
         pb::ScalarCalculate {
             start: self.start,
             end: self.end,
             interval: self.interval,
-            time_index: self.time_index.clone(),
-            tag_columns: self.tag_columns.clone(),
-            field_column: self.field_column.clone(),
+            time_index_idx,
+            tag_column_indices,
+            field_column_idx,
+            ..Default::default()
         }
         .encode_to_vec()
     }
@@ -166,17 +186,20 @@ impl ScalarCalculate {
             produce_one_row: false,
             schema: Arc::new(DFSchema::empty()),
         });
+
+        let unfix = UnfixIndices {
+            time_index_idx: pb_scalar_calculate.time_index_idx,
+            tag_column_indices: pb_scalar_calculate.tag_column_indices.clone(),
+            field_column_idx: pb_scalar_calculate.field_column_idx,
+        };
+
         // TODO(Taylor-lagrange): Supports timestamps of different precisions
         let ts_field = Field::new(
-            &pb_scalar_calculate.time_index,
+            "placeholder_time_index",
             DataType::Timestamp(TimeUnit::Millisecond, None),
             true,
         );
-        let val_field = Field::new(
-            format!("scalar({})", pb_scalar_calculate.field_column),
-            DataType::Float64,
-            true,
-        );
+        let val_field = Field::new("placeholder_field", DataType::Float64, true);
         // TODO(Taylor-lagrange): missing tablename in pb
         let schema = DFSchema::new_with_metadata(
             vec![(None, Arc::new(ts_field)), (None, Arc::new(val_field))],
@@ -188,11 +211,12 @@ impl ScalarCalculate {
             start: pb_scalar_calculate.start,
             end: pb_scalar_calculate.end,
             interval: pb_scalar_calculate.interval,
-            time_index: pb_scalar_calculate.time_index,
-            tag_columns: pb_scalar_calculate.tag_columns,
-            field_column: pb_scalar_calculate.field_column,
+            time_index: String::new(),
+            tag_columns: Vec::new(),
+            field_column: String::new(),
             output_schema: Arc::new(schema),
             input: placeholder_plan,
+            unfix: Some(unfix),
         })
     }
 }
@@ -259,16 +283,70 @@ impl UserDefinedLogicalNodeCore for ScalarCalculate {
                 "ScalarCalculate should not have any expressions".to_string(),
             ));
         }
-        Ok(ScalarCalculate {
-            start: self.start,
-            end: self.end,
-            interval: self.interval,
-            time_index: self.time_index.clone(),
-            tag_columns: self.tag_columns.clone(),
-            field_column: self.field_column.clone(),
-            input: inputs.into_iter().next().unwrap(),
-            output_schema: self.output_schema.clone(),
-        })
+
+        let input: LogicalPlan = inputs.into_iter().next().unwrap();
+        let input_schema = input.schema();
+
+        if let Some(unfix) = &self.unfix {
+            // transform indices to names
+            let time_index = resolve_column_name(
+                unfix.time_index_idx,
+                input_schema,
+                "ScalarCalculate",
+                "time index",
+            )?;
+
+            let tag_columns = unfix
+                .tag_column_indices
+                .iter()
+                .map(|idx| resolve_column_name(*idx, input_schema, "ScalarCalculate", "tag"))
+                .collect::<DataFusionResult<Vec<String>>>()?;
+
+            let field_column = resolve_column_name(
+                unfix.field_column_idx,
+                input_schema,
+                "ScalarCalculate",
+                "field",
+            )?;
+
+            // Recreate output schema with actual field names
+            let ts_field = Field::new(
+                &time_index,
+                DataType::Timestamp(TimeUnit::Millisecond, None),
+                true,
+            );
+            let val_field =
+                Field::new(format!("scalar({})", field_column), DataType::Float64, true);
+            let schema = DFSchema::new_with_metadata(
+                vec![(None, Arc::new(ts_field)), (None, Arc::new(val_field))],
+                HashMap::new(),
+            )
+            .context(DataFusionPlanningSnafu)?;
+
+            Ok(ScalarCalculate {
+                start: self.start,
+                end: self.end,
+                interval: self.interval,
+                time_index,
+                tag_columns,
+                field_column,
+                input,
+                output_schema: Arc::new(schema),
+                unfix: None,
+            })
+        } else {
+            Ok(ScalarCalculate {
+                start: self.start,
+                end: self.end,
+                interval: self.interval,
+                time_index: self.time_index.clone(),
+                tag_columns: self.tag_columns.clone(),
+                field_column: self.field_column.clone(),
+                input,
+                output_schema: self.output_schema.clone(),
+                unfix: None,
+            })
+        }
     }
 }
 
diff --git a/src/promql/src/extension_plan/series_divide.rs b/src/promql/src/extension_plan/series_divide.rs
index ece5263741..8e50da113b 100644
--- a/src/promql/src/extension_plan/series_divide.rs
+++ b/src/promql/src/extension_plan/series_divide.rs
@@ -41,7 +41,7 @@ use prost::Message;
 use snafu::ResultExt;
 
 use crate::error::{DeserializeSnafu, Result};
-use crate::extension_plan::METRIC_NUM_SERIES;
+use crate::extension_plan::{METRIC_NUM_SERIES, resolve_column_name, serialize_column_index};
 use crate::metrics::PROMQL_SERIES_COUNT;
 
 #[derive(Debug, PartialEq, Eq, Hash, PartialOrd)]
@@ -53,6 +53,13 @@ pub struct SeriesDivide {
     /// here can avoid unnecessary sort in follow on plans.
     time_index_column: String,
     input: LogicalPlan,
+    unfix: Option<UnfixIndices>,
+}
+
+#[derive(Debug, PartialEq, Eq, Hash, PartialOrd)]
+struct UnfixIndices {
+    pub tag_column_indices: Vec<u64>,
+    pub time_index_column_idx: u64,
 }
 
 impl UserDefinedLogicalNodeCore for SeriesDivide {
@@ -87,11 +94,38 @@ impl UserDefinedLogicalNodeCore for SeriesDivide {
             ));
         }
 
-        Ok(Self {
-            tag_columns: self.tag_columns.clone(),
-            time_index_column: self.time_index_column.clone(),
-            input: inputs[0].clone(),
-        })
+        let input: LogicalPlan = inputs[0].clone();
+        let input_schema = input.schema();
+
+        if let Some(unfix) = &self.unfix {
+            // transform indices to names
+            let tag_columns = unfix
+                .tag_column_indices
+                .iter()
+                .map(|idx| resolve_column_name(*idx, input_schema, "SeriesDivide", "tag"))
+                .collect::<DataFusionResult<Vec<String>>>()?;
+
+            let time_index_column = resolve_column_name(
+                unfix.time_index_column_idx,
+                input_schema,
+                "SeriesDivide",
+                "time index",
+            )?;
+
+            Ok(Self {
+                tag_columns,
+                time_index_column,
+                input,
+                unfix: None,
+            })
+        } else {
+            Ok(Self {
+                tag_columns: self.tag_columns.clone(),
+                time_index_column: self.time_index_column.clone(),
+                input,
+                unfix: None,
+            })
+        }
     }
 }
 
@@ -101,6 +135,7 @@ impl SeriesDivide {
             tag_columns,
             time_index_column,
             input,
+            unfix: None,
         }
     }
 
@@ -122,9 +157,19 @@ impl SeriesDivide {
     }
 
     pub fn serialize(&self) -> Vec<u8> {
+        let tag_column_indices = self
+            .tag_columns
+            .iter()
+            .map(|name| serialize_column_index(self.input.schema(), name))
+            .collect::<Vec<u64>>();
+
+        let time_index_column_idx =
+            serialize_column_index(self.input.schema(), &self.time_index_column);
+
         pb::SeriesDivide {
-            tag_columns: self.tag_columns.clone(),
-            time_index_column: self.time_index_column.clone(),
+            tag_column_indices,
+            time_index_column_idx,
+            ..Default::default()
         }
         .encode_to_vec()
     }
@@ -135,10 +180,17 @@ impl SeriesDivide {
             produce_one_row: false,
             schema: Arc::new(DFSchema::empty()),
         });
+
+        let unfix = UnfixIndices {
+            tag_column_indices: pb_series_divide.tag_column_indices.clone(),
+            time_index_column_idx: pb_series_divide.time_index_column_idx,
+        };
+
         Ok(Self {
-            tag_columns: pb_series_divide.tag_columns,
-            time_index_column: pb_series_divide.time_index_column,
+            tag_columns: Vec::new(),
+            time_index_column: String::new(),
             input: placeholder_plan,
+            unfix: Some(unfix),
         })
     }
 }
diff --git a/tests/cases/standalone/common/promql/encode_substrait.result b/tests/cases/standalone/common/promql/encode_substrait.result
new file mode 100644
index 0000000000..802a2308bb
--- /dev/null
+++ b/tests/cases/standalone/common/promql/encode_substrait.result
@@ -0,0 +1,49 @@
+create table count_total (
+    ts timestamp time index,
+    tag_a string,
+    tag_b string,
+    val double,
+    primary key (tag_a, tag_b),
+);
+
+Affected Rows: 0
+
+-- if `RangeManipulate` can be encoded/decoded correctly in substrait, the following queries should pass
+-- SQLNESS REPLACE (peers.*) REDACTED
+-- SQLNESS REPLACE (partitioning.*) REDACTED
+tql explain (0, 100, '1s') 
+    increase(count_total{
+      tag_a="ffa",
+    }[1h])[12h:1h];
+
++---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+| plan_type     | plan                                                                                                                                                                                |
++---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+| logical_plan  | MergeScan [is_placeholder=false, remote_input=[                                                                                                                                     |
+|               | PromRangeManipulate: req range=[0..0], interval=[300000], eval range=[43200000], time index=[ts], values=["prom_increase(ts_range,val,ts,Int64(3600000))"]                          |
+|               |   Filter: prom_increase(ts_range,val,ts,Int64(3600000)) IS NOT NULL                                                                                                                 |
+|               |     Projection: count_total.ts, prom_increase(ts_range, val, count_total.ts, Int64(3600000)) AS prom_increase(ts_range,val,ts,Int64(3600000)), count_total.tag_a, count_total.tag_b |
+|               |       PromRangeManipulate: req range=[-39600000..0], interval=[3600000], eval range=[3600000], time index=[ts], values=["val"]                                                      |
+|               |         PromSeriesNormalize: offset=[0], time index=[ts], filter NaN: [true]                                                                                                        |
+|               |           PromSeriesDivide: tags=["tag_a", "tag_b"]                                                                                                                                 |
+|               |             Sort: count_total.tag_a ASC NULLS FIRST, count_total.tag_b ASC NULLS FIRST, count_total.ts ASC NULLS FIRST                                                              |
+|               |               Filter: count_total.tag_a = Utf8("ffa") AND count_total.ts >= TimestampMillisecond(-43500000, None) AND count_total.ts <= TimestampMillisecond(300000, None)          |
+|               |                 TableScan: count_total                                                                                                                                              |
+|               | ]]                                                                                                                                                                                  |
+| physical_plan | CooperativeExec                                                                                                                                                                     |
+|               |   MergeScanExec: REDACTED
+|               |                                                                                                                                                                                     |
++---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+
+tql eval (0, 100, '1s') 
+    increase(count_total{
+      tag_a="ffa",
+    }[1h])[12h:1h];
+
+++
+++
+
+drop table count_total;
+
+Affected Rows: 0
+
diff --git a/tests/cases/standalone/common/promql/encode_substrait.sql b/tests/cases/standalone/common/promql/encode_substrait.sql
new file mode 100644
index 0000000000..195c383ebd
--- /dev/null
+++ b/tests/cases/standalone/common/promql/encode_substrait.sql
@@ -0,0 +1,22 @@
+create table count_total (
+    ts timestamp time index,
+    tag_a string,
+    tag_b string,
+    val double,
+    primary key (tag_a, tag_b),
+);
+
+-- if `RangeManipulate` can be encoded/decoded correctly in substrait, the following queries should pass
+-- SQLNESS REPLACE (peers.*) REDACTED
+-- SQLNESS REPLACE (partitioning.*) REDACTED
+tql explain (0, 100, '1s') 
+    increase(count_total{
+      tag_a="ffa",
+    }[1h])[12h:1h];
+
+tql eval (0, 100, '1s') 
+    increase(count_total{
+      tag_a="ffa",
+    }[1h])[12h:1h];
+
+drop table count_total;

From 62b51c673635ad597c2a1a5200512beeccf52fb0 Mon Sep 17 00:00:00 2001
From: jeremyhi <jiachun_feng@proton.me>
Date: Wed, 22 Oct 2025 17:30:36 +0800
Subject: [PATCH 02/14] feat: writer mem limiter for http and grpc service
 (#7092)

* feat: writer mem limiter for http and grpc service

Signed-off-by: jeremyhi <fengjiachun@gmail.com>

* fix: docs

Signed-off-by: jeremyhi <fengjiachun@gmail.com>

* feat: add metrics for limiter

Signed-off-by: jeremyhi <fengjiachun@gmail.com>

* Apply suggestion from @MichaelScofield

Co-authored-by: LFC <990479+MichaelScofield@users.noreply.github.com>

* chore: refactor try_acquire

Signed-off-by: jeremyhi <fengjiachun@gmail.com>

* chore: make size human readable

Signed-off-by: jeremyhi <fengjiachun@gmail.com>

---------

Signed-off-by: jeremyhi <fengjiachun@gmail.com>
Co-authored-by: LFC <990479+MichaelScofield@users.noreply.github.com>
---
 config/config.md                         |   4 +
 config/frontend.example.toml             |   8 +
 config/standalone.example.toml           |   8 +
 src/flow/src/server.rs                   |   1 +
 src/servers/src/error.rs                 |  15 ++
 src/servers/src/grpc.rs                  |  16 ++
 src/servers/src/grpc/builder.rs          |  21 ++-
 src/servers/src/grpc/database.rs         |  46 +++++
 src/servers/src/grpc/flight.rs           |  51 +++++-
 src/servers/src/grpc/greptime_handler.rs |   1 +
 src/servers/src/grpc/memory_limit.rs     |  72 ++++++++
 src/servers/src/http.rs                  |  15 ++
 src/servers/src/http/memory_limit.rs     |  52 ++++++
 src/servers/src/lib.rs                   |   4 +
 src/servers/src/metrics.rs               |  20 +++
 src/servers/src/request_limiter.rs       | 215 +++++++++++++++++++++++
 tests-integration/tests/grpc.rs          | 161 ++++++++++++++++-
 tests-integration/tests/http.rs          |   2 +
 18 files changed, 704 insertions(+), 8 deletions(-)
 create mode 100644 src/servers/src/grpc/memory_limit.rs
 create mode 100644 src/servers/src/http/memory_limit.rs
 create mode 100644 src/servers/src/request_limiter.rs

diff --git a/config/config.md b/config/config.md
index 46a0aee1a7..72d48b5bcb 100644
--- a/config/config.md
+++ b/config/config.md
@@ -25,12 +25,14 @@
 | `http.addr` | String | `127.0.0.1:4000` | The address to bind the HTTP server. |
 | `http.timeout` | String | `0s` | HTTP request timeout. Set to 0 to disable timeout. |
 | `http.body_limit` | String | `64MB` | HTTP request body limit.<br/>The following units are supported: `B`, `KB`, `KiB`, `MB`, `MiB`, `GB`, `GiB`, `TB`, `TiB`, `PB`, `PiB`.<br/>Set to 0 to disable limit. |
+| `http.max_total_body_memory` | String | Unset | Maximum total memory for all concurrent HTTP request bodies.<br/>Set to 0 to disable the limit. Default: "0" (unlimited) |
 | `http.enable_cors` | Bool | `true` | HTTP CORS support, it's turned on by default<br/>This allows browser to access http APIs without CORS restrictions |
 | `http.cors_allowed_origins` | Array | Unset | Customize allowed origins for HTTP CORS. |
 | `http.prom_validation_mode` | String | `strict` | Whether to enable validation for Prometheus remote write requests.<br/>Available options:<br/>- strict: deny invalid UTF-8 strings (default).<br/>- lossy: allow invalid UTF-8 strings, replace invalid characters with REPLACEMENT_CHARACTER(U+FFFD).<br/>- unchecked: do not valid strings. |
 | `grpc` | -- | -- | The gRPC server options. |
 | `grpc.bind_addr` | String | `127.0.0.1:4001` | The address to bind the gRPC server. |
 | `grpc.runtime_size` | Integer | `8` | The number of server worker threads. |
+| `grpc.max_total_message_memory` | String | Unset | Maximum total memory for all concurrent gRPC request messages.<br/>Set to 0 to disable the limit. Default: "0" (unlimited) |
 | `grpc.max_connection_age` | String | Unset | The maximum connection age for gRPC connection.<br/>The value can be a human-readable time string. For example: `10m` for ten minutes or `1h` for one hour.<br/>Refer to https://grpc.io/docs/guides/keepalive/ for more details. |
 | `grpc.tls` | -- | -- | gRPC server TLS options, see `mysql.tls` section. |
 | `grpc.tls.mode` | String | `disable` | TLS mode. |
@@ -235,6 +237,7 @@
 | `http.addr` | String | `127.0.0.1:4000` | The address to bind the HTTP server. |
 | `http.timeout` | String | `0s` | HTTP request timeout. Set to 0 to disable timeout. |
 | `http.body_limit` | String | `64MB` | HTTP request body limit.<br/>The following units are supported: `B`, `KB`, `KiB`, `MB`, `MiB`, `GB`, `GiB`, `TB`, `TiB`, `PB`, `PiB`.<br/>Set to 0 to disable limit. |
+| `http.max_total_body_memory` | String | Unset | Maximum total memory for all concurrent HTTP request bodies.<br/>Set to 0 to disable the limit. Default: "0" (unlimited) |
 | `http.enable_cors` | Bool | `true` | HTTP CORS support, it's turned on by default<br/>This allows browser to access http APIs without CORS restrictions |
 | `http.cors_allowed_origins` | Array | Unset | Customize allowed origins for HTTP CORS. |
 | `http.prom_validation_mode` | String | `strict` | Whether to enable validation for Prometheus remote write requests.<br/>Available options:<br/>- strict: deny invalid UTF-8 strings (default).<br/>- lossy: allow invalid UTF-8 strings, replace invalid characters with REPLACEMENT_CHARACTER(U+FFFD).<br/>- unchecked: do not valid strings. |
@@ -242,6 +245,7 @@
 | `grpc.bind_addr` | String | `127.0.0.1:4001` | The address to bind the gRPC server. |
 | `grpc.server_addr` | String | `127.0.0.1:4001` | The address advertised to the metasrv, and used for connections from outside the host.<br/>If left empty or unset, the server will automatically use the IP address of the first network interface<br/>on the host, with the same port number as the one specified in `grpc.bind_addr`. |
 | `grpc.runtime_size` | Integer | `8` | The number of server worker threads. |
+| `grpc.max_total_message_memory` | String | Unset | Maximum total memory for all concurrent gRPC request messages.<br/>Set to 0 to disable the limit. Default: "0" (unlimited) |
 | `grpc.flight_compression` | String | `arrow_ipc` | Compression mode for frontend side Arrow IPC service. Available options:<br/>- `none`: disable all compression<br/>- `transport`: only enable gRPC transport compression (zstd)<br/>- `arrow_ipc`: only enable Arrow IPC compression (lz4)<br/>- `all`: enable all compression.<br/>Default to `none` |
 | `grpc.max_connection_age` | String | Unset | The maximum connection age for gRPC connection.<br/>The value can be a human-readable time string. For example: `10m` for ten minutes or `1h` for one hour.<br/>Refer to https://grpc.io/docs/guides/keepalive/ for more details. |
 | `grpc.tls` | -- | -- | gRPC server TLS options, see `mysql.tls` section. |
diff --git a/config/frontend.example.toml b/config/frontend.example.toml
index b26d88323e..9ffcdad540 100644
--- a/config/frontend.example.toml
+++ b/config/frontend.example.toml
@@ -31,6 +31,10 @@ timeout = "0s"
 ## The following units are supported: `B`, `KB`, `KiB`, `MB`, `MiB`, `GB`, `GiB`, `TB`, `TiB`, `PB`, `PiB`.
 ## Set to 0 to disable limit.
 body_limit = "64MB"
+## Maximum total memory for all concurrent HTTP request bodies.
+## Set to 0 to disable the limit. Default: "0" (unlimited)
+## @toml2docs:none-default
+#+ max_total_body_memory = "1GB"
 ## HTTP CORS support, it's turned on by default
 ## This allows browser to access http APIs without CORS restrictions
 enable_cors = true
@@ -54,6 +58,10 @@ bind_addr = "127.0.0.1:4001"
 server_addr = "127.0.0.1:4001"
 ## The number of server worker threads.
 runtime_size = 8
+## Maximum total memory for all concurrent gRPC request messages.
+## Set to 0 to disable the limit. Default: "0" (unlimited)
+## @toml2docs:none-default
+#+ max_total_message_memory = "1GB"
 ## Compression mode for frontend side Arrow IPC service. Available options:
 ## - `none`: disable all compression
 ## - `transport`: only enable gRPC transport compression (zstd)
diff --git a/config/standalone.example.toml b/config/standalone.example.toml
index 5fae0f444f..744dbbe751 100644
--- a/config/standalone.example.toml
+++ b/config/standalone.example.toml
@@ -36,6 +36,10 @@ timeout = "0s"
 ## The following units are supported: `B`, `KB`, `KiB`, `MB`, `MiB`, `GB`, `GiB`, `TB`, `TiB`, `PB`, `PiB`.
 ## Set to 0 to disable limit.
 body_limit = "64MB"
+## Maximum total memory for all concurrent HTTP request bodies.
+## Set to 0 to disable the limit. Default: "0" (unlimited)
+## @toml2docs:none-default
+#+ max_total_body_memory = "1GB"
 ## HTTP CORS support, it's turned on by default
 ## This allows browser to access http APIs without CORS restrictions
 enable_cors = true
@@ -56,6 +60,10 @@ prom_validation_mode = "strict"
 bind_addr = "127.0.0.1:4001"
 ## The number of server worker threads.
 runtime_size = 8
+## Maximum total memory for all concurrent gRPC request messages.
+## Set to 0 to disable the limit. Default: "0" (unlimited)
+## @toml2docs:none-default
+#+ max_total_message_memory = "1GB"
 ## The maximum connection age for gRPC connection.
 ## The value can be a human-readable time string. For example: `10m` for ten minutes or `1h` for one hour.
 ## Refer to https://grpc.io/docs/guides/keepalive/ for more details.
diff --git a/src/flow/src/server.rs b/src/flow/src/server.rs
index 3f46203ba0..eae97756a5 100644
--- a/src/flow/src/server.rs
+++ b/src/flow/src/server.rs
@@ -490,6 +490,7 @@ impl<'a> FlownodeServiceBuilder<'a> {
         let config = GrpcServerConfig {
             max_recv_message_size: opts.grpc.max_recv_message_size.as_bytes() as usize,
             max_send_message_size: opts.grpc.max_send_message_size.as_bytes() as usize,
+            max_total_message_memory: opts.grpc.max_total_message_memory.as_bytes() as usize,
             tls: opts.grpc.tls.clone(),
             max_connection_age: opts.grpc.max_connection_age,
         };
diff --git a/src/servers/src/error.rs b/src/servers/src/error.rs
index d36bdd1494..c7e5c5d07a 100644
--- a/src/servers/src/error.rs
+++ b/src/servers/src/error.rs
@@ -20,6 +20,7 @@ use axum::http::StatusCode as HttpStatusCode;
 use axum::response::{IntoResponse, Response};
 use axum::{Json, http};
 use base64::DecodeError;
+use common_base::readable_size::ReadableSize;
 use common_error::define_into_tonic_status;
 use common_error::ext::{BoxedError, ErrorExt};
 use common_error::status_code::StatusCode;
@@ -164,6 +165,18 @@ pub enum Error {
         location: Location,
     },
 
+    #[snafu(display(
+        "Too many concurrent large requests, limit: {}, request size: {}",
+        ReadableSize(*limit as u64),
+        ReadableSize(*request_size as u64)
+    ))]
+    TooManyConcurrentRequests {
+        limit: usize,
+        request_size: usize,
+        #[snafu(implicit)]
+        location: Location,
+    },
+
     #[snafu(display("Invalid query: {}", reason))]
     InvalidQuery {
         reason: String,
@@ -729,6 +742,8 @@ impl ErrorExt for Error {
 
             InvalidUtf8Value { .. } | InvalidHeaderValue { .. } => StatusCode::InvalidArguments,
 
+            TooManyConcurrentRequests { .. } => StatusCode::RuntimeResourcesExhausted,
+
             ParsePromQL { source, .. } => source.status_code(),
             Other { source, .. } => source.status_code(),
 
diff --git a/src/servers/src/grpc.rs b/src/servers/src/grpc.rs
index 2f759db2a0..1c479a04de 100644
--- a/src/servers/src/grpc.rs
+++ b/src/servers/src/grpc.rs
@@ -19,6 +19,7 @@ mod database;
 pub mod flight;
 pub mod frontend_grpc_handler;
 pub mod greptime_handler;
+pub mod memory_limit;
 pub mod prom_query_gateway;
 pub mod region_server;
 
@@ -51,6 +52,7 @@ use crate::error::{AlreadyStartedSnafu, InternalSnafu, Result, StartGrpcSnafu, T
 use crate::metrics::MetricsMiddlewareLayer;
 use crate::otel_arrow::{HeaderInterceptor, OtelArrowServiceHandler};
 use crate::query_handler::OpenTelemetryProtocolHandlerRef;
+use crate::request_limiter::RequestMemoryLimiter;
 use crate::server::Server;
 use crate::tls::TlsOption;
 
@@ -67,6 +69,8 @@ pub struct GrpcOptions {
     pub max_recv_message_size: ReadableSize,
     /// Max gRPC sending(encoding) message size
     pub max_send_message_size: ReadableSize,
+    /// Maximum total memory for all concurrent gRPC request messages. 0 disables the limit.
+    pub max_total_message_memory: ReadableSize,
     /// Compression mode in Arrow Flight service.
     pub flight_compression: FlightCompression,
     pub runtime_size: usize,
@@ -116,6 +120,7 @@ impl GrpcOptions {
         GrpcServerConfig {
             max_recv_message_size: self.max_recv_message_size.as_bytes() as usize,
             max_send_message_size: self.max_send_message_size.as_bytes() as usize,
+            max_total_message_memory: self.max_total_message_memory.as_bytes() as usize,
             tls: self.tls.clone(),
             max_connection_age: self.max_connection_age,
         }
@@ -134,6 +139,7 @@ impl Default for GrpcOptions {
             server_addr: String::new(),
             max_recv_message_size: DEFAULT_MAX_GRPC_RECV_MESSAGE_SIZE,
             max_send_message_size: DEFAULT_MAX_GRPC_SEND_MESSAGE_SIZE,
+            max_total_message_memory: ReadableSize(0),
             flight_compression: FlightCompression::ArrowIpc,
             runtime_size: 8,
             tls: TlsOption::default(),
@@ -153,6 +159,7 @@ impl GrpcOptions {
             server_addr: format!("127.0.0.1:{}", DEFAULT_INTERNAL_GRPC_ADDR_PORT),
             max_recv_message_size: DEFAULT_MAX_GRPC_RECV_MESSAGE_SIZE,
             max_send_message_size: DEFAULT_MAX_GRPC_SEND_MESSAGE_SIZE,
+            max_total_message_memory: ReadableSize(0),
             flight_compression: FlightCompression::ArrowIpc,
             runtime_size: 8,
             tls: TlsOption::default(),
@@ -217,6 +224,7 @@ pub struct GrpcServer {
     bind_addr: Option<SocketAddr>,
     name: Option<String>,
     config: GrpcServerConfig,
+    memory_limiter: RequestMemoryLimiter,
 }
 
 /// Grpc Server configuration
@@ -226,6 +234,8 @@ pub struct GrpcServerConfig {
     pub max_recv_message_size: usize,
     // Max gRPC sending(encoding) message size
     pub max_send_message_size: usize,
+    /// Maximum total memory for all concurrent gRPC request messages. 0 disables the limit.
+    pub max_total_message_memory: usize,
     pub tls: TlsOption,
     /// Maximum time that a channel may exist.
     /// Useful when the server wants to control the reconnection of its clients.
@@ -238,6 +248,7 @@ impl Default for GrpcServerConfig {
         Self {
             max_recv_message_size: DEFAULT_MAX_GRPC_RECV_MESSAGE_SIZE.as_bytes() as usize,
             max_send_message_size: DEFAULT_MAX_GRPC_SEND_MESSAGE_SIZE.as_bytes() as usize,
+            max_total_message_memory: 0,
             tls: TlsOption::default(),
             max_connection_age: None,
         }
@@ -277,6 +288,11 @@ impl GrpcServer {
         }
         Ok(())
     }
+
+    /// Get the memory limiter for monitoring current memory usage
+    pub fn memory_limiter(&self) -> &RequestMemoryLimiter {
+        &self.memory_limiter
+    }
 }
 
 pub struct HealthCheckHandler;
diff --git a/src/servers/src/grpc/builder.rs b/src/servers/src/grpc/builder.rs
index 75a0bb13c3..ae5c226138 100644
--- a/src/servers/src/grpc/builder.rs
+++ b/src/servers/src/grpc/builder.rs
@@ -38,6 +38,7 @@ use crate::grpc::{GrpcServer, GrpcServerConfig};
 use crate::otel_arrow::{HeaderInterceptor, OtelArrowServiceHandler};
 use crate::prometheus_handler::PrometheusHandlerRef;
 use crate::query_handler::OpenTelemetryProtocolHandlerRef;
+use crate::request_limiter::RequestMemoryLimiter;
 use crate::tls::TlsOption;
 
 /// Add a gRPC service (`service`) to a `builder`([RoutesBuilder]).
@@ -57,7 +58,17 @@ macro_rules! add_service {
             .send_compressed(CompressionEncoding::Gzip)
             .send_compressed(CompressionEncoding::Zstd);
 
-        $builder.routes_builder_mut().add_service(service_builder);
+        // Apply memory limiter layer
+        use $crate::grpc::memory_limit::MemoryLimiterExtensionLayer;
+        let service_with_limiter = $crate::tower::ServiceBuilder::new()
+            .layer(MemoryLimiterExtensionLayer::new(
+                $builder.memory_limiter().clone(),
+            ))
+            .service(service_builder);
+
+        $builder
+            .routes_builder_mut()
+            .add_service(service_with_limiter);
     };
 }
 
@@ -73,10 +84,12 @@ pub struct GrpcServerBuilder {
             HeaderInterceptor,
         >,
     >,
+    memory_limiter: RequestMemoryLimiter,
 }
 
 impl GrpcServerBuilder {
     pub fn new(config: GrpcServerConfig, runtime: Runtime) -> Self {
+        let memory_limiter = RequestMemoryLimiter::new(config.max_total_message_memory);
         Self {
             name: None,
             config,
@@ -84,6 +97,7 @@ impl GrpcServerBuilder {
             routes_builder: RoutesBuilder::default(),
             tls_config: None,
             otel_arrow_service: None,
+            memory_limiter,
         }
     }
 
@@ -95,6 +109,10 @@ impl GrpcServerBuilder {
         &self.runtime
     }
 
+    pub fn memory_limiter(&self) -> &RequestMemoryLimiter {
+        &self.memory_limiter
+    }
+
     pub fn name(self, name: Option<String>) -> Self {
         Self { name, ..self }
     }
@@ -198,6 +216,7 @@ impl GrpcServerBuilder {
             bind_addr: None,
             name: self.name,
             config: self.config,
+            memory_limiter: self.memory_limiter,
         }
     }
 }
diff --git a/src/servers/src/grpc/database.rs b/src/servers/src/grpc/database.rs
index 13c328399d..5d132c434e 100644
--- a/src/servers/src/grpc/database.rs
+++ b/src/servers/src/grpc/database.rs
@@ -20,11 +20,14 @@ use common_error::status_code::StatusCode;
 use common_query::OutputData;
 use common_telemetry::{debug, warn};
 use futures::StreamExt;
+use prost::Message;
 use tonic::{Request, Response, Status, Streaming};
 
 use crate::grpc::greptime_handler::GreptimeRequestHandler;
 use crate::grpc::{TonicResult, cancellation};
 use crate::hint_headers;
+use crate::metrics::{METRIC_GRPC_MEMORY_USAGE_BYTES, METRIC_GRPC_REQUESTS_REJECTED_TOTAL};
+use crate::request_limiter::RequestMemoryLimiter;
 
 pub(crate) struct DatabaseService {
     handler: GreptimeRequestHandler,
@@ -48,6 +51,27 @@ impl GreptimeDatabase for DatabaseService {
             "GreptimeDatabase::Handle: request from {:?} with hints: {:?}",
             remote_addr, hints
         );
+
+        let _guard = request
+            .extensions()
+            .get::<RequestMemoryLimiter>()
+            .filter(|limiter| limiter.is_enabled())
+            .and_then(|limiter| {
+                let message_size = request.get_ref().encoded_len();
+                limiter
+                    .try_acquire(message_size)
+                    .map(|guard| {
+                        guard.inspect(|g| {
+                            METRIC_GRPC_MEMORY_USAGE_BYTES.set(g.current_usage() as i64);
+                        })
+                    })
+                    .inspect_err(|_| {
+                        METRIC_GRPC_REQUESTS_REJECTED_TOTAL.inc();
+                    })
+                    .transpose()
+            })
+            .transpose()?;
+
         let handler = self.handler.clone();
         let request_future = async move {
             let request = request.into_inner();
@@ -94,6 +118,9 @@ impl GreptimeDatabase for DatabaseService {
             "GreptimeDatabase::HandleRequests: request from {:?} with hints: {:?}",
             remote_addr, hints
         );
+
+        let limiter = request.extensions().get::<RequestMemoryLimiter>().cloned();
+
         let handler = self.handler.clone();
         let request_future = async move {
             let mut affected_rows = 0;
@@ -101,6 +128,25 @@ impl GreptimeDatabase for DatabaseService {
             let mut stream = request.into_inner();
             while let Some(request) = stream.next().await {
                 let request = request?;
+
+                let _guard = limiter
+                    .as_ref()
+                    .filter(|limiter| limiter.is_enabled())
+                    .and_then(|limiter| {
+                        let message_size = request.encoded_len();
+                        limiter
+                            .try_acquire(message_size)
+                            .map(|guard| {
+                                guard.inspect(|g| {
+                                    METRIC_GRPC_MEMORY_USAGE_BYTES.set(g.current_usage() as i64);
+                                })
+                            })
+                            .inspect_err(|_| {
+                                METRIC_GRPC_REQUESTS_REJECTED_TOTAL.inc();
+                            })
+                            .transpose()
+                    })
+                    .transpose()?;
                 let output = handler.handle_request(request, hints.clone()).await?;
                 match output.data {
                     OutputData::AffectedRows(rows) => affected_rows += rows,
diff --git a/src/servers/src/grpc/flight.rs b/src/servers/src/grpc/flight.rs
index bb431bfdae..44b307fe71 100644
--- a/src/servers/src/grpc/flight.rs
+++ b/src/servers/src/grpc/flight.rs
@@ -45,6 +45,8 @@ use crate::error::{InvalidParameterSnafu, ParseJsonSnafu, Result, ToJsonSnafu};
 pub use crate::grpc::flight::stream::FlightRecordBatchStream;
 use crate::grpc::greptime_handler::{GreptimeRequestHandler, get_request_type};
 use crate::grpc::{FlightCompression, TonicResult, context_auth};
+use crate::metrics::{METRIC_GRPC_MEMORY_USAGE_BYTES, METRIC_GRPC_REQUESTS_REJECTED_TOTAL};
+use crate::request_limiter::{RequestMemoryGuard, RequestMemoryLimiter};
 use crate::{error, hint_headers};
 
 pub type TonicStream<T> = Pin<Box<dyn Stream<Item = TonicResult<T>> + Send + 'static>>;
@@ -211,7 +213,9 @@ impl FlightCraft for GreptimeRequestHandler {
         &self,
         request: Request<Streaming<FlightData>>,
     ) -> TonicResult<Response<TonicStream<PutResult>>> {
-        let (headers, _, stream) = request.into_parts();
+        let (headers, extensions, stream) = request.into_parts();
+
+        let limiter = extensions.get::<RequestMemoryLimiter>().cloned();
 
         let query_ctx = context_auth::create_query_context_from_grpc_metadata(&headers)?;
         context_auth::check_auth(self.user_provider.clone(), &headers, query_ctx.clone()).await?;
@@ -225,6 +229,7 @@ impl FlightCraft for GreptimeRequestHandler {
                 query_ctx.current_catalog().to_string(),
                 query_ctx.current_schema(),
             ),
+            limiter,
         };
         self.put_record_batches(stream, tx, query_ctx).await;
 
@@ -248,10 +253,15 @@ pub(crate) struct PutRecordBatchRequest {
     pub(crate) table_name: TableName,
     pub(crate) request_id: i64,
     pub(crate) data: FlightData,
+    pub(crate) _guard: Option<RequestMemoryGuard>,
 }
 
 impl PutRecordBatchRequest {
-    fn try_new(table_name: TableName, flight_data: FlightData) -> Result<Self> {
+    fn try_new(
+        table_name: TableName,
+        flight_data: FlightData,
+        limiter: Option<&RequestMemoryLimiter>,
+    ) -> Result<Self> {
         let request_id = if !flight_data.app_metadata.is_empty() {
             let metadata: DoPutMetadata =
                 serde_json::from_slice(&flight_data.app_metadata).context(ParseJsonSnafu)?;
@@ -259,10 +269,30 @@ impl PutRecordBatchRequest {
         } else {
             0
         };
+
+        let _guard = limiter
+            .filter(|limiter| limiter.is_enabled())
+            .map(|limiter| {
+                let message_size = flight_data.encoded_len();
+                limiter
+                    .try_acquire(message_size)
+                    .map(|guard| {
+                        guard.inspect(|g| {
+                            METRIC_GRPC_MEMORY_USAGE_BYTES.set(g.current_usage() as i64);
+                        })
+                    })
+                    .inspect_err(|_| {
+                        METRIC_GRPC_REQUESTS_REJECTED_TOTAL.inc();
+                    })
+            })
+            .transpose()?
+            .flatten();
+
         Ok(Self {
             table_name,
             request_id,
             data: flight_data,
+            _guard,
         })
     }
 }
@@ -270,6 +300,7 @@ impl PutRecordBatchRequest {
 pub(crate) struct PutRecordBatchRequestStream {
     flight_data_stream: Streaming<FlightData>,
     state: PutRecordBatchRequestStreamState,
+    limiter: Option<RequestMemoryLimiter>,
 }
 
 enum PutRecordBatchRequestStreamState {
@@ -298,6 +329,7 @@ impl Stream for PutRecordBatchRequestStream {
         }
 
         let poll = ready!(self.flight_data_stream.poll_next_unpin(cx));
+        let limiter = self.limiter.clone();
 
         let result = match &mut self.state {
             PutRecordBatchRequestStreamState::Init(catalog, schema) => match poll {
@@ -311,8 +343,11 @@ impl Stream for PutRecordBatchRequestStream {
                             Err(e) => return Poll::Ready(Some(Err(e.into()))),
                         };
 
-                        let request =
-                            PutRecordBatchRequest::try_new(table_name.clone(), flight_data);
+                        let request = PutRecordBatchRequest::try_new(
+                            table_name.clone(),
+                            flight_data,
+                            limiter.as_ref(),
+                        );
                         let request = match request {
                             Ok(request) => request,
                             Err(e) => return Poll::Ready(Some(Err(e.into()))),
@@ -333,8 +368,12 @@ impl Stream for PutRecordBatchRequestStream {
             },
             PutRecordBatchRequestStreamState::Started(table_name) => poll.map(|x| {
                 x.and_then(|flight_data| {
-                    PutRecordBatchRequest::try_new(table_name.clone(), flight_data)
-                        .map_err(Into::into)
+                    PutRecordBatchRequest::try_new(
+                        table_name.clone(),
+                        flight_data,
+                        limiter.as_ref(),
+                    )
+                    .map_err(Into::into)
                 })
             }),
         };
diff --git a/src/servers/src/grpc/greptime_handler.rs b/src/servers/src/grpc/greptime_handler.rs
index e19fc4352b..095c36abb1 100644
--- a/src/servers/src/grpc/greptime_handler.rs
+++ b/src/servers/src/grpc/greptime_handler.rs
@@ -160,6 +160,7 @@ impl GreptimeRequestHandler {
                     table_name,
                     request_id,
                     data,
+                    _guard,
                 } = request;
 
                 let timer = metrics::GRPC_BULK_INSERT_ELAPSED.start_timer();
diff --git a/src/servers/src/grpc/memory_limit.rs b/src/servers/src/grpc/memory_limit.rs
new file mode 100644
index 0000000000..a3dee9da57
--- /dev/null
+++ b/src/servers/src/grpc/memory_limit.rs
@@ -0,0 +1,72 @@
+// Copyright 2023 Greptime Team
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use std::task::{Context, Poll};
+
+use futures::future::BoxFuture;
+use tonic::server::NamedService;
+use tower::{Layer, Service};
+
+use crate::request_limiter::RequestMemoryLimiter;
+
+#[derive(Clone)]
+pub struct MemoryLimiterExtensionLayer {
+    limiter: RequestMemoryLimiter,
+}
+
+impl MemoryLimiterExtensionLayer {
+    pub fn new(limiter: RequestMemoryLimiter) -> Self {
+        Self { limiter }
+    }
+}
+
+impl<S> Layer<S> for MemoryLimiterExtensionLayer {
+    type Service = MemoryLimiterExtensionService<S>;
+
+    fn layer(&self, service: S) -> Self::Service {
+        MemoryLimiterExtensionService {
+            inner: service,
+            limiter: self.limiter.clone(),
+        }
+    }
+}
+
+#[derive(Clone)]
+pub struct MemoryLimiterExtensionService<S> {
+    inner: S,
+    limiter: RequestMemoryLimiter,
+}
+
+impl<S: NamedService> NamedService for MemoryLimiterExtensionService<S> {
+    const NAME: &'static str = S::NAME;
+}
+
+impl<S, ReqBody> Service<http::Request<ReqBody>> for MemoryLimiterExtensionService<S>
+where
+    S: Service<http::Request<ReqBody>>,
+    S::Future: Send + 'static,
+{
+    type Response = S::Response;
+    type Error = S::Error;
+    type Future = BoxFuture<'static, Result<Self::Response, Self::Error>>;
+
+    fn poll_ready(&mut self, cx: &mut Context<'_>) -> Poll<Result<(), Self::Error>> {
+        self.inner.poll_ready(cx)
+    }
+
+    fn call(&mut self, mut req: http::Request<ReqBody>) -> Self::Future {
+        req.extensions_mut().insert(self.limiter.clone());
+        Box::pin(self.inner.call(req))
+    }
+}
diff --git a/src/servers/src/http.rs b/src/servers/src/http.rs
index 946e22ba5b..404b087535 100644
--- a/src/servers/src/http.rs
+++ b/src/servers/src/http.rs
@@ -82,6 +82,7 @@ use crate::query_handler::{
     OpenTelemetryProtocolHandlerRef, OpentsdbProtocolHandlerRef, PipelineHandlerRef,
     PromStoreProtocolHandlerRef,
 };
+use crate::request_limiter::RequestMemoryLimiter;
 use crate::server::Server;
 
 pub mod authorize;
@@ -97,6 +98,7 @@ pub mod jaeger;
 pub mod logs;
 pub mod loki;
 pub mod mem_prof;
+mod memory_limit;
 pub mod opentsdb;
 pub mod otlp;
 pub mod pprof;
@@ -129,6 +131,7 @@ pub struct HttpServer {
     router: StdMutex<Router>,
     shutdown_tx: Mutex<Option<Sender<()>>>,
     user_provider: Option<UserProviderRef>,
+    memory_limiter: RequestMemoryLimiter,
 
     // plugins
     plugins: Plugins,
@@ -151,6 +154,9 @@ pub struct HttpOptions {
 
     pub body_limit: ReadableSize,
 
+    /// Maximum total memory for all concurrent HTTP request bodies. 0 disables the limit.
+    pub max_total_body_memory: ReadableSize,
+
     /// Validation mode while decoding Prometheus remote write requests.
     pub prom_validation_mode: PromValidationMode,
 
@@ -195,6 +201,7 @@ impl Default for HttpOptions {
             timeout: Duration::from_secs(0),
             disable_dashboard: false,
             body_limit: DEFAULT_BODY_LIMIT,
+            max_total_body_memory: ReadableSize(0),
             cors_allowed_origins: Vec::new(),
             enable_cors: true,
             prom_validation_mode: PromValidationMode::Strict,
@@ -746,6 +753,8 @@ impl HttpServerBuilder {
     }
 
     pub fn build(self) -> HttpServer {
+        let memory_limiter =
+            RequestMemoryLimiter::new(self.options.max_total_body_memory.as_bytes() as usize);
         HttpServer {
             options: self.options,
             user_provider: self.user_provider,
@@ -753,6 +762,7 @@ impl HttpServerBuilder {
             plugins: self.plugins,
             router: StdMutex::new(self.router),
             bind_addr: None,
+            memory_limiter,
         }
     }
 }
@@ -877,6 +887,11 @@ impl HttpServer {
                     .option_layer(cors_layer)
                     .option_layer(timeout_layer)
                     .option_layer(body_limit_layer)
+                    // memory limit layer - must be before body is consumed
+                    .layer(middleware::from_fn_with_state(
+                        self.memory_limiter.clone(),
+                        memory_limit::memory_limit_middleware,
+                    ))
                     // auth layer
                     .layer(middleware::from_fn_with_state(
                         AuthState::new(self.user_provider.clone()),
diff --git a/src/servers/src/http/memory_limit.rs b/src/servers/src/http/memory_limit.rs
new file mode 100644
index 0000000000..346b5d3409
--- /dev/null
+++ b/src/servers/src/http/memory_limit.rs
@@ -0,0 +1,52 @@
+// Copyright 2023 Greptime Team
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+//! Middleware for limiting total memory usage of concurrent HTTP request bodies.
+
+use axum::extract::{Request, State};
+use axum::middleware::Next;
+use axum::response::{IntoResponse, Response};
+use http::StatusCode;
+
+use crate::metrics::{METRIC_HTTP_MEMORY_USAGE_BYTES, METRIC_HTTP_REQUESTS_REJECTED_TOTAL};
+use crate::request_limiter::RequestMemoryLimiter;
+
+pub async fn memory_limit_middleware(
+    State(limiter): State<RequestMemoryLimiter>,
+    req: Request,
+    next: Next,
+) -> Response {
+    let content_length = req
+        .headers()
+        .get(http::header::CONTENT_LENGTH)
+        .and_then(|v| v.to_str().ok())
+        .and_then(|v| v.parse::<usize>().ok())
+        .unwrap_or(0);
+
+    let _guard = match limiter.try_acquire(content_length) {
+        Ok(guard) => guard.inspect(|g| {
+            METRIC_HTTP_MEMORY_USAGE_BYTES.set(g.current_usage() as i64);
+        }),
+        Err(e) => {
+            METRIC_HTTP_REQUESTS_REJECTED_TOTAL.inc();
+            return (
+                StatusCode::TOO_MANY_REQUESTS,
+                format!("Request body memory limit exceeded: {}", e),
+            )
+                .into_response();
+        }
+    };
+
+    next.run(req).await
+}
diff --git a/src/servers/src/lib.rs b/src/servers/src/lib.rs
index 7172934e66..c73883f0da 100644
--- a/src/servers/src/lib.rs
+++ b/src/servers/src/lib.rs
@@ -20,6 +20,9 @@
 use datafusion_expr::LogicalPlan;
 use datatypes::schema::Schema;
 use sql::statements::statement::Statement;
+// Re-export for use in add_service! macro
+#[doc(hidden)]
+pub use tower;
 
 pub mod addrs;
 pub mod configurator;
@@ -47,6 +50,7 @@ pub mod prometheus_handler;
 pub mod proto;
 pub mod query_handler;
 pub mod repeated_field;
+pub mod request_limiter;
 mod row_writer;
 pub mod server;
 pub mod tls;
diff --git a/src/servers/src/metrics.rs b/src/servers/src/metrics.rs
index af44e697db..8662465f94 100644
--- a/src/servers/src/metrics.rs
+++ b/src/servers/src/metrics.rs
@@ -298,6 +298,26 @@ lazy_static! {
         "greptime_servers_bulk_insert_elapsed",
         "servers handle bulk insert elapsed",
     ).unwrap();
+
+    pub static ref METRIC_HTTP_MEMORY_USAGE_BYTES: IntGauge = register_int_gauge!(
+        "greptime_servers_http_memory_usage_bytes",
+        "current http request memory usage in bytes"
+    ).unwrap();
+
+    pub static ref METRIC_HTTP_REQUESTS_REJECTED_TOTAL: IntCounter = register_int_counter!(
+        "greptime_servers_http_requests_rejected_total",
+        "total number of http requests rejected due to memory limit"
+    ).unwrap();
+
+    pub static ref METRIC_GRPC_MEMORY_USAGE_BYTES: IntGauge = register_int_gauge!(
+        "greptime_servers_grpc_memory_usage_bytes",
+        "current grpc request memory usage in bytes"
+    ).unwrap();
+
+    pub static ref METRIC_GRPC_REQUESTS_REJECTED_TOTAL: IntCounter = register_int_counter!(
+        "greptime_servers_grpc_requests_rejected_total",
+        "total number of grpc requests rejected due to memory limit"
+    ).unwrap();
 }
 
 // Based on https://github.com/hyperium/tonic/blob/master/examples/src/tower/server.rs
diff --git a/src/servers/src/request_limiter.rs b/src/servers/src/request_limiter.rs
new file mode 100644
index 0000000000..62fb4cf216
--- /dev/null
+++ b/src/servers/src/request_limiter.rs
@@ -0,0 +1,215 @@
+// Copyright 2023 Greptime Team
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+//! Request memory limiter for controlling total memory usage of concurrent requests.
+
+use std::sync::Arc;
+use std::sync::atomic::{AtomicUsize, Ordering};
+
+use crate::error::{Result, TooManyConcurrentRequestsSnafu};
+
+/// Limiter for total memory usage of concurrent request bodies.
+///
+/// Tracks the total memory used by all concurrent request bodies
+/// and rejects new requests when the limit is reached.
+#[derive(Clone, Default)]
+pub struct RequestMemoryLimiter {
+    inner: Option<Arc<LimiterInner>>,
+}
+
+struct LimiterInner {
+    current_usage: AtomicUsize,
+    max_memory: usize,
+}
+
+impl RequestMemoryLimiter {
+    /// Create a new memory limiter.
+    ///
+    /// # Arguments
+    /// * `max_memory` - Maximum total memory for all concurrent request bodies in bytes (0 = unlimited)
+    pub fn new(max_memory: usize) -> Self {
+        if max_memory == 0 {
+            return Self { inner: None };
+        }
+
+        Self {
+            inner: Some(Arc::new(LimiterInner {
+                current_usage: AtomicUsize::new(0),
+                max_memory,
+            })),
+        }
+    }
+
+    /// Try to acquire memory for a request of given size.
+    ///
+    /// Returns `Ok(RequestMemoryGuard)` if memory was acquired successfully.
+    /// Returns `Err` if the memory limit would be exceeded.
+    pub fn try_acquire(&self, request_size: usize) -> Result<Option<RequestMemoryGuard>> {
+        let Some(inner) = self.inner.as_ref() else {
+            return Ok(None);
+        };
+
+        let mut new_usage = 0;
+        let result =
+            inner
+                .current_usage
+                .fetch_update(Ordering::Relaxed, Ordering::Relaxed, |current| {
+                    new_usage = current.saturating_add(request_size);
+                    if new_usage <= inner.max_memory {
+                        Some(new_usage)
+                    } else {
+                        None
+                    }
+                });
+
+        match result {
+            Ok(_) => Ok(Some(RequestMemoryGuard {
+                size: request_size,
+                limiter: Arc::clone(inner),
+                usage_snapshot: new_usage,
+            })),
+            Err(_current) => TooManyConcurrentRequestsSnafu {
+                limit: inner.max_memory,
+                request_size,
+            }
+            .fail(),
+        }
+    }
+
+    /// Check if limiter is enabled
+    pub fn is_enabled(&self) -> bool {
+        self.inner.is_some()
+    }
+
+    /// Get current memory usage
+    pub fn current_usage(&self) -> usize {
+        self.inner
+            .as_ref()
+            .map(|inner| inner.current_usage.load(Ordering::Relaxed))
+            .unwrap_or(0)
+    }
+
+    /// Get max memory limit
+    pub fn max_memory(&self) -> usize {
+        self.inner
+            .as_ref()
+            .map(|inner| inner.max_memory)
+            .unwrap_or(0)
+    }
+}
+
+/// RAII guard that releases memory when dropped
+pub struct RequestMemoryGuard {
+    size: usize,
+    limiter: Arc<LimiterInner>,
+    usage_snapshot: usize,
+}
+
+impl RequestMemoryGuard {
+    /// Returns the total memory usage snapshot at the time this guard was acquired.
+    pub fn current_usage(&self) -> usize {
+        self.usage_snapshot
+    }
+}
+
+impl Drop for RequestMemoryGuard {
+    fn drop(&mut self) {
+        self.limiter
+            .current_usage
+            .fetch_sub(self.size, Ordering::Release);
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_limiter_disabled() {
+        let limiter = RequestMemoryLimiter::new(0);
+        assert!(!limiter.is_enabled());
+        assert!(limiter.try_acquire(1000000).unwrap().is_none());
+        assert_eq!(limiter.current_usage(), 0);
+    }
+
+    #[test]
+    fn test_limiter_basic() {
+        let limiter = RequestMemoryLimiter::new(1000);
+        assert!(limiter.is_enabled());
+        assert_eq!(limiter.max_memory(), 1000);
+        assert_eq!(limiter.current_usage(), 0);
+
+        // Acquire 400 bytes
+        let _guard1 = limiter.try_acquire(400).unwrap();
+        assert_eq!(limiter.current_usage(), 400);
+
+        // Acquire another 500 bytes
+        let _guard2 = limiter.try_acquire(500).unwrap();
+        assert_eq!(limiter.current_usage(), 900);
+
+        // Try to acquire 200 bytes - should fail (900 + 200 > 1000)
+        let result = limiter.try_acquire(200);
+        assert!(result.is_err());
+        assert_eq!(limiter.current_usage(), 900);
+
+        // Drop first guard
+        drop(_guard1);
+        assert_eq!(limiter.current_usage(), 500);
+
+        // Now we can acquire 200 bytes
+        let _guard3 = limiter.try_acquire(200).unwrap();
+        assert_eq!(limiter.current_usage(), 700);
+    }
+
+    #[test]
+    fn test_limiter_exact_limit() {
+        let limiter = RequestMemoryLimiter::new(1000);
+
+        // Acquire exactly the limit
+        let _guard = limiter.try_acquire(1000).unwrap();
+        assert_eq!(limiter.current_usage(), 1000);
+
+        // Try to acquire 1 more byte - should fail
+        let result = limiter.try_acquire(1);
+        assert!(result.is_err());
+    }
+
+    #[tokio::test(flavor = "multi_thread", worker_threads = 4)]
+    async fn test_limiter_concurrent() {
+        let limiter = RequestMemoryLimiter::new(1000);
+        let mut handles = vec![];
+
+        // Spawn 10 tasks each trying to acquire 200 bytes
+        for _ in 0..10 {
+            let limiter_clone = limiter.clone();
+            let handle = tokio::spawn(async move { limiter_clone.try_acquire(200) });
+            handles.push(handle);
+        }
+
+        let mut success_count = 0;
+        let mut fail_count = 0;
+
+        for handle in handles {
+            match handle.await.unwrap() {
+                Ok(Some(_)) => success_count += 1,
+                Err(_) => fail_count += 1,
+                Ok(None) => unreachable!(),
+            }
+        }
+
+        // Only 5 tasks should succeed (5 * 200 = 1000)
+        assert_eq!(success_count, 5);
+        assert_eq!(fail_count, 5);
+    }
+}
diff --git a/tests-integration/tests/grpc.rs b/tests-integration/tests/grpc.rs
index b9e56564a5..6f82d4fc55 100644
--- a/tests-integration/tests/grpc.rs
+++ b/tests-integration/tests/grpc.rs
@@ -14,10 +14,12 @@
 
 use api::v1::alter_table_expr::Kind;
 use api::v1::promql_request::Promql;
+use api::v1::value::ValueData;
 use api::v1::{
     AddColumn, AddColumns, AlterTableExpr, Basic, Column, ColumnDataType, ColumnDef,
     CreateTableExpr, InsertRequest, InsertRequests, PromInstantQuery, PromRangeQuery,
-    PromqlRequest, RequestHeader, SemanticType, column,
+    PromqlRequest, RequestHeader, Row, RowInsertRequest, RowInsertRequests, SemanticType, Value,
+    column,
 };
 use auth::user_provider_from_option;
 use client::{Client, DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, Database, OutputData};
@@ -89,6 +91,7 @@ macro_rules! grpc_tests {
                 test_prom_gateway_query,
                 test_grpc_timezone,
                 test_grpc_tls_config,
+                test_grpc_memory_limit,
             );
         )*
     };
@@ -954,6 +957,7 @@ pub async fn test_grpc_tls_config(store_type: StorageType) {
     let config = GrpcServerConfig {
         max_recv_message_size: 1024,
         max_send_message_size: 1024,
+        max_total_message_memory: 1024 * 1024 * 1024,
         tls,
         max_connection_age: None,
     };
@@ -996,6 +1000,7 @@ pub async fn test_grpc_tls_config(store_type: StorageType) {
         let config = GrpcServerConfig {
             max_recv_message_size: 1024,
             max_send_message_size: 1024,
+            max_total_message_memory: 1024 * 1024 * 1024,
             tls,
             max_connection_age: None,
         };
@@ -1007,3 +1012,157 @@ pub async fn test_grpc_tls_config(store_type: StorageType) {
 
     let _ = fe_grpc_server.shutdown().await;
 }
+
+pub async fn test_grpc_memory_limit(store_type: StorageType) {
+    let config = GrpcServerConfig {
+        max_recv_message_size: 1024 * 1024,
+        max_send_message_size: 1024 * 1024,
+        max_total_message_memory: 200,
+        tls: Default::default(),
+        max_connection_age: None,
+    };
+    let (_db, fe_grpc_server) =
+        setup_grpc_server_with(store_type, "test_grpc_memory_limit", None, Some(config)).await;
+    let addr = fe_grpc_server.bind_addr().unwrap().to_string();
+
+    let grpc_client = Client::with_urls([&addr]);
+    let db = Database::new(DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, grpc_client);
+
+    let table_name = "demo";
+
+    let column_schemas = vec![
+        ColumnDef {
+            name: "host".to_string(),
+            data_type: ColumnDataType::String as i32,
+            is_nullable: false,
+            default_constraint: vec![],
+            semantic_type: SemanticType::Tag as i32,
+            comment: String::new(),
+            datatype_extension: None,
+            options: None,
+        },
+        ColumnDef {
+            name: "ts".to_string(),
+            data_type: ColumnDataType::TimestampMillisecond as i32,
+            is_nullable: false,
+            default_constraint: vec![],
+            semantic_type: SemanticType::Timestamp as i32,
+            comment: String::new(),
+            datatype_extension: None,
+            options: None,
+        },
+        ColumnDef {
+            name: "cpu".to_string(),
+            data_type: ColumnDataType::Float64 as i32,
+            is_nullable: true,
+            default_constraint: vec![],
+            semantic_type: SemanticType::Field as i32,
+            comment: String::new(),
+            datatype_extension: None,
+            options: None,
+        },
+    ];
+
+    let expr = CreateTableExpr {
+        catalog_name: DEFAULT_CATALOG_NAME.to_string(),
+        schema_name: DEFAULT_SCHEMA_NAME.to_string(),
+        table_name: table_name.to_string(),
+        desc: String::new(),
+        column_defs: column_schemas.clone(),
+        time_index: "ts".to_string(),
+        primary_keys: vec!["host".to_string()],
+        create_if_not_exists: true,
+        table_options: Default::default(),
+        table_id: None,
+        engine: MITO_ENGINE.to_string(),
+    };
+
+    db.create(expr).await.unwrap();
+
+    // Test that small request succeeds
+    let small_row_insert = RowInsertRequest {
+        table_name: table_name.to_owned(),
+        rows: Some(api::v1::Rows {
+            schema: column_schemas
+                .iter()
+                .map(|c| api::v1::ColumnSchema {
+                    column_name: c.name.clone(),
+                    datatype: c.data_type,
+                    semantic_type: c.semantic_type,
+                    datatype_extension: None,
+                    options: None,
+                })
+                .collect(),
+            rows: vec![Row {
+                values: vec![
+                    Value {
+                        value_data: Some(ValueData::StringValue("host1".to_string())),
+                    },
+                    Value {
+                        value_data: Some(ValueData::TimestampMillisecondValue(1000)),
+                    },
+                    Value {
+                        value_data: Some(ValueData::F64Value(1.2)),
+                    },
+                ],
+            }],
+        }),
+    };
+
+    let result = db
+        .row_inserts(RowInsertRequests {
+            inserts: vec![small_row_insert],
+        })
+        .await;
+    assert!(result.is_ok());
+
+    // Test that large request exceeds limit
+    let large_rows: Vec<Row> = (0..100)
+        .map(|i| Row {
+            values: vec![
+                Value {
+                    value_data: Some(ValueData::StringValue(format!("host{}", i))),
+                },
+                Value {
+                    value_data: Some(ValueData::TimestampMillisecondValue(1000 + i)),
+                },
+                Value {
+                    value_data: Some(ValueData::F64Value(i as f64 * 1.2)),
+                },
+            ],
+        })
+        .collect();
+
+    let large_row_insert = RowInsertRequest {
+        table_name: table_name.to_owned(),
+        rows: Some(api::v1::Rows {
+            schema: column_schemas
+                .iter()
+                .map(|c| api::v1::ColumnSchema {
+                    column_name: c.name.clone(),
+                    datatype: c.data_type,
+                    semantic_type: c.semantic_type,
+                    datatype_extension: None,
+                    options: None,
+                })
+                .collect(),
+            rows: large_rows,
+        }),
+    };
+
+    let result = db
+        .row_inserts(RowInsertRequests {
+            inserts: vec![large_row_insert],
+        })
+        .await;
+    assert!(result.is_err());
+    let err = result.unwrap_err();
+    let err_msg = err.to_string();
+    assert!(
+        err_msg.contains("Too many concurrent"),
+        "Expected memory limit error, got: {}",
+        err_msg
+    );
+
+    let _ = fe_grpc_server.shutdown().await;
+}
diff --git a/tests-integration/tests/http.rs b/tests-integration/tests/http.rs
index 538392e437..d5ed2ed4e6 100644
--- a/tests-integration/tests/http.rs
+++ b/tests-integration/tests/http.rs
@@ -1597,6 +1597,8 @@ fn drop_lines_with_inconsistent_results(input: String) -> String {
         "max_background_compactions =",
         "max_background_purges =",
         "enable_read_cache =",
+        "max_total_body_memory =",
+        "max_total_message_memory =",
     ];
 
     input

From e8f39cbc4f9a0886593f318e2e1e51c8ef931f6a Mon Sep 17 00:00:00 2001
From: fys <40801205+fengys1996@users.noreply.github.com>
Date: Thu, 23 Oct 2025 11:47:25 +0800
Subject: [PATCH 03/14] fix: unit test about trigger parser (#7132)

* fix: unit test about trigger parser

* fix: cargo clippy
---
 src/sql/src/parsers/alter_parser/trigger.rs | 32 +++++++++++++--------
 1 file changed, 20 insertions(+), 12 deletions(-)

diff --git a/src/sql/src/parsers/alter_parser/trigger.rs b/src/sql/src/parsers/alter_parser/trigger.rs
index 73e9275acd..ceec60fa24 100644
--- a/src/sql/src/parsers/alter_parser/trigger.rs
+++ b/src/sql/src/parsers/alter_parser/trigger.rs
@@ -640,18 +640,26 @@ mod tests {
             panic!("Expected PartialChanges label operations");
         };
         assert_eq!(changes.len(), 3);
-        let expected_changes = vec![
-            LabelChange::Add(OptionMap::from([(
-                "key1".to_string(),
-                "value1".to_string(),
-            )])),
-            LabelChange::Modify(OptionMap::from([(
-                "key2".to_string(),
-                "value2".to_string(),
-            )])),
-            LabelChange::Drop(vec!["key3".to_string()]),
-        ];
-        assert_eq!(changes, expected_changes);
+        let change0 = changes.first().unwrap();
+        let LabelChange::Add(labels) = change0 else {
+            panic!("Expected Add label change");
+        };
+        assert_eq!(labels.len(), 1);
+        assert_eq!(labels.get("key1"), Some("value1"));
+
+        let change1 = changes.get(1).unwrap();
+        let LabelChange::Modify(labels) = change1 else {
+            panic!("Expected Modify label change");
+        };
+        assert_eq!(labels.len(), 1);
+        assert_eq!(labels.get("key2"), Some("value2"));
+
+        let change2 = changes.get(2).unwrap();
+        let LabelChange::Drop(names) = change2 else {
+            panic!("Expected Drop label change");
+        };
+        assert_eq!(names.len(), 1);
+        assert_eq!(names.first().unwrap(), "key3");
 
         // Failed case: Duplicate SET LABELS.
         let sql =

From 136b9eef7ae43ef8ff2e69de8e29cbf2ab6080b3 Mon Sep 17 00:00:00 2001
From: jeremyhi <jiachun_feng@proton.me>
Date: Thu, 23 Oct 2025 14:22:02 +0800
Subject: [PATCH 04/14] feat: pr review reminder frequency (#7129)

* feat: run at 9:00 am on monday, wednesday, friday

Signed-off-by: jeremyhi <fengjiachun@gmail.com>

* chore: remove unused method

Signed-off-by: jeremyhi <fengjiachun@gmail.com>

---------

Signed-off-by: jeremyhi <fengjiachun@gmail.com>
---
 .github/scripts/pr-review-reminder.js    | 8 --------
 .github/workflows/pr-review-reminder.yml | 4 ++--
 2 files changed, 2 insertions(+), 10 deletions(-)

diff --git a/.github/scripts/pr-review-reminder.js b/.github/scripts/pr-review-reminder.js
index 9b0ce4f490..3853d11e60 100644
--- a/.github/scripts/pr-review-reminder.js
+++ b/.github/scripts/pr-review-reminder.js
@@ -57,14 +57,6 @@
     return days;
   }
 
-  // Get urgency emoji based on PR age
-  function getAgeEmoji(days) {
-    if (days >= 14) return "🔴"; // 14+ days - critical
-    if (days >= 7) return "🟠";  // 7+ days - urgent
-    if (days >= 3) return "🟡";  // 3+ days - needs attention
-    return "🟢"; // < 3 days - fresh
-  }
-
   // Build Slack notification message from PR list
   function buildSlackMessage(prs) {
     if (prs.length === 0) {
diff --git a/.github/workflows/pr-review-reminder.yml b/.github/workflows/pr-review-reminder.yml
index d49e928fec..0ba25fdaca 100644
--- a/.github/workflows/pr-review-reminder.yml
+++ b/.github/workflows/pr-review-reminder.yml
@@ -2,8 +2,8 @@ name: PR Review Reminder
 
 on:
   schedule:
-    # Run at 9:00 AM UTC+8 (01:00 AM UTC) every day
-    - cron: '0 1 * * *'
+    # Run at 9:00 AM UTC+8 (01:00 AM UTC) on Monday, Wednesday, Friday
+    - cron: '0 1 * * 1,3,5'
   workflow_dispatch:
 
 jobs:

From f388dbdbb8d5708b3e597443f33bf6bb44f6bb7f Mon Sep 17 00:00:00 2001
From: Yingwen <realevenyag@gmail.com>
Date: Thu, 23 Oct 2025 14:43:46 +0800
Subject: [PATCH 05/14] fix: fix index and tag filtering for flat format
 (#7121)

* perf: only decode primary keys in the batch

Signed-off-by: evenyag <realevenyag@gmail.com>

* fix: don't push none to creator

Signed-off-by: evenyag <realevenyag@gmail.com>

* chore: implement method to filter __table_id for sparse encoding

Signed-off-by: evenyag <realevenyag@gmail.com>

* feat: filter table id for sparse encoding separately

The __table_id doesn't present in projection so we have to filter it
manually

Signed-off-by: evenyag <realevenyag@gmail.com>

* fix: decode tags for sparse encoding when building bloom filter

Signed-off-by: evenyag <realevenyag@gmail.com>

* feat: support inverted index for tags under sparse encoding

Signed-off-by: evenyag <realevenyag@gmail.com>

* feat: skip tag columns in fulltext index

Signed-off-by: evenyag <realevenyag@gmail.com>

* chore: fix warnings

Signed-off-by: evenyag <realevenyag@gmail.com>

* style: fix clippy

Signed-off-by: evenyag <realevenyag@gmail.com>

* test: fix list index metadata test

Signed-off-by: evenyag <realevenyag@gmail.com>

* fix: decode primary key columns to filter

When primary key columns are not in projection but in filters, we need
to decode them in compute_filter_mask_flat

Signed-off-by: evenyag <realevenyag@gmail.com>

* refactor: reuse filter method

Signed-off-by: evenyag <realevenyag@gmail.com>

* fix: only use dictionary for string type in compat

Signed-off-by: evenyag <realevenyag@gmail.com>

* refactor: safe to get column by creator's column id

Signed-off-by: evenyag <realevenyag@gmail.com>

---------

Signed-off-by: evenyag <realevenyag@gmail.com>
---
 src/mito2/src/engine/basic_test.rs            | 100 +++++---
 src/mito2/src/memtable/bulk/part_reader.rs    |  41 +---
 src/mito2/src/read/compat.rs                  |   3 +-
 src/mito2/src/read/flat_projection.rs         |   2 +
 src/mito2/src/sst/index.rs                    |  59 ++++-
 .../src/sst/index/bloom_filter/creator.rs     | 106 ++++++---
 .../src/sst/index/fulltext_index/creator.rs   |  12 +
 .../src/sst/index/inverted_index/creator.rs   |  99 ++++----
 src/mito2/src/sst/parquet/file_range.rs       |  97 ++++++--
 src/mito2/src/sst/parquet/flat_format.rs      | 213 +++++++++++-------
 src/mito2/src/sst/parquet/reader.rs           |   1 +
 11 files changed, 482 insertions(+), 251 deletions(-)

diff --git a/src/mito2/src/engine/basic_test.rs b/src/mito2/src/engine/basic_test.rs
index 38b4df7a9e..39f2366659 100644
--- a/src/mito2/src/engine/basic_test.rs
+++ b/src/mito2/src/engine/basic_test.rs
@@ -972,17 +972,17 @@ async fn test_list_ssts_with_format(
 #[tokio::test]
 async fn test_all_index_metas_list_all_types() {
     test_all_index_metas_list_all_types_with_format(false, r#"
-PuffinIndexMetaEntry { table_dir: "test/", index_file_path: "test/11_0000000001/index/<file_id>.puffin", region_id: 47244640257(11, 1), table_id: 11, region_number: 1, region_group: 0, region_sequence: 1, file_id: "<file_id>", index_file_size: Some(6032), index_type: "bloom_filter", target_type: "column", target_key: "3", target_json: "{\"column\":3}", blob_size: 751, meta_json: Some("{\"bloom\":{\"bloom_filter_size\":640,\"row_count\":20,\"rows_per_segment\":2,\"segment_count\":10}}"), node_id: None }
-PuffinIndexMetaEntry { table_dir: "test/", index_file_path: "test/11_0000000001/index/<file_id>.puffin", region_id: 47244640257(11, 1), table_id: 11, region_number: 1, region_group: 0, region_sequence: 1, file_id: "<file_id>", index_file_size: Some(6032), index_type: "fulltext_bloom", target_type: "column", target_key: "1", target_json: "{\"column\":1}", blob_size: 87, meta_json: Some("{\"bloom\":{\"bloom_filter_size\":64,\"row_count\":20,\"rows_per_segment\":4,\"segment_count\":5},\"fulltext\":{\"analyzer\":\"English\",\"case_sensitive\":false}}"), node_id: None }
-PuffinIndexMetaEntry { table_dir: "test/", index_file_path: "test/11_0000000001/index/<file_id>.puffin", region_id: 47244640257(11, 1), table_id: 11, region_number: 1, region_group: 0, region_sequence: 1, file_id: "<file_id>", index_file_size: Some(6032), index_type: "fulltext_tantivy", target_type: "column", target_key: "2", target_json: "{\"column\":2}", blob_size: 1104, meta_json: Some("{\"fulltext\":{\"analyzer\":\"Chinese\",\"case_sensitive\":true}}"), node_id: None }
-PuffinIndexMetaEntry { table_dir: "test/", index_file_path: "test/11_0000000001/index/<file_id>.puffin", region_id: 47244640257(11, 1), table_id: 11, region_number: 1, region_group: 0, region_sequence: 1, file_id: "<file_id>", index_file_size: Some(6032), index_type: "inverted", target_type: "column", target_key: "0", target_json: "{\"column\":0}", blob_size: 70, meta_json: Some("{\"inverted\":{\"base_offset\":0,\"bitmap_type\":\"Roaring\",\"fst_size\":44,\"inverted_index_size\":70,\"null_bitmap_size\":8,\"relative_fst_offset\":26,\"relative_null_bitmap_offset\":0,\"segment_row_count\":1024,\"total_row_count\":20}}"), node_id: None }
-PuffinIndexMetaEntry { table_dir: "test/", index_file_path: "test/11_0000000001/index/<file_id>.puffin", region_id: 47244640257(11, 1), table_id: 11, region_number: 1, region_group: 0, region_sequence: 1, file_id: "<file_id>", index_file_size: Some(6032), index_type: "inverted", target_type: "column", target_key: "4", target_json: "{\"column\":4}", blob_size: 515, meta_json: Some("{\"inverted\":{\"base_offset\":0,\"bitmap_type\":\"Roaring\",\"fst_size\":147,\"inverted_index_size\":515,\"null_bitmap_size\":8,\"relative_fst_offset\":368,\"relative_null_bitmap_offset\":0,\"segment_row_count\":1024,\"total_row_count\":20}}"), node_id: None }"#).await;
+PuffinIndexMetaEntry { table_dir: "test/", index_file_path: "test/11_0000000001/index/<file_id>.puffin", region_id: 47244640257(11, 1), table_id: 11, region_number: 1, region_group: 0, region_sequence: 1, file_id: "<file_id>", index_file_size: Some(6500), index_type: "bloom_filter", target_type: "column", target_key: "1", target_json: "{\"column\":1}", blob_size: 751, meta_json: Some("{\"bloom\":{\"bloom_filter_size\":640,\"row_count\":20,\"rows_per_segment\":2,\"segment_count\":10}}"), node_id: None }
+PuffinIndexMetaEntry { table_dir: "test/", index_file_path: "test/11_0000000001/index/<file_id>.puffin", region_id: 47244640257(11, 1), table_id: 11, region_number: 1, region_group: 0, region_sequence: 1, file_id: "<file_id>", index_file_size: Some(6500), index_type: "fulltext_bloom", target_type: "column", target_key: "4", target_json: "{\"column\":4}", blob_size: 89, meta_json: Some("{\"bloom\":{\"bloom_filter_size\":64,\"row_count\":20,\"rows_per_segment\":4,\"segment_count\":5},\"fulltext\":{\"analyzer\":\"English\",\"case_sensitive\":false}}"), node_id: None }
+PuffinIndexMetaEntry { table_dir: "test/", index_file_path: "test/11_0000000001/index/<file_id>.puffin", region_id: 47244640257(11, 1), table_id: 11, region_number: 1, region_group: 0, region_sequence: 1, file_id: "<file_id>", index_file_size: Some(6500), index_type: "fulltext_tantivy", target_type: "column", target_key: "5", target_json: "{\"column\":5}", blob_size: 1100, meta_json: Some("{\"fulltext\":{\"analyzer\":\"Chinese\",\"case_sensitive\":true}}"), node_id: None }
+PuffinIndexMetaEntry { table_dir: "test/", index_file_path: "test/11_0000000001/index/<file_id>.puffin", region_id: 47244640257(11, 1), table_id: 11, region_number: 1, region_group: 0, region_sequence: 1, file_id: "<file_id>", index_file_size: Some(6500), index_type: "inverted", target_type: "column", target_key: "1", target_json: "{\"column\":1}", blob_size: 518, meta_json: Some("{\"inverted\":{\"base_offset\":0,\"bitmap_type\":\"Roaring\",\"fst_size\":150,\"inverted_index_size\":518,\"null_bitmap_size\":8,\"relative_fst_offset\":368,\"relative_null_bitmap_offset\":0,\"segment_row_count\":1024,\"total_row_count\":20}}"), node_id: None }
+PuffinIndexMetaEntry { table_dir: "test/", index_file_path: "test/11_0000000001/index/<file_id>.puffin", region_id: 47244640257(11, 1), table_id: 11, region_number: 1, region_group: 0, region_sequence: 1, file_id: "<file_id>", index_file_size: Some(6500), index_type: "inverted", target_type: "column", target_key: "2", target_json: "{\"column\":2}", blob_size: 515, meta_json: Some("{\"inverted\":{\"base_offset\":0,\"bitmap_type\":\"Roaring\",\"fst_size\":147,\"inverted_index_size\":515,\"null_bitmap_size\":8,\"relative_fst_offset\":368,\"relative_null_bitmap_offset\":0,\"segment_row_count\":1024,\"total_row_count\":20}}"), node_id: None }"#).await;
     test_all_index_metas_list_all_types_with_format(true, r#"
-PuffinIndexMetaEntry { table_dir: "test/", index_file_path: "test/11_0000000001/index/<file_id>.puffin", region_id: 47244640257(11, 1), table_id: 11, region_number: 1, region_group: 0, region_sequence: 1, file_id: "<file_id>", index_file_size: Some(6144), index_type: "bloom_filter", target_type: "column", target_key: "3", target_json: "{\"column\":3}", blob_size: 751, meta_json: Some("{\"bloom\":{\"bloom_filter_size\":640,\"row_count\":20,\"rows_per_segment\":2,\"segment_count\":10}}"), node_id: None }
-PuffinIndexMetaEntry { table_dir: "test/", index_file_path: "test/11_0000000001/index/<file_id>.puffin", region_id: 47244640257(11, 1), table_id: 11, region_number: 1, region_group: 0, region_sequence: 1, file_id: "<file_id>", index_file_size: Some(6144), index_type: "fulltext_bloom", target_type: "column", target_key: "1", target_json: "{\"column\":1}", blob_size: 89, meta_json: Some("{\"bloom\":{\"bloom_filter_size\":64,\"row_count\":20,\"rows_per_segment\":4,\"segment_count\":5},\"fulltext\":{\"analyzer\":\"English\",\"case_sensitive\":false}}"), node_id: None }
-PuffinIndexMetaEntry { table_dir: "test/", index_file_path: "test/11_0000000001/index/<file_id>.puffin", region_id: 47244640257(11, 1), table_id: 11, region_number: 1, region_group: 0, region_sequence: 1, file_id: "<file_id>", index_file_size: Some(6144), index_type: "fulltext_tantivy", target_type: "column", target_key: "2", target_json: "{\"column\":2}", blob_size: 1104, meta_json: Some("{\"fulltext\":{\"analyzer\":\"Chinese\",\"case_sensitive\":true}}"), node_id: None }
-PuffinIndexMetaEntry { table_dir: "test/", index_file_path: "test/11_0000000001/index/<file_id>.puffin", region_id: 47244640257(11, 1), table_id: 11, region_number: 1, region_group: 0, region_sequence: 1, file_id: "<file_id>", index_file_size: Some(6144), index_type: "inverted", target_type: "column", target_key: "0", target_json: "{\"column\":0}", blob_size: 92, meta_json: Some("{\"inverted\":{\"base_offset\":0,\"bitmap_type\":\"Roaring\",\"fst_size\":66,\"inverted_index_size\":92,\"null_bitmap_size\":8,\"relative_fst_offset\":26,\"relative_null_bitmap_offset\":0,\"segment_row_count\":1024,\"total_row_count\":20}}"), node_id: None }
-PuffinIndexMetaEntry { table_dir: "test/", index_file_path: "test/11_0000000001/index/<file_id>.puffin", region_id: 47244640257(11, 1), table_id: 11, region_number: 1, region_group: 0, region_sequence: 1, file_id: "<file_id>", index_file_size: Some(6144), index_type: "inverted", target_type: "column", target_key: "4", target_json: "{\"column\":4}", blob_size: 515, meta_json: Some("{\"inverted\":{\"base_offset\":0,\"bitmap_type\":\"Roaring\",\"fst_size\":147,\"inverted_index_size\":515,\"null_bitmap_size\":8,\"relative_fst_offset\":368,\"relative_null_bitmap_offset\":0,\"segment_row_count\":1024,\"total_row_count\":20}}"), node_id: None }"#).await;
+PuffinIndexMetaEntry { table_dir: "test/", index_file_path: "test/11_0000000001/index/<file_id>.puffin", region_id: 47244640257(11, 1), table_id: 11, region_number: 1, region_group: 0, region_sequence: 1, file_id: "<file_id>", index_file_size: Some(6500), index_type: "bloom_filter", target_type: "column", target_key: "1", target_json: "{\"column\":1}", blob_size: 751, meta_json: Some("{\"bloom\":{\"bloom_filter_size\":640,\"row_count\":20,\"rows_per_segment\":2,\"segment_count\":10}}"), node_id: None }
+PuffinIndexMetaEntry { table_dir: "test/", index_file_path: "test/11_0000000001/index/<file_id>.puffin", region_id: 47244640257(11, 1), table_id: 11, region_number: 1, region_group: 0, region_sequence: 1, file_id: "<file_id>", index_file_size: Some(6500), index_type: "fulltext_bloom", target_type: "column", target_key: "4", target_json: "{\"column\":4}", blob_size: 89, meta_json: Some("{\"bloom\":{\"bloom_filter_size\":64,\"row_count\":20,\"rows_per_segment\":4,\"segment_count\":5},\"fulltext\":{\"analyzer\":\"English\",\"case_sensitive\":false}}"), node_id: None }
+PuffinIndexMetaEntry { table_dir: "test/", index_file_path: "test/11_0000000001/index/<file_id>.puffin", region_id: 47244640257(11, 1), table_id: 11, region_number: 1, region_group: 0, region_sequence: 1, file_id: "<file_id>", index_file_size: Some(6500), index_type: "fulltext_tantivy", target_type: "column", target_key: "5", target_json: "{\"column\":5}", blob_size: 1100, meta_json: Some("{\"fulltext\":{\"analyzer\":\"Chinese\",\"case_sensitive\":true}}"), node_id: None }
+PuffinIndexMetaEntry { table_dir: "test/", index_file_path: "test/11_0000000001/index/<file_id>.puffin", region_id: 47244640257(11, 1), table_id: 11, region_number: 1, region_group: 0, region_sequence: 1, file_id: "<file_id>", index_file_size: Some(6500), index_type: "inverted", target_type: "column", target_key: "1", target_json: "{\"column\":1}", blob_size: 518, meta_json: Some("{\"inverted\":{\"base_offset\":0,\"bitmap_type\":\"Roaring\",\"fst_size\":150,\"inverted_index_size\":518,\"null_bitmap_size\":8,\"relative_fst_offset\":368,\"relative_null_bitmap_offset\":0,\"segment_row_count\":1024,\"total_row_count\":20}}"), node_id: None }
+PuffinIndexMetaEntry { table_dir: "test/", index_file_path: "test/11_0000000001/index/<file_id>.puffin", region_id: 47244640257(11, 1), table_id: 11, region_number: 1, region_group: 0, region_sequence: 1, file_id: "<file_id>", index_file_size: Some(6500), index_type: "inverted", target_type: "column", target_key: "2", target_json: "{\"column\":2}", blob_size: 515, meta_json: Some("{\"inverted\":{\"base_offset\":0,\"bitmap_type\":\"Roaring\",\"fst_size\":147,\"inverted_index_size\":515,\"null_bitmap_size\":8,\"relative_fst_offset\":368,\"relative_null_bitmap_offset\":0,\"segment_row_count\":1024,\"total_row_count\":20}}"), node_id: None }"#).await;
 }
 
 async fn test_all_index_metas_list_all_types_with_format(flat_format: bool, expect_format: &str) {
@@ -1001,12 +1001,33 @@ async fn test_all_index_metas_list_all_types_with_format(flat_format: bool, expe
     // One region with both fulltext backends and inverted index enabled, plus bloom skipping index
     let region_id = RegionId::new(11, 1);
 
-    let mut request = CreateRequestBuilder::new().tag_num(3).field_num(2).build();
-    // inverted index on tag_0
-    request.column_metadatas[0]
+    let mut request = CreateRequestBuilder::new().tag_num(1).field_num(2).build();
+    // bloom filter skipping index on field_1
+    let skipping = SkippingIndexOptions::new_unchecked(2, 0.01, SkippingIndexType::BloomFilter);
+    request.column_metadatas[1]
+        .column_schema
+        .set_skipping_options(&skipping)
+        .unwrap();
+
+    // inverted index on field_1
+    request.column_metadatas[2]
         .column_schema
         .set_inverted_index(true);
-    // fulltext bloom on tag_1
+    // inverted index on tag_0
+    request.column_metadatas[1]
+        .column_schema
+        .set_inverted_index(true);
+
+    request.column_metadatas.push(ColumnMetadata {
+        column_schema: ColumnSchema::new(
+            "field_2".to_string(),
+            ConcreteDataType::string_datatype(),
+            true,
+        ),
+        semantic_type: SemanticType::Field,
+        column_id: 4,
+    });
+    // fulltext bloom on field_2
     let ft_bloom = FulltextOptions::new_unchecked(
         true,
         FulltextAnalyzer::English,
@@ -1015,11 +1036,24 @@ async fn test_all_index_metas_list_all_types_with_format(flat_format: bool, expe
         4,
         0.001,
     );
-    request.column_metadatas[1]
+    request
+        .column_metadatas
+        .last_mut()
+        .unwrap()
         .column_schema
         .set_fulltext_options(&ft_bloom)
         .unwrap();
-    // fulltext tantivy on tag_2
+
+    request.column_metadatas.push(ColumnMetadata {
+        column_schema: ColumnSchema::new(
+            "field_3".to_string(),
+            ConcreteDataType::string_datatype(),
+            true,
+        ),
+        semantic_type: SemanticType::Field,
+        column_id: 5,
+    });
+    // fulltext tantivy on field_3
     let ft_tantivy = FulltextOptions::new_unchecked(
         true,
         FulltextAnalyzer::Chinese,
@@ -1028,28 +1062,20 @@ async fn test_all_index_metas_list_all_types_with_format(flat_format: bool, expe
         2,
         0.01,
     );
-    request.column_metadatas[2]
+    request
+        .column_metadatas
+        .last_mut()
+        .unwrap()
         .column_schema
         .set_fulltext_options(&ft_tantivy)
         .unwrap();
-    // bloom filter skipping index on field_1 (which is at index 3)
-    let skipping = SkippingIndexOptions::new_unchecked(2, 0.01, SkippingIndexType::BloomFilter);
-    request.column_metadatas[3]
-        .column_schema
-        .set_skipping_options(&skipping)
-        .unwrap();
-
-    // inverted index on field_1
-    request.column_metadatas[4]
-        .column_schema
-        .set_inverted_index(true);
 
     engine
         .handle_request(region_id, RegionRequest::Create(request.clone()))
         .await
         .unwrap();
 
-    // write some rows (schema: tag_0, tag_1, tag_2, field_0, field_1, ts)
+    // write some rows (schema: tag_0, field_0, field_1, field_2, field_3, ts)
     let column_schemas = rows_schema(&request);
     let rows_vec: Vec<api::v1::Row> = (0..20)
         .map(|ts| api::v1::Row {
@@ -1057,12 +1083,6 @@ async fn test_all_index_metas_list_all_types_with_format(flat_format: bool, expe
                 api::v1::Value {
                     value_data: Some(api::v1::value::ValueData::StringValue("x".to_string())),
                 },
-                api::v1::Value {
-                    value_data: Some(api::v1::value::ValueData::StringValue("y".to_string())),
-                },
-                api::v1::Value {
-                    value_data: Some(api::v1::value::ValueData::StringValue("z".to_string())),
-                },
                 api::v1::Value {
                     value_data: Some(api::v1::value::ValueData::F64Value(ts as f64)),
                 },
@@ -1074,6 +1094,12 @@ async fn test_all_index_metas_list_all_types_with_format(flat_format: bool, expe
                         ts as i64 * 1000,
                     )),
                 },
+                api::v1::Value {
+                    value_data: Some(api::v1::value::ValueData::StringValue("y".to_string())),
+                },
+                api::v1::Value {
+                    value_data: Some(api::v1::value::ValueData::StringValue("z".to_string())),
+                },
             ],
         })
         .collect();
@@ -1095,7 +1121,7 @@ async fn test_all_index_metas_list_all_types_with_format(flat_format: bool, expe
         .unwrap();
 
     fn bucket_size(size: u64) -> u64 {
-        if size < 512 { size } else { (size / 16) * 16 }
+        if size < 512 { size } else { (size / 100) * 100 }
     }
 
     let mut metas = engine.all_index_metas().await;
@@ -1125,5 +1151,5 @@ async fn test_all_index_metas_list_all_types_with_format(flat_format: bool, expe
         .map(|entry| format!("\n{:?}", entry))
         .collect::<String>();
 
-    assert_eq!(debug_format, expect_format);
+    assert_eq!(expect_format, debug_format);
 }
diff --git a/src/mito2/src/memtable/bulk/part_reader.rs b/src/mito2/src/memtable/bulk/part_reader.rs
index b14ff05dfe..5578018a8d 100644
--- a/src/mito2/src/memtable/bulk/part_reader.rs
+++ b/src/mito2/src/memtable/bulk/part_reader.rs
@@ -13,12 +13,10 @@
 // limitations under the License.
 
 use std::collections::VecDeque;
-use std::ops::BitAnd;
 use std::sync::Arc;
 
 use bytes::Bytes;
 use datatypes::arrow::array::BooleanArray;
-use datatypes::arrow::buffer::BooleanBuffer;
 use datatypes::arrow::record_batch::RecordBatch;
 use parquet::arrow::ProjectionMask;
 use parquet::arrow::arrow_reader::ParquetRecordBatchReader;
@@ -30,7 +28,7 @@ use crate::error::{self, ComputeArrowSnafu, DecodeArrowRowGroupSnafu};
 use crate::memtable::bulk::context::{BulkIterContext, BulkIterContextRef};
 use crate::memtable::bulk::row_group_reader::MemtableRowGroupReaderBuilder;
 use crate::sst::parquet::flat_format::sequence_column_index;
-use crate::sst::parquet::reader::{MaybeFilter, RowGroupReaderContext};
+use crate::sst::parquet::reader::RowGroupReaderContext;
 
 /// Iterator for reading data inside a bulk part.
 pub struct EncodedBulkPartIter {
@@ -191,38 +189,13 @@ fn apply_combined_filters(
     let num_rows = record_batch.num_rows();
     let mut combined_filter = None;
 
-    // First, apply predicate filters.
+    // First, apply predicate filters using the shared method.
     if !context.base.filters.is_empty() {
-        let num_rows = record_batch.num_rows();
-        let mut mask = BooleanBuffer::new_set(num_rows);
-
-        // Run filter one by one and combine them result, similar to RangeBase::precise_filter
-        for filter_ctx in &context.base.filters {
-            let filter = match filter_ctx.filter() {
-                MaybeFilter::Filter(f) => f,
-                // Column matches.
-                MaybeFilter::Matched => continue,
-                // Column doesn't match, filter the entire batch.
-                MaybeFilter::Pruned => return Ok(None),
-            };
-
-            // Safety: We checked the format type in new().
-            let Some(column_index) = context
-                .read_format()
-                .as_flat()
-                .unwrap()
-                .projected_index_by_id(filter_ctx.column_id())
-            else {
-                continue;
-            };
-            let array = record_batch.column(column_index);
-            let result = filter
-                .evaluate_array(array)
-                .context(crate::error::RecordBatchSnafu)?;
-
-            mask = mask.bitand(&result);
-        }
-        // Convert the mask to BooleanArray
+        let predicate_mask = context.base.compute_filter_mask_flat(&record_batch)?;
+        // If predicate filters out the entire batch, return None early
+        let Some(mask) = predicate_mask else {
+            return Ok(None);
+        };
         combined_filter = Some(BooleanArray::from(mask));
     }
 
diff --git a/src/mito2/src/read/compat.rs b/src/mito2/src/read/compat.rs
index f3fb2f6c96..8bc24a4953 100644
--- a/src/mito2/src/read/compat.rs
+++ b/src/mito2/src/read/compat.rs
@@ -386,7 +386,8 @@ impl FlatCompatBatch {
 /// Repeats the vector value `to_len` times.
 fn repeat_vector(vector: &VectorRef, to_len: usize, is_tag: bool) -> Result<ArrayRef> {
     assert_eq!(1, vector.len());
-    if is_tag {
+    let data_type = vector.data_type();
+    if is_tag && data_type.is_string() {
         let values = vector.to_arrow_array();
         if values.is_null(0) {
             // Creates a dictionary array with `to_len` null keys.
diff --git a/src/mito2/src/read/flat_projection.rs b/src/mito2/src/read/flat_projection.rs
index ddad8e772f..23257ef649 100644
--- a/src/mito2/src/read/flat_projection.rs
+++ b/src/mito2/src/read/flat_projection.rs
@@ -48,6 +48,8 @@ pub struct FlatProjectionMapper {
     /// Ids of columns to project. It keeps ids in the same order as the `projection`
     /// indices to build the mapper.
     /// The mapper won't deduplicate the column ids.
+    ///
+    /// Note that this doesn't contain the `__table_id` and `__tsid`.
     column_ids: Vec<ColumnId>,
     /// Ids and DataTypes of columns of the expected batch.
     /// We can use this to check if the batch is compatible with the expected schema.
diff --git a/src/mito2/src/sst/index.rs b/src/mito2/src/sst/index.rs
index 20e8cb66d3..8ad7f6ef01 100644
--- a/src/mito2/src/sst/index.rs
+++ b/src/mito2/src/sst/index.rs
@@ -26,10 +26,13 @@ use std::sync::Arc;
 
 use bloom_filter::creator::BloomFilterIndexer;
 use common_telemetry::{debug, info, warn};
+use datatypes::arrow::array::BinaryArray;
 use datatypes::arrow::record_batch::RecordBatch;
+use mito_codec::index::IndexValuesCodec;
+use mito_codec::row_converter::CompositeValues;
 use puffin_manager::SstPuffinManager;
 use smallvec::{SmallVec, smallvec};
-use snafu::ResultExt;
+use snafu::{OptionExt, ResultExt};
 use statistics::{ByteCount, RowCount};
 use store_api::metadata::RegionMetadataRef;
 use store_api::storage::{ColumnId, FileId, RegionId};
@@ -40,7 +43,7 @@ use crate::access_layer::{AccessLayerRef, FilePathProvider, OperationType, Regio
 use crate::cache::file_cache::{FileType, IndexKey};
 use crate::cache::write_cache::{UploadTracker, WriteCacheRef};
 use crate::config::{BloomFilterConfig, FulltextIndexConfig, InvertedIndexConfig};
-use crate::error::{BuildIndexAsyncSnafu, Error, Result};
+use crate::error::{BuildIndexAsyncSnafu, DecodeSnafu, Error, InvalidRecordBatchSnafu, Result};
 use crate::manifest::action::{RegionEdit, RegionMetaAction, RegionMetaActionList};
 use crate::metrics::INDEX_CREATE_MEMORY_USAGE;
 use crate::read::{Batch, BatchReader};
@@ -57,6 +60,8 @@ use crate::sst::index::fulltext_index::creator::FulltextIndexer;
 use crate::sst::index::intermediate::IntermediateManager;
 use crate::sst::index::inverted_index::creator::InvertedIndexer;
 use crate::sst::parquet::SstInfo;
+use crate::sst::parquet::flat_format::primary_key_column_index;
+use crate::sst::parquet::format::PrimaryKeyArray;
 
 pub(crate) const TYPE_INVERTED_INDEX: &str = "inverted_index";
 pub(crate) const TYPE_FULLTEXT_INDEX: &str = "fulltext_index";
@@ -698,6 +703,56 @@ impl IndexBuildScheduler {
     }
 }
 
+/// Decodes primary keys from a flat format RecordBatch.
+/// Returns a list of (decoded_pk_value, count) tuples where count is the number of occurrences.
+pub(crate) fn decode_primary_keys_with_counts(
+    batch: &RecordBatch,
+    codec: &IndexValuesCodec,
+) -> Result<Vec<(CompositeValues, usize)>> {
+    let primary_key_index = primary_key_column_index(batch.num_columns());
+    let pk_dict_array = batch
+        .column(primary_key_index)
+        .as_any()
+        .downcast_ref::<PrimaryKeyArray>()
+        .context(InvalidRecordBatchSnafu {
+            reason: "Primary key column is not a dictionary array",
+        })?;
+    let pk_values_array = pk_dict_array
+        .values()
+        .as_any()
+        .downcast_ref::<BinaryArray>()
+        .context(InvalidRecordBatchSnafu {
+            reason: "Primary key values are not binary array",
+        })?;
+    let keys = pk_dict_array.keys();
+
+    // Decodes primary keys and count consecutive occurrences
+    let mut result: Vec<(CompositeValues, usize)> = Vec::new();
+    let mut prev_key: Option<u32> = None;
+
+    for i in 0..keys.len() {
+        let current_key = keys.value(i);
+
+        // Checks if current key is the same as previous key
+        if let Some(prev) = prev_key
+            && prev == current_key
+        {
+            // Safety: We already have a key in the result vector.
+            result.last_mut().unwrap().1 += 1;
+            continue;
+        }
+
+        // New key, decodes it.
+        let pk_bytes = pk_values_array.value(current_key as usize);
+        let decoded_value = codec.decoder().decode(pk_bytes).context(DecodeSnafu)?;
+
+        result.push((decoded_value, 1));
+        prev_key = Some(current_key);
+    }
+
+    Ok(result)
+}
+
 #[cfg(test)]
 mod tests {
     use std::sync::Arc;
diff --git a/src/mito2/src/sst/index/bloom_filter/creator.rs b/src/mito2/src/sst/index/bloom_filter/creator.rs
index a48898902f..0d16a21d7c 100644
--- a/src/mito2/src/sst/index/bloom_filter/creator.rs
+++ b/src/mito2/src/sst/index/bloom_filter/creator.rs
@@ -16,6 +16,7 @@ use std::collections::HashMap;
 use std::sync::Arc;
 use std::sync::atomic::AtomicUsize;
 
+use api::v1::SemanticType;
 use common_telemetry::{debug, warn};
 use datatypes::arrow::record_batch::RecordBatch;
 use datatypes::schema::SkippingIndexType;
@@ -23,9 +24,10 @@ use datatypes::vectors::Helper;
 use index::bloom_filter::creator::BloomFilterCreator;
 use index::target::IndexTarget;
 use mito_codec::index::{IndexValueCodec, IndexValuesCodec};
-use mito_codec::row_converter::SortField;
+use mito_codec::row_converter::{CompositeValues, SortField};
 use puffin::puffin_manager::{PuffinWriter, PutOptions};
 use snafu::{ResultExt, ensure};
+use store_api::codec::PrimaryKeyEncoding;
 use store_api::metadata::RegionMetadataRef;
 use store_api::storage::{ColumnId, FileId};
 use tokio_util::compat::{TokioAsyncReadCompatExt, TokioAsyncWriteCompatExt};
@@ -35,13 +37,13 @@ use crate::error::{
     OperateAbortedIndexSnafu, PuffinAddBlobSnafu, PushBloomFilterValueSnafu, Result,
 };
 use crate::read::Batch;
-use crate::sst::index::TYPE_BLOOM_FILTER_INDEX;
 use crate::sst::index::bloom_filter::INDEX_BLOB_TYPE;
 use crate::sst::index::intermediate::{
     IntermediateLocation, IntermediateManager, TempFileProvider,
 };
 use crate::sst::index::puffin_manager::SstPuffinWriter;
 use crate::sst::index::statistics::{ByteCount, RowCount, Statistics};
+use crate::sst::index::{TYPE_BLOOM_FILTER_INDEX, decode_primary_keys_with_counts};
 
 /// The buffer size for the pipe used to send index data to the puffin blob.
 const PIPE_BUFFER_SIZE_FOR_SENDING_BLOB: usize = 8192;
@@ -289,47 +291,81 @@ impl BloomFilterIndexer {
         let n = batch.num_rows();
         guard.inc_row_count(n);
 
+        let is_sparse = self.metadata.primary_key_encoding == PrimaryKeyEncoding::Sparse;
+        let mut decoded_pks: Option<Vec<(CompositeValues, usize)>> = None;
+
         for (col_id, creator) in &mut self.creators {
-            // Get the column name from metadata
-            if let Some(column_meta) = self.metadata.column_by_id(*col_id) {
-                let column_name = &column_meta.column_schema.name;
+            // Safety: `creators` are created from the metadata so it won't be None.
+            let column_meta = self.metadata.column_by_id(*col_id).unwrap();
+            let column_name = &column_meta.column_schema.name;
+            if let Some(column_array) = batch.column_by_name(column_name) {
+                // Convert Arrow array to VectorRef
+                let vector = Helper::try_into_vector(column_array.clone())
+                    .context(crate::error::ConvertVectorSnafu)?;
+                let sort_field = SortField::new(vector.data_type());
 
-                // Find the column in the RecordBatch by name
-                if let Some(column_array) = batch.column_by_name(column_name) {
-                    // Convert Arrow array to VectorRef
-                    let vector = Helper::try_into_vector(column_array.clone())
-                        .context(crate::error::ConvertVectorSnafu)?;
-                    let sort_field = SortField::new(vector.data_type());
+                for i in 0..n {
+                    let value = vector.get_ref(i);
+                    let elems = (!value.is_null())
+                        .then(|| {
+                            let mut buf = vec![];
+                            IndexValueCodec::encode_nonnull_value(value, &sort_field, &mut buf)
+                                .context(EncodeSnafu)?;
+                            Ok(buf)
+                        })
+                        .transpose()?;
 
-                    for i in 0..n {
-                        let value = vector.get_ref(i);
-                        let elems = (!value.is_null())
-                            .then(|| {
-                                let mut buf = vec![];
-                                IndexValueCodec::encode_nonnull_value(value, &sort_field, &mut buf)
-                                    .context(EncodeSnafu)?;
-                                Ok(buf)
-                            })
-                            .transpose()?;
+                    creator
+                        .push_row_elems(elems)
+                        .await
+                        .context(PushBloomFilterValueSnafu)?;
+                }
+            } else if is_sparse && column_meta.semantic_type == SemanticType::Tag {
+                // Column not found in batch, tries to decode from primary keys for sparse encoding.
+                if decoded_pks.is_none() {
+                    decoded_pks = Some(decode_primary_keys_with_counts(batch, &self.codec)?);
+                }
 
-                        creator
-                            .push_row_elems(elems)
-                            .await
-                            .context(PushBloomFilterValueSnafu)?;
-                    }
-                } else {
+                let pk_values_with_counts = decoded_pks.as_ref().unwrap();
+                let Some(col_info) = self.codec.pk_col_info(*col_id) else {
                     debug!(
-                        "Column {} not found in the batch during building bloom filter index",
+                        "Column {} not found in primary key during building bloom filter index",
                         column_name
                     );
-                    // Push empty elements to maintain alignment
-                    for _ in 0..n {
-                        creator
-                            .push_row_elems(None)
-                            .await
-                            .context(PushBloomFilterValueSnafu)?;
-                    }
+                    continue;
+                };
+                let pk_index = col_info.idx;
+                let field = &col_info.field;
+                for (decoded, count) in pk_values_with_counts {
+                    let value = match decoded {
+                        CompositeValues::Dense(dense) => dense.get(pk_index).map(|v| &v.1),
+                        CompositeValues::Sparse(sparse) => sparse.get(col_id),
+                    };
+
+                    let elems = value
+                        .filter(|v| !v.is_null())
+                        .map(|v| {
+                            let mut buf = vec![];
+                            IndexValueCodec::encode_nonnull_value(
+                                v.as_value_ref(),
+                                field,
+                                &mut buf,
+                            )
+                            .context(EncodeSnafu)?;
+                            Ok(buf)
+                        })
+                        .transpose()?;
+
+                    creator
+                        .push_n_row_elems(*count, elems)
+                        .await
+                        .context(PushBloomFilterValueSnafu)?;
                 }
+            } else {
+                debug!(
+                    "Column {} not found in the batch during building bloom filter index",
+                    column_name
+                );
             }
         }
 
diff --git a/src/mito2/src/sst/index/fulltext_index/creator.rs b/src/mito2/src/sst/index/fulltext_index/creator.rs
index 15e8870441..2efa154ec4 100644
--- a/src/mito2/src/sst/index/fulltext_index/creator.rs
+++ b/src/mito2/src/sst/index/fulltext_index/creator.rs
@@ -16,6 +16,7 @@ use std::collections::HashMap;
 use std::sync::Arc;
 use std::sync::atomic::AtomicUsize;
 
+use api::v1::SemanticType;
 use common_telemetry::warn;
 use datatypes::arrow::array::{Array, LargeStringArray, StringArray};
 use datatypes::arrow::datatypes::DataType;
@@ -69,6 +70,17 @@ impl FulltextIndexer {
         let mut creators = HashMap::new();
 
         for column in &metadata.column_metadatas {
+            // Tag columns don't support fulltext index now.
+            // If we need to support fulltext index for tag columns, we also need to parse
+            // the codec and handle sparse encoding for flat format specially.
+            if column.semantic_type == SemanticType::Tag {
+                common_telemetry::debug!(
+                    "Skip creating fulltext index for tag column {}",
+                    column.column_schema.name
+                );
+                continue;
+            }
+
             let options = column
                 .column_schema
                 .fulltext_options()
diff --git a/src/mito2/src/sst/index/inverted_index/creator.rs b/src/mito2/src/sst/index/inverted_index/creator.rs
index b7019422f8..f31cfaf1dc 100644
--- a/src/mito2/src/sst/index/inverted_index/creator.rs
+++ b/src/mito2/src/sst/index/inverted_index/creator.rs
@@ -17,6 +17,7 @@ use std::num::NonZeroUsize;
 use std::sync::Arc;
 use std::sync::atomic::AtomicUsize;
 
+use api::v1::SemanticType;
 use common_telemetry::{debug, warn};
 use datatypes::arrow::record_batch::RecordBatch;
 use datatypes::vectors::Helper;
@@ -26,9 +27,10 @@ use index::inverted_index::create::sort_create::SortIndexCreator;
 use index::inverted_index::format::writer::InvertedIndexBlobWriter;
 use index::target::IndexTarget;
 use mito_codec::index::{IndexValueCodec, IndexValuesCodec};
-use mito_codec::row_converter::SortField;
+use mito_codec::row_converter::{CompositeValues, SortField};
 use puffin::puffin_manager::{PuffinWriter, PutOptions};
 use snafu::{ResultExt, ensure};
+use store_api::codec::PrimaryKeyEncoding;
 use store_api::metadata::RegionMetadataRef;
 use store_api::storage::{ColumnId, FileId};
 use tokio::io::duplex;
@@ -39,13 +41,13 @@ use crate::error::{
     PushIndexValueSnafu, Result,
 };
 use crate::read::Batch;
-use crate::sst::index::TYPE_INVERTED_INDEX;
 use crate::sst::index::intermediate::{
     IntermediateLocation, IntermediateManager, TempFileProvider,
 };
 use crate::sst::index::inverted_index::INDEX_BLOB_TYPE;
 use crate::sst::index::puffin_manager::SstPuffinWriter;
 use crate::sst::index::statistics::{ByteCount, RowCount, Statistics};
+use crate::sst::index::{TYPE_INVERTED_INDEX, decode_primary_keys_with_counts};
 
 /// The minimum memory usage threshold for one column.
 const MIN_MEMORY_USAGE_THRESHOLD_PER_COLUMN: usize = 1024 * 1024; // 1MB
@@ -78,9 +80,6 @@ pub struct InvertedIndexer {
 
     /// Region metadata for column lookups.
     metadata: RegionMetadataRef,
-    /// Cache for mapping indexed column positions to their indices in the RecordBatch.
-    /// Aligns with indexed_column_ids. Initialized lazily when first batch is processed.
-    column_index_cache: Option<Vec<Option<usize>>>,
 }
 
 impl InvertedIndexer {
@@ -130,7 +129,6 @@ impl InvertedIndexer {
             memory_usage,
             indexed_column_ids,
             metadata: metadata.clone(),
-            column_index_cache: None,
         }
     }
 
@@ -170,29 +168,29 @@ impl InvertedIndexer {
     }
 
     async fn do_update_flat(&mut self, batch: &RecordBatch) -> Result<()> {
-        // Initialize column index cache if not already done
-        if self.column_index_cache.is_none() {
-            self.initialize_column_index_cache(batch);
-        }
-
         let mut guard = self.stats.record_update();
 
-        let n = batch.num_rows();
-        guard.inc_row_count(n);
+        guard.inc_row_count(batch.num_rows());
 
-        let column_indices = self.column_index_cache.as_ref().unwrap();
+        let is_sparse = self.metadata.primary_key_encoding == PrimaryKeyEncoding::Sparse;
+        let mut decoded_pks: Option<Vec<(CompositeValues, usize)>> = None;
 
-        for ((col_id, target_key), &column_index) in
-            self.indexed_column_ids.iter().zip(column_indices.iter())
-        {
-            if let Some(index) = column_index {
-                let column_array = batch.column(index);
+        for (col_id, target_key) in &self.indexed_column_ids {
+            let Some(column_meta) = self.metadata.column_by_id(*col_id) else {
+                debug!(
+                    "Column {} not found in the metadata during building inverted index",
+                    col_id
+                );
+                continue;
+            };
+            let column_name = &column_meta.column_schema.name;
+            if let Some(column_array) = batch.column_by_name(column_name) {
                 // Convert Arrow array to VectorRef using Helper
                 let vector = Helper::try_into_vector(column_array.clone())
                     .context(crate::error::ConvertVectorSnafu)?;
                 let sort_field = SortField::new(vector.data_type());
 
-                for row in 0..n {
+                for row in 0..batch.num_rows() {
                     self.value_buf.clear();
                     let value_ref = vector.get_ref(row);
 
@@ -214,6 +212,47 @@ impl InvertedIndexer {
                             .context(PushIndexValueSnafu)?;
                     }
                 }
+            } else if is_sparse && column_meta.semantic_type == SemanticType::Tag {
+                // Column not found in batch, tries to decode from primary keys for sparse encoding.
+                if decoded_pks.is_none() {
+                    decoded_pks = Some(decode_primary_keys_with_counts(batch, &self.codec)?);
+                }
+
+                let pk_values_with_counts = decoded_pks.as_ref().unwrap();
+                let Some(col_info) = self.codec.pk_col_info(*col_id) else {
+                    debug!(
+                        "Column {} not found in primary key during building bloom filter index",
+                        column_name
+                    );
+                    continue;
+                };
+                let pk_index = col_info.idx;
+                let field = &col_info.field;
+                for (decoded, count) in pk_values_with_counts {
+                    let value = match decoded {
+                        CompositeValues::Dense(dense) => dense.get(pk_index).map(|v| &v.1),
+                        CompositeValues::Sparse(sparse) => sparse.get(col_id),
+                    };
+
+                    let elem = value
+                        .filter(|v| !v.is_null())
+                        .map(|v| {
+                            self.value_buf.clear();
+                            IndexValueCodec::encode_nonnull_value(
+                                v.as_value_ref(),
+                                field,
+                                &mut self.value_buf,
+                            )
+                            .context(EncodeSnafu)?;
+                            Ok(self.value_buf.as_slice())
+                        })
+                        .transpose()?;
+
+                    self.index_creator
+                        .push_with_name_n(target_key, elem, *count)
+                        .await
+                        .context(PushIndexValueSnafu)?;
+                }
             } else {
                 debug!(
                     "Column {} not found in the batch during building inverted index",
@@ -225,26 +264,6 @@ impl InvertedIndexer {
         Ok(())
     }
 
-    /// Initializes the column index cache by mapping indexed column ids to their positions in the RecordBatch.
-    fn initialize_column_index_cache(&mut self, batch: &RecordBatch) {
-        let mut column_indices = Vec::with_capacity(self.indexed_column_ids.len());
-
-        for (col_id, _) in &self.indexed_column_ids {
-            let column_index = if let Some(column_meta) = self.metadata.column_by_id(*col_id) {
-                let column_name = &column_meta.column_schema.name;
-                batch
-                    .schema()
-                    .column_with_name(column_name)
-                    .map(|(index, _)| index)
-            } else {
-                None
-            };
-            column_indices.push(column_index);
-        }
-
-        self.column_index_cache = Some(column_indices);
-    }
-
     /// Finishes index creation and cleans up garbage.
     /// Returns the number of rows and bytes written.
     pub async fn finish(
diff --git a/src/mito2/src/sst/parquet/file_range.rs b/src/mito2/src/sst/parquet/file_range.rs
index d216f1e132..268391135b 100644
--- a/src/mito2/src/sst/parquet/file_range.rs
+++ b/src/mito2/src/sst/parquet/file_range.rs
@@ -15,18 +15,20 @@
 //! Structs and functions for reading ranges from a parquet file. A file range
 //! is usually a row group in a parquet file.
 
+use std::collections::HashMap;
 use std::ops::BitAnd;
 use std::sync::Arc;
 
 use api::v1::{OpType, SemanticType};
 use common_telemetry::error;
-use datatypes::arrow::array::BooleanArray;
+use datatypes::arrow::array::{ArrayRef, BooleanArray};
 use datatypes::arrow::buffer::BooleanBuffer;
 use datatypes::arrow::record_batch::RecordBatch;
 use mito_codec::row_converter::{CompositeValues, PrimaryKeyCodec};
 use parquet::arrow::arrow_reader::RowSelection;
 use snafu::{OptionExt, ResultExt};
-use store_api::storage::TimeSeriesRowSelector;
+use store_api::codec::PrimaryKeyEncoding;
+use store_api::storage::{ColumnId, TimeSeriesRowSelector};
 
 use crate::error::{
     ComputeArrowSnafu, DataTypeMismatchSnafu, DecodeSnafu, DecodeStatsSnafu, RecordBatchSnafu,
@@ -37,11 +39,11 @@ use crate::read::compat::CompatBatch;
 use crate::read::last_row::RowGroupLastRowCachedReader;
 use crate::read::prune::{FlatPruneReader, PruneReader};
 use crate::sst::file::FileHandle;
+use crate::sst::parquet::flat_format::{DecodedPrimaryKeys, decode_primary_keys};
 use crate::sst::parquet::format::ReadFormat;
 use crate::sst::parquet::reader::{
     FlatRowGroupReader, MaybeFilter, RowGroupReader, RowGroupReaderBuilder, SimpleFilterContext,
 };
-
 /// A range of a parquet SST. Now it is a row group.
 /// We can read different file ranges in parallel.
 #[derive(Clone)]
@@ -357,7 +359,34 @@ impl RangeBase {
     }
 
     /// Filters the input RecordBatch by the pushed down predicate and returns RecordBatch.
+    ///
+    /// It assumes all necessary tags are already decoded from the primary key.
     pub(crate) fn precise_filter_flat(&self, input: RecordBatch) -> Result<Option<RecordBatch>> {
+        let mask = self.compute_filter_mask_flat(&input)?;
+
+        // If mask is None, the entire batch is filtered out
+        let Some(mask) = mask else {
+            return Ok(None);
+        };
+
+        let filtered_batch =
+            datatypes::arrow::compute::filter_record_batch(&input, &BooleanArray::from(mask))
+                .context(ComputeArrowSnafu)?;
+
+        if filtered_batch.num_rows() > 0 {
+            Ok(Some(filtered_batch))
+        } else {
+            Ok(None)
+        }
+    }
+
+    /// Computes the filter mask for the input RecordBatch based on pushed down predicates.
+    ///
+    /// Returns `None` if the entire batch is filtered out, otherwise returns the boolean mask.
+    pub(crate) fn compute_filter_mask_flat(
+        &self,
+        input: &RecordBatch,
+    ) -> Result<Option<BooleanBuffer>> {
         let mut mask = BooleanBuffer::new_set(input.num_rows());
 
         let flat_format = self
@@ -367,6 +396,11 @@ impl RangeBase {
                 reason: "Expected flat format for precise_filter_flat",
             })?;
 
+        // Decodes primary keys once if we have any tag filters not in projection
+        let mut decoded_pks: Option<DecodedPrimaryKeys> = None;
+        // Cache decoded tag arrays by column id to avoid redundant decoding
+        let mut decoded_tag_cache: HashMap<ColumnId, ArrayRef> = HashMap::new();
+
         // Run filter one by one and combine them result
         for filter_ctx in &self.filters {
             let filter = match filter_ctx.filter() {
@@ -383,20 +417,53 @@ impl RangeBase {
                 let column = &input.columns()[idx];
                 let result = filter.evaluate_array(column).context(RecordBatchSnafu)?;
                 mask = mask.bitand(&result);
-            } else {
-                // Column not found in projection, continue
-                continue;
+            } else if filter_ctx.semantic_type() == SemanticType::Tag {
+                // Column not found in projection, it may be a tag column.
+                // Decodes primary keys if not already decoded.
+                if decoded_pks.is_none() {
+                    decoded_pks = Some(decode_primary_keys(self.codec.as_ref(), input)?);
+                }
+
+                let metadata = flat_format.metadata();
+                let column_id = filter_ctx.column_id();
+
+                // Check cache first
+                let tag_column = if let Some(cached_column) = decoded_tag_cache.get(&column_id) {
+                    cached_column.clone()
+                } else {
+                    // For dense encoding, we need pk_index. For sparse encoding, pk_index is None.
+                    let pk_index = if self.codec.encoding() == PrimaryKeyEncoding::Sparse {
+                        None
+                    } else {
+                        metadata.primary_key_index(column_id)
+                    };
+                    let column_index = metadata.column_index_by_id(column_id);
+
+                    if let (Some(column_index), Some(decoded)) =
+                        (column_index, decoded_pks.as_ref())
+                    {
+                        let column_metadata = &metadata.column_metadatas[column_index];
+                        let tag_column = decoded.get_tag_column(
+                            column_id,
+                            pk_index,
+                            &column_metadata.column_schema.data_type,
+                        )?;
+                        // Cache the decoded tag column
+                        decoded_tag_cache.insert(column_id, tag_column.clone());
+                        tag_column
+                    } else {
+                        continue;
+                    }
+                };
+
+                let result = filter
+                    .evaluate_array(&tag_column)
+                    .context(RecordBatchSnafu)?;
+                mask = mask.bitand(&result);
             }
+            // Non-tag column not found in projection.
         }
 
-        let filtered_batch =
-            datatypes::arrow::compute::filter_record_batch(&input, &BooleanArray::from(mask))
-                .context(ComputeArrowSnafu)?;
-
-        if filtered_batch.num_rows() > 0 {
-            Ok(Some(filtered_batch))
-        } else {
-            Ok(None)
-        }
+        Ok(Some(mask))
     }
 }
diff --git a/src/mito2/src/sst/parquet/flat_format.rs b/src/mito2/src/sst/parquet/flat_format.rs
index bcf1d8694c..cd13dfea01 100644
--- a/src/mito2/src/sst/parquet/flat_format.rs
+++ b/src/mito2/src/sst/parquet/flat_format.rs
@@ -127,7 +127,9 @@ pub(crate) fn op_type_column_index(num_columns: usize) -> usize {
     num_columns - 1
 }
 
-// TODO(yingwen): Add an option to skip reading internal columns.
+// TODO(yingwen): Add an option to skip reading internal columns if the region is
+// append only and doesn't use sparse encoding (We need to check the table id under
+// sparse encoding).
 /// Helper for reading the flat SST format with projection.
 ///
 /// It only supports flat format that stores primary keys additionally.
@@ -528,6 +530,125 @@ pub(crate) fn sst_column_id_indices(metadata: &RegionMetadata) -> HashMap<Column
     id_to_index
 }
 
+/// Decodes primary keys from a batch and returns decoded primary key information.
+///
+/// The batch must contain a primary key column at the expected index.
+pub(crate) fn decode_primary_keys(
+    codec: &dyn PrimaryKeyCodec,
+    batch: &RecordBatch,
+) -> Result<DecodedPrimaryKeys> {
+    let primary_key_index = primary_key_column_index(batch.num_columns());
+    let pk_dict_array = batch
+        .column(primary_key_index)
+        .as_any()
+        .downcast_ref::<PrimaryKeyArray>()
+        .with_context(|| InvalidRecordBatchSnafu {
+            reason: "Primary key column is not a dictionary array".to_string(),
+        })?;
+    let pk_values_array = pk_dict_array
+        .values()
+        .as_any()
+        .downcast_ref::<BinaryArray>()
+        .with_context(|| InvalidRecordBatchSnafu {
+            reason: "Primary key values are not binary array".to_string(),
+        })?;
+
+    let keys = pk_dict_array.keys();
+
+    // Decodes primary key values by iterating through keys, reusing decoded values for duplicate keys.
+    // Maps original key index -> new decoded value index
+    let mut key_to_decoded_index = Vec::with_capacity(keys.len());
+    let mut decoded_pk_values = Vec::new();
+    let mut prev_key: Option<u32> = None;
+
+    // The parquet reader may read the whole dictionary page into the dictionary values, so
+    // we may decode many primary keys not in this batch if we decode the values array directly.
+    for i in 0..keys.len() {
+        let current_key = keys.value(i);
+
+        // Check if current key is the same as previous key
+        if let Some(prev) = prev_key
+            && prev == current_key
+        {
+            // Reuse the last decoded index
+            key_to_decoded_index.push((decoded_pk_values.len() - 1) as u32);
+            continue;
+        }
+
+        // New key, decodes the value
+        let pk_bytes = pk_values_array.value(current_key as usize);
+        let decoded_value = codec.decode(pk_bytes).context(DecodeSnafu)?;
+
+        decoded_pk_values.push(decoded_value);
+        key_to_decoded_index.push((decoded_pk_values.len() - 1) as u32);
+        prev_key = Some(current_key);
+    }
+
+    // Create the keys array from key_to_decoded_index
+    let keys_array = UInt32Array::from(key_to_decoded_index);
+
+    Ok(DecodedPrimaryKeys {
+        decoded_pk_values,
+        keys_array,
+    })
+}
+
+/// Holds decoded primary key values and their indices.
+pub(crate) struct DecodedPrimaryKeys {
+    /// Decoded primary key values for unique keys in the dictionary.
+    decoded_pk_values: Vec<CompositeValues>,
+    /// Prebuilt keys array for creating dictionary arrays.
+    keys_array: UInt32Array,
+}
+
+impl DecodedPrimaryKeys {
+    /// Gets a tag column array by column id and data type.
+    ///
+    /// For sparse encoding, uses column_id to lookup values.
+    /// For dense encoding, uses pk_index to get values.
+    pub(crate) fn get_tag_column(
+        &self,
+        column_id: ColumnId,
+        pk_index: Option<usize>,
+        column_type: &ConcreteDataType,
+    ) -> Result<ArrayRef> {
+        // Gets values from the primary key.
+        let mut builder = column_type.create_mutable_vector(self.decoded_pk_values.len());
+        for decoded in &self.decoded_pk_values {
+            match decoded {
+                CompositeValues::Dense(dense) => {
+                    let pk_idx = pk_index.expect("pk_index required for dense encoding");
+                    if pk_idx < dense.len() {
+                        builder.push_value_ref(&dense[pk_idx].1.as_value_ref());
+                    } else {
+                        builder.push_null();
+                    }
+                }
+                CompositeValues::Sparse(sparse) => {
+                    let value = sparse.get_or_null(column_id);
+                    builder.push_value_ref(&value.as_value_ref());
+                }
+            };
+        }
+
+        let values_vector = builder.to_vector();
+        let values_array = values_vector.to_arrow_array();
+
+        // Only creates dictionary array for string types, otherwise take values by keys
+        if column_type.is_string() {
+            // Creates dictionary array using the same keys for string types
+            // Note that the dictionary values may have nulls.
+            let dict_array = DictionaryArray::new(self.keys_array.clone(), values_array);
+            Ok(Arc::new(dict_array))
+        } else {
+            // For non-string types, takes values by keys indices to create a regular array
+            let taken_array =
+                take(&values_array, &self.keys_array, None).context(ComputeArrowSnafu)?;
+            Ok(taken_array)
+        }
+    }
+}
+
 /// Converts a batch that doesn't have decoded primary key columns into a batch that has decoded
 /// primary key columns in flat format.
 pub(crate) struct FlatConvertFormat {
@@ -577,53 +698,22 @@ impl FlatConvertFormat {
 
     /// Converts a batch to have decoded primary key columns in flat format.
     ///
-    /// The primary key array in the batch is a dictionary array. We decode each value which is a
-    /// primary key and reuse the keys array to build a dictionary array for each tag column.
-    /// The decoded columns are inserted in front of other columns.
+    /// The primary key array in the batch is a dictionary array.
     pub(crate) fn convert(&self, batch: RecordBatch) -> Result<RecordBatch> {
         if self.projected_primary_keys.is_empty() {
             return Ok(batch);
         }
 
-        let primary_key_index = primary_key_column_index(batch.num_columns());
-        let pk_dict_array = batch
-            .column(primary_key_index)
-            .as_any()
-            .downcast_ref::<PrimaryKeyArray>()
-            .with_context(|| InvalidRecordBatchSnafu {
-                reason: "Primary key column is not a dictionary array".to_string(),
-            })?;
-
-        let pk_values_array = pk_dict_array
-            .values()
-            .as_any()
-            .downcast_ref::<BinaryArray>()
-            .with_context(|| InvalidRecordBatchSnafu {
-                reason: "Primary key values are not binary array".to_string(),
-            })?;
-
-        // Decodes all primary key values
-        let mut decoded_pk_values = Vec::with_capacity(pk_values_array.len());
-        for i in 0..pk_values_array.len() {
-            if pk_values_array.is_null(i) {
-                decoded_pk_values.push(None);
-            } else {
-                let pk_bytes = pk_values_array.value(i);
-                let decoded = self.codec.decode(pk_bytes).context(DecodeSnafu)?;
-                decoded_pk_values.push(Some(decoded));
-            }
-        }
+        let decoded_pks = decode_primary_keys(self.codec.as_ref(), &batch)?;
 
         // Builds decoded tag column arrays.
         let mut decoded_columns = Vec::new();
         for (column_id, pk_index, column_index) in &self.projected_primary_keys {
             let column_metadata = &self.metadata.column_metadatas[*column_index];
-            let tag_column = self.build_primary_key_column(
+            let tag_column = decoded_pks.get_tag_column(
                 *column_id,
-                *pk_index,
+                Some(*pk_index),
                 &column_metadata.column_schema.data_type,
-                pk_dict_array.keys(),
-                &decoded_pk_values,
             )?;
             decoded_columns.push(tag_column);
         }
@@ -648,57 +738,6 @@ impl FlatConvertFormat {
         let new_schema = Arc::new(Schema::new(new_fields));
         RecordBatch::try_new(new_schema, new_columns).context(NewRecordBatchSnafu)
     }
-
-    /// Builds an array for a specific tag column.
-    ///
-    /// It may build a dictionary array if the type is string. Note that the dictionary
-    /// array may have null values, although keys are not null.
-    fn build_primary_key_column(
-        &self,
-        column_id: ColumnId,
-        pk_index: usize,
-        column_type: &ConcreteDataType,
-        keys: &UInt32Array,
-        decoded_pk_values: &[Option<CompositeValues>],
-    ) -> Result<ArrayRef> {
-        // Gets values from the primary key.
-        let mut builder = column_type.create_mutable_vector(decoded_pk_values.len());
-        for decoded_opt in decoded_pk_values {
-            match decoded_opt {
-                Some(decoded) => {
-                    match decoded {
-                        CompositeValues::Dense(dense) => {
-                            if pk_index < dense.len() {
-                                builder.push_value_ref(&dense[pk_index].1.as_value_ref());
-                            } else {
-                                builder.push_null();
-                            }
-                        }
-                        CompositeValues::Sparse(sparse) => {
-                            let value = sparse.get_or_null(column_id);
-                            builder.push_value_ref(&value.as_value_ref());
-                        }
-                    };
-                }
-                None => builder.push_null(),
-            }
-        }
-
-        let values_vector = builder.to_vector();
-        let values_array = values_vector.to_arrow_array();
-
-        // Only creates dictionary array for string types, otherwise take values by keys
-        if column_type.is_string() {
-            // Creates dictionary array using the same keys for string types
-            // Note that the dictionary values may have nulls.
-            let dict_array = DictionaryArray::new(keys.clone(), values_array);
-            Ok(Arc::new(dict_array))
-        } else {
-            // For non-string types, takes values by keys indices to create a regular array
-            let taken_array = take(&values_array, keys, None).context(ComputeArrowSnafu)?;
-            Ok(taken_array)
-        }
-    }
 }
 
 #[cfg(test)]
diff --git a/src/mito2/src/sst/parquet/reader.rs b/src/mito2/src/sst/parquet/reader.rs
index d02786455e..60cf654380 100644
--- a/src/mito2/src/sst/parquet/reader.rs
+++ b/src/mito2/src/sst/parquet/reader.rs
@@ -1397,6 +1397,7 @@ impl FlatRowGroupReader {
                 let record_batch = batch_result.context(ArrowReaderSnafu {
                     path: self.context.file_path(),
                 })?;
+
                 // Safety: Only flat format use FlatRowGroupReader.
                 let flat_format = self.context.read_format().as_flat().unwrap();
                 let record_batch =

From 2f637a262e1a576e3283d93b60665467d1cdc57e Mon Sep 17 00:00:00 2001
From: LFC <990479+MichaelScofield@users.noreply.github.com>
Date: Thu, 23 Oct 2025 15:18:36 +0800
Subject: [PATCH 06/14] chore: update datafusion to 50 (#7076)

* chore: update datafusion to 50

Signed-off-by: luofucong <luofc@foxmail.com>

* fix ci

Signed-off-by: luofucong <luofc@foxmail.com>

* fix: update datafusion_pg_catalog import

* chore: fix toml format

* chore: fix toml format again

* fix nextest

Signed-off-by: luofucong <luofc@foxmail.com>

* fix sqlness

Signed-off-by: luofucong <luofc@foxmail.com>

* chore: switch datafusion-orc to upstream tag

* fix sqlness

Signed-off-by: luofucong <luofc@foxmail.com>

* resolve PR comments

Signed-off-by: luofucong <luofc@foxmail.com>

---------

Signed-off-by: luofucong <luofc@foxmail.com>
Co-authored-by: Ning Sun <sunning@greptime.com>
---
 Cargo.lock                                    | 805 +++++++++---------
 Cargo.toml                                    |  66 +-
 src/catalog/src/system_schema/pg_catalog.rs   |   4 +-
 src/common/datasource/src/file_format.rs      |   4 +-
 src/common/function/src/aggrs/aggr_wrapper.rs |  17 +-
 .../function/src/aggrs/aggr_wrapper/tests.rs  |   3 +-
 src/common/function/src/aggrs/count_hash.rs   |   2 +-
 .../function/src/scalars/geo/geohash.rs       |   4 +-
 src/common/function/src/scalars/geo/h3.rs     |   4 +-
 src/common/function/src/scalars/udf.rs        |  15 +
 src/common/macro/src/admin_fn.rs              |  14 +
 src/datatypes/src/schema.rs                   |   3 +-
 src/datatypes/src/value.rs                    |   4 +-
 src/datatypes/src/vectors/helper.rs           |   4 +-
 src/flow/src/df_optimizer.rs                  |   2 +-
 src/operator/src/expr_helper.rs               |  17 +-
 src/promql/src/extension_plan/empty_metric.rs |   4 +-
 .../src/extension_plan/histogram_fold.rs      |   9 +-
 .../src/extension_plan/range_manipulate.rs    |   6 +-
 .../src/extension_plan/union_distinct_on.rs   |   2 +-
 src/query/Cargo.toml                          |   1 +
 src/query/src/dist_plan/commutativity.rs      |  25 +
 src/query/src/dist_plan/planner.rs            |   4 +-
 src/query/src/optimizer/windowed_sort.rs      |   6 +-
 src/query/src/part_sort.rs                    |  13 +-
 src/query/src/planner.rs                      |  13 +-
 src/query/src/promql/planner.rs               |   1 +
 src/query/src/range_select/plan_rewrite.rs    |   3 +
 src/servers/src/http/handler.rs               |   2 +-
 src/sql/src/ast.rs                            |   5 +-
 src/sql/src/parser.rs                         |  23 +-
 src/sql/src/parsers/alter_parser.rs           |   6 +-
 src/sql/src/parsers/copy_parser.rs            |   2 +-
 src/sql/src/parsers/create_parser.rs          |   5 +-
 src/sql/src/parsers/cursor_parser.rs          |   6 +-
 src/sql/src/parsers/describe_parser.rs        |   2 +-
 src/sql/src/parsers/drop_parser.rs            |  10 +-
 src/sql/src/parsers/explain_parser.rs         |   6 +-
 src/sql/src/parsers/set_var_parser.rs         |  34 +-
 src/sql/src/parsers/show_parser.rs            |  12 +-
 src/sql/src/parsers/show_parser/trigger.rs    |   2 +-
 src/sql/src/parsers/truncate_parser.rs        |   2 +-
 src/sql/src/parsers/utils.rs                  |   2 +-
 src/sql/src/statements/statement.rs           |   2 +-
 src/sql/src/util.rs                           |   2 +-
 tests-integration/tests/http.rs               |   8 +-
 tests-integration/tests/sql.rs                |   4 +-
 .../optimizer/filter_push_down.result         |  14 +-
 .../standalone/common/aggregate/corr.result   |   3 +-
 .../standalone/common/aggregate/corr.sql      |   1 -
 .../standalone/common/aggregate/stddev.result | 112 +--
 .../common/error/incorrect_sql.result         |   2 +-
 .../common/function/arithmetic.result         |  30 +-
 .../standalone/common/order/limit.result      |   2 +-
 tests/cases/standalone/common/range/by.result |  24 +-
 .../standalone/common/range/calculate.result  |  32 +-
 .../common/system/pg_catalog.result           |   8 +-
 .../common/tql-explain-analyze/explain.result |   2 +
 .../standalone/common/tql/tql-cte.result      |   8 +-
 .../common/types/string/bigstring.result      |  16 +-
 .../types/string/scan_big_varchar.result      | 150 ++--
 .../common/types/string/unicode.result        |  12 +-
 .../standalone/common/view/create.result      |   2 +-
 63 files changed, 830 insertions(+), 778 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index 14f8089da7..f721f58369 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -124,12 +124,6 @@ version = "0.2.21"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "683d7910e743518b0e34f1186f92494becacb047c7b6bf616c96772180fef923"
 
-[[package]]
-name = "android-tzdata"
-version = "0.1.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e999941b234f3131b00bc13c22d06e8c5ff726d1b6318ac7eb276997bbb4fef0"
-
 [[package]]
 name = "android_system_properties"
 version = "0.1.5"
@@ -262,7 +256,7 @@ dependencies = [
  "proc-macro-error2",
  "proc-macro2",
  "quote",
- "syn 2.0.104",
+ "syn 2.0.106",
 ]
 
 [[package]]
@@ -300,9 +294,9 @@ checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50"
 
 [[package]]
 name = "arrow"
-version = "56.1.0"
+version = "56.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c26b57282a08ae92f727497805122fec964c6245cfa0e13f0e75452eaf3bc41f"
+checksum = "6e833808ff2d94ed40d9379848a950d995043c7fb3e81a30b383f4c6033821cc"
 dependencies = [
  "arrow-arith",
  "arrow-array",
@@ -321,9 +315,9 @@ dependencies = [
 
 [[package]]
 name = "arrow-arith"
-version = "56.1.0"
+version = "56.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "cebf38ca279120ff522f4954b81a39527425b6e9f615e6b72842f4de1ffe02b8"
+checksum = "ad08897b81588f60ba983e3ca39bda2b179bdd84dced378e7df81a5313802ef8"
 dependencies = [
  "arrow-array",
  "arrow-buffer",
@@ -335,9 +329,9 @@ dependencies = [
 
 [[package]]
 name = "arrow-array"
-version = "56.1.0"
+version = "56.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "744109142cdf8e7b02795e240e20756c2a782ac9180d4992802954a8f871c0de"
+checksum = "8548ca7c070d8db9ce7aa43f37393e4bfcf3f2d3681df278490772fd1673d08d"
 dependencies = [
  "ahash 0.8.12",
  "arrow-buffer",
@@ -346,15 +340,15 @@ dependencies = [
  "chrono",
  "chrono-tz",
  "half",
- "hashbrown 0.15.4",
+ "hashbrown 0.16.0",
  "num",
 ]
 
 [[package]]
 name = "arrow-buffer"
-version = "56.1.0"
+version = "56.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "601bb103c4c374bcd1f62c66bcea67b42a2ee91a690486c37d4c180236f11ccc"
+checksum = "e003216336f70446457e280807a73899dd822feaf02087d31febca1363e2fccc"
 dependencies = [
  "bytes",
  "half",
@@ -363,9 +357,9 @@ dependencies = [
 
 [[package]]
 name = "arrow-cast"
-version = "56.1.0"
+version = "56.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "eed61d9d73eda8df9e3014843def37af3050b5080a9acbe108f045a316d5a0be"
+checksum = "919418a0681298d3a77d1a315f625916cb5678ad0d74b9c60108eb15fd083023"
 dependencies = [
  "arrow-array",
  "arrow-buffer",
@@ -384,9 +378,9 @@ dependencies = [
 
 [[package]]
 name = "arrow-csv"
-version = "56.1.0"
+version = "56.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "fa95b96ce0c06b4d33ac958370db8c0d31e88e54f9d6e08b0353d18374d9f991"
+checksum = "bfa9bf02705b5cf762b6f764c65f04ae9082c7cfc4e96e0c33548ee3f67012eb"
 dependencies = [
  "arrow-array",
  "arrow-cast",
@@ -399,9 +393,9 @@ dependencies = [
 
 [[package]]
 name = "arrow-data"
-version = "56.1.0"
+version = "56.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "43407f2c6ba2367f64d85d4603d6fb9c4b92ed79d2ffd21021b37efa96523e12"
+checksum = "a5c64fff1d142f833d78897a772f2e5b55b36cb3e6320376f0961ab0db7bd6d0"
 dependencies = [
  "arrow-buffer",
  "arrow-schema",
@@ -411,9 +405,9 @@ dependencies = [
 
 [[package]]
 name = "arrow-flight"
-version = "56.1.0"
+version = "56.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d7c66c5e4a7aedc2bfebffeabc2116d76adb22e08d230b968b995da97f8b11ca"
+checksum = "8c8b0ba0784d56bc6266b79f5de7a24b47024e7b3a0045d2ad4df3d9b686099f"
 dependencies = [
  "arrow-array",
  "arrow-buffer",
@@ -430,9 +424,9 @@ dependencies = [
 
 [[package]]
 name = "arrow-ipc"
-version = "56.1.0"
+version = "56.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e4b0487c4d2ad121cbc42c4db204f1509f8618e589bc77e635e9c40b502e3b90"
+checksum = "1d3594dcddccc7f20fd069bc8e9828ce37220372680ff638c5e00dea427d88f5"
 dependencies = [
  "arrow-array",
  "arrow-buffer",
@@ -446,9 +440,9 @@ dependencies = [
 
 [[package]]
 name = "arrow-json"
-version = "56.1.0"
+version = "56.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "26d747573390905905a2dc4c5a61a96163fe2750457f90a04ee2a88680758c79"
+checksum = "88cf36502b64a127dc659e3b305f1d993a544eab0d48cce704424e62074dc04b"
 dependencies = [
  "arrow-array",
  "arrow-buffer",
@@ -457,7 +451,7 @@ dependencies = [
  "arrow-schema",
  "chrono",
  "half",
- "indexmap 2.10.0",
+ "indexmap 2.11.4",
  "lexical-core",
  "memchr",
  "num",
@@ -468,9 +462,9 @@ dependencies = [
 
 [[package]]
 name = "arrow-ord"
-version = "56.1.0"
+version = "56.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c142a147dceb59d057bad82400f1693847c80dca870d008bf7b91caf902810ae"
+checksum = "3c8f82583eb4f8d84d4ee55fd1cb306720cddead7596edce95b50ee418edf66f"
 dependencies = [
  "arrow-array",
  "arrow-buffer",
@@ -481,9 +475,9 @@ dependencies = [
 
 [[package]]
 name = "arrow-row"
-version = "56.1.0"
+version = "56.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "dac6620667fccdab4204689ca173bd84a15de6bb6b756c3a8764d4d7d0c2fc04"
+checksum = "9d07ba24522229d9085031df6b94605e0f4b26e099fb7cdeec37abd941a73753"
 dependencies = [
  "arrow-array",
  "arrow-buffer",
@@ -494,9 +488,9 @@ dependencies = [
 
 [[package]]
 name = "arrow-schema"
-version = "56.1.0"
+version = "56.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "dfa93af9ff2bb80de539e6eb2c1c8764abd0f4b73ffb0d7c82bf1f9868785e66"
+checksum = "b3aa9e59c611ebc291c28582077ef25c97f1975383f1479b12f3b9ffee2ffabe"
 dependencies = [
  "serde",
  "serde_json",
@@ -504,9 +498,9 @@ dependencies = [
 
 [[package]]
 name = "arrow-select"
-version = "56.1.0"
+version = "56.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "be8b2e0052cd20d36d64f32640b68a5ab54d805d24a473baee5d52017c85536c"
+checksum = "8c41dbbd1e97bfcaee4fcb30e29105fb2c75e4d82ae4de70b792a5d3f66b2e7a"
 dependencies = [
  "ahash 0.8.12",
  "arrow-array",
@@ -518,9 +512,9 @@ dependencies = [
 
 [[package]]
 name = "arrow-string"
-version = "56.1.0"
+version = "56.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c2155e26e17f053c8975c546fc70cf19c00542f9abf43c23a88a46ef7204204f"
+checksum = "53f5183c150fbc619eede22b861ea7c0eebed8eaac0333eaa7f6da5205fd504d"
 dependencies = [
  "arrow-array",
  "arrow-buffer",
@@ -530,7 +524,7 @@ dependencies = [
  "memchr",
  "num",
  "regex",
- "regex-syntax 0.8.5",
+ "regex-syntax 0.8.7",
 ]
 
 [[package]]
@@ -644,7 +638,7 @@ checksum = "3b43422f69d8ff38f95f1b2bb76517c91589a924d1559a0e935d7c8ce0274c11"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.104",
+ "syn 2.0.106",
 ]
 
 [[package]]
@@ -666,7 +660,7 @@ checksum = "c7c24de15d275a1ecfd47a380fb4d5ec9bfe0933f309ed5e705b775596a3574d"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.104",
+ "syn 2.0.106",
 ]
 
 [[package]]
@@ -677,13 +671,13 @@ checksum = "8b75356056920673b02621b35afd0f7dda9306d03c79a30f5c56c44cf256e3de"
 
 [[package]]
 name = "async-trait"
-version = "0.1.88"
+version = "0.1.89"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e539d3fca749fcee5236ab05e93a52867dd549cc157c8cb7f99595f3cedffdb5"
+checksum = "9035ad2d096bed7955a320ee7e2230574d28fd3c3a0f186cbea1ff3c7eed5dbb"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.104",
+ "syn 2.0.106",
 ]
 
 [[package]]
@@ -763,7 +757,7 @@ checksum = "ffdcb70bdbc4d478427380519163274ac86e52916e10f0a8889adf0f96d3fee7"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.104",
+ "syn 2.0.106",
 ]
 
 [[package]]
@@ -918,7 +912,7 @@ checksum = "604fde5e028fea851ce1d8570bbdc034bec850d157f7569d10f347d06808c05c"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.104",
+ "syn 2.0.106",
 ]
 
 [[package]]
@@ -1066,7 +1060,7 @@ dependencies = [
  "regex",
  "rustc-hash 2.1.1",
  "shlex",
- "syn 2.0.104",
+ "syn 2.0.106",
 ]
 
 [[package]]
@@ -1195,7 +1189,7 @@ dependencies = [
  "proc-macro2",
  "quote",
  "rustversion",
- "syn 2.0.104",
+ "syn 2.0.106",
 ]
 
 [[package]]
@@ -1218,7 +1212,7 @@ dependencies = [
  "proc-macro-crate 3.3.0",
  "proc-macro2",
  "quote",
- "syn 2.0.104",
+ "syn 2.0.106",
 ]
 
 [[package]]
@@ -1560,7 +1554,7 @@ version = "0.13.10"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "7fe45e18904af7af10e4312df7c97251e98af98c70f42f1f2587aecfcbee56bf"
 dependencies = [
- "indexmap 2.10.0",
+ "indexmap 2.11.4",
  "lazy_static",
  "num-traits",
  "regex",
@@ -1614,17 +1608,16 @@ dependencies = [
 
 [[package]]
 name = "chrono"
-version = "0.4.41"
+version = "0.4.42"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c469d952047f47f91b68d1cba3f10d63c11d73e4636f24f08daf0278abf01c4d"
+checksum = "145052bdd345b87320e369255277e3fb5152762ad123a901ef5c262dd38fe8d2"
 dependencies = [
- "android-tzdata",
  "iana-time-zone",
  "js-sys",
  "num-traits",
  "serde",
  "wasm-bindgen",
- "windows-link",
+ "windows-link 0.2.1",
 ]
 
 [[package]]
@@ -1746,7 +1739,7 @@ dependencies = [
  "heck 0.5.0",
  "proc-macro2",
  "quote",
- "syn 2.0.104",
+ "syn 2.0.106",
 ]
 
 [[package]]
@@ -1984,11 +1977,12 @@ checksum = "b05b61dc5112cbb17e4b6cd61790d9845d13888356391624cbe7e41efeac1e75"
 
 [[package]]
 name = "comfy-table"
-version = "7.1.4"
+version = "7.1.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4a65ebfec4fb190b6f90e944a817d60499ee0744e582530e2c9900a22e591d9a"
+checksum = "e0d05af1e006a2407bedef5af410552494ce5be9090444dbbcb57258c1af3d56"
 dependencies = [
- "unicode-segmentation",
+ "strum 0.26.3",
+ "strum_macros 0.26.4",
  "unicode-width 0.2.1",
 ]
 
@@ -2072,7 +2066,7 @@ dependencies = [
  "lazy_static",
  "object-store",
  "object_store_opendal",
- "orc-rust 0.6.3",
+ "orc-rust",
  "parquet",
  "paste",
  "regex",
@@ -2288,7 +2282,7 @@ dependencies = [
  "once_cell",
  "proc-macro2",
  "quote",
- "syn 2.0.104",
+ "syn 2.0.106",
 ]
 
 [[package]]
@@ -2462,7 +2456,7 @@ dependencies = [
  "futures-util",
  "serde",
  "snafu 0.8.6",
- "sqlparser 0.55.0-greptime",
+ "sqlparser",
  "sqlparser_derive 0.1.1",
  "store-api",
  "tokio",
@@ -2544,7 +2538,7 @@ dependencies = [
  "jsonb",
  "serde_json",
  "snafu 0.8.6",
- "sqlparser 0.55.0-greptime",
+ "sqlparser",
 ]
 
 [[package]]
@@ -3177,7 +3171,7 @@ dependencies = [
  "proc-macro2",
  "quote",
  "strsim 0.11.1",
- "syn 2.0.104",
+ "syn 2.0.106",
 ]
 
 [[package]]
@@ -3191,7 +3185,7 @@ dependencies = [
  "proc-macro2",
  "quote",
  "strsim 0.11.1",
- "syn 2.0.104",
+ "syn 2.0.106",
 ]
 
 [[package]]
@@ -3213,7 +3207,7 @@ checksum = "fc34b93ccb385b40dc71c6fceac4b2ad23662c7eeb248cf10d529b7e055b6ead"
 dependencies = [
  "darling_core 0.20.11",
  "quote",
- "syn 2.0.104",
+ "syn 2.0.106",
 ]
 
 [[package]]
@@ -3224,7 +3218,7 @@ checksum = "2b5be8a7a562d315a5b92a630c30cec6bcf663e6673f00fbb69cca66a6f521b9"
 dependencies = [
  "darling_core 0.21.1",
  "quote",
- "syn 2.0.104",
+ "syn 2.0.106",
 ]
 
 [[package]]
@@ -3249,8 +3243,8 @@ dependencies = [
 
 [[package]]
 name = "datafusion"
-version = "49.0.0"
-source = "git+https://github.com/GreptimeTeam/datafusion.git?rev=7d5214512740b4dfb742b6b3d91ed9affcc2c9d0#7d5214512740b4dfb742b6b3d91ed9affcc2c9d0"
+version = "50.1.0"
+source = "git+https://github.com/GreptimeTeam/datafusion.git?rev=fd4b2abcf3c3e43e94951bda452c9fd35243aab0#fd4b2abcf3c3e43e94951bda452c9fd35243aab0"
 dependencies = [
  "arrow",
  "arrow-ipc",
@@ -3277,6 +3271,7 @@ dependencies = [
  "datafusion-functions-window",
  "datafusion-optimizer",
  "datafusion-physical-expr",
+ "datafusion-physical-expr-adapter",
  "datafusion-physical-expr-common",
  "datafusion-physical-optimizer",
  "datafusion-physical-plan",
@@ -3284,7 +3279,6 @@ dependencies = [
  "datafusion-sql",
  "flate2",
  "futures",
- "hex",
  "itertools 0.14.0",
  "log",
  "object_store",
@@ -3292,7 +3286,8 @@ dependencies = [
  "parquet",
  "rand 0.9.1",
  "regex",
- "sqlparser 0.55.0",
+ "rstest",
+ "sqlparser",
  "tempfile",
  "tokio",
  "url",
@@ -3303,8 +3298,8 @@ dependencies = [
 
 [[package]]
 name = "datafusion-catalog"
-version = "49.0.0"
-source = "git+https://github.com/GreptimeTeam/datafusion.git?rev=7d5214512740b4dfb742b6b3d91ed9affcc2c9d0#7d5214512740b4dfb742b6b3d91ed9affcc2c9d0"
+version = "50.1.0"
+source = "git+https://github.com/GreptimeTeam/datafusion.git?rev=fd4b2abcf3c3e43e94951bda452c9fd35243aab0#fd4b2abcf3c3e43e94951bda452c9fd35243aab0"
 dependencies = [
  "arrow",
  "async-trait",
@@ -3317,7 +3312,6 @@ dependencies = [
  "datafusion-physical-expr",
  "datafusion-physical-plan",
  "datafusion-session",
- "datafusion-sql",
  "futures",
  "itertools 0.14.0",
  "log",
@@ -3328,8 +3322,8 @@ dependencies = [
 
 [[package]]
 name = "datafusion-catalog-listing"
-version = "49.0.0"
-source = "git+https://github.com/GreptimeTeam/datafusion.git?rev=7d5214512740b4dfb742b6b3d91ed9affcc2c9d0#7d5214512740b4dfb742b6b3d91ed9affcc2c9d0"
+version = "50.1.0"
+source = "git+https://github.com/GreptimeTeam/datafusion.git?rev=fd4b2abcf3c3e43e94951bda452c9fd35243aab0#fd4b2abcf3c3e43e94951bda452c9fd35243aab0"
 dependencies = [
  "arrow",
  "async-trait",
@@ -3350,33 +3344,31 @@ dependencies = [
 
 [[package]]
 name = "datafusion-common"
-version = "49.0.0"
-source = "git+https://github.com/GreptimeTeam/datafusion.git?rev=7d5214512740b4dfb742b6b3d91ed9affcc2c9d0#7d5214512740b4dfb742b6b3d91ed9affcc2c9d0"
+version = "50.1.0"
+source = "git+https://github.com/GreptimeTeam/datafusion.git?rev=fd4b2abcf3c3e43e94951bda452c9fd35243aab0#fd4b2abcf3c3e43e94951bda452c9fd35243aab0"
 dependencies = [
  "ahash 0.8.12",
  "arrow",
  "arrow-ipc",
- "base64 0.22.1",
  "chrono",
  "half",
  "hashbrown 0.14.5",
- "hex",
- "indexmap 2.10.0",
+ "indexmap 2.11.4",
  "libc",
  "log",
  "object_store",
  "parquet",
  "paste",
  "recursive",
- "sqlparser 0.55.0",
+ "sqlparser",
  "tokio",
  "web-time",
 ]
 
 [[package]]
 name = "datafusion-common-runtime"
-version = "49.0.0"
-source = "git+https://github.com/GreptimeTeam/datafusion.git?rev=7d5214512740b4dfb742b6b3d91ed9affcc2c9d0#7d5214512740b4dfb742b6b3d91ed9affcc2c9d0"
+version = "50.1.0"
+source = "git+https://github.com/GreptimeTeam/datafusion.git?rev=fd4b2abcf3c3e43e94951bda452c9fd35243aab0#fd4b2abcf3c3e43e94951bda452c9fd35243aab0"
 dependencies = [
  "futures",
  "log",
@@ -3385,8 +3377,8 @@ dependencies = [
 
 [[package]]
 name = "datafusion-datasource"
-version = "49.0.0"
-source = "git+https://github.com/GreptimeTeam/datafusion.git?rev=7d5214512740b4dfb742b6b3d91ed9affcc2c9d0#7d5214512740b4dfb742b6b3d91ed9affcc2c9d0"
+version = "50.1.0"
+source = "git+https://github.com/GreptimeTeam/datafusion.git?rev=fd4b2abcf3c3e43e94951bda452c9fd35243aab0#fd4b2abcf3c3e43e94951bda452c9fd35243aab0"
 dependencies = [
  "arrow",
  "async-compression 0.4.19",
@@ -3399,6 +3391,7 @@ dependencies = [
  "datafusion-execution",
  "datafusion-expr",
  "datafusion-physical-expr",
+ "datafusion-physical-expr-adapter",
  "datafusion-physical-expr-common",
  "datafusion-physical-plan",
  "datafusion-session",
@@ -3408,9 +3401,7 @@ dependencies = [
  "itertools 0.14.0",
  "log",
  "object_store",
- "parquet",
  "rand 0.9.1",
- "tempfile",
  "tokio",
  "tokio-util",
  "url",
@@ -3420,19 +3411,17 @@ dependencies = [
 
 [[package]]
 name = "datafusion-datasource-csv"
-version = "49.0.0"
-source = "git+https://github.com/GreptimeTeam/datafusion.git?rev=7d5214512740b4dfb742b6b3d91ed9affcc2c9d0#7d5214512740b4dfb742b6b3d91ed9affcc2c9d0"
+version = "50.1.0"
+source = "git+https://github.com/GreptimeTeam/datafusion.git?rev=fd4b2abcf3c3e43e94951bda452c9fd35243aab0#fd4b2abcf3c3e43e94951bda452c9fd35243aab0"
 dependencies = [
  "arrow",
  "async-trait",
  "bytes",
- "datafusion-catalog",
  "datafusion-common",
  "datafusion-common-runtime",
  "datafusion-datasource",
  "datafusion-execution",
  "datafusion-expr",
- "datafusion-physical-expr",
  "datafusion-physical-expr-common",
  "datafusion-physical-plan",
  "datafusion-session",
@@ -3444,71 +3433,66 @@ dependencies = [
 
 [[package]]
 name = "datafusion-datasource-json"
-version = "49.0.0"
-source = "git+https://github.com/GreptimeTeam/datafusion.git?rev=7d5214512740b4dfb742b6b3d91ed9affcc2c9d0#7d5214512740b4dfb742b6b3d91ed9affcc2c9d0"
+version = "50.1.0"
+source = "git+https://github.com/GreptimeTeam/datafusion.git?rev=fd4b2abcf3c3e43e94951bda452c9fd35243aab0#fd4b2abcf3c3e43e94951bda452c9fd35243aab0"
 dependencies = [
  "arrow",
  "async-trait",
  "bytes",
- "datafusion-catalog",
  "datafusion-common",
  "datafusion-common-runtime",
  "datafusion-datasource",
  "datafusion-execution",
  "datafusion-expr",
- "datafusion-physical-expr",
  "datafusion-physical-expr-common",
  "datafusion-physical-plan",
  "datafusion-session",
  "futures",
  "object_store",
- "serde_json",
  "tokio",
 ]
 
 [[package]]
 name = "datafusion-datasource-parquet"
-version = "49.0.0"
-source = "git+https://github.com/GreptimeTeam/datafusion.git?rev=7d5214512740b4dfb742b6b3d91ed9affcc2c9d0#7d5214512740b4dfb742b6b3d91ed9affcc2c9d0"
+version = "50.1.0"
+source = "git+https://github.com/GreptimeTeam/datafusion.git?rev=fd4b2abcf3c3e43e94951bda452c9fd35243aab0#fd4b2abcf3c3e43e94951bda452c9fd35243aab0"
 dependencies = [
  "arrow",
  "async-trait",
  "bytes",
- "datafusion-catalog",
  "datafusion-common",
  "datafusion-common-runtime",
  "datafusion-datasource",
  "datafusion-execution",
  "datafusion-expr",
- "datafusion-functions-aggregate",
+ "datafusion-functions-aggregate-common",
  "datafusion-physical-expr",
+ "datafusion-physical-expr-adapter",
  "datafusion-physical-expr-common",
- "datafusion-physical-optimizer",
  "datafusion-physical-plan",
  "datafusion-pruning",
  "datafusion-session",
  "futures",
- "hex",
  "itertools 0.14.0",
  "log",
  "object_store",
  "parking_lot 0.12.4",
  "parquet",
- "rand 0.9.1",
  "tokio",
 ]
 
 [[package]]
 name = "datafusion-doc"
-version = "49.0.0"
-source = "git+https://github.com/GreptimeTeam/datafusion.git?rev=7d5214512740b4dfb742b6b3d91ed9affcc2c9d0#7d5214512740b4dfb742b6b3d91ed9affcc2c9d0"
+version = "50.1.0"
+source = "git+https://github.com/GreptimeTeam/datafusion.git?rev=fd4b2abcf3c3e43e94951bda452c9fd35243aab0#fd4b2abcf3c3e43e94951bda452c9fd35243aab0"
 
 [[package]]
 name = "datafusion-execution"
-version = "49.0.0"
-source = "git+https://github.com/GreptimeTeam/datafusion.git?rev=7d5214512740b4dfb742b6b3d91ed9affcc2c9d0#7d5214512740b4dfb742b6b3d91ed9affcc2c9d0"
+version = "50.1.0"
+source = "git+https://github.com/GreptimeTeam/datafusion.git?rev=fd4b2abcf3c3e43e94951bda452c9fd35243aab0#fd4b2abcf3c3e43e94951bda452c9fd35243aab0"
 dependencies = [
  "arrow",
+ "async-trait",
  "dashmap",
  "datafusion-common",
  "datafusion-expr",
@@ -3516,7 +3500,6 @@ dependencies = [
  "log",
  "object_store",
  "parking_lot 0.12.4",
- "parquet",
  "rand 0.9.1",
  "tempfile",
  "url",
@@ -3524,8 +3507,8 @@ dependencies = [
 
 [[package]]
 name = "datafusion-expr"
-version = "49.0.0"
-source = "git+https://github.com/GreptimeTeam/datafusion.git?rev=7d5214512740b4dfb742b6b3d91ed9affcc2c9d0#7d5214512740b4dfb742b6b3d91ed9affcc2c9d0"
+version = "50.1.0"
+source = "git+https://github.com/GreptimeTeam/datafusion.git?rev=fd4b2abcf3c3e43e94951bda452c9fd35243aab0#fd4b2abcf3c3e43e94951bda452c9fd35243aab0"
 dependencies = [
  "arrow",
  "async-trait",
@@ -3536,29 +3519,30 @@ dependencies = [
  "datafusion-functions-aggregate-common",
  "datafusion-functions-window-common",
  "datafusion-physical-expr-common",
- "indexmap 2.10.0",
+ "indexmap 2.11.4",
+ "itertools 0.14.0",
  "paste",
  "recursive",
  "serde_json",
- "sqlparser 0.55.0",
+ "sqlparser",
 ]
 
 [[package]]
 name = "datafusion-expr-common"
-version = "49.0.0"
-source = "git+https://github.com/GreptimeTeam/datafusion.git?rev=7d5214512740b4dfb742b6b3d91ed9affcc2c9d0#7d5214512740b4dfb742b6b3d91ed9affcc2c9d0"
+version = "50.1.0"
+source = "git+https://github.com/GreptimeTeam/datafusion.git?rev=fd4b2abcf3c3e43e94951bda452c9fd35243aab0#fd4b2abcf3c3e43e94951bda452c9fd35243aab0"
 dependencies = [
  "arrow",
  "datafusion-common",
- "indexmap 2.10.0",
+ "indexmap 2.11.4",
  "itertools 0.14.0",
  "paste",
 ]
 
 [[package]]
 name = "datafusion-functions"
-version = "49.0.0"
-source = "git+https://github.com/GreptimeTeam/datafusion.git?rev=7d5214512740b4dfb742b6b3d91ed9affcc2c9d0#7d5214512740b4dfb742b6b3d91ed9affcc2c9d0"
+version = "50.1.0"
+source = "git+https://github.com/GreptimeTeam/datafusion.git?rev=fd4b2abcf3c3e43e94951bda452c9fd35243aab0#fd4b2abcf3c3e43e94951bda452c9fd35243aab0"
 dependencies = [
  "arrow",
  "arrow-buffer",
@@ -3585,8 +3569,8 @@ dependencies = [
 
 [[package]]
 name = "datafusion-functions-aggregate"
-version = "49.0.0"
-source = "git+https://github.com/GreptimeTeam/datafusion.git?rev=7d5214512740b4dfb742b6b3d91ed9affcc2c9d0#7d5214512740b4dfb742b6b3d91ed9affcc2c9d0"
+version = "50.1.0"
+source = "git+https://github.com/GreptimeTeam/datafusion.git?rev=fd4b2abcf3c3e43e94951bda452c9fd35243aab0#fd4b2abcf3c3e43e94951bda452c9fd35243aab0"
 dependencies = [
  "ahash 0.8.12",
  "arrow",
@@ -3605,8 +3589,8 @@ dependencies = [
 
 [[package]]
 name = "datafusion-functions-aggregate-common"
-version = "49.0.0"
-source = "git+https://github.com/GreptimeTeam/datafusion.git?rev=7d5214512740b4dfb742b6b3d91ed9affcc2c9d0#7d5214512740b4dfb742b6b3d91ed9affcc2c9d0"
+version = "50.1.0"
+source = "git+https://github.com/GreptimeTeam/datafusion.git?rev=fd4b2abcf3c3e43e94951bda452c9fd35243aab0#fd4b2abcf3c3e43e94951bda452c9fd35243aab0"
 dependencies = [
  "ahash 0.8.12",
  "arrow",
@@ -3617,8 +3601,8 @@ dependencies = [
 
 [[package]]
 name = "datafusion-functions-nested"
-version = "49.0.0"
-source = "git+https://github.com/GreptimeTeam/datafusion.git?rev=7d5214512740b4dfb742b6b3d91ed9affcc2c9d0#7d5214512740b4dfb742b6b3d91ed9affcc2c9d0"
+version = "50.1.0"
+source = "git+https://github.com/GreptimeTeam/datafusion.git?rev=fd4b2abcf3c3e43e94951bda452c9fd35243aab0#fd4b2abcf3c3e43e94951bda452c9fd35243aab0"
 dependencies = [
  "arrow",
  "arrow-ord",
@@ -3626,6 +3610,7 @@ dependencies = [
  "datafusion-doc",
  "datafusion-execution",
  "datafusion-expr",
+ "datafusion-expr-common",
  "datafusion-functions",
  "datafusion-functions-aggregate",
  "datafusion-functions-aggregate-common",
@@ -3638,8 +3623,8 @@ dependencies = [
 
 [[package]]
 name = "datafusion-functions-table"
-version = "49.0.0"
-source = "git+https://github.com/GreptimeTeam/datafusion.git?rev=7d5214512740b4dfb742b6b3d91ed9affcc2c9d0#7d5214512740b4dfb742b6b3d91ed9affcc2c9d0"
+version = "50.1.0"
+source = "git+https://github.com/GreptimeTeam/datafusion.git?rev=fd4b2abcf3c3e43e94951bda452c9fd35243aab0#fd4b2abcf3c3e43e94951bda452c9fd35243aab0"
 dependencies = [
  "arrow",
  "async-trait",
@@ -3653,8 +3638,8 @@ dependencies = [
 
 [[package]]
 name = "datafusion-functions-window"
-version = "49.0.0"
-source = "git+https://github.com/GreptimeTeam/datafusion.git?rev=7d5214512740b4dfb742b6b3d91ed9affcc2c9d0#7d5214512740b4dfb742b6b3d91ed9affcc2c9d0"
+version = "50.1.0"
+source = "git+https://github.com/GreptimeTeam/datafusion.git?rev=fd4b2abcf3c3e43e94951bda452c9fd35243aab0#fd4b2abcf3c3e43e94951bda452c9fd35243aab0"
 dependencies = [
  "arrow",
  "datafusion-common",
@@ -3670,8 +3655,8 @@ dependencies = [
 
 [[package]]
 name = "datafusion-functions-window-common"
-version = "49.0.0"
-source = "git+https://github.com/GreptimeTeam/datafusion.git?rev=7d5214512740b4dfb742b6b3d91ed9affcc2c9d0#7d5214512740b4dfb742b6b3d91ed9affcc2c9d0"
+version = "50.1.0"
+source = "git+https://github.com/GreptimeTeam/datafusion.git?rev=fd4b2abcf3c3e43e94951bda452c9fd35243aab0#fd4b2abcf3c3e43e94951bda452c9fd35243aab0"
 dependencies = [
  "datafusion-common",
  "datafusion-physical-expr-common",
@@ -3679,18 +3664,18 @@ dependencies = [
 
 [[package]]
 name = "datafusion-macros"
-version = "49.0.0"
-source = "git+https://github.com/GreptimeTeam/datafusion.git?rev=7d5214512740b4dfb742b6b3d91ed9affcc2c9d0#7d5214512740b4dfb742b6b3d91ed9affcc2c9d0"
+version = "50.1.0"
+source = "git+https://github.com/GreptimeTeam/datafusion.git?rev=fd4b2abcf3c3e43e94951bda452c9fd35243aab0#fd4b2abcf3c3e43e94951bda452c9fd35243aab0"
 dependencies = [
- "datafusion-expr",
+ "datafusion-doc",
  "quote",
- "syn 2.0.104",
+ "syn 2.0.106",
 ]
 
 [[package]]
 name = "datafusion-optimizer"
-version = "49.0.0"
-source = "git+https://github.com/GreptimeTeam/datafusion.git?rev=7d5214512740b4dfb742b6b3d91ed9affcc2c9d0#7d5214512740b4dfb742b6b3d91ed9affcc2c9d0"
+version = "50.1.0"
+source = "git+https://github.com/GreptimeTeam/datafusion.git?rev=fd4b2abcf3c3e43e94951bda452c9fd35243aab0#fd4b2abcf3c3e43e94951bda452c9fd35243aab0"
 dependencies = [
  "arrow",
  "chrono",
@@ -3698,18 +3683,19 @@ dependencies = [
  "datafusion-expr",
  "datafusion-expr-common",
  "datafusion-physical-expr",
- "indexmap 2.10.0",
+ "indexmap 2.11.4",
  "itertools 0.14.0",
  "log",
  "recursive",
  "regex",
- "regex-syntax 0.8.5",
+ "regex-syntax 0.8.7",
 ]
 
 [[package]]
 name = "datafusion-orc"
-version = "0.4.1"
-source = "git+https://github.com/GreptimeTeam/datafusion-orc?rev=a0a5f902158f153119316eaeec868cff3fc8a99d#a0a5f902158f153119316eaeec868cff3fc8a99d"
+version = "0.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2088adcf23fad3b1430ba95e7782c74e49c9ce5b0965151d96b295d4d538fb17"
 dependencies = [
  "arrow",
  "async-trait",
@@ -3719,14 +3705,15 @@ dependencies = [
  "futures",
  "futures-util",
  "object_store",
- "orc-rust 0.6.0",
+ "orc-rust",
  "tokio",
 ]
 
 [[package]]
 name = "datafusion-pg-catalog"
-version = "0.9.0"
-source = "git+https://github.com/datafusion-contrib/datafusion-postgres?rev=3d1b7c7d5b82dd49bafc2803259365e633f654fa#3d1b7c7d5b82dd49bafc2803259365e633f654fa"
+version = "0.11.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f258caedd1593e7dca3bf53912249de6685fa224bcce897ede1fbb7b040ac6f6"
 dependencies = [
  "async-trait",
  "datafusion",
@@ -3738,8 +3725,8 @@ dependencies = [
 
 [[package]]
 name = "datafusion-physical-expr"
-version = "49.0.0"
-source = "git+https://github.com/GreptimeTeam/datafusion.git?rev=7d5214512740b4dfb742b6b3d91ed9affcc2c9d0#7d5214512740b4dfb742b6b3d91ed9affcc2c9d0"
+version = "50.1.0"
+source = "git+https://github.com/GreptimeTeam/datafusion.git?rev=fd4b2abcf3c3e43e94951bda452c9fd35243aab0#fd4b2abcf3c3e43e94951bda452c9fd35243aab0"
 dependencies = [
  "ahash 0.8.12",
  "arrow",
@@ -3750,17 +3737,31 @@ dependencies = [
  "datafusion-physical-expr-common",
  "half",
  "hashbrown 0.14.5",
- "indexmap 2.10.0",
+ "indexmap 2.11.4",
  "itertools 0.14.0",
- "log",
+ "parking_lot 0.12.4",
  "paste",
- "petgraph 0.8.2",
+ "petgraph 0.8.3",
+]
+
+[[package]]
+name = "datafusion-physical-expr-adapter"
+version = "50.1.0"
+source = "git+https://github.com/GreptimeTeam/datafusion.git?rev=fd4b2abcf3c3e43e94951bda452c9fd35243aab0#fd4b2abcf3c3e43e94951bda452c9fd35243aab0"
+dependencies = [
+ "arrow",
+ "datafusion-common",
+ "datafusion-expr",
+ "datafusion-functions",
+ "datafusion-physical-expr",
+ "datafusion-physical-expr-common",
+ "itertools 0.14.0",
 ]
 
 [[package]]
 name = "datafusion-physical-expr-common"
-version = "49.0.0"
-source = "git+https://github.com/GreptimeTeam/datafusion.git?rev=7d5214512740b4dfb742b6b3d91ed9affcc2c9d0#7d5214512740b4dfb742b6b3d91ed9affcc2c9d0"
+version = "50.1.0"
+source = "git+https://github.com/GreptimeTeam/datafusion.git?rev=fd4b2abcf3c3e43e94951bda452c9fd35243aab0#fd4b2abcf3c3e43e94951bda452c9fd35243aab0"
 dependencies = [
  "ahash 0.8.12",
  "arrow",
@@ -3772,8 +3773,8 @@ dependencies = [
 
 [[package]]
 name = "datafusion-physical-optimizer"
-version = "49.0.0"
-source = "git+https://github.com/GreptimeTeam/datafusion.git?rev=7d5214512740b4dfb742b6b3d91ed9affcc2c9d0#7d5214512740b4dfb742b6b3d91ed9affcc2c9d0"
+version = "50.1.0"
+source = "git+https://github.com/GreptimeTeam/datafusion.git?rev=fd4b2abcf3c3e43e94951bda452c9fd35243aab0#fd4b2abcf3c3e43e94951bda452c9fd35243aab0"
 dependencies = [
  "arrow",
  "datafusion-common",
@@ -3785,14 +3786,13 @@ dependencies = [
  "datafusion-physical-plan",
  "datafusion-pruning",
  "itertools 0.14.0",
- "log",
  "recursive",
 ]
 
 [[package]]
 name = "datafusion-physical-plan"
-version = "49.0.0"
-source = "git+https://github.com/GreptimeTeam/datafusion.git?rev=7d5214512740b4dfb742b6b3d91ed9affcc2c9d0#7d5214512740b4dfb742b6b3d91ed9affcc2c9d0"
+version = "50.1.0"
+source = "git+https://github.com/GreptimeTeam/datafusion.git?rev=fd4b2abcf3c3e43e94951bda452c9fd35243aab0#fd4b2abcf3c3e43e94951bda452c9fd35243aab0"
 dependencies = [
  "ahash 0.8.12",
  "arrow",
@@ -3811,7 +3811,7 @@ dependencies = [
  "futures",
  "half",
  "hashbrown 0.14.5",
- "indexmap 2.10.0",
+ "indexmap 2.11.4",
  "itertools 0.14.0",
  "log",
  "parking_lot 0.12.4",
@@ -3821,11 +3821,10 @@ dependencies = [
 
 [[package]]
 name = "datafusion-pruning"
-version = "49.0.0"
-source = "git+https://github.com/GreptimeTeam/datafusion.git?rev=7d5214512740b4dfb742b6b3d91ed9affcc2c9d0#7d5214512740b4dfb742b6b3d91ed9affcc2c9d0"
+version = "50.1.0"
+source = "git+https://github.com/GreptimeTeam/datafusion.git?rev=fd4b2abcf3c3e43e94951bda452c9fd35243aab0#fd4b2abcf3c3e43e94951bda452c9fd35243aab0"
 dependencies = [
  "arrow",
- "arrow-schema",
  "datafusion-common",
  "datafusion-datasource",
  "datafusion-expr-common",
@@ -3838,47 +3837,38 @@ dependencies = [
 
 [[package]]
 name = "datafusion-session"
-version = "49.0.0"
-source = "git+https://github.com/GreptimeTeam/datafusion.git?rev=7d5214512740b4dfb742b6b3d91ed9affcc2c9d0#7d5214512740b4dfb742b6b3d91ed9affcc2c9d0"
+version = "50.1.0"
+source = "git+https://github.com/GreptimeTeam/datafusion.git?rev=fd4b2abcf3c3e43e94951bda452c9fd35243aab0#fd4b2abcf3c3e43e94951bda452c9fd35243aab0"
 dependencies = [
- "arrow",
  "async-trait",
- "dashmap",
  "datafusion-common",
- "datafusion-common-runtime",
  "datafusion-execution",
  "datafusion-expr",
- "datafusion-physical-expr",
  "datafusion-physical-plan",
- "datafusion-sql",
- "futures",
- "itertools 0.14.0",
- "log",
- "object_store",
  "parking_lot 0.12.4",
- "tokio",
 ]
 
 [[package]]
 name = "datafusion-sql"
-version = "49.0.0"
-source = "git+https://github.com/GreptimeTeam/datafusion.git?rev=7d5214512740b4dfb742b6b3d91ed9affcc2c9d0#7d5214512740b4dfb742b6b3d91ed9affcc2c9d0"
+version = "50.1.0"
+source = "git+https://github.com/GreptimeTeam/datafusion.git?rev=fd4b2abcf3c3e43e94951bda452c9fd35243aab0#fd4b2abcf3c3e43e94951bda452c9fd35243aab0"
 dependencies = [
  "arrow",
  "bigdecimal 0.4.8",
+ "chrono",
  "datafusion-common",
  "datafusion-expr",
- "indexmap 2.10.0",
+ "indexmap 2.11.4",
  "log",
  "recursive",
  "regex",
- "sqlparser 0.55.0",
+ "sqlparser",
 ]
 
 [[package]]
 name = "datafusion-substrait"
-version = "49.0.0"
-source = "git+https://github.com/GreptimeTeam/datafusion.git?rev=7d5214512740b4dfb742b6b3d91ed9affcc2c9d0#7d5214512740b4dfb742b6b3d91ed9affcc2c9d0"
+version = "50.1.0"
+source = "git+https://github.com/GreptimeTeam/datafusion.git?rev=fd4b2abcf3c3e43e94951bda452c9fd35243aab0#fd4b2abcf3c3e43e94951bda452c9fd35243aab0"
 dependencies = [
  "async-recursion",
  "async-trait",
@@ -3891,6 +3881,7 @@ dependencies = [
  "substrait 0.58.0",
  "tokio",
  "url",
+ "uuid",
 ]
 
 [[package]]
@@ -3979,7 +3970,7 @@ dependencies = [
  "serde",
  "serde_json",
  "snafu 0.8.6",
- "sqlparser 0.55.0-greptime",
+ "sqlparser",
  "sqlparser_derive 0.1.1",
 ]
 
@@ -4084,7 +4075,7 @@ checksum = "2cdc8d50f426189eef89dac62fabfa0abb27d5cc008f25bf4156a0203325becc"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.104",
+ "syn 2.0.106",
 ]
 
 [[package]]
@@ -4095,7 +4086,7 @@ checksum = "30542c1ad912e0e3d22a1935c290e12e8a29d704a420177a31faad4a601a0800"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.104",
+ "syn 2.0.106",
 ]
 
 [[package]]
@@ -4137,7 +4128,7 @@ dependencies = [
  "darling 0.20.11",
  "proc-macro2",
  "quote",
- "syn 2.0.104",
+ "syn 2.0.106",
 ]
 
 [[package]]
@@ -4157,7 +4148,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "ab63b0e2bf4d5928aff72e83a7dace85d7bba5fe12dcc3c5a572d78caffd3f3c"
 dependencies = [
  "derive_builder_core 0.20.2",
- "syn 2.0.104",
+ "syn 2.0.106",
 ]
 
 [[package]]
@@ -4177,7 +4168,7 @@ checksum = "cb7330aeadfbe296029522e6c40f315320aba36fc43a5b3632f3795348f3bd22"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.104",
+ "syn 2.0.106",
  "unicode-xid",
 ]
 
@@ -4189,7 +4180,7 @@ checksum = "ccfae181bab5ab6c5478b2ccb69e4c68a02f8c3ec72f6616bfec9dbc599d2ee0"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.104",
+ "syn 2.0.106",
 ]
 
 [[package]]
@@ -4248,7 +4239,7 @@ checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.104",
+ "syn 2.0.106",
 ]
 
 [[package]]
@@ -4323,7 +4314,7 @@ checksum = "0e197fdfd2cdb5fdeb7f8ddcf3aed5d5d04ecde2890d448b14ffb716f7376b70"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.104",
+ "syn 2.0.106",
 ]
 
 [[package]]
@@ -4431,7 +4422,7 @@ dependencies = [
  "heck 0.5.0",
  "proc-macro2",
  "quote",
- "syn 2.0.104",
+ "syn 2.0.106",
 ]
 
 [[package]]
@@ -4443,7 +4434,7 @@ dependencies = [
  "once_cell",
  "proc-macro2",
  "quote",
- "syn 2.0.104",
+ "syn 2.0.106",
 ]
 
 [[package]]
@@ -4482,7 +4473,7 @@ checksum = "44f23cf4b44bfce11a86ace86f8a73ffdec849c9fd00a386a53d278bd9e81fb3"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.104",
+ "syn 2.0.106",
 ]
 
 [[package]]
@@ -4595,7 +4586,7 @@ checksum = "6e24cb5a94bcae1e5408b0effca5cd7172ea3c5755049c5f3af4cd283a165298"
 dependencies = [
  "bit-set",
  "regex-automata 0.4.9",
- "regex-syntax 0.8.5",
+ "regex-syntax 0.8.7",
 ]
 
 [[package]]
@@ -4707,9 +4698,9 @@ dependencies = [
 
 [[package]]
 name = "flate2"
-version = "1.1.2"
+version = "1.1.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4a3d7db9596fecd151c5f638c0ee5d5bd487b6e0ea232e5dc96d5250f6f94b1d"
+checksum = "dc5a4e564e38c699f2880d3fda590bedc2e69f3f84cd48b457bd892ce61d0aa9"
 dependencies = [
  "crc32fast",
  "libz-rs-sys",
@@ -4871,9 +4862,9 @@ checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2"
 
 [[package]]
 name = "form_urlencoded"
-version = "1.2.1"
+version = "1.2.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e13624c2627564efccf4934284bdd98cbaa14e79b0b5a141218e507b3a823456"
+checksum = "cb4cb245038516f5f85277875cdaa4f7d2c9a0fa0468de06ed190163b1581fcf"
 dependencies = [
  "percent-encoding",
 ]
@@ -4946,7 +4937,7 @@ dependencies = [
  "session",
  "snafu 0.8.6",
  "sql",
- "sqlparser 0.55.0-greptime",
+ "sqlparser",
  "store-api",
  "strfmt",
  "table",
@@ -4988,7 +4979,7 @@ checksum = "a0b4095fc99e1d858e5b8c7125d2638372ec85aa0fe6c807105cf10b0265ca6c"
 dependencies = [
  "frunk_proc_macro_helpers",
  "quote",
- "syn 2.0.104",
+ "syn 2.0.106",
 ]
 
 [[package]]
@@ -5000,7 +4991,7 @@ dependencies = [
  "frunk_core",
  "proc-macro2",
  "quote",
- "syn 2.0.104",
+ "syn 2.0.106",
 ]
 
 [[package]]
@@ -5012,7 +5003,7 @@ dependencies = [
  "frunk_core",
  "frunk_proc_macro_helpers",
  "quote",
- "syn 2.0.104",
+ "syn 2.0.106",
 ]
 
 [[package]]
@@ -5136,7 +5127,7 @@ checksum = "162ee34ebcb7c64a8abebc059ce0fee27c2262618d7b60ed8faf72fef13c3650"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.104",
+ "syn 2.0.106",
 ]
 
 [[package]]
@@ -5362,7 +5353,7 @@ dependencies = [
  "futures-sink",
  "futures-util",
  "http 0.2.12",
- "indexmap 2.10.0",
+ "indexmap 2.11.4",
  "slab",
  "tokio",
  "tokio-util",
@@ -5381,7 +5372,7 @@ dependencies = [
  "futures-core",
  "futures-sink",
  "http 1.3.1",
- "indexmap 2.10.0",
+ "indexmap 2.11.4",
  "slab",
  "tokio",
  "tokio-util",
@@ -5467,6 +5458,12 @@ dependencies = [
  "foldhash",
 ]
 
+[[package]]
+name = "hashbrown"
+version = "0.16.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5419bdc4f6a9207fbeba6d11b604d481addf78ecd10c11ad51e76c2f6482748d"
+
 [[package]]
 name = "hashlink"
 version = "0.10.0"
@@ -5602,7 +5599,7 @@ checksum = "a56f203cd1c76362b69e3863fd987520ac36cf70a8c92627449b2f64a8cf7d65"
 dependencies = [
  "cfg-if",
  "libc",
- "windows-link",
+ "windows-link 0.1.3",
 ]
 
 [[package]]
@@ -6021,9 +6018,9 @@ checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39"
 
 [[package]]
 name = "idna"
-version = "1.0.3"
+version = "1.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "686f825264d630750a544639377bae737628043f20d38bbc029e8f29ea968a7e"
+checksum = "3b0875f23caa03898994f6ddc501886a45c7d3d62d04d2d90788d47be1b1e4de"
 dependencies = [
  "idna_adapter",
  "smallvec",
@@ -6066,7 +6063,7 @@ dependencies = [
  "libflate",
  "proc-macro2",
  "quote",
- "syn 2.0.104",
+ "syn 2.0.106",
 ]
 
 [[package]]
@@ -6144,13 +6141,12 @@ dependencies = [
 
 [[package]]
 name = "indexmap"
-version = "2.10.0"
+version = "2.11.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "fe4cd85333e22411419a0bcae1297d25e58c9443848b11dc6a86fefe8c78a661"
+checksum = "4b0f83760fb341a774ed326568e19f5a863af4a952def8c39f9ab92fd95b88e5"
 dependencies = [
  "equivalent",
- "hashbrown 0.15.4",
- "serde",
+ "hashbrown 0.16.0",
 ]
 
 [[package]]
@@ -6166,7 +6162,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "232929e1d75fe899576a3d5c7416ad0d88dbfbb3c3d6aa00873a7408a50ddb88"
 dependencies = [
  "ahash 0.8.12",
- "indexmap 2.10.0",
+ "indexmap 2.11.4",
  "is-terminal",
  "itoa",
  "log",
@@ -6189,7 +6185,7 @@ dependencies = [
  "crossbeam-utils",
  "dashmap",
  "env_logger",
- "indexmap 2.10.0",
+ "indexmap 2.11.4",
  "itoa",
  "log",
  "num-format",
@@ -6231,7 +6227,7 @@ checksum = "6c38228f24186d9cc68c729accb4d413be9eaed6ad07ff79e0270d9e56f3de13"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.104",
+ "syn 2.0.106",
 ]
 
 [[package]]
@@ -6576,7 +6572,7 @@ version = "0.4.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "4ee7893dab2e44ae5f9d0173f26ff4aa327c10b01b06a72b52dd9405b628640d"
 dependencies = [
- "indexmap 2.10.0",
+ "indexmap 2.11.4",
 ]
 
 [[package]]
@@ -6661,7 +6657,7 @@ dependencies = [
  "http 1.3.1",
  "json-patch",
  "k8s-openapi",
- "schemars 0.8.22",
+ "schemars",
  "serde",
  "serde_json",
  "thiserror 1.0.69",
@@ -6677,7 +6673,7 @@ dependencies = [
  "proc-macro2",
  "quote",
  "serde_json",
- "syn 2.0.104",
+ "syn 2.0.106",
 ]
 
 [[package]]
@@ -6721,7 +6717,7 @@ dependencies = [
  "lalrpop-util",
  "petgraph 0.7.1",
  "regex",
- "regex-syntax 0.8.5",
+ "regex-syntax 0.8.7",
  "sha3",
  "string_cache",
  "term",
@@ -6763,7 +6759,7 @@ dependencies = [
  "proc-macro-crate 1.3.1",
  "proc-macro2",
  "quote",
- "syn 2.0.104",
+ "syn 2.0.106",
 ]
 
 [[package]]
@@ -6786,7 +6782,7 @@ dependencies = [
  "proc-macro2",
  "quote",
  "regex",
- "syn 2.0.104",
+ "syn 2.0.106",
 ]
 
 [[package]]
@@ -6876,9 +6872,9 @@ checksum = "775bf80d5878ab7c2b1080b5351a48b2f737d9f6f8b383574eebcc22be0dfccb"
 
 [[package]]
 name = "libc"
-version = "0.2.175"
+version = "0.2.177"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6a82ae493e598baaea5209805c49bbf2ea7de956d50d7da0da1164f9c6d28543"
+checksum = "2874a2af47a2325c2001a6e6fad9b16a53b802102b528163885171cf92b15976"
 
 [[package]]
 name = "libflate"
@@ -7119,7 +7115,7 @@ dependencies = [
  "num-traits",
  "quote",
  "regex",
- "regex-syntax 0.8.5",
+ "regex-syntax 0.8.7",
  "serde",
  "vergen",
 ]
@@ -7134,7 +7130,7 @@ dependencies = [
  "cactus",
  "cfgrammar",
  "filetime",
- "indexmap 2.10.0",
+ "indexmap 2.11.4",
  "lazy_static",
  "lrtable",
  "num-traits",
@@ -7552,6 +7548,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "1fa76a2c86f704bdb222d66965fb3d63269ce38518b83cb0575fca855ebb6316"
 dependencies = [
  "adler2",
+ "simd-adler32",
 ]
 
 [[package]]
@@ -7693,7 +7690,7 @@ dependencies = [
  "cfg-if",
  "proc-macro2",
  "quote",
- "syn 2.0.104",
+ "syn 2.0.106",
 ]
 
 [[package]]
@@ -7792,7 +7789,7 @@ dependencies = [
  "proc-macro-error2",
  "proc-macro2",
  "quote",
- "syn 2.0.104",
+ "syn 2.0.106",
  "termcolor",
  "thiserror 1.0.69",
 ]
@@ -7810,7 +7807,7 @@ dependencies = [
  "proc-macro-error2",
  "proc-macro2",
  "quote",
- "syn 2.0.104",
+ "syn 2.0.106",
  "termcolor",
  "thiserror 2.0.17",
 ]
@@ -7946,7 +7943,7 @@ checksum = "254a5372af8fc138e36684761d3c0cdb758a4410e938babcff1c860ce14ddbfc"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.104",
+ "syn 2.0.106",
 ]
 
 [[package]]
@@ -8173,7 +8170,7 @@ checksum = "ed3955f1a9c7c0c15e092f9c887db08b1fc683305fdf6eb6684f22555355e202"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.104",
+ "syn 2.0.106",
 ]
 
 [[package]]
@@ -8286,7 +8283,7 @@ dependencies = [
  "proc-macro-crate 1.3.1",
  "proc-macro2",
  "quote",
- "syn 2.0.104",
+ "syn 2.0.106",
 ]
 
 [[package]]
@@ -8298,7 +8295,7 @@ dependencies = [
  "proc-macro-crate 3.3.0",
  "proc-macro2",
  "quote",
- "syn 2.0.104",
+ "syn 2.0.106",
 ]
 
 [[package]]
@@ -8355,9 +8352,9 @@ dependencies = [
 
 [[package]]
 name = "object_store"
-version = "0.12.3"
+version = "0.12.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "efc4f07659e11cd45a341cd24d71e683e3be65d9ff1f8150061678fe60437496"
+checksum = "4c1be0c6c22ec0817cdc77d3842f721a17fd30ab6965001415b5402a74e6b740"
 dependencies = [
  "async-trait",
  "bytes",
@@ -8671,7 +8668,7 @@ dependencies = [
  "session",
  "snafu 0.8.6",
  "sql",
- "sqlparser 0.55.0-greptime",
+ "sqlparser",
  "store-api",
  "substrait 0.18.0",
  "table",
@@ -8681,31 +8678,6 @@ dependencies = [
  "tracing",
 ]
 
-[[package]]
-name = "orc-rust"
-version = "0.6.0"
-source = "git+https://github.com/GreptimeTeam/orc-rust?rev=d1690a06eec754e97beecf2cf7690267fc818726#d1690a06eec754e97beecf2cf7690267fc818726"
-dependencies = [
- "arrow",
- "async-trait",
- "bytemuck",
- "bytes",
- "chrono",
- "chrono-tz",
- "fallible-streaming-iterator",
- "flate2",
- "futures",
- "futures-util",
- "lz4_flex",
- "lzokay-native",
- "num",
- "prost 0.13.5",
- "snafu 0.8.6",
- "snap",
- "tokio",
- "zstd 0.13.3",
-]
-
 [[package]]
 name = "orc-rust"
 version = "0.6.3"
@@ -8823,7 +8795,7 @@ dependencies = [
  "otlp-model",
  "proc-macro2",
  "quote",
- "syn 2.0.104",
+ "syn 2.0.106",
 ]
 
 [[package]]
@@ -8933,9 +8905,9 @@ dependencies = [
 
 [[package]]
 name = "parquet"
-version = "56.0.0"
+version = "56.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c7288a07ed5d25939a90f9cb1ca5afa6855faa08ec7700613511ae64bdb0620c"
+checksum = "f0dbd48ad52d7dccf8ea1b90a3ddbfaea4f69878dd7683e51c507d4bc52b5b27"
 dependencies = [
  "ahash 0.8.12",
  "arrow-array",
@@ -8952,13 +8924,12 @@ dependencies = [
  "flate2",
  "futures",
  "half",
- "hashbrown 0.15.4",
+ "hashbrown 0.16.0",
  "lz4_flex",
  "num",
  "num-bigint",
  "object_store",
  "paste",
- "ring",
  "seq-macro",
  "simdutf8",
  "snap",
@@ -9005,7 +8976,7 @@ dependencies = [
  "session",
  "snafu 0.8.6",
  "sql",
- "sqlparser 0.55.0-greptime",
+ "sqlparser",
  "store-api",
  "table",
 ]
@@ -9102,9 +9073,9 @@ dependencies = [
 
 [[package]]
 name = "percent-encoding"
-version = "2.3.1"
+version = "2.3.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e"
+checksum = "9b4f627cb1b25917193a259e49bdad08f671f8d9708acfd5fe0a8c1455d87220"
 
 [[package]]
 name = "permutation"
@@ -9143,7 +9114,7 @@ dependencies = [
  "pest_meta",
  "proc-macro2",
  "quote",
- "syn 2.0.104",
+ "syn 2.0.106",
 ]
 
 [[package]]
@@ -9163,7 +9134,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "b4c5cc86750666a3ed20bdaf5ca2a0344f9c67674cae0515bec2da16fbaa47db"
 dependencies = [
  "fixedbitset 0.4.2",
- "indexmap 2.10.0",
+ "indexmap 2.11.4",
 ]
 
 [[package]]
@@ -9173,26 +9144,26 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "3672b37090dbd86368a4145bc067582552b29c27377cad4e0a306c97f9bd7772"
 dependencies = [
  "fixedbitset 0.5.7",
- "indexmap 2.10.0",
+ "indexmap 2.11.4",
 ]
 
 [[package]]
 name = "petgraph"
-version = "0.8.2"
+version = "0.8.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "54acf3a685220b533e437e264e4d932cfbdc4cc7ec0cd232ed73c08d03b8a7ca"
+checksum = "8701b58ea97060d5e5b155d383a69952a60943f0e6dfe30b04c287beb0b27455"
 dependencies = [
  "fixedbitset 0.5.7",
  "hashbrown 0.15.4",
- "indexmap 2.10.0",
+ "indexmap 2.11.4",
  "serde",
 ]
 
 [[package]]
 name = "pgwire"
-version = "0.34.1"
+version = "0.34.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c748793f2a9267fa2aa409d9375a5e26e4f1504ea96e34f8cab3e2fc32042d69"
+checksum = "4f56a81b4fcc69016028f657a68f9b8e8a2a4b7d07684ca3298f2d3e7ff199ce"
 dependencies = [
  "async-trait",
  "base64 0.22.1",
@@ -9312,7 +9283,7 @@ checksum = "6e918e4ff8c4549eb882f14b3a4bc8c8bc93de829416eacf579f1207a8fbf861"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.104",
+ "syn 2.0.106",
 ]
 
 [[package]]
@@ -9449,7 +9420,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "3d77244ce2d584cd84f6a15f86195b8c9b2a0dfbfd817c09e0464244091a58ed"
 dependencies = [
  "base64 0.22.1",
- "indexmap 2.10.0",
+ "indexmap 2.11.4",
  "quick-xml 0.37.5",
  "serde",
  "time",
@@ -9678,7 +9649,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "061c1221631e079b26479d25bbf2275bfe5917ae8419cd7e34f13bfc2aa7539a"
 dependencies = [
  "proc-macro2",
- "syn 2.0.104",
+ "syn 2.0.106",
 ]
 
 [[package]]
@@ -9719,7 +9690,7 @@ dependencies = [
  "proc-macro-error-attr2",
  "proc-macro2",
  "quote",
- "syn 2.0.104",
+ "syn 2.0.106",
 ]
 
 [[package]]
@@ -9871,7 +9842,7 @@ dependencies = [
  "prost 0.12.6",
  "prost-types 0.12.6",
  "regex",
- "syn 2.0.104",
+ "syn 2.0.106",
  "tempfile",
 ]
 
@@ -9891,7 +9862,7 @@ dependencies = [
  "prost 0.13.5",
  "prost-types 0.13.5",
  "regex",
- "syn 2.0.104",
+ "syn 2.0.106",
  "tempfile",
 ]
 
@@ -9918,7 +9889,7 @@ dependencies = [
  "itertools 0.12.1",
  "proc-macro2",
  "quote",
- "syn 2.0.104",
+ "syn 2.0.106",
 ]
 
 [[package]]
@@ -9931,7 +9902,7 @@ dependencies = [
  "itertools 0.14.0",
  "proc-macro2",
  "quote",
- "syn 2.0.104",
+ "syn 2.0.106",
 ]
 
 [[package]]
@@ -10159,6 +10130,7 @@ dependencies = [
  "num-traits",
  "object-store",
  "once_cell",
+ "parking_lot 0.12.4",
  "partition",
  "paste",
  "pretty_assertions",
@@ -10173,7 +10145,7 @@ dependencies = [
  "session",
  "snafu 0.8.6",
  "sql",
- "sqlparser 0.55.0-greptime",
+ "sqlparser",
  "store-api",
  "substrait 0.18.0",
  "table",
@@ -10260,9 +10232,9 @@ dependencies = [
 
 [[package]]
 name = "quote"
-version = "1.0.40"
+version = "1.0.41"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1885c039570dc00dcb4ff087a89e185fd56bae234ddc7f056a945bf36467248d"
+checksum = "ce25767e7b499d1b604768e7cde645d14cc8584231ea6b295e9c9eb22c02e1d1"
 dependencies = [
  "proc-macro2",
 ]
@@ -10449,7 +10421,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "76009fbe0614077fc1a2ce255e3a1881a2e3a3527097d5dc6d8212c585e7e38b"
 dependencies = [
  "quote",
- "syn 2.0.104",
+ "syn 2.0.106",
 ]
 
 [[package]]
@@ -10487,7 +10459,7 @@ checksum = "1165225c21bff1f3bbce98f5a1f889949bc902d3575308cc7b0de30b4f6d27c7"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.104",
+ "syn 2.0.106",
 ]
 
 [[package]]
@@ -10499,7 +10471,7 @@ dependencies = [
  "aho-corasick",
  "memchr",
  "regex-automata 0.4.9",
- "regex-syntax 0.8.5",
+ "regex-syntax 0.8.7",
 ]
 
 [[package]]
@@ -10519,7 +10491,7 @@ checksum = "809e8dc61f6de73b46c85f4c96486310fe304c434cfa43669d7b40f711150908"
 dependencies = [
  "aho-corasick",
  "memchr",
- "regex-syntax 0.8.5",
+ "regex-syntax 0.8.7",
 ]
 
 [[package]]
@@ -10529,11 +10501,11 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "4c11639076bf147be211b90e47790db89f4c22b6c8a9ca6e960833869da67166"
 dependencies = [
  "aho-corasick",
- "indexmap 2.10.0",
+ "indexmap 2.11.4",
  "itertools 0.13.0",
  "nohash",
  "regex",
- "regex-syntax 0.8.5",
+ "regex-syntax 0.8.7",
 ]
 
 [[package]]
@@ -10550,9 +10522,9 @@ checksum = "f162c6dd7b008981e4d40210aca20b4bd0f9b60ca9271061b07f78537722f2e1"
 
 [[package]]
 name = "regex-syntax"
-version = "0.8.5"
+version = "0.8.7"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c"
+checksum = "c3160422bbd54dd5ecfdca71e5fd59b7b8fe2b1697ab2baf64f6d05dcc66d298"
 
 [[package]]
 name = "regress"
@@ -10876,7 +10848,7 @@ dependencies = [
  "regex",
  "relative-path",
  "rustc_version",
- "syn 2.0.104",
+ "syn 2.0.106",
  "unicode-ident",
 ]
 
@@ -10888,7 +10860,7 @@ checksum = "b3a8fb4672e840a587a66fc577a5491375df51ddb88f2a2c2a792598c326fe14"
 dependencies = [
  "quote",
  "rand 0.8.5",
- "syn 2.0.104",
+ "syn 2.0.106",
 ]
 
 [[package]]
@@ -10911,7 +10883,7 @@ dependencies = [
  "proc-macro2",
  "quote",
  "rust-embed-utils",
- "syn 2.0.104",
+ "syn 2.0.106",
  "walkdir",
 ]
 
@@ -11223,30 +11195,6 @@ dependencies = [
  "serde_json",
 ]
 
-[[package]]
-name = "schemars"
-version = "0.9.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4cd191f9397d57d581cddd31014772520aa448f65ef991055d7f61582c65165f"
-dependencies = [
- "dyn-clone",
- "ref-cast",
- "serde",
- "serde_json",
-]
-
-[[package]]
-name = "schemars"
-version = "1.0.3"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1375ba8ef45a6f15d83fa8748f1079428295d403d6ea991d09ab100155fbc06d"
-dependencies = [
- "dyn-clone",
- "ref-cast",
- "serde",
- "serde_json",
-]
-
 [[package]]
 name = "schemars_derive"
 version = "0.8.22"
@@ -11256,7 +11204,7 @@ dependencies = [
  "proc-macro2",
  "quote",
  "serde_derive_internals",
- "syn 2.0.104",
+ "syn 2.0.106",
 ]
 
 [[package]]
@@ -11302,7 +11250,7 @@ dependencies = [
  "heck 0.4.1",
  "proc-macro2",
  "quote",
- "syn 2.0.104",
+ "syn 2.0.106",
  "thiserror 2.0.17",
 ]
 
@@ -11321,7 +11269,7 @@ dependencies = [
  "heck 0.4.1",
  "proc-macro2",
  "quote",
- "syn 2.0.104",
+ "syn 2.0.106",
 ]
 
 [[package]]
@@ -11412,7 +11360,7 @@ checksum = "5b0276cf7f2c73365f7157c8123c21cd9a50fbbd844757af28ca1f5925fc2a00"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.104",
+ "syn 2.0.106",
 ]
 
 [[package]]
@@ -11423,7 +11371,7 @@ checksum = "18d26a20a969b9e3fdf2fc2d9f21eda6c40e2de84c9408bb5d3b05d499aae711"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.104",
+ "syn 2.0.106",
 ]
 
 [[package]]
@@ -11456,7 +11404,7 @@ checksum = "175ee3e80ae9982737ca543e96133087cbd9a485eecc3bc4de9c1a37b47ea59c"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.104",
+ "syn 2.0.106",
 ]
 
 [[package]]
@@ -11477,7 +11425,7 @@ dependencies = [
  "proc-macro2",
  "quote",
  "serde",
- "syn 2.0.104",
+ "syn 2.0.106",
 ]
 
 [[package]]
@@ -11494,19 +11442,15 @@ dependencies = [
 
 [[package]]
 name = "serde_with"
-version = "3.14.0"
+version = "3.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f2c45cd61fefa9db6f254525d46e392b852e0e61d9a1fd36e5bd183450a556d5"
+checksum = "21e47d95bc83ed33b2ecf84f4187ad1ab9685d18ff28db000c99deac8ce180e3"
 dependencies = [
- "base64 0.22.1",
+ "base64 0.21.7",
  "chrono",
  "hex",
  "indexmap 1.9.3",
- "indexmap 2.10.0",
- "schemars 0.9.0",
- "schemars 1.0.3",
  "serde",
- "serde_derive",
  "serde_json",
  "serde_with_macros",
  "time",
@@ -11514,14 +11458,14 @@ dependencies = [
 
 [[package]]
 name = "serde_with_macros"
-version = "3.14.0"
+version = "3.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "de90945e6565ce0d9a25098082ed4ee4002e047cb59892c318d66821e14bb30f"
+checksum = "ea3cee93715c2e266b9338b7544da68a9f24e227722ba482bd1c024367c77c65"
 dependencies = [
  "darling 0.20.11",
  "proc-macro2",
  "quote",
- "syn 2.0.104",
+ "syn 2.0.106",
 ]
 
 [[package]]
@@ -11530,7 +11474,7 @@ version = "0.9.34+deprecated"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "6a8b1a1a2ebf674015cc02edccce75287f1a0130d394307b36743c2f5d504b47"
 dependencies = [
- "indexmap 2.10.0",
+ "indexmap 2.11.4",
  "itoa",
  "ryu",
  "serde",
@@ -11595,7 +11539,7 @@ dependencies = [
  "humantime",
  "humantime-serde",
  "hyper 1.6.0",
- "indexmap 2.10.0",
+ "indexmap 2.11.4",
  "influxdb_line_protocol",
  "itertools 0.14.0",
  "json5",
@@ -11787,6 +11731,12 @@ dependencies = [
  "wide",
 ]
 
+[[package]]
+name = "simd-adler32"
+version = "0.3.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d66dc143e6b11c1eddc06d5c423cfc97062865baf299914ab64caa38182078fe"
+
 [[package]]
 name = "simd-json"
 version = "0.15.1"
@@ -11919,7 +11869,7 @@ dependencies = [
  "heck 0.5.0",
  "proc-macro2",
  "quote",
- "syn 2.0.104",
+ "syn 2.0.106",
 ]
 
 [[package]]
@@ -12025,7 +11975,7 @@ dependencies = [
  "serde",
  "serde_json",
  "snafu 0.8.6",
- "sqlparser 0.55.0-greptime",
+ "sqlparser",
  "sqlparser_derive 0.1.1",
  "store-api",
  "table",
@@ -12082,26 +12032,14 @@ dependencies = [
 
 [[package]]
 name = "sqlparser"
-version = "0.55.0-greptime"
-source = "git+https://github.com/GreptimeTeam/sqlparser-rs.git?rev=39e4fc94c3c741981f77e9d63b5ce8c02e0a27ea#39e4fc94c3c741981f77e9d63b5ce8c02e0a27ea"
+version = "0.58.0"
+source = "git+https://github.com/GreptimeTeam/sqlparser-rs.git?rev=4b519a5caa95472cc3988f5556813a583dd35af1#4b519a5caa95472cc3988f5556813a583dd35af1"
 dependencies = [
  "lazy_static",
  "log",
  "recursive",
  "regex",
  "serde",
- "sqlparser 0.55.0",
- "sqlparser_derive 0.3.0-greptime",
-]
-
-[[package]]
-name = "sqlparser"
-version = "0.55.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c4521174166bac1ff04fe16ef4524c70144cd29682a45978978ca3d7f4e0be11"
-dependencies = [
- "log",
- "recursive",
  "sqlparser_derive 0.3.0",
 ]
 
@@ -12116,25 +12054,14 @@ dependencies = [
  "syn 1.0.109",
 ]
 
-[[package]]
-name = "sqlparser_derive"
-version = "0.3.0-greptime"
-source = "git+https://github.com/GreptimeTeam/sqlparser-rs.git?rev=39e4fc94c3c741981f77e9d63b5ce8c02e0a27ea#39e4fc94c3c741981f77e9d63b5ce8c02e0a27ea"
-dependencies = [
- "proc-macro2",
- "quote",
- "syn 2.0.104",
-]
-
 [[package]]
 name = "sqlparser_derive"
 version = "0.3.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "da5fc6819faabb412da764b99d3b713bb55083c11e7e0c00144d386cd6a1939c"
+source = "git+https://github.com/GreptimeTeam/sqlparser-rs.git?rev=4b519a5caa95472cc3988f5556813a583dd35af1#4b519a5caa95472cc3988f5556813a583dd35af1"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.104",
+ "syn 2.0.106",
 ]
 
 [[package]]
@@ -12169,7 +12096,7 @@ dependencies = [
  "futures-util",
  "hashbrown 0.15.4",
  "hashlink",
- "indexmap 2.10.0",
+ "indexmap 2.11.4",
  "log",
  "memchr",
  "once_cell",
@@ -12197,7 +12124,7 @@ dependencies = [
  "quote",
  "sqlx-core",
  "sqlx-macros-core",
- "syn 2.0.104",
+ "syn 2.0.106",
 ]
 
 [[package]]
@@ -12220,7 +12147,7 @@ dependencies = [
  "sqlx-mysql",
  "sqlx-postgres",
  "sqlx-sqlite",
- "syn 2.0.104",
+ "syn 2.0.106",
  "tokio",
  "url",
 ]
@@ -12423,7 +12350,7 @@ dependencies = [
  "serde",
  "serde_json",
  "snafu 0.8.6",
- "sqlparser 0.55.0-greptime",
+ "sqlparser",
  "strum 0.27.1",
  "tokio",
  "uuid",
@@ -12494,6 +12421,12 @@ dependencies = [
  "strum_macros 0.25.3",
 ]
 
+[[package]]
+name = "strum"
+version = "0.26.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8fec0f0aef304996cf250b31b5a10dee7980c85da9d759361292b8bca5a18f06"
+
 [[package]]
 name = "strum"
 version = "0.27.1"
@@ -12513,7 +12446,20 @@ dependencies = [
  "proc-macro2",
  "quote",
  "rustversion",
- "syn 2.0.104",
+ "syn 2.0.106",
+]
+
+[[package]]
+name = "strum_macros"
+version = "0.26.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4c6bee85a5a24955dc440386795aa378cd9cf82acd5f764469152d2270e581be"
+dependencies = [
+ "heck 0.5.0",
+ "proc-macro2",
+ "quote",
+ "rustversion",
+ "syn 2.0.106",
 ]
 
 [[package]]
@@ -12526,7 +12472,7 @@ dependencies = [
  "proc-macro2",
  "quote",
  "rustversion",
- "syn 2.0.104",
+ "syn 2.0.106",
 ]
 
 [[package]]
@@ -12572,12 +12518,12 @@ dependencies = [
  "prost 0.13.5",
  "prost-build 0.13.5",
  "prost-types 0.13.5",
- "schemars 0.8.22",
+ "schemars",
  "semver",
  "serde",
  "serde_json",
  "serde_yaml",
- "syn 2.0.104",
+ "syn 2.0.106",
  "typify 0.1.0",
  "walkdir",
 ]
@@ -12597,12 +12543,12 @@ dependencies = [
  "prost-build 0.13.5",
  "prost-types 0.13.5",
  "regress 0.10.3",
- "schemars 0.8.22",
+ "schemars",
  "semver",
  "serde",
  "serde_json",
  "serde_yaml",
- "syn 2.0.104",
+ "syn 2.0.106",
  "typify 0.4.2",
  "walkdir",
 ]
@@ -12649,9 +12595,9 @@ dependencies = [
 
 [[package]]
 name = "syn"
-version = "2.0.104"
+version = "2.0.106"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "17b6f705963418cdb9927482fa304bc562ece2fdd4f616084c50b7023b435a40"
+checksum = "ede7c438028d4436d71104916910f5bb611972c5cfd7f89b8300a8186e6fada6"
 dependencies = [
  "proc-macro2",
  "quote",
@@ -12681,7 +12627,7 @@ checksum = "728a70f3dbaf5bab7f0c4b1ac8d7ae5ea60a4b5549c8a5914361c99147a709d2"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.104",
+ "syn 2.0.106",
 ]
 
 [[package]]
@@ -12743,7 +12689,7 @@ dependencies = [
  "serde",
  "serde_json",
  "snafu 0.8.6",
- "sqlparser 0.55.0-greptime",
+ "sqlparser",
  "store-api",
  "tokio",
  "tokio-util",
@@ -12853,7 +12799,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "d60769b80ad7953d8a7b2c70cdfe722bbcdcac6bccc8ac934c40c034d866fc18"
 dependencies = [
  "byteorder",
- "regex-syntax 0.8.5",
+ "regex-syntax 0.8.7",
  "utf8-ranges",
 ]
 
@@ -13007,13 +12953,13 @@ dependencies = [
  "rand 0.9.1",
  "rand_chacha 0.9.0",
  "reqwest",
- "schemars 0.8.22",
+ "schemars",
  "serde",
  "serde_json",
  "serde_yaml",
  "snafu 0.8.6",
  "sql",
- "sqlparser 0.55.0-greptime",
+ "sqlparser",
  "sqlx",
  "store-api",
  "strum 0.27.1",
@@ -13144,7 +13090,7 @@ checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.104",
+ "syn 2.0.106",
 ]
 
 [[package]]
@@ -13155,7 +13101,7 @@ checksum = "3ff15c8ecd7de3849db632e14d18d2571fa09dfc5ed93479bc4485c7a517c913"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.104",
+ "syn 2.0.106",
 ]
 
 [[package]]
@@ -13335,7 +13281,7 @@ checksum = "6e06d43f1345a3bcd39f6a56dbb7dcab2ba47e68e8ac134855e7e2bdbaf8cab8"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.104",
+ "syn 2.0.106",
 ]
 
 [[package]]
@@ -13468,7 +13414,7 @@ version = "0.8.23"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "dc1beb996b9d83529a9e75c17a1686767d148d70663143c7854d8b4a09ced362"
 dependencies = [
- "indexmap 2.10.0",
+ "indexmap 2.11.4",
  "serde",
  "serde_spanned",
  "toml_datetime",
@@ -13490,7 +13436,7 @@ version = "0.19.15"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "1b5bb770da30e5cbfde35a2d7b9b8a2c4b8ef89548a7a6aeab5c9a576e3e7421"
 dependencies = [
- "indexmap 2.10.0",
+ "indexmap 2.11.4",
  "toml_datetime",
  "winnow 0.5.40",
 ]
@@ -13501,7 +13447,7 @@ version = "0.22.27"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "41fe8c660ae4257887cf66394862d21dbca4a6ddd26f04a3560410406a2f819a"
 dependencies = [
- "indexmap 2.10.0",
+ "indexmap 2.11.4",
  "serde",
  "serde_spanned",
  "toml_datetime",
@@ -13587,7 +13533,7 @@ dependencies = [
  "prost-build 0.13.5",
  "prost-types 0.13.5",
  "quote",
- "syn 2.0.104",
+ "syn 2.0.106",
 ]
 
 [[package]]
@@ -13632,7 +13578,7 @@ dependencies = [
  "futures-core",
  "futures-util",
  "hdrhistogram",
- "indexmap 2.10.0",
+ "indexmap 2.11.4",
  "pin-project-lite",
  "slab",
  "sync_wrapper 1.0.2",
@@ -13737,7 +13683,7 @@ checksum = "395ae124c09f9e6918a2310af6038fba074bcf474ac352496d5910dd59a2226d"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.104",
+ "syn 2.0.106",
 ]
 
 [[package]]
@@ -13861,7 +13807,7 @@ checksum = "35f5380909ffc31b4de4f4bdf96b877175a016aa2ca98cee39fcfd8c4d53d952"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.104",
+ "syn 2.0.106",
 ]
 
 [[package]]
@@ -13895,11 +13841,11 @@ dependencies = [
  "proc-macro2",
  "quote",
  "regress 0.9.1",
- "schemars 0.8.22",
+ "schemars",
  "semver",
  "serde",
  "serde_json",
- "syn 2.0.104",
+ "syn 2.0.106",
  "thiserror 1.0.69",
  "unicode-ident",
 ]
@@ -13915,11 +13861,11 @@ dependencies = [
  "proc-macro2",
  "quote",
  "regress 0.10.3",
- "schemars 0.8.22",
+ "schemars",
  "semver",
  "serde",
  "serde_json",
- "syn 2.0.104",
+ "syn 2.0.106",
  "thiserror 2.0.17",
  "unicode-ident",
 ]
@@ -13932,12 +13878,12 @@ checksum = "f8e6491896e955692d68361c68db2b263e3bec317ec0b684e0e2fa882fb6e31e"
 dependencies = [
  "proc-macro2",
  "quote",
- "schemars 0.8.22",
+ "schemars",
  "semver",
  "serde",
  "serde_json",
  "serde_tokenstream",
- "syn 2.0.104",
+ "syn 2.0.106",
  "typify-impl 0.1.0",
 ]
 
@@ -13949,12 +13895,12 @@ checksum = "7560adf816a1e8dad7c63d8845ef6e31e673e39eab310d225636779230cbedeb"
 dependencies = [
  "proc-macro2",
  "quote",
- "schemars 0.8.22",
+ "schemars",
  "semver",
  "serde",
  "serde_json",
  "serde_tokenstream",
- "syn 2.0.104",
+ "syn 2.0.106",
  "typify-impl 0.4.2",
 ]
 
@@ -14099,13 +14045,14 @@ checksum = "8ecb6da28b8a351d773b68d5825ac39017e680750f980f3a1a85cd8dd28a47c1"
 
 [[package]]
 name = "url"
-version = "2.5.4"
+version = "2.5.7"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "32f8b686cadd1473f4bd0117a5d28d36b1ade384ea9b5069a1c40aefed7fda60"
+checksum = "08bc136a29a3d1758e07a9cca267be308aeebf5cfd5a10f3f67ab2097683ef5b"
 dependencies = [
  "form_urlencoded",
  "idna",
  "percent-encoding",
+ "serde",
 ]
 
 [[package]]
@@ -14140,9 +14087,9 @@ checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821"
 
 [[package]]
 name = "uuid"
-version = "1.17.0"
+version = "1.18.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3cf4199d1e5d15ddd86a694e4d0dffa9c323ce759fea589f00fef9d81cc1931d"
+checksum = "2f87b8aa10b915a06587d0dec516c282ff295b475d94abf425d62b57710070a2"
 dependencies = [
  "getrandom 0.3.3",
  "js-sys",
@@ -14188,7 +14135,7 @@ dependencies = [
  "proc-macro-crate 1.3.1",
  "proc-macro2",
  "quote",
- "syn 2.0.104",
+ "syn 2.0.106",
  "variadics",
 ]
 
@@ -14273,7 +14220,7 @@ dependencies = [
  "hostname 0.4.1",
  "iana-time-zone",
  "idna",
- "indexmap 2.10.0",
+ "indexmap 2.11.4",
  "indoc",
  "influxdb-line-protocol",
  "itertools 0.14.0",
@@ -14402,7 +14349,7 @@ dependencies = [
  "log",
  "proc-macro2",
  "quote",
- "syn 2.0.104",
+ "syn 2.0.106",
  "wasm-bindgen-shared",
 ]
 
@@ -14437,7 +14384,7 @@ checksum = "8ae87ea40c9f689fc23f209965b6fb8a99ad69aeeb0231408be24920604395de"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.104",
+ "syn 2.0.106",
  "wasm-bindgen-backend",
  "wasm-bindgen-shared",
 ]
@@ -14583,7 +14530,7 @@ dependencies = [
  "windows-collections",
  "windows-core 0.61.2",
  "windows-future",
- "windows-link",
+ "windows-link 0.1.3",
  "windows-numerics",
 ]
 
@@ -14616,7 +14563,7 @@ checksum = "c0fdd3ddb90610c7638aa2b3a3ab2904fb9e5cdbecc643ddb3647212781c4ae3"
 dependencies = [
  "windows-implement 0.60.0",
  "windows-interface 0.59.1",
- "windows-link",
+ "windows-link 0.1.3",
  "windows-result 0.3.4",
  "windows-strings",
 ]
@@ -14628,7 +14575,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "fc6a41e98427b19fe4b73c550f060b59fa592d7d686537eebf9385621bfbad8e"
 dependencies = [
  "windows-core 0.61.2",
- "windows-link",
+ "windows-link 0.1.3",
  "windows-threading",
 ]
 
@@ -14640,7 +14587,7 @@ checksum = "9107ddc059d5b6fbfbffdfa7a7fe3e22a226def0b2608f72e9d552763d3e1ad7"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.104",
+ "syn 2.0.106",
 ]
 
 [[package]]
@@ -14651,7 +14598,7 @@ checksum = "a47fddd13af08290e67f4acabf4b459f647552718f683a7b415d290ac744a836"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.104",
+ "syn 2.0.106",
 ]
 
 [[package]]
@@ -14662,7 +14609,7 @@ checksum = "29bee4b38ea3cde66011baa44dba677c432a78593e202392d1e9070cf2a7fca7"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.104",
+ "syn 2.0.106",
 ]
 
 [[package]]
@@ -14673,7 +14620,7 @@ checksum = "bd9211b69f8dcdfa817bfd14bf1c97c9188afa36f4750130fcdf3f400eca9fa8"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.104",
+ "syn 2.0.106",
 ]
 
 [[package]]
@@ -14682,6 +14629,12 @@ version = "0.1.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "5e6ad25900d524eaabdbbb96d20b4311e1e7ae1699af4fb28c17ae66c80d798a"
 
+[[package]]
+name = "windows-link"
+version = "0.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5"
+
 [[package]]
 name = "windows-numerics"
 version = "0.2.0"
@@ -14689,7 +14642,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "9150af68066c4c5c07ddc0ce30421554771e528bde427614c61038bc2c92c2b1"
 dependencies = [
  "windows-core 0.61.2",
- "windows-link",
+ "windows-link 0.1.3",
 ]
 
 [[package]]
@@ -14707,7 +14660,7 @@ version = "0.3.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "56f42bd332cc6c8eac5af113fc0c1fd6a8fd2aa08a0119358686e5160d0586c6"
 dependencies = [
- "windows-link",
+ "windows-link 0.1.3",
 ]
 
 [[package]]
@@ -14716,7 +14669,7 @@ version = "0.4.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "56e6c93f3a0c3b36176cb1327a4958a0353d5d166c2a35cb268ace15e91d3b57"
 dependencies = [
- "windows-link",
+ "windows-link 0.1.3",
 ]
 
 [[package]]
@@ -14783,7 +14736,7 @@ version = "0.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "b66463ad2e0ea3bbf808b7f1d371311c80e115c0b71d60efc142cafbcfb057a6"
 dependencies = [
- "windows-link",
+ "windows-link 0.1.3",
 ]
 
 [[package]]
@@ -15041,7 +14994,7 @@ checksum = "38da3c9736e16c5d3c8c597a9aaa5d1fa565d0532ae05e27c24aa62fb32c0ab6"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.104",
+ "syn 2.0.106",
  "synstructure",
 ]
 
@@ -15062,7 +15015,7 @@ checksum = "9ecf5b4cc5364572d7f4c329661bcc82724222973f2cab6f050a4e5c22f75181"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.104",
+ "syn 2.0.106",
 ]
 
 [[package]]
@@ -15082,7 +15035,7 @@ checksum = "d71e5d6e06ab090c67b5e44993ec16b72dcbaabc526db883a360057678b48502"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.104",
+ "syn 2.0.106",
  "synstructure",
 ]
 
@@ -15103,7 +15056,7 @@ checksum = "ce36e65b0d2999d2aafac989fb249189a141aee1f53c612c1f37d72631959f69"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.104",
+ "syn 2.0.106",
 ]
 
 [[package]]
@@ -15136,7 +15089,7 @@ checksum = "5b96237efa0c878c64bd89c436f661be4e46b2f3eff1ebb976f7ef2321d2f58f"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.104",
+ "syn 2.0.106",
 ]
 
 [[package]]
diff --git a/Cargo.toml b/Cargo.toml
index 50e286195c..8a9d574263 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -99,12 +99,12 @@ rust.unexpected_cfgs = { level = "warn", check-cfg = ['cfg(tokio_unstable)'] }
 # See for more detaiils: https://github.com/rust-lang/cargo/issues/11329
 ahash = { version = "0.8", features = ["compile-time-rng"] }
 aquamarine = "0.6"
-arrow = { version = "56.0", features = ["prettyprint"] }
-arrow-array = { version = "56.0", default-features = false, features = ["chrono-tz"] }
-arrow-buffer = "56.0"
-arrow-flight = "56.0"
-arrow-ipc = { version = "56.0", default-features = false, features = ["lz4", "zstd"] }
-arrow-schema = { version = "56.0", features = ["serde"] }
+arrow = { version = "56.2", features = ["prettyprint"] }
+arrow-array = { version = "56.2", default-features = false, features = ["chrono-tz"] }
+arrow-buffer = "56.2"
+arrow-flight = "56.2"
+arrow-ipc = { version = "56.2", default-features = false, features = ["lz4", "zstd"] }
+arrow-schema = { version = "56.2", features = ["serde"] }
 async-stream = "0.3"
 async-trait = "0.1"
 # Remember to update axum-extra, axum-macros when updating axum
@@ -123,18 +123,18 @@ clap = { version = "4.4", features = ["derive"] }
 config = "0.13.0"
 crossbeam-utils = "0.8"
 dashmap = "6.1"
-datafusion = "49"
-datafusion-common = "49"
-datafusion-expr = "49"
-datafusion-functions = "49"
-datafusion-functions-aggregate-common = "49"
-datafusion-optimizer = "49"
-datafusion-orc = { git = "https://github.com/GreptimeTeam/datafusion-orc", rev = "a0a5f902158f153119316eaeec868cff3fc8a99d" }
-datafusion-pg-catalog = { git = "https://github.com/datafusion-contrib/datafusion-postgres", rev = "3d1b7c7d5b82dd49bafc2803259365e633f654fa" }
-datafusion-physical-expr = "49"
-datafusion-physical-plan = "49"
-datafusion-sql = "49"
-datafusion-substrait = "49"
+datafusion = "50"
+datafusion-common = "50"
+datafusion-expr = "50"
+datafusion-functions = "50"
+datafusion-functions-aggregate-common = "50"
+datafusion-optimizer = "50"
+datafusion-orc = "0.5"
+datafusion-pg-catalog = "0.11"
+datafusion-physical-expr = "50"
+datafusion-physical-plan = "50"
+datafusion-sql = "50"
+datafusion-substrait = "50"
 deadpool = "0.12"
 deadpool-postgres = "0.14"
 derive_builder = "0.20"
@@ -180,7 +180,7 @@ otel-arrow-rust = { git = "https://github.com/GreptimeTeam/otel-arrow", rev = "2
     "server",
 ] }
 parking_lot = "0.12"
-parquet = { version = "56.0", default-features = false, features = ["arrow", "async", "object_store"] }
+parquet = { version = "56.2", default-features = false, features = ["arrow", "async", "object_store"] }
 paste = "1.0"
 pin-project = "1.0"
 pretty_assertions = "1.4.0"
@@ -217,10 +217,7 @@ simd-json = "0.15"
 similar-asserts = "1.6.0"
 smallvec = { version = "1", features = ["serde"] }
 snafu = "0.8"
-sqlparser = { git = "https://github.com/GreptimeTeam/sqlparser-rs.git", rev = "39e4fc94c3c741981f77e9d63b5ce8c02e0a27ea", features = [
-    "visitor",
-    "serde",
-] } # branch = "v0.55.x"
+sqlparser = { version = "0.58.0", default-features = false, features = ["std", "visitor", "serde"] }
 sqlx = { version = "0.8", features = [
     "runtime-tokio-rustls",
     "mysql",
@@ -322,16 +319,19 @@ git = "https://github.com/GreptimeTeam/greptime-meter.git"
 rev = "5618e779cf2bb4755b499c630fba4c35e91898cb"
 
 [patch.crates-io]
-datafusion = { git = "https://github.com/GreptimeTeam/datafusion.git", rev = "7d5214512740b4dfb742b6b3d91ed9affcc2c9d0" }
-datafusion-common = { git = "https://github.com/GreptimeTeam/datafusion.git", rev = "7d5214512740b4dfb742b6b3d91ed9affcc2c9d0" }
-datafusion-expr = { git = "https://github.com/GreptimeTeam/datafusion.git", rev = "7d5214512740b4dfb742b6b3d91ed9affcc2c9d0" }
-datafusion-functions = { git = "https://github.com/GreptimeTeam/datafusion.git", rev = "7d5214512740b4dfb742b6b3d91ed9affcc2c9d0" }
-datafusion-functions-aggregate-common = { git = "https://github.com/GreptimeTeam/datafusion.git", rev = "7d5214512740b4dfb742b6b3d91ed9affcc2c9d0" }
-datafusion-optimizer = { git = "https://github.com/GreptimeTeam/datafusion.git", rev = "7d5214512740b4dfb742b6b3d91ed9affcc2c9d0" }
-datafusion-physical-expr = { git = "https://github.com/GreptimeTeam/datafusion.git", rev = "7d5214512740b4dfb742b6b3d91ed9affcc2c9d0" }
-datafusion-physical-plan = { git = "https://github.com/GreptimeTeam/datafusion.git", rev = "7d5214512740b4dfb742b6b3d91ed9affcc2c9d0" }
-datafusion-sql = { git = "https://github.com/GreptimeTeam/datafusion.git", rev = "7d5214512740b4dfb742b6b3d91ed9affcc2c9d0" }
-datafusion-substrait = { git = "https://github.com/GreptimeTeam/datafusion.git", rev = "7d5214512740b4dfb742b6b3d91ed9affcc2c9d0" }
+datafusion = { git = "https://github.com/GreptimeTeam/datafusion.git", rev = "fd4b2abcf3c3e43e94951bda452c9fd35243aab0" }
+datafusion-common = { git = "https://github.com/GreptimeTeam/datafusion.git", rev = "fd4b2abcf3c3e43e94951bda452c9fd35243aab0" }
+datafusion-expr = { git = "https://github.com/GreptimeTeam/datafusion.git", rev = "fd4b2abcf3c3e43e94951bda452c9fd35243aab0" }
+datafusion-functions = { git = "https://github.com/GreptimeTeam/datafusion.git", rev = "fd4b2abcf3c3e43e94951bda452c9fd35243aab0" }
+datafusion-functions-aggregate-common = { git = "https://github.com/GreptimeTeam/datafusion.git", rev = "fd4b2abcf3c3e43e94951bda452c9fd35243aab0" }
+datafusion-optimizer = { git = "https://github.com/GreptimeTeam/datafusion.git", rev = "fd4b2abcf3c3e43e94951bda452c9fd35243aab0" }
+datafusion-physical-expr = { git = "https://github.com/GreptimeTeam/datafusion.git", rev = "fd4b2abcf3c3e43e94951bda452c9fd35243aab0" }
+datafusion-physical-expr-common = { git = "https://github.com/GreptimeTeam/datafusion.git", rev = "fd4b2abcf3c3e43e94951bda452c9fd35243aab0" }
+datafusion-physical-plan = { git = "https://github.com/GreptimeTeam/datafusion.git", rev = "fd4b2abcf3c3e43e94951bda452c9fd35243aab0" }
+datafusion-datasource = { git = "https://github.com/GreptimeTeam/datafusion.git", rev = "fd4b2abcf3c3e43e94951bda452c9fd35243aab0" }
+datafusion-sql = { git = "https://github.com/GreptimeTeam/datafusion.git", rev = "fd4b2abcf3c3e43e94951bda452c9fd35243aab0" }
+datafusion-substrait = { git = "https://github.com/GreptimeTeam/datafusion.git", rev = "fd4b2abcf3c3e43e94951bda452c9fd35243aab0" }
+sqlparser = { git = "https://github.com/GreptimeTeam/sqlparser-rs.git", rev = "4b519a5caa95472cc3988f5556813a583dd35af1" }                           # branch = "v0.58.x"
 
 [profile.release]
 debug = 1
diff --git a/src/catalog/src/system_schema/pg_catalog.rs b/src/catalog/src/system_schema/pg_catalog.rs
index b3ddec5b3b..08aad2d6dd 100644
--- a/src/catalog/src/system_schema/pg_catalog.rs
+++ b/src/catalog/src/system_schema/pg_catalog.rs
@@ -27,6 +27,7 @@ use datafusion::error::DataFusionError;
 use datafusion::execution::TaskContext;
 use datafusion::physical_plan::stream::RecordBatchStreamAdapter as DfRecordBatchStreamAdapter;
 use datafusion_pg_catalog::pg_catalog::catalog_info::CatalogInfo;
+use datafusion_pg_catalog::pg_catalog::context::EmptyContextProvider;
 use datafusion_pg_catalog::pg_catalog::{
     PG_CATALOG_TABLES, PgCatalogSchemaProvider, PgCatalogStaticTables, PgCatalogTable,
 };
@@ -44,7 +45,7 @@ use crate::system_schema::{
 /// [`PGCatalogProvider`] is the provider for a schema named `pg_catalog`, it is not a catalog.
 pub struct PGCatalogProvider {
     catalog_name: String,
-    inner: PgCatalogSchemaProvider<CatalogManagerWrapper>,
+    inner: PgCatalogSchemaProvider<CatalogManagerWrapper, EmptyContextProvider>,
     tables: HashMap<String, TableRef>,
     table_ids: HashMap<&'static str, u32>,
 }
@@ -69,6 +70,7 @@ impl PGCatalogProvider {
                 catalog_manager,
             },
             Arc::new(static_tables),
+            EmptyContextProvider,
         )
         .expect("Failed to initialize PgCatalogSchemaProvider");
 
diff --git a/src/common/datasource/src/file_format.rs b/src/common/datasource/src/file_format.rs
index b6d4d6c30a..7c4e8d6c88 100644
--- a/src/common/datasource/src/file_format.rs
+++ b/src/common/datasource/src/file_format.rs
@@ -33,7 +33,7 @@ use bytes::{Buf, Bytes};
 use datafusion::datasource::physical_plan::FileOpenFuture;
 use datafusion::error::{DataFusionError, Result as DataFusionResult};
 use datafusion::physical_plan::SendableRecordBatchStream;
-use futures::StreamExt;
+use futures::{StreamExt, TryStreamExt};
 use object_store::ObjectStore;
 use snafu::ResultExt;
 use tokio_util::compat::FuturesAsyncWriteCompatExt;
@@ -179,7 +179,7 @@ pub fn open_with_decoder<T: ArrowDecoder, F: Fn() -> DataFusionResult<T>>(
             Poll::Ready(decoder.flush().transpose())
         });
 
-        Ok(stream.boxed())
+        Ok(stream.map_err(Into::into).boxed())
     }))
 }
 
diff --git a/src/common/function/src/aggrs/aggr_wrapper.rs b/src/common/function/src/aggrs/aggr_wrapper.rs
index 4ee8190f2d..ed691296ee 100644
--- a/src/common/function/src/aggrs/aggr_wrapper.rs
+++ b/src/common/function/src/aggrs/aggr_wrapper.rs
@@ -22,6 +22,7 @@
 //! `foo_merge`'s input arg is the same as `foo_state`'s output, and its output is the same as `foo`'s input.
 //!
 
+use std::hash::{Hash, Hasher};
 use std::sync::Arc;
 
 use arrow::array::StructArray;
@@ -272,7 +273,7 @@ impl StateMergeHelper {
 }
 
 /// Wrapper to make an aggregate function out of a state function.
-#[derive(Debug, Clone, PartialEq, Eq)]
+#[derive(Debug, Clone, PartialEq, Eq, Hash)]
 pub struct StateWrapper {
     inner: AggregateUDF,
     name: String,
@@ -616,6 +617,20 @@ impl AggregateUDFImpl for MergeWrapper {
     }
 }
 
+impl PartialEq for MergeWrapper {
+    fn eq(&self, other: &Self) -> bool {
+        self.inner == other.inner
+    }
+}
+
+impl Eq for MergeWrapper {}
+
+impl Hash for MergeWrapper {
+    fn hash<H: Hasher>(&self, state: &mut H) {
+        self.inner.hash(state);
+    }
+}
+
 /// The merge accumulator, which modify `update_batch`'s behavior to accept one struct array which
 /// include the state fields of original aggregate function, and merge said states into original accumulator
 /// the output is the same as original aggregate function
diff --git a/src/common/function/src/aggrs/aggr_wrapper/tests.rs b/src/common/function/src/aggrs/aggr_wrapper/tests.rs
index d24cdd8475..97a5a792d9 100644
--- a/src/common/function/src/aggrs/aggr_wrapper/tests.rs
+++ b/src/common/function/src/aggrs/aggr_wrapper/tests.rs
@@ -39,8 +39,7 @@ use datafusion::prelude::SessionContext;
 use datafusion_common::arrow::array::AsArray;
 use datafusion_common::arrow::datatypes::{Float64Type, UInt64Type};
 use datafusion_common::{Column, TableReference};
-use datafusion_expr::expr::AggregateFunction;
-use datafusion_expr::sqlparser::ast::NullTreatment;
+use datafusion_expr::expr::{AggregateFunction, NullTreatment};
 use datafusion_expr::{
     Aggregate, ColumnarValue, Expr, LogicalPlan, ScalarFunctionArgs, SortExpr, TableScan, lit,
 };
diff --git a/src/common/function/src/aggrs/count_hash.rs b/src/common/function/src/aggrs/count_hash.rs
index ded88107e6..7cc594f2e3 100644
--- a/src/common/function/src/aggrs/count_hash.rs
+++ b/src/common/function/src/aggrs/count_hash.rs
@@ -68,7 +68,7 @@ impl CountHash {
     }
 }
 
-#[derive(Debug, Clone)]
+#[derive(Debug, Clone, Eq, PartialEq, Hash)]
 pub struct CountHash {
     signature: Signature,
 }
diff --git a/src/common/function/src/scalars/geo/geohash.rs b/src/common/function/src/scalars/geo/geohash.rs
index 2a9deddca6..90bb958246 100644
--- a/src/common/function/src/scalars/geo/geohash.rs
+++ b/src/common/function/src/scalars/geo/geohash.rs
@@ -76,7 +76,7 @@ impl Function for GeohashFunction {
     }
 
     fn return_type(&self, _: &[DataType]) -> datafusion_common::Result<DataType> {
-        Ok(DataType::Utf8)
+        Ok(DataType::Utf8View)
     }
 
     fn signature(&self) -> &Signature {
@@ -176,7 +176,7 @@ impl Function for GeohashNeighboursFunction {
         Ok(DataType::List(Arc::new(Field::new(
             "item",
             DataType::Utf8View,
-            false,
+            true,
         ))))
     }
 
diff --git a/src/common/function/src/scalars/geo/h3.rs b/src/common/function/src/scalars/geo/h3.rs
index d90eed8143..c6630525df 100644
--- a/src/common/function/src/scalars/geo/h3.rs
+++ b/src/common/function/src/scalars/geo/h3.rs
@@ -355,9 +355,9 @@ impl Function for H3CellCenterLatLng {
 
     fn return_type(&self, _: &[DataType]) -> datafusion_common::Result<DataType> {
         Ok(DataType::List(Arc::new(Field::new(
-            "x",
+            "item",
             DataType::Float64,
-            false,
+            true,
         ))))
     }
 
diff --git a/src/common/function/src/scalars/udf.rs b/src/common/function/src/scalars/udf.rs
index 503a66d331..eee3ede801 100644
--- a/src/common/function/src/scalars/udf.rs
+++ b/src/common/function/src/scalars/udf.rs
@@ -14,6 +14,7 @@
 
 use std::any::Any;
 use std::fmt::{Debug, Formatter};
+use std::hash::{Hash, Hasher};
 
 use datafusion::arrow::datatypes::DataType;
 use datafusion::logical_expr::{ScalarFunctionArgs, ScalarUDFImpl};
@@ -33,6 +34,20 @@ impl Debug for ScalarUdf {
     }
 }
 
+impl PartialEq for ScalarUdf {
+    fn eq(&self, other: &Self) -> bool {
+        self.function.signature() == other.function.signature()
+    }
+}
+
+impl Eq for ScalarUdf {}
+
+impl Hash for ScalarUdf {
+    fn hash<H: Hasher>(&self, state: &mut H) {
+        self.function.signature().hash(state)
+    }
+}
+
 impl ScalarUDFImpl for ScalarUdf {
     fn as_any(&self) -> &dyn Any {
         self
diff --git a/src/common/macro/src/admin_fn.rs b/src/common/macro/src/admin_fn.rs
index ca97e5468f..651c083ec8 100644
--- a/src/common/macro/src/admin_fn.rs
+++ b/src/common/macro/src/admin_fn.rs
@@ -345,6 +345,20 @@ fn build_struct(
                 Ok(datafusion_expr::ColumnarValue::Array(result_vector.to_arrow_array()))
             }
         }
+
+        impl PartialEq for #name {
+            fn eq(&self, other: &Self) -> bool {
+                self.signature == other.signature
+            }
+        }
+
+        impl Eq for #name {}
+
+        impl std::hash::Hash for #name {
+            fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
+                self.signature.hash(state)
+            }
+        }
     }
     .into()
 }
diff --git a/src/datatypes/src/schema.rs b/src/datatypes/src/schema.rs
index 8a79b3c02e..6bdf321137 100644
--- a/src/datatypes/src/schema.rs
+++ b/src/datatypes/src/schema.rs
@@ -368,8 +368,7 @@ impl TryFrom<DFSchemaRef> for Schema {
     type Error = Error;
 
     fn try_from(value: DFSchemaRef) -> Result<Self> {
-        let s: ArrowSchema = value.as_ref().into();
-        s.try_into()
+        value.inner().clone().try_into()
     }
 }
 
diff --git a/src/datatypes/src/value.rs b/src/datatypes/src/value.rs
index 11a974463c..7acc7073d4 100644
--- a/src/datatypes/src/value.rs
+++ b/src/datatypes/src/value.rs
@@ -1208,7 +1208,9 @@ impl TryFrom<ScalarValue> for Value {
                     .collect::<Result<Vec<Value>>>()?;
                 Value::Struct(StructValue::try_new(items, struct_type)?)
             }
-            ScalarValue::Decimal256(_, _, _)
+            ScalarValue::Decimal32(_, _, _)
+            | ScalarValue::Decimal64(_, _, _)
+            | ScalarValue::Decimal256(_, _, _)
             | ScalarValue::FixedSizeList(_)
             | ScalarValue::LargeList(_)
             | ScalarValue::Dictionary(_, _)
diff --git a/src/datatypes/src/vectors/helper.rs b/src/datatypes/src/vectors/helper.rs
index 05021d7338..024a01c6b1 100644
--- a/src/datatypes/src/vectors/helper.rs
+++ b/src/datatypes/src/vectors/helper.rs
@@ -245,7 +245,9 @@ impl Helper {
                     length,
                 )
             }
-            ScalarValue::Decimal256(_, _, _)
+            ScalarValue::Decimal32(_, _, _)
+            | ScalarValue::Decimal64(_, _, _)
+            | ScalarValue::Decimal256(_, _, _)
             | ScalarValue::FixedSizeList(_)
             | ScalarValue::LargeList(_)
             | ScalarValue::Dictionary(_, _)
diff --git a/src/flow/src/df_optimizer.rs b/src/flow/src/df_optimizer.rs
index 5fa180d53c..1d41d09346 100644
--- a/src/flow/src/df_optimizer.rs
+++ b/src/flow/src/df_optimizer.rs
@@ -427,7 +427,7 @@ fn expand_tumble_analyzer(
 
 /// This is a placeholder for tumble_start and tumble_end function, so that datafusion can
 /// recognize them as scalar function
-#[derive(Debug)]
+#[derive(Debug, PartialEq, Eq, Hash)]
 pub struct TumbleExpand {
     signature: Signature,
     name: String,
diff --git a/src/operator/src/expr_helper.rs b/src/operator/src/expr_helper.rs
index bd77b20d5f..3fa9a0ae1f 100644
--- a/src/operator/src/expr_helper.rs
+++ b/src/operator/src/expr_helper.rs
@@ -979,11 +979,10 @@ pub fn to_create_flow_task_expr(
     query_ctx: &QueryContextRef,
 ) -> Result<CreateFlowExpr> {
     // retrieve sink table name
-    let sink_table_ref =
-        object_name_to_table_reference(create_flow.sink_table_name.clone().into(), true)
-            .with_context(|_| ConvertIdentifierSnafu {
-                ident: create_flow.sink_table_name.to_string(),
-            })?;
+    let sink_table_ref = object_name_to_table_reference(create_flow.sink_table_name.clone(), true)
+        .with_context(|_| ConvertIdentifierSnafu {
+            ident: create_flow.sink_table_name.to_string(),
+        })?;
     let catalog = sink_table_ref
         .catalog()
         .unwrap_or(query_ctx.current_catalog())
@@ -1001,9 +1000,11 @@ pub fn to_create_flow_task_expr(
 
     let source_table_names = extract_tables_from_query(&create_flow.query)
         .map(|name| {
-            let reference = object_name_to_table_reference(name.clone().into(), true)
-                .with_context(|_| ConvertIdentifierSnafu {
-                    ident: name.to_string(),
+            let reference =
+                object_name_to_table_reference(name.clone(), true).with_context(|_| {
+                    ConvertIdentifierSnafu {
+                        ident: name.to_string(),
+                    }
                 })?;
             let catalog = reference
                 .catalog()
diff --git a/src/promql/src/extension_plan/empty_metric.rs b/src/promql/src/extension_plan/empty_metric.rs
index 741a6b64bc..5514cb1abb 100644
--- a/src/promql/src/extension_plan/empty_metric.rs
+++ b/src/promql/src/extension_plan/empty_metric.rs
@@ -123,7 +123,7 @@ impl EmptyMetric {
                 physical_planner.create_physical_expr(expr, &self.time_index_schema, session_state)
             })
             .transpose()?;
-        let result_schema: SchemaRef = Arc::new(self.result_schema.as_ref().into());
+        let result_schema: SchemaRef = self.result_schema.inner().clone();
         let properties = Arc::new(PlanProperties::new(
             EquivalenceProperties::new(result_schema.clone()),
             Partitioning::UnknownPartitioning(1),
@@ -134,7 +134,7 @@ impl EmptyMetric {
             start: self.start,
             end: self.end,
             interval: self.interval,
-            time_index_schema: Arc::new(self.time_index_schema.as_ref().into()),
+            time_index_schema: self.time_index_schema.inner().clone(),
             result_schema,
             expr: physical_expr,
             properties,
diff --git a/src/promql/src/extension_plan/histogram_fold.rs b/src/promql/src/extension_plan/histogram_fold.rs
index 5c0c361db9..e80d4a7676 100644
--- a/src/promql/src/extension_plan/histogram_fold.rs
+++ b/src/promql/src/extension_plan/histogram_fold.rs
@@ -181,7 +181,7 @@ impl HistogramFold {
             .index_of_column_by_name(None, &self.ts_column)
             .unwrap();
 
-        let output_schema: SchemaRef = Arc::new(self.output_schema.as_ref().into());
+        let output_schema: SchemaRef = self.output_schema.inner().clone();
         let properties = PlanProperties::new(
             EquivalenceProperties::new(output_schema.clone()),
             Partitioning::UnknownPartitioning(1),
@@ -805,14 +805,13 @@ mod test {
     async fn fold_overall() {
         let memory_exec = Arc::new(prepare_test_data());
         let output_schema: SchemaRef = Arc::new(
-            (*HistogramFold::convert_schema(
+            HistogramFold::convert_schema(
                 &Arc::new(memory_exec.schema().to_dfschema().unwrap()),
                 "le",
             )
             .unwrap()
-            .as_ref())
-            .clone()
-            .into(),
+            .as_arrow()
+            .clone(),
         );
         let properties = PlanProperties::new(
             EquivalenceProperties::new(output_schema.clone()),
diff --git a/src/promql/src/extension_plan/range_manipulate.rs b/src/promql/src/extension_plan/range_manipulate.rs
index 540fa4c174..9c0586adef 100644
--- a/src/promql/src/extension_plan/range_manipulate.rs
+++ b/src/promql/src/extension_plan/range_manipulate.rs
@@ -167,7 +167,7 @@ impl RangeManipulate {
     }
 
     pub fn to_execution_plan(&self, exec_input: Arc<dyn ExecutionPlan>) -> Arc<dyn ExecutionPlan> {
-        let output_schema: SchemaRef = SchemaRef::new(self.output_schema.as_ref().into());
+        let output_schema: SchemaRef = self.output_schema.inner().clone();
         let properties = exec_input.properties();
         let properties = PlanProperties::new(
             EquivalenceProperties::new(output_schema.clone()),
@@ -791,8 +791,8 @@ mod test {
                 &field_columns,
             )
             .unwrap()
-            .as_ref()
-            .into(),
+            .as_arrow()
+            .clone(),
         );
         let properties = PlanProperties::new(
             EquivalenceProperties::new(manipulate_output_schema.clone()),
diff --git a/src/promql/src/extension_plan/union_distinct_on.rs b/src/promql/src/extension_plan/union_distinct_on.rs
index e5e80525b8..795669a4e9 100644
--- a/src/promql/src/extension_plan/union_distinct_on.rs
+++ b/src/promql/src/extension_plan/union_distinct_on.rs
@@ -92,7 +92,7 @@ impl UnionDistinctOn {
         left_exec: Arc<dyn ExecutionPlan>,
         right_exec: Arc<dyn ExecutionPlan>,
     ) -> Arc<dyn ExecutionPlan> {
-        let output_schema: SchemaRef = Arc::new(self.output_schema.as_ref().into());
+        let output_schema: SchemaRef = self.output_schema.inner().clone();
         let properties = Arc::new(PlanProperties::new(
             EquivalenceProperties::new(output_schema.clone()),
             Partitioning::UnknownPartitioning(1),
diff --git a/src/query/Cargo.toml b/src/query/Cargo.toml
index 0c6c2e033f..344d7bd5fc 100644
--- a/src/query/Cargo.toml
+++ b/src/query/Cargo.toml
@@ -54,6 +54,7 @@ meter-core.workspace = true
 meter-macros.workspace = true
 object-store.workspace = true
 once_cell.workspace = true
+parking_lot.workspace = true
 partition.workspace = true
 prometheus.workspace = true
 promql.workspace = true
diff --git a/src/query/src/dist_plan/commutativity.rs b/src/query/src/dist_plan/commutativity.rs
index d0e26a3f92..c8652b8d52 100644
--- a/src/query/src/dist_plan/commutativity.rs
+++ b/src/query/src/dist_plan/commutativity.rs
@@ -18,6 +18,7 @@ use std::sync::Arc;
 use common_function::aggrs::aggr_wrapper::{StateMergeHelper, is_all_aggr_exprs_steppable};
 use common_telemetry::debug;
 use datafusion::error::Result as DfResult;
+use datafusion_common::tree_node::{TreeNode, TreeNodeRecursion};
 use datafusion_expr::{Expr, LogicalPlan, UserDefinedLogicalNode};
 use promql::extension_plan::{
     EmptyMetric, InstantManipulate, RangeManipulate, SeriesDivide, SeriesNormalize,
@@ -93,6 +94,12 @@ impl Categorizer {
         plan: &LogicalPlan,
         partition_cols: Option<AliasMapping>,
     ) -> DfResult<Commutativity> {
+        // Subquery is treated separately in `inspect_plan_with_subquery`. To avoid rewrite the
+        // "maybe rewritten" plan, stop the check here.
+        if has_subquery(plan)? {
+            return Ok(Commutativity::Unimplemented);
+        }
+
         let partition_cols = partition_cols.unwrap_or_default();
 
         let comm = match plan {
@@ -331,6 +338,24 @@ pub fn partial_commutative_transformer(plan: &LogicalPlan) -> Option<LogicalPlan
     Some(plan.clone())
 }
 
+fn has_subquery(plan: &LogicalPlan) -> DfResult<bool> {
+    let mut found = false;
+    plan.apply_expressions(|e| {
+        e.apply(|x| {
+            if matches!(
+                x,
+                Expr::Exists(_) | Expr::InSubquery(_) | Expr::ScalarSubquery(_)
+            ) {
+                found = true;
+                Ok(TreeNodeRecursion::Stop)
+            } else {
+                Ok(TreeNodeRecursion::Continue)
+            }
+        })
+    })?;
+    Ok(found)
+}
+
 #[cfg(test)]
 mod test {
     use datafusion_expr::{LogicalPlanBuilder, Sort};
diff --git a/src/query/src/dist_plan/planner.rs b/src/query/src/dist_plan/planner.rs
index 6c7eba6b1e..cea8e54045 100644
--- a/src/query/src/dist_plan/planner.rs
+++ b/src/query/src/dist_plan/planner.rs
@@ -163,7 +163,7 @@ impl ExtensionPlanner for DistExtensionPlanner {
         };
 
         // TODO(ruihang): generate different execution plans for different variant merge operation
-        let schema = optimized_plan.schema().as_ref().into();
+        let schema = optimized_plan.schema().as_arrow();
         let query_ctx = session_state
             .config()
             .get_extension()
@@ -173,7 +173,7 @@ impl ExtensionPlanner for DistExtensionPlanner {
             table_name,
             regions,
             input_plan.clone(),
-            &schema,
+            schema,
             self.region_query_handler.clone(),
             query_ctx,
             session_state.config().target_partitions(),
diff --git a/src/query/src/optimizer/windowed_sort.rs b/src/query/src/optimizer/windowed_sort.rs
index 8de6fc8317..dcf63f6d73 100644
--- a/src/query/src/optimizer/windowed_sort.rs
+++ b/src/query/src/optimizer/windowed_sort.rs
@@ -196,9 +196,9 @@ fn fetch_partition_range(input: Arc<dyn ExecutionPlan>) -> DataFusionResult<Opti
         // TODO(discord9): do this in logical plan instead as it's lessy bugy there
         // Collects alias of the time index column.
         if let Some(projection) = plan.as_any().downcast_ref::<ProjectionExec>() {
-            for (expr, output_name) in projection.expr() {
-                if let Some(column_expr) = expr.as_any().downcast_ref::<PhysicalColumn>() {
-                    alias_map.push((column_expr.name().to_string(), output_name.clone()));
+            for expr in projection.expr() {
+                if let Some(column_expr) = expr.expr.as_any().downcast_ref::<PhysicalColumn>() {
+                    alias_map.push((column_expr.name().to_string(), expr.alias.clone()));
                 }
             }
             // resolve alias properly
diff --git a/src/query/src/part_sort.rs b/src/query/src/part_sort.rs
index e9d70ec17a..64ba76a149 100644
--- a/src/query/src/part_sort.rs
+++ b/src/query/src/part_sort.rs
@@ -33,11 +33,14 @@ use datafusion::execution::{RecordBatchStream, TaskContext};
 use datafusion::physical_plan::metrics::{BaselineMetrics, ExecutionPlanMetricsSet, MetricsSet};
 use datafusion::physical_plan::{
     DisplayAs, DisplayFormatType, ExecutionPlan, ExecutionPlanProperties, PlanProperties, TopK,
+    TopKDynamicFilters,
 };
 use datafusion_common::{DataFusionError, internal_err};
 use datafusion_physical_expr::PhysicalSortExpr;
+use datafusion_physical_expr::expressions::{DynamicFilterPhysicalExpr, lit};
 use futures::{Stream, StreamExt};
 use itertools::Itertools;
+use parking_lot::RwLock;
 use snafu::location;
 use store_api::region_engine::PartitionRange;
 
@@ -239,6 +242,9 @@ impl PartSortStream {
         partition: usize,
     ) -> datafusion_common::Result<Self> {
         let buffer = if let Some(limit) = limit {
+            let filter = Arc::new(RwLock::new(TopKDynamicFilters::new(Arc::new(
+                DynamicFilterPhysicalExpr::new(vec![], lit(true)),
+            ))));
             PartSortBuffer::Top(
                 TopK::try_new(
                     partition,
@@ -249,7 +255,7 @@ impl PartSortStream {
                     context.session_config().batch_size(),
                     context.runtime_env(),
                     &sort.metrics,
-                    None,
+                    filter,
                 )?,
                 0,
             )
@@ -497,6 +503,9 @@ impl PartSortStream {
 
     /// Internal method for sorting `Top` buffer (with limit).
     fn sort_top_buffer(&mut self) -> datafusion_common::Result<DfRecordBatch> {
+        let filter = Arc::new(RwLock::new(TopKDynamicFilters::new(Arc::new(
+            DynamicFilterPhysicalExpr::new(vec![], lit(true)),
+        ))));
         let new_top_buffer = TopK::try_new(
             self.partition,
             self.schema().clone(),
@@ -506,7 +515,7 @@ impl PartSortStream {
             self.context.session_config().batch_size(),
             self.context.runtime_env(),
             &self.root_metrics,
-            None,
+            filter,
         )?;
         let PartSortBuffer::Top(top_k, _) =
             std::mem::replace(&mut self.buffer, PartSortBuffer::Top(new_top_buffer, 0))
diff --git a/src/query/src/planner.rs b/src/query/src/planner.rs
index e59be28ceb..faba24a742 100644
--- a/src/query/src/planner.rs
+++ b/src/query/src/planner.rs
@@ -14,6 +14,7 @@
 
 use std::any::Any;
 use std::borrow::Cow;
+use std::str::FromStr;
 use std::sync::Arc;
 
 use async_trait::async_trait;
@@ -116,9 +117,10 @@ impl DfLogicalPlanner {
 
             // default to configuration value
             let options = self.session_state.config().options();
-            let format = format.as_ref().unwrap_or(&options.explain.format);
-
-            let format: ExplainFormat = format.parse()?;
+            let format = format
+                .map(|x| ExplainFormat::from_str(&x))
+                .transpose()?
+                .unwrap_or_else(|| options.explain.format.clone());
 
             Ok(LogicalPlan::Explain(Explain {
                 verbose,
@@ -208,8 +210,7 @@ impl DfLogicalPlanner {
             let Statement::Query(query) = stmt.into_owned() else {
                 unreachable!("is_tql_cte should only be true for Query statements");
             };
-            let sqlparser_stmt =
-                datafusion::sql::sqlparser::ast::Statement::Query(Box::new(query.inner.into()));
+            let sqlparser_stmt = sqlparser::ast::Statement::Query(Box::new(query.inner));
             sql_to_rel
                 .sql_statement_to_plan_with_context(sqlparser_stmt, &mut planner_context)
                 .context(PlanSqlSnafu)?
@@ -261,7 +262,7 @@ impl DfLogicalPlanner {
 
         let sql_to_rel = SqlToRel::new_with_options(&context_provider, parser_options);
 
-        Ok(sql_to_rel.sql_to_expr(sql.into(), schema, &mut PlannerContext::new())?)
+        Ok(sql_to_rel.sql_to_expr(sql, schema, &mut PlannerContext::new())?)
     }
 
     #[tracing::instrument(skip_all)]
diff --git a/src/query/src/promql/planner.rs b/src/query/src/promql/planner.rs
index b65cddc2e2..a1dc1b640a 100644
--- a/src/query/src/promql/planner.rs
+++ b/src/query/src/promql/planner.rs
@@ -2464,6 +2464,7 @@ impl PromPlanner {
                         window_frame: WindowFrame::new(Some(true)),
                         null_treatment: None,
                         distinct: false,
+                        filter: None,
                     },
                 }))
             })
diff --git a/src/query/src/range_select/plan_rewrite.rs b/src/query/src/range_select/plan_rewrite.rs
index 14eba9bcba..87b036df65 100644
--- a/src/query/src/range_select/plan_rewrite.rs
+++ b/src/query/src/range_select/plan_rewrite.rs
@@ -244,6 +244,9 @@ fn parse_expr_list(args: &[Expr], start: usize, len: usize) -> DFResult<Vec<Expr
                 | Expr::BinaryExpr(_)
                 | Expr::ScalarFunction(_),
             ) => args[i].clone(),
+            Some(Expr::Alias(alias)) if matches!(*alias.expr, Expr::ScalarFunction(_)) => {
+                args[i].clone()
+            }
             other => {
                 return Err(dispose_parse_error(*other));
             }
diff --git a/src/servers/src/http/handler.rs b/src/servers/src/http/handler.rs
index 69ba93cc5c..ca56e5234e 100644
--- a/src/servers/src/http/handler.rs
+++ b/src/servers/src/http/handler.rs
@@ -208,7 +208,7 @@ pub async fn sql_format(
 
     let mut parts: Vec<String> = Vec::with_capacity(stmts.len());
     for stmt in stmts {
-        let mut s = format!("{:#}", stmt);
+        let mut s = format!("{stmt}");
         if !s.trim_end().ends_with(';') {
             s.push(';');
         }
diff --git a/src/sql/src/ast.rs b/src/sql/src/ast.rs
index 5d207fb579..122740987a 100644
--- a/src/sql/src/ast.rs
+++ b/src/sql/src/ast.rs
@@ -25,7 +25,10 @@ pub trait ObjectNamePartExt {
 
 impl ObjectNamePartExt for ObjectNamePart {
     fn to_string_unquoted(&self) -> String {
-        let ObjectNamePart::Identifier(ident) = self;
+        let ObjectNamePart::Identifier(ident) = self else {
+            // If it's not an ident, just return it as a string.
+            return self.to_string();
+        };
         ident.value.clone()
     }
 }
diff --git a/src/sql/src/parser.rs b/src/sql/src/parser.rs
index 6e2a880348..6c2a7e11ab 100644
--- a/src/sql/src/parser.rs
+++ b/src/sql/src/parser.rs
@@ -14,15 +14,15 @@
 
 use std::str::FromStr;
 
-use snafu::ResultExt;
-use sqlparser::ast::{Ident, ObjectNamePart, Query, Value};
+use snafu::{OptionExt, ResultExt};
+use sqlparser::ast::{Ident, Query, Value};
 use sqlparser::dialect::Dialect;
 use sqlparser::keywords::Keyword;
 use sqlparser::parser::{Parser, ParserError, ParserOptions};
 use sqlparser::tokenizer::{Token, TokenWithSpan};
 
 use crate::ast::{Expr, ObjectName};
-use crate::error::{self, Result, SyntaxSnafu};
+use crate::error::{self, InvalidSqlSnafu, Result, SyntaxSnafu};
 use crate::parsers::tql_parser;
 use crate::statements::kill::Kill;
 use crate::statements::statement::Statement;
@@ -106,7 +106,7 @@ impl ParserContext<'_> {
                     expected: "a table name",
                     actual: self.parser.peek_token().to_string(),
                 })?;
-        Ok(Self::canonicalize_object_name(raw_table_name))
+        Self::canonicalize_object_name(raw_table_name)
     }
 
     pub fn parse_function(sql: &str, dialect: &dyn Dialect) -> Result<Expr> {
@@ -303,17 +303,20 @@ impl ParserContext<'_> {
     }
 
     /// Like [canonicalize_identifier] but for [ObjectName].
-    pub fn canonicalize_object_name(object_name: ObjectName) -> ObjectName {
+    pub(crate) fn canonicalize_object_name(object_name: ObjectName) -> Result<ObjectName> {
         object_name
             .0
             .into_iter()
             .map(|x| {
-                let ObjectNamePart::Identifier(ident) = x;
-                ident
+                x.as_ident()
+                    .cloned()
+                    .map(Self::canonicalize_identifier)
+                    .with_context(|| InvalidSqlSnafu {
+                        msg: format!("not an ident: '{x}'"),
+                    })
             })
-            .map(Self::canonicalize_identifier)
-            .collect::<Vec<_>>()
-            .into()
+            .collect::<Result<Vec<_>>>()
+            .map(Into::into)
     }
 
     /// Simply a shortcut for sqlparser's same name method `parse_object_name`,
diff --git a/src/sql/src/parsers/alter_parser.rs b/src/sql/src/parsers/alter_parser.rs
index 68f246f26a..0cab82efc9 100644
--- a/src/sql/src/parsers/alter_parser.rs
+++ b/src/sql/src/parsers/alter_parser.rs
@@ -68,7 +68,7 @@ impl ParserContext<'_> {
             .parser
             .parse_object_name(false)
             .context(error::SyntaxSnafu)?;
-        let database_name = Self::canonicalize_object_name(database_name);
+        let database_name = Self::canonicalize_object_name(database_name)?;
 
         match self.parser.peek_token().token {
             Token::Word(w) => {
@@ -117,7 +117,7 @@ impl ParserContext<'_> {
             .parser
             .parse_object_name(false)
             .context(error::SyntaxSnafu)?;
-        let table_name = Self::canonicalize_object_name(raw_table_name);
+        let table_name = Self::canonicalize_object_name(raw_table_name)?;
 
         let alter_operation = match self.parser.peek_token().token {
             Token::Word(w) => {
@@ -145,7 +145,7 @@ impl ParserContext<'_> {
                             let new_table_name_obj_raw =
                                 self.parse_object_name().context(error::SyntaxSnafu)?;
                             let new_table_name_obj =
-                                Self::canonicalize_object_name(new_table_name_obj_raw);
+                                Self::canonicalize_object_name(new_table_name_obj_raw)?;
                             let new_table_name = match &new_table_name_obj.0[..] {
                                 [table] => table.to_string_unquoted(),
                                 _ => {
diff --git a/src/sql/src/parsers/copy_parser.rs b/src/sql/src/parsers/copy_parser.rs
index 528c0de076..892992d310 100644
--- a/src/sql/src/parsers/copy_parser.rs
+++ b/src/sql/src/parsers/copy_parser.rs
@@ -104,7 +104,7 @@ impl ParserContext<'_> {
                 expected: "a table name",
                 actual: self.peek_token_as_string(),
             })?;
-        let table_name = Self::canonicalize_object_name(raw_table_name);
+        let table_name = Self::canonicalize_object_name(raw_table_name)?;
 
         if self.parser.parse_keyword(Keyword::TO) {
             let (with, connection, location, limit) = self.parse_copy_parameters()?;
diff --git a/src/sql/src/parsers/create_parser.rs b/src/sql/src/parsers/create_parser.rs
index df6592b187..157f554071 100644
--- a/src/sql/src/parsers/create_parser.rs
+++ b/src/sql/src/parsers/create_parser.rs
@@ -196,7 +196,7 @@ impl<'a> ParserContext<'a> {
             expected: "a database name",
             actual: self.peek_token_as_string(),
         })?;
-        let database_name = Self::canonicalize_object_name(database_name);
+        let database_name = Self::canonicalize_object_name(database_name)?;
 
         let options = self
             .parser
@@ -2435,8 +2435,7 @@ non TIMESTAMP(6) TIME INDEX,
         let sql = "CREATE VIEW test AS DELETE from demo";
         let result =
             ParserContext::create_with_dialect(sql, &GreptimeDbDialect {}, ParseOptions::default());
-        assert!(result.is_err());
-        assert_matches!(result, Err(crate::error::Error::Syntax { .. }));
+        assert!(result.is_ok_and(|x| x.len() == 1));
     }
 
     #[test]
diff --git a/src/sql/src/parsers/cursor_parser.rs b/src/sql/src/parsers/cursor_parser.rs
index 5d6deee12c..eb907b8d76 100644
--- a/src/sql/src/parsers/cursor_parser.rs
+++ b/src/sql/src/parsers/cursor_parser.rs
@@ -51,7 +51,7 @@ impl ParserContext<'_> {
         let query_stmt = self.parse_query()?;
         match query_stmt {
             Statement::Query(query) => Ok(Statement::DeclareCursor(DeclareCursor {
-                cursor_name: ParserContext::canonicalize_object_name(cursor_name),
+                cursor_name: ParserContext::canonicalize_object_name(cursor_name)?,
                 query,
             })),
             _ => error::InvalidSqlSnafu {
@@ -78,7 +78,7 @@ impl ParserContext<'_> {
             .context(error::SyntaxSnafu)?;
 
         Ok(Statement::FetchCursor(FetchCursor {
-            cursor_name: ParserContext::canonicalize_object_name(cursor_name),
+            cursor_name: ParserContext::canonicalize_object_name(cursor_name)?,
             fetch_size,
         }))
     }
@@ -91,7 +91,7 @@ impl ParserContext<'_> {
             .context(error::SyntaxSnafu)?;
 
         Ok(Statement::CloseCursor(CloseCursor {
-            cursor_name: ParserContext::canonicalize_object_name(cursor_name),
+            cursor_name: ParserContext::canonicalize_object_name(cursor_name)?,
         }))
     }
 }
diff --git a/src/sql/src/parsers/describe_parser.rs b/src/sql/src/parsers/describe_parser.rs
index 849dca468b..9a418cba31 100644
--- a/src/sql/src/parsers/describe_parser.rs
+++ b/src/sql/src/parsers/describe_parser.rs
@@ -36,7 +36,7 @@ impl ParserContext<'_> {
                     expected: "a table name",
                     actual: self.peek_token_as_string(),
                 })?;
-        let table_idents = Self::canonicalize_object_name(raw_table_idents);
+        let table_idents = Self::canonicalize_object_name(raw_table_idents)?;
         ensure!(
             !table_idents.0.is_empty(),
             InvalidTableNameSnafu {
diff --git a/src/sql/src/parsers/drop_parser.rs b/src/sql/src/parsers/drop_parser.rs
index 8f3872d957..39de64e945 100644
--- a/src/sql/src/parsers/drop_parser.rs
+++ b/src/sql/src/parsers/drop_parser.rs
@@ -58,7 +58,7 @@ impl ParserContext<'_> {
                     expected: "a trigger name",
                     actual: self.peek_token_as_string(),
                 })?;
-        let trigger_ident = Self::canonicalize_object_name(raw_trigger_ident);
+        let trigger_ident = Self::canonicalize_object_name(raw_trigger_ident)?;
         ensure!(
             !trigger_ident.0.is_empty(),
             error::InvalidTriggerNameSnafu {
@@ -82,7 +82,7 @@ impl ParserContext<'_> {
                 expected: "a view name",
                 actual: self.peek_token_as_string(),
             })?;
-        let view_ident = Self::canonicalize_object_name(raw_view_ident);
+        let view_ident = Self::canonicalize_object_name(raw_view_ident)?;
         ensure!(
             !view_ident.0.is_empty(),
             InvalidTableNameSnafu {
@@ -106,7 +106,7 @@ impl ParserContext<'_> {
                 expected: "a flow name",
                 actual: self.peek_token_as_string(),
             })?;
-        let flow_ident = Self::canonicalize_object_name(raw_flow_ident);
+        let flow_ident = Self::canonicalize_object_name(raw_flow_ident)?;
         ensure!(
             !flow_ident.0.is_empty(),
             InvalidFlowNameSnafu {
@@ -129,7 +129,7 @@ impl ParserContext<'_> {
                         expected: "a table name",
                         actual: self.peek_token_as_string(),
                     })?;
-            let table_ident = Self::canonicalize_object_name(raw_table_ident);
+            let table_ident = Self::canonicalize_object_name(raw_table_ident)?;
             ensure!(
                 !table_ident.0.is_empty(),
                 InvalidTableNameSnafu {
@@ -155,7 +155,7 @@ impl ParserContext<'_> {
                 expected: "a database name",
                 actual: self.peek_token_as_string(),
             })?;
-        let database_name = Self::canonicalize_object_name(database_name);
+        let database_name = Self::canonicalize_object_name(database_name)?;
 
         Ok(Statement::DropDatabase(DropDatabase::new(
             database_name,
diff --git a/src/sql/src/parsers/explain_parser.rs b/src/sql/src/parsers/explain_parser.rs
index c595efc86d..720c9209a4 100644
--- a/src/sql/src/parsers/explain_parser.rs
+++ b/src/sql/src/parsers/explain_parser.rs
@@ -73,6 +73,7 @@ mod tests {
             projection: vec![sqlparser::ast::SelectItem::Wildcard(
                 WildcardAdditionalOptions::default(),
             )],
+            exclude: None,
             into: None,
             from: vec![sqlparser::ast::TableWithJoins {
                 relation: sqlparser::ast::TableFactor::Table {
@@ -112,9 +113,8 @@ mod tests {
                 with: None,
                 body: Box::new(sqlparser::ast::SetExpr::Select(Box::new(select))),
                 order_by: None,
-                limit: None,
-                limit_by: vec![],
-                offset: None,
+                limit_clause: None,
+                pipe_operators: vec![],
                 fetch: None,
                 locks: vec![],
                 for_clause: None,
diff --git a/src/sql/src/parsers/set_var_parser.rs b/src/sql/src/parsers/set_var_parser.rs
index d04a466704..8290f00af8 100644
--- a/src/sql/src/parsers/set_var_parser.rs
+++ b/src/sql/src/parsers/set_var_parser.rs
@@ -13,7 +13,7 @@
 // limitations under the License.
 
 use snafu::ResultExt;
-use sqlparser::ast::Statement as SpStatement;
+use sqlparser::ast::{Set, Statement as SpStatement};
 
 use crate::ast::{Ident, ObjectName};
 use crate::error::{self, Result};
@@ -27,21 +27,27 @@ impl ParserContext<'_> {
         let _ = self.parser.next_token();
         let spstatement = self.parser.parse_set().context(error::SyntaxSnafu)?;
         match spstatement {
-            SpStatement::SetVariable {
-                variables,
-                value,
-                hivevar,
-                ..
-            } if !hivevar => Ok(Statement::SetVariables(SetVariables {
-                variable: (*variables)[0].clone(),
-                value,
-            })),
+            SpStatement::Set(set) => match set {
+                Set::SingleAssignment {
+                    scope: _,
+                    hivevar,
+                    variable,
+                    values,
+                } if !hivevar => Ok(Statement::SetVariables(SetVariables {
+                    variable,
+                    value: values,
+                })),
 
-            SpStatement::SetTimeZone { value, .. } => Ok(Statement::SetVariables(SetVariables {
-                variable: ObjectName::from(vec![Ident::new("TIMEZONE")]),
-                value: vec![value],
-            })),
+                Set::SetTimeZone { local: _, value } => Ok(Statement::SetVariables(SetVariables {
+                    variable: ObjectName::from(vec![Ident::new("TIMEZONE")]),
+                    value: vec![value],
+                })),
 
+                set => error::UnsupportedSnafu {
+                    keyword: set.to_string(),
+                }
+                .fail(),
+            },
             unexp => error::UnsupportedSnafu {
                 keyword: unexp.to_string(),
             }
diff --git a/src/sql/src/parsers/show_parser.rs b/src/sql/src/parsers/show_parser.rs
index 005653450c..e2e5fc50ac 100644
--- a/src/sql/src/parsers/show_parser.rs
+++ b/src/sql/src/parsers/show_parser.rs
@@ -148,7 +148,7 @@ impl ParserContext<'_> {
                     expected: "a database name",
                     actual: self.peek_token_as_string(),
                 })?;
-        let database_name = Self::canonicalize_object_name(raw_database_name);
+        let database_name = Self::canonicalize_object_name(raw_database_name)?;
         ensure!(
             !database_name.0.is_empty(),
             InvalidDatabaseNameSnafu {
@@ -168,7 +168,7 @@ impl ParserContext<'_> {
                 expected: "a table name",
                 actual: self.peek_token_as_string(),
             })?;
-        let table_name = Self::canonicalize_object_name(raw_table_name);
+        let table_name = Self::canonicalize_object_name(raw_table_name)?;
         ensure!(
             !table_name.0.is_empty(),
             InvalidTableNameSnafu {
@@ -197,7 +197,7 @@ impl ParserContext<'_> {
                 expected: "a flow name",
                 actual: self.peek_token_as_string(),
             })?;
-        let flow_name = Self::canonicalize_object_name(raw_flow_name);
+        let flow_name = Self::canonicalize_object_name(raw_flow_name)?;
         ensure!(
             !flow_name.0.is_empty(),
             InvalidFlowNameSnafu {
@@ -214,7 +214,7 @@ impl ParserContext<'_> {
                 expected: "a view name",
                 actual: self.peek_token_as_string(),
             })?;
-        let view_name = Self::canonicalize_object_name(raw_view_name);
+        let view_name = Self::canonicalize_object_name(raw_view_name)?;
         ensure!(
             !view_name.0.is_empty(),
             InvalidTableNameSnafu {
@@ -241,7 +241,7 @@ impl ParserContext<'_> {
         );
 
         // Safety: already checked above
-        Ok(Self::canonicalize_object_name(table_name).0[0].to_string_unquoted())
+        Ok(Self::canonicalize_object_name(table_name)?.0[0].to_string_unquoted())
     }
 
     fn parse_db_name(&mut self) -> Result<Option<String>> {
@@ -262,7 +262,7 @@ impl ParserContext<'_> {
 
         // Safety: already checked above
         Ok(Some(
-            Self::canonicalize_object_name(db_name).0[0].to_string_unquoted(),
+            Self::canonicalize_object_name(db_name)?.0[0].to_string_unquoted(),
         ))
     }
 
diff --git a/src/sql/src/parsers/show_parser/trigger.rs b/src/sql/src/parsers/show_parser/trigger.rs
index 9fecadd089..10054ba4a4 100644
--- a/src/sql/src/parsers/show_parser/trigger.rs
+++ b/src/sql/src/parsers/show_parser/trigger.rs
@@ -20,7 +20,7 @@ impl ParserContext<'_> {
                 actual: self.peek_token_as_string(),
             })?;
 
-        let trigger_name = Self::canonicalize_object_name(trigger_name);
+        let trigger_name = Self::canonicalize_object_name(trigger_name)?;
 
         ensure!(
             !trigger_name.0.is_empty(),
diff --git a/src/sql/src/parsers/truncate_parser.rs b/src/sql/src/parsers/truncate_parser.rs
index e7dd9a8c74..1fe0486575 100644
--- a/src/sql/src/parsers/truncate_parser.rs
+++ b/src/sql/src/parsers/truncate_parser.rs
@@ -33,7 +33,7 @@ impl ParserContext<'_> {
                     expected: "a table name",
                     actual: self.peek_token_as_string(),
                 })?;
-        let table_ident = Self::canonicalize_object_name(raw_table_ident);
+        let table_ident = Self::canonicalize_object_name(raw_table_ident)?;
 
         ensure!(
             !table_ident.0.is_empty(),
diff --git a/src/sql/src/parsers/utils.rs b/src/sql/src/parsers/utils.rs
index 15ef50ab71..5938018082 100644
--- a/src/sql/src/parsers/utils.rs
+++ b/src/sql/src/parsers/utils.rs
@@ -75,7 +75,7 @@ pub fn parser_expr_to_scalar_value_literal(
     // 1. convert parser expr to logical expr
     let empty_df_schema = DFSchema::empty();
     let logical_expr = SqlToRel::new(&StubContextProvider::default())
-        .sql_to_expr(expr.into(), &empty_df_schema, &mut Default::default())
+        .sql_to_expr(expr, &empty_df_schema, &mut Default::default())
         .context(ConvertToLogicalExpressionSnafu)?;
 
     struct FindNow {
diff --git a/src/sql/src/statements/statement.rs b/src/sql/src/statements/statement.rs
index d0096baa71..f723409a6b 100644
--- a/src/sql/src/statements/statement.rs
+++ b/src/sql/src/statements/statement.rs
@@ -310,6 +310,6 @@ impl TryFrom<&Statement> for DfStatement {
                 .fail();
             }
         };
-        Ok(DfStatement::Statement(Box::new(s.into())))
+        Ok(DfStatement::Statement(Box::new(s)))
     }
 }
diff --git a/src/sql/src/util.rs b/src/sql/src/util.rs
index 54555aa59d..f71dfcc8d7 100644
--- a/src/sql/src/util.rs
+++ b/src/sql/src/util.rs
@@ -194,7 +194,7 @@ fn extract_tables_from_set_expr(set_expr: &SetExpr, names: &mut HashSet<ObjectNa
             extract_tables_from_set_expr(left, names);
             extract_tables_from_set_expr(right, names);
         }
-        SetExpr::Values(_) | SetExpr::Insert(_) | SetExpr::Update(_) | SetExpr::Table(_) => {}
+        _ => {}
     };
 }
 
diff --git a/tests-integration/tests/http.rs b/tests-integration/tests/http.rs
index d5ed2ed4e6..f6c28c6602 100644
--- a/tests-integration/tests/http.rs
+++ b/tests-integration/tests/http.rs
@@ -662,7 +662,7 @@ pub async fn test_http_sql_slow_query(store_type: StorageType) {
     let (app, mut guard) = setup_test_http_app_with_frontend(store_type, "sql_api").await;
     let client = TestClient::new(app).await;
 
-    let slow_query = "WITH RECURSIVE slow_cte AS (SELECT 1 AS n, md5(CAST(random() AS STRING)) AS hash UNION ALL SELECT n + 1, md5(concat(hash, n)) FROM slow_cte WHERE n < 4500) SELECT COUNT(*) FROM slow_cte";
+    let slow_query = "SELECT count(*) FROM generate_series(1, 1000000000)";
     let encoded_slow_query = encode(slow_query);
 
     let query_params = format!("/v1/sql?sql={encoded_slow_query}");
@@ -1152,12 +1152,12 @@ pub async fn test_prom_http_api(store_type: StorageType) {
     // query `__name__` without match[]
     // create a physical table and a logical table
     let res = client
-        .get("/v1/sql?sql=create table physical_table (`ts` timestamp time index, message string) with ('physical_metric_table' = 'true');")
+        .get("/v1/sql?sql=create table physical_table (`ts` timestamp time index, `message` string) with ('physical_metric_table' = 'true');")
         .send()
         .await;
     assert_eq!(res.status(), StatusCode::OK, "{:?}", res.text().await);
     let res = client
-        .get("/v1/sql?sql=create table logic_table (`ts` timestamp time index, message string) with ('on_physical_table' = 'physical_table');")
+        .get("/v1/sql?sql=create table logic_table (`ts` timestamp time index, `message` string) with ('on_physical_table' = 'physical_table');")
         .send()
         .await;
     assert_eq!(res.status(), StatusCode::OK, "{:?}", res.text().await);
@@ -5297,7 +5297,7 @@ pub async fn test_log_query(store_type: StorageType) {
 
     // prepare data with SQL API
     let res = client
-        .get("/v1/sql?sql=create table logs (`ts` timestamp time index, message string);")
+        .get("/v1/sql?sql=create table logs (`ts` timestamp time index, `message` string);")
         .send()
         .await;
     assert_eq!(res.status(), StatusCode::OK, "{:?}", res.text().await);
diff --git a/tests-integration/tests/sql.rs b/tests-integration/tests/sql.rs
index f0bc9a353e..c28347076a 100644
--- a/tests-integration/tests/sql.rs
+++ b/tests-integration/tests/sql.rs
@@ -708,7 +708,7 @@ pub async fn test_mysql_slow_query(store_type: StorageType) {
         .unwrap();
 
     // The slow query will run at least longer than 1s.
-    let slow_query = "WITH RECURSIVE slow_cte AS (SELECT 1 AS n, md5(CAST(random() AS STRING)) AS hash UNION ALL SELECT n + 1, md5(concat(hash, n)) FROM slow_cte WHERE n < 4500) SELECT COUNT(*) FROM slow_cte";
+    let slow_query = "SELECT count(*) FROM generate_series(1, 1000000000)";
 
     // Simulate a slow query.
     sqlx::query(slow_query).fetch_all(&pool).await.unwrap();
@@ -823,7 +823,7 @@ pub async fn test_postgres_slow_query(store_type: StorageType) {
         .await
         .unwrap();
 
-    let slow_query = "WITH RECURSIVE slow_cte AS (SELECT 1 AS n, md5(CAST(random() AS STRING)) AS hash UNION ALL SELECT n + 1, md5(concat(hash, n)) FROM slow_cte WHERE n < 4500) SELECT COUNT(*) FROM slow_cte";
+    let slow_query = "SELECT count(*) FROM generate_series(1, 1000000000)";
     let _ = sqlx::query(slow_query).fetch_all(&pool).await.unwrap();
 
     // Wait for the slow query to be recorded.
diff --git a/tests/cases/distributed/optimizer/filter_push_down.result b/tests/cases/distributed/optimizer/filter_push_down.result
index e811d8348d..d383ba0445 100644
--- a/tests/cases/distributed/optimizer/filter_push_down.result
+++ b/tests/cases/distributed/optimizer/filter_push_down.result
@@ -203,13 +203,13 @@ SELECT * FROM (SELECT i1.i AS a, i2.i AS b, row_number() OVER (ORDER BY i1.i, i2
 -- Align the result to PostgreSQL: empty.
 EXPLAIN SELECT * FROM (SELECT 0=1 AS cond FROM integers i1, integers i2) a1 WHERE cond ORDER BY 1;
 
-+---------------+---------------+
-| plan_type     | plan          |
-+---------------+---------------+
-| logical_plan  | EmptyRelation |
-| physical_plan | EmptyExec     |
-|               |               |
-+---------------+---------------+
++---------------+-----------------------+
+| plan_type     | plan                  |
++---------------+-----------------------+
+| logical_plan  | EmptyRelation: rows=0 |
+| physical_plan | EmptyExec             |
+|               |                       |
++---------------+-----------------------+
 
 -- Align the result to PostgreSQL: empty.
 SELECT * FROM (SELECT 0=1 AS cond FROM integers i1, integers i2 GROUP BY 1) a1 WHERE cond ORDER BY 1;
diff --git a/tests/cases/standalone/common/aggregate/corr.result b/tests/cases/standalone/common/aggregate/corr.result
index 7099c7a85e..919639bc7b 100644
--- a/tests/cases/standalone/common/aggregate/corr.result
+++ b/tests/cases/standalone/common/aggregate/corr.result
@@ -10,13 +10,12 @@ SELECT corr(NULL,NULL);
 +-----------------+
 
 -- Single value returns NULL
--- FIXME(dennis): datafusion returns 0.0 here, should be NULL
 SELECT corr(1,1);
 
 +-------------------------+
 | corr(Int64(1),Int64(1)) |
 +-------------------------+
-| 0.0                     |
+|                         |
 +-------------------------+
 
 -- Test with table
diff --git a/tests/cases/standalone/common/aggregate/corr.sql b/tests/cases/standalone/common/aggregate/corr.sql
index d22715337a..8c859fdddb 100644
--- a/tests/cases/standalone/common/aggregate/corr.sql
+++ b/tests/cases/standalone/common/aggregate/corr.sql
@@ -5,7 +5,6 @@
 SELECT corr(NULL,NULL);
 
 -- Single value returns NULL
--- FIXME(dennis): datafusion returns 0.0 here, should be NULL
 SELECT corr(1,1);
 
 -- Test with table
diff --git a/tests/cases/standalone/common/aggregate/stddev.result b/tests/cases/standalone/common/aggregate/stddev.result
index 4cabcd313a..78948c67d8 100644
--- a/tests/cases/standalone/common/aggregate/stddev.result
+++ b/tests/cases/standalone/common/aggregate/stddev.result
@@ -10,55 +10,55 @@ Affected Rows: 6
 
 SELECT stddev_samp(1);
 
-+------------------+
-| stddev(Int64(1)) |
-+------------------+
-|                  |
-+------------------+
++-----------------------+
+| stddev_samp(Int64(1)) |
++-----------------------+
+|                       |
++-----------------------+
 
 SELECT var_samp(1);
 
-+---------------+
-| var(Int64(1)) |
-+---------------+
-|               |
-+---------------+
++--------------------+
+| var_samp(Int64(1)) |
++--------------------+
+|                    |
++--------------------+
 
 -- stddev_samp
 SELECT round(stddev_samp(val), 1) FROM stddev_test;
 
-+-----------------------------------------+
-| round(stddev(stddev_test.val),Int64(1)) |
-+-----------------------------------------+
-| 478.8                                   |
-+-----------------------------------------+
++----------------------------------------------+
+| round(stddev_samp(stddev_test.val),Int64(1)) |
++----------------------------------------------+
+| 478.8                                        |
++----------------------------------------------+
 
 SELECT round(stddev_samp(val), 1) FROM stddev_test WHERE val IS NOT NULL;
 
-+-----------------------------------------+
-| round(stddev(stddev_test.val),Int64(1)) |
-+-----------------------------------------+
-| 478.8                                   |
-+-----------------------------------------+
++----------------------------------------------+
+| round(stddev_samp(stddev_test.val),Int64(1)) |
++----------------------------------------------+
+| 478.8                                        |
++----------------------------------------------+
 
 SELECT grp, sum(val), round(stddev_samp(val), 1), min(val) FROM stddev_test GROUP BY grp ORDER BY grp;
 
-+-----+----------------------+-----------------------------------------+----------------------+
-| grp | sum(stddev_test.val) | round(stddev(stddev_test.val),Int64(1)) | min(stddev_test.val) |
-+-----+----------------------+-----------------------------------------+----------------------+
-| 1   | 85                   | 0.7                                     | 42                   |
-| 2   | 1042                 | 677.4                                   | 42                   |
-| 3   |                      |                                         |                      |
-+-----+----------------------+-----------------------------------------+----------------------+
++-----+----------------------+----------------------------------------------+----------------------+
+| grp | sum(stddev_test.val) | round(stddev_samp(stddev_test.val),Int64(1)) | min(stddev_test.val) |
++-----+----------------------+----------------------------------------------+----------------------+
+| 1   | 85                   | 0.7                                          | 42                   |
+| 2   | 1042                 | 677.4                                        | 42                   |
+| 3   |                      |                                              |                      |
++-----+----------------------+----------------------------------------------+----------------------+
 
 SELECT grp, sum(val), round(stddev_samp(val), 1), min(val) FROM stddev_test WHERE val IS NOT NULL GROUP BY grp ORDER BY grp;
 
-+-----+----------------------+-----------------------------------------+----------------------+
-| grp | sum(stddev_test.val) | round(stddev(stddev_test.val),Int64(1)) | min(stddev_test.val) |
-+-----+----------------------+-----------------------------------------+----------------------+
-| 1   | 85                   | 0.7                                     | 42                   |
-| 2   | 1042                 | 677.4                                   | 42                   |
-+-----+----------------------+-----------------------------------------+----------------------+
++-----+----------------------+----------------------------------------------+----------------------+
+| grp | sum(stddev_test.val) | round(stddev_samp(stddev_test.val),Int64(1)) | min(stddev_test.val) |
++-----+----------------------+----------------------------------------------+----------------------+
+| 1   | 85                   | 0.7                                          | 42                   |
+| 2   | 1042                 | 677.4                                        | 42                   |
++-----+----------------------+----------------------------------------------+----------------------+
 
 -- stddev_pop
 SELECT round(stddev_pop(val), 1) FROM stddev_test;
@@ -99,38 +99,38 @@ SELECT grp, sum(val), round(stddev_pop(val), 1), min(val) FROM stddev_test WHERE
 -- var_samp
 SELECT round(var_samp(val), 1) FROM stddev_test;
 
-+--------------------------------------+
-| round(var(stddev_test.val),Int64(1)) |
-+--------------------------------------+
-| 229281.6                             |
-+--------------------------------------+
++-------------------------------------------+
+| round(var_samp(stddev_test.val),Int64(1)) |
++-------------------------------------------+
+| 229281.6                                  |
++-------------------------------------------+
 
 SELECT round(var_samp(val), 1) FROM stddev_test WHERE val IS NOT NULL;
 
-+--------------------------------------+
-| round(var(stddev_test.val),Int64(1)) |
-+--------------------------------------+
-| 229281.6                             |
-+--------------------------------------+
++-------------------------------------------+
+| round(var_samp(stddev_test.val),Int64(1)) |
++-------------------------------------------+
+| 229281.6                                  |
++-------------------------------------------+
 
 SELECT grp, sum(val), round(var_samp(val), 1), min(val) FROM stddev_test GROUP BY grp ORDER BY grp;
 
-+-----+----------------------+--------------------------------------+----------------------+
-| grp | sum(stddev_test.val) | round(var(stddev_test.val),Int64(1)) | min(stddev_test.val) |
-+-----+----------------------+--------------------------------------+----------------------+
-| 1   | 85                   | 0.5                                  | 42                   |
-| 2   | 1042                 | 458882.0                             | 42                   |
-| 3   |                      |                                      |                      |
-+-----+----------------------+--------------------------------------+----------------------+
++-----+----------------------+-------------------------------------------+----------------------+
+| grp | sum(stddev_test.val) | round(var_samp(stddev_test.val),Int64(1)) | min(stddev_test.val) |
++-----+----------------------+-------------------------------------------+----------------------+
+| 1   | 85                   | 0.5                                       | 42                   |
+| 2   | 1042                 | 458882.0                                  | 42                   |
+| 3   |                      |                                           |                      |
++-----+----------------------+-------------------------------------------+----------------------+
 
 SELECT grp, sum(val), round(var_samp(val), 1), min(val) FROM stddev_test WHERE val IS NOT NULL GROUP BY grp ORDER BY grp;
 
-+-----+----------------------+--------------------------------------+----------------------+
-| grp | sum(stddev_test.val) | round(var(stddev_test.val),Int64(1)) | min(stddev_test.val) |
-+-----+----------------------+--------------------------------------+----------------------+
-| 1   | 85                   | 0.5                                  | 42                   |
-| 2   | 1042                 | 458882.0                             | 42                   |
-+-----+----------------------+--------------------------------------+----------------------+
++-----+----------------------+-------------------------------------------+----------------------+
+| grp | sum(stddev_test.val) | round(var_samp(stddev_test.val),Int64(1)) | min(stddev_test.val) |
++-----+----------------------+-------------------------------------------+----------------------+
+| 1   | 85                   | 0.5                                       | 42                   |
+| 2   | 1042                 | 458882.0                                  | 42                   |
++-----+----------------------+-------------------------------------------+----------------------+
 
 -- var_pop
 SELECT round(var_pop(val), 1) FROM stddev_test;
diff --git a/tests/cases/standalone/common/error/incorrect_sql.result b/tests/cases/standalone/common/error/incorrect_sql.result
index 5069376ed6..d8511ad6df 100644
--- a/tests/cases/standalone/common/error/incorrect_sql.result
+++ b/tests/cases/standalone/common/error/incorrect_sql.result
@@ -25,7 +25,7 @@ Error: 3000(PlanQuery), Failed to plan SQL: Error during planning: Execution err
 -- No matching function signature
 SELECT cos(0, 1, 2, 3);
 
-Error: 3000(PlanQuery), Failed to plan SQL: Error during planning: Failed to coerce arguments to satisfy a call to 'cos' function: coercion from [Int64, Int64, Int64, Int64] to the signature Uniform(1, [Float64, Float32]) failed No function matches the given name and argument types 'cos(Int64, Int64, Int64, Int64)'. You might need to add explicit type casts.
+Error: 3000(PlanQuery), Failed to plan SQL: Error during planning: Failed to coerce arguments to satisfy a call to 'cos' function: coercion from Int64, Int64, Int64, Int64 to the signature Uniform(1, [Float64, Float32]) failed No function matches the given name and argument types 'cos(Int64, Int64, Int64, Int64)'. You might need to add explicit type casts.
 	Candidate functions:
 	cos(Float64/Float32)
 
diff --git a/tests/cases/standalone/common/function/arithmetic.result b/tests/cases/standalone/common/function/arithmetic.result
index 01d2c7e062..c1f6dcc387 100644
--- a/tests/cases/standalone/common/function/arithmetic.result
+++ b/tests/cases/standalone/common/function/arithmetic.result
@@ -28,27 +28,27 @@ Error: 3001(EngineExecuteQuery), Divide by zero error
 
 SELECT POW (2, 5);
 
-+--------------------------+
-| power(Int64(2),Int64(5)) |
-+--------------------------+
-| 32                       |
-+--------------------------+
++------------------------+
+| pow(Int64(2),Int64(5)) |
++------------------------+
+| 32                     |
++------------------------+
 
 SELECT POW (1.01, 365);
 
-+---------------------------------+
-| power(Float64(1.01),Int64(365)) |
-+---------------------------------+
-| 37.78343433288728               |
-+---------------------------------+
++-------------------------------+
+| pow(Float64(1.01),Int64(365)) |
++-------------------------------+
+| 37.78343433288728             |
++-------------------------------+
 
 SELECT POW (0.99, 365);
 
-+---------------------------------+
-| power(Float64(0.99),Int64(365)) |
-+---------------------------------+
-| 0.025517964452291125            |
-+---------------------------------+
++-------------------------------+
+| pow(Float64(0.99),Int64(365)) |
++-------------------------------+
+| 0.025517964452291125          |
++-------------------------------+
 
 SELECT CLAMP(10, 0, 1);
 
diff --git a/tests/cases/standalone/common/order/limit.result b/tests/cases/standalone/common/order/limit.result
index 059cc9706a..e830a3ea4b 100644
--- a/tests/cases/standalone/common/order/limit.result
+++ b/tests/cases/standalone/common/order/limit.result
@@ -49,7 +49,7 @@ Error: 1001(Unsupported), This feature is not implemented: Unsupported LIMIT exp
 
 SELECT a FROM test LIMIT row_number() OVER ();
 
-Error: 3001(EngineExecuteQuery), This feature is not implemented: Unsupported LIMIT expression: Some(Cast(Cast { expr: WindowFunction(WindowFunction { fun: WindowUDF(WindowUDF { inner: RowNumber { signature: Signature { type_signature: Nullary, volatility: Immutable } } }), params: WindowFunctionParams { args: [], partition_by: [], order_by: [], window_frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }, null_treatment: None, distinct: false } }), data_type: Int64 }))
+Error: 3001(EngineExecuteQuery), This feature is not implemented: Unsupported LIMIT expression: Some(Cast(Cast { expr: WindowFunction(WindowFunction { fun: WindowUDF(WindowUDF { inner: RowNumber { signature: Signature { type_signature: Nullary, volatility: Immutable } } }), params: WindowFunctionParams { args: [], partition_by: [], order_by: [], window_frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }, filter: None, null_treatment: None, distinct: false } }), data_type: Int64 }))
 
 CREATE TABLE test2 (a STRING, ts TIMESTAMP TIME INDEX);
 
diff --git a/tests/cases/standalone/common/range/by.result b/tests/cases/standalone/common/range/by.result
index 0f876eb024..fd2feb1da4 100644
--- a/tests/cases/standalone/common/range/by.result
+++ b/tests/cases/standalone/common/range/by.result
@@ -23,12 +23,12 @@ Affected Rows: 10
 -- Test by calculate
 SELECT ts, length(host), max(val) RANGE '5s' FROM host ALIGN '20s' BY (length(host)) ORDER BY ts;
 
-+---------------------+-----------------------------+------------------------+
-| ts                  | character_length(host.host) | max(host.val) RANGE 5s |
-+---------------------+-----------------------------+------------------------+
-| 1970-01-01T00:00:00 | 5                           | 3                      |
-| 1970-01-01T00:00:20 | 5                           | 5                      |
-+---------------------+-----------------------------+------------------------+
++---------------------+-------------------+------------------------+
+| ts                  | length(host.host) | max(host.val) RANGE 5s |
++---------------------+-------------------+------------------------+
+| 1970-01-01T00:00:00 | 5                 | 3                      |
+| 1970-01-01T00:00:20 | 5                 | 5                      |
++---------------------+-------------------+------------------------+
 
 SELECT ts, max(val) RANGE '5s' FROM host ALIGN '20s' BY (2) ORDER BY ts;
 
@@ -52,12 +52,12 @@ SELECT ts, max(val) RANGE '5s' FROM host ALIGN '20s' BY () ORDER BY ts;
 
 SELECT ts, length(host)::INT64 + 2, max(val) RANGE '5s' FROM host ALIGN '20s' BY (length(host)::INT64 + 2) ORDER BY ts;
 
-+---------------------+------------------------------------------------------------------+------------------------+
-| ts                  | arrow_cast(character_length(host.host),Utf8("Int64")) + Int64(2) | max(host.val) RANGE 5s |
-+---------------------+------------------------------------------------------------------+------------------------+
-| 1970-01-01T00:00:00 | 7                                                                | 3                      |
-| 1970-01-01T00:00:20 | 7                                                                | 5                      |
-+---------------------+------------------------------------------------------------------+------------------------+
++---------------------+--------------------------------------------------------+------------------------+
+| ts                  | arrow_cast(length(host.host),Utf8("Int64")) + Int64(2) | max(host.val) RANGE 5s |
++---------------------+--------------------------------------------------------+------------------------+
+| 1970-01-01T00:00:00 | 7                                                      | 3                      |
+| 1970-01-01T00:00:20 | 7                                                      | 5                      |
++---------------------+--------------------------------------------------------+------------------------+
 
 -- Test error
 -- project non-aggregation key
diff --git a/tests/cases/standalone/common/range/calculate.result b/tests/cases/standalone/common/range/calculate.result
index 6ac21a9352..f27cbef398 100644
--- a/tests/cases/standalone/common/range/calculate.result
+++ b/tests/cases/standalone/common/range/calculate.result
@@ -188,22 +188,22 @@ SELECT ts, host, floor(cos(ceil(sin(min(val) RANGE '5s')))) FROM host ALIGN '5s'
 
 SELECT ts, host, gcd(CAST(max(floor(val::DOUBLE)) RANGE '10s' FILL PREV as INT64) * 4, max(val * 4) RANGE '10s' FILL PREV) * length(host) + 1 FROM host ALIGN '5s' ORDER BY host, ts;
 
-+---------------------+-------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
-| ts                  | host  | gcd(arrow_cast(max(floor(host.val)) RANGE 10s FILL PREV,Utf8("Int64")) * Int64(4),max(host.val * Int64(4)) RANGE 10s FILL PREV) * character_length(host.host) + Int64(1) |
-+---------------------+-------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
-| 1969-12-31T23:59:55 | host1 | 1                                                                                                                                                                        |
-| 1970-01-01T00:00:00 | host1 | 1                                                                                                                                                                        |
-| 1970-01-01T00:00:05 | host1 | 21                                                                                                                                                                       |
-| 1970-01-01T00:00:10 | host1 | 21                                                                                                                                                                       |
-| 1970-01-01T00:00:15 | host1 | 41                                                                                                                                                                       |
-| 1970-01-01T00:00:20 | host1 | 41                                                                                                                                                                       |
-| 1969-12-31T23:59:55 | host2 | 61                                                                                                                                                                       |
-| 1970-01-01T00:00:00 | host2 | 61                                                                                                                                                                       |
-| 1970-01-01T00:00:05 | host2 | 81                                                                                                                                                                       |
-| 1970-01-01T00:00:10 | host2 | 81                                                                                                                                                                       |
-| 1970-01-01T00:00:15 | host2 | 101                                                                                                                                                                      |
-| 1970-01-01T00:00:20 | host2 | 101                                                                                                                                                                      |
-+---------------------+-------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
++---------------------+-------+----------------------------------------------------------------------------------------------------------------------------------------------------------------+
+| ts                  | host  | gcd(arrow_cast(max(floor(host.val)) RANGE 10s FILL PREV,Utf8("Int64")) * Int64(4),max(host.val * Int64(4)) RANGE 10s FILL PREV) * length(host.host) + Int64(1) |
++---------------------+-------+----------------------------------------------------------------------------------------------------------------------------------------------------------------+
+| 1969-12-31T23:59:55 | host1 | 1                                                                                                                                                              |
+| 1970-01-01T00:00:00 | host1 | 1                                                                                                                                                              |
+| 1970-01-01T00:00:05 | host1 | 21                                                                                                                                                             |
+| 1970-01-01T00:00:10 | host1 | 21                                                                                                                                                             |
+| 1970-01-01T00:00:15 | host1 | 41                                                                                                                                                             |
+| 1970-01-01T00:00:20 | host1 | 41                                                                                                                                                             |
+| 1969-12-31T23:59:55 | host2 | 61                                                                                                                                                             |
+| 1970-01-01T00:00:00 | host2 | 61                                                                                                                                                             |
+| 1970-01-01T00:00:05 | host2 | 81                                                                                                                                                             |
+| 1970-01-01T00:00:10 | host2 | 81                                                                                                                                                             |
+| 1970-01-01T00:00:15 | host2 | 101                                                                                                                                                            |
+| 1970-01-01T00:00:20 | host2 | 101                                                                                                                                                            |
++---------------------+-------+----------------------------------------------------------------------------------------------------------------------------------------------------------------+
 
 DROP TABLE host;
 
diff --git a/tests/cases/standalone/common/system/pg_catalog.result b/tests/cases/standalone/common/system/pg_catalog.result
index d6530a78d2..0aa7f1cc7e 100644
--- a/tests/cases/standalone/common/system/pg_catalog.result
+++ b/tests/cases/standalone/common/system/pg_catalog.result
@@ -712,10 +712,10 @@ select * from pg_catalog.pg_type order by oid;
 -- SQLNESS REPLACE (\d+\s*) OID
 select * from pg_catalog.pg_database where datname = 'public';
 
-+-----+---------+--------+----------+------------+----------+---------------+--------------+--------------+---------------+--------------+------------+---------------+--------+
-| oid | datname | datdba | encoding | datcollate | datctype | datistemplate | datallowconn | datconnlimit | datlastsysoid | datfrozenxid | datminmxid | dattablespace | datacl |
-+-----+---------+--------+----------+------------+----------+---------------+--------------+--------------+---------------+--------------+------------+---------------+--------+
-+-----+---------+--------+----------+------------+----------+---------------+--------------+--------------+---------------+--------------+------------+---------------+--------+
++-----+---------+--------+----------+----------------+------------+----------+---------------+--------------+--------------+---------------+--------------+------------+---------------+--------------+-------------+--------+
+| oid | datname | datdba | encoding | datlocprovider | datcollate | datctype | datistemplate | datallowconn | datconnlimit | datlastsysoid | datfrozenxid | datminmxid | dattablespace | daticulocale | daticurules | datacl |
++-----+---------+--------+----------+----------------+------------+----------+---------------+--------------+--------------+---------------+--------------+------------+---------------+--------------+-------------+--------+
++-----+---------+--------+----------+----------------+------------+----------+---------------+--------------+--------------+---------------+--------------+------------+---------------+--------------+-------------+--------+
 
 -- \d
 -- SQLNESS PROTOCOL POSTGRES
diff --git a/tests/cases/standalone/common/tql-explain-analyze/explain.result b/tests/cases/standalone/common/tql-explain-analyze/explain.result
index 33cd57b327..846086bf67 100644
--- a/tests/cases/standalone/common/tql-explain-analyze/explain.result
+++ b/tests/cases/standalone/common/tql-explain-analyze/explain.result
@@ -184,6 +184,7 @@ TQL EXPLAIN VERBOSE (0, 10, '5s') test;
 | physical_plan after OutputRequirements_| MergeScanExec: REDACTED
 |_|_|
 | physical_plan after LimitAggregation_| SAME TEXT AS ABOVE_|
+| physical_plan after LimitPushPastWindows_| SAME TEXT AS ABOVE_|
 | physical_plan after LimitPushdown_| SAME TEXT AS ABOVE_|
 | physical_plan after ProjectionPushdown_| SAME TEXT AS ABOVE_|
 | physical_plan after EnsureCooperative_| CooperativeExec_|
@@ -321,6 +322,7 @@ TQL EXPLAIN VERBOSE (0, 10, '5s') test AS series;
 | physical_plan after OutputRequirements_| MergeScanExec: REDACTED
 |_|_|
 | physical_plan after LimitAggregation_| SAME TEXT AS ABOVE_|
+| physical_plan after LimitPushPastWindows_| SAME TEXT AS ABOVE_|
 | physical_plan after LimitPushdown_| SAME TEXT AS ABOVE_|
 | physical_plan after ProjectionPushdown_| SAME TEXT AS ABOVE_|
 | physical_plan after EnsureCooperative_| CooperativeExec_|
diff --git a/tests/cases/standalone/common/tql/tql-cte.result b/tests/cases/standalone/common/tql/tql-cte.result
index 76e4b511db..7127f79d9f 100644
--- a/tests/cases/standalone/common/tql/tql-cte.result
+++ b/tests/cases/standalone/common/tql/tql-cte.result
@@ -775,8 +775,8 @@ LIMIT 5;
 |               | SubqueryAlias: l                                                                                                                                                       |
 |               |   TableScan: labels                                                                                                                                                    |
 |               | ]]                                                                                                                                                                     |
-| physical_plan | SortPreservingMergeExec: [ts@0 ASC NULLS LAST, host@2 ASC NULLS LAST, avg_value@1 ASC NULLS LAST], fetch=5                                                             |
-|               |   SortExec: TopK(fetch=5), expr=[ts@0 ASC NULLS LAST, host@2 ASC NULLS LAST, avg_value@1 ASC NULLS LAST], preserve_REDACTED
+| physical_plan | SortPreservingMergeExec: [ts@0 ASC NULLS LAST, avg_value@1 ASC NULLS LAST], fetch=5                                                                                    |
+|               |   SortExec: TopK(fetch=5), expr=[ts@0 ASC NULLS LAST, avg_value@1 ASC NULLS LAST], preserve_REDACTED
 |               |     ProjectionExec: expr=[ts@0 as ts, cpu@1 as avg_value, host@2 as host]                                                                                              |
 |               |       CoalesceBatchesExec: target_batch_size=8192                                                                                                                      |
 |               |         HashJoinExec: mode=Partitioned, join_type=Inner, on=[(date_trunc(Utf8("second"),t.ts)@2, date_trunc(Utf8("second"),l.ts)@2)], projection=[ts@0, cpu@1, host@4] |
@@ -861,8 +861,8 @@ LIMIT 5;
 |               | SubqueryAlias: l                                                                                                                                                       |
 |               |   TableScan: labels                                                                                                                                                    |
 |               | ]]                                                                                                                                                                     |
-| physical_plan | SortPreservingMergeExec: [ts@0 ASC NULLS LAST, host@2 ASC NULLS LAST, avg_value@1 ASC NULLS LAST], fetch=5                                                             |
-|               |   SortExec: TopK(fetch=5), expr=[ts@0 ASC NULLS LAST, host@2 ASC NULLS LAST, avg_value@1 ASC NULLS LAST], preserve_REDACTED
+| physical_plan | SortPreservingMergeExec: [ts@0 ASC NULLS LAST, avg_value@1 ASC NULLS LAST], fetch=5                                                                                    |
+|               |   SortExec: TopK(fetch=5), expr=[ts@0 ASC NULLS LAST, avg_value@1 ASC NULLS LAST], preserve_REDACTED
 |               |     ProjectionExec: expr=[ts@1 as ts, cpu@0 as avg_value, host@2 as host]                                                                                              |
 |               |       CoalesceBatchesExec: target_batch_size=8192                                                                                                                      |
 |               |         HashJoinExec: mode=Partitioned, join_type=Inner, on=[(date_trunc(Utf8("second"),t.ts)@2, date_trunc(Utf8("second"),l.ts)@2)], projection=[cpu@0, ts@1, host@4] |
diff --git a/tests/cases/standalone/common/types/string/bigstring.result b/tests/cases/standalone/common/types/string/bigstring.result
index 725f2dd659..a749e7538b 100644
--- a/tests/cases/standalone/common/types/string/bigstring.result
+++ b/tests/cases/standalone/common/types/string/bigstring.result
@@ -23,14 +23,14 @@ Affected Rows: 1
 
 SELECT LENGTH(a) FROM test ORDER BY 1;
 
-+--------------------------+
-| character_length(test.a) |
-+--------------------------+
-| 10                       |
-| 100                      |
-| 1000                     |
-| 10000                    |
-+--------------------------+
++----------------+
+| length(test.a) |
++----------------+
+| 10             |
+| 100            |
+| 1000           |
+| 10000          |
++----------------+
 
 DROP TABLE test;
 
diff --git a/tests/cases/standalone/common/types/string/scan_big_varchar.result b/tests/cases/standalone/common/types/string/scan_big_varchar.result
index 4d9261d116..374c8662bd 100644
--- a/tests/cases/standalone/common/types/string/scan_big_varchar.result
+++ b/tests/cases/standalone/common/types/string/scan_big_varchar.result
@@ -33,11 +33,11 @@ Affected Rows: 1
 -- verify that the append worked
 SELECT COUNT(*), COUNT(a), MAX(LENGTH(a)), SUM(LENGTH(a)) FROM bigtable;
 
-+----------+-------------------+-----------------------------------+-----------------------------------+
-| count(*) | count(bigtable.a) | max(character_length(bigtable.a)) | sum(character_length(bigtable.a)) |
-+----------+-------------------+-----------------------------------+-----------------------------------+
-| 1        | 1                 | 10000                             | 10000                             |
-+----------+-------------------+-----------------------------------+-----------------------------------+
++----------+-------------------+-------------------------+-------------------------+
+| count(*) | count(bigtable.a) | max(length(bigtable.a)) | sum(length(bigtable.a)) |
++----------+-------------------+-------------------------+-------------------------+
+| 1        | 1                 | 10000                   | 10000                   |
++----------+-------------------+-------------------------+-------------------------+
 
 -- we create a total of 16K entries in the big table
 -- the total size of this table is 16K*10K = 160MB
@@ -48,11 +48,11 @@ Affected Rows: 1
 
 SELECT COUNT(*), COUNT(a), MAX(LENGTH(a)), SUM(LENGTH(a)) FROM bigtable;
 
-+----------+-------------------+-----------------------------------+-----------------------------------+
-| count(*) | count(bigtable.a) | max(character_length(bigtable.a)) | sum(character_length(bigtable.a)) |
-+----------+-------------------+-----------------------------------+-----------------------------------+
-| 2        | 2                 | 10000                             | 20000                             |
-+----------+-------------------+-----------------------------------+-----------------------------------+
++----------+-------------------+-------------------------+-------------------------+
+| count(*) | count(bigtable.a) | max(length(bigtable.a)) | sum(length(bigtable.a)) |
++----------+-------------------+-------------------------+-------------------------+
+| 2        | 2                 | 10000                   | 20000                   |
++----------+-------------------+-------------------------+-------------------------+
 
 INSERT INTO bigtable SELECT a, to_unixtime(ts) * 23 FROM bigtable;
 
@@ -60,11 +60,11 @@ Affected Rows: 2
 
 SELECT COUNT(*), COUNT(a), MAX(LENGTH(a)), SUM(LENGTH(a)) FROM bigtable;
 
-+----------+-------------------+-----------------------------------+-----------------------------------+
-| count(*) | count(bigtable.a) | max(character_length(bigtable.a)) | sum(character_length(bigtable.a)) |
-+----------+-------------------+-----------------------------------+-----------------------------------+
-| 4        | 4                 | 10000                             | 40000                             |
-+----------+-------------------+-----------------------------------+-----------------------------------+
++----------+-------------------+-------------------------+-------------------------+
+| count(*) | count(bigtable.a) | max(length(bigtable.a)) | sum(length(bigtable.a)) |
++----------+-------------------+-------------------------+-------------------------+
+| 4        | 4                 | 10000                   | 40000                   |
++----------+-------------------+-------------------------+-------------------------+
 
 INSERT INTO bigtable SELECT a, to_unixtime(ts) * 31 FROM bigtable;
 
@@ -72,11 +72,11 @@ Affected Rows: 4
 
 SELECT COUNT(*), COUNT(a), MAX(LENGTH(a)), SUM(LENGTH(a)) FROM bigtable;
 
-+----------+-------------------+-----------------------------------+-----------------------------------+
-| count(*) | count(bigtable.a) | max(character_length(bigtable.a)) | sum(character_length(bigtable.a)) |
-+----------+-------------------+-----------------------------------+-----------------------------------+
-| 8        | 8                 | 10000                             | 80000                             |
-+----------+-------------------+-----------------------------------+-----------------------------------+
++----------+-------------------+-------------------------+-------------------------+
+| count(*) | count(bigtable.a) | max(length(bigtable.a)) | sum(length(bigtable.a)) |
++----------+-------------------+-------------------------+-------------------------+
+| 8        | 8                 | 10000                   | 80000                   |
++----------+-------------------+-------------------------+-------------------------+
 
 INSERT INTO bigtable SELECT a, to_unixtime(ts) * 37 FROM bigtable;
 
@@ -84,11 +84,11 @@ Affected Rows: 8
 
 SELECT COUNT(*), COUNT(a), MAX(LENGTH(a)), SUM(LENGTH(a)) FROM bigtable;
 
-+----------+-------------------+-----------------------------------+-----------------------------------+
-| count(*) | count(bigtable.a) | max(character_length(bigtable.a)) | sum(character_length(bigtable.a)) |
-+----------+-------------------+-----------------------------------+-----------------------------------+
-| 16       | 16                | 10000                             | 160000                            |
-+----------+-------------------+-----------------------------------+-----------------------------------+
++----------+-------------------+-------------------------+-------------------------+
+| count(*) | count(bigtable.a) | max(length(bigtable.a)) | sum(length(bigtable.a)) |
++----------+-------------------+-------------------------+-------------------------+
+| 16       | 16                | 10000                   | 160000                  |
++----------+-------------------+-------------------------+-------------------------+
 
 INSERT INTO bigtable SELECT a, to_unixtime(ts) * 41 FROM bigtable;
 
@@ -96,11 +96,11 @@ Affected Rows: 16
 
 SELECT COUNT(*), COUNT(a), MAX(LENGTH(a)), SUM(LENGTH(a)) FROM bigtable;
 
-+----------+-------------------+-----------------------------------+-----------------------------------+
-| count(*) | count(bigtable.a) | max(character_length(bigtable.a)) | sum(character_length(bigtable.a)) |
-+----------+-------------------+-----------------------------------+-----------------------------------+
-| 32       | 32                | 10000                             | 320000                            |
-+----------+-------------------+-----------------------------------+-----------------------------------+
++----------+-------------------+-------------------------+-------------------------+
+| count(*) | count(bigtable.a) | max(length(bigtable.a)) | sum(length(bigtable.a)) |
++----------+-------------------+-------------------------+-------------------------+
+| 32       | 32                | 10000                   | 320000                  |
++----------+-------------------+-------------------------+-------------------------+
 
 INSERT INTO bigtable SELECT a, to_unixtime(ts) * 47 FROM bigtable;
 
@@ -108,11 +108,11 @@ Affected Rows: 32
 
 SELECT COUNT(*), COUNT(a), MAX(LENGTH(a)), SUM(LENGTH(a)) FROM bigtable;
 
-+----------+-------------------+-----------------------------------+-----------------------------------+
-| count(*) | count(bigtable.a) | max(character_length(bigtable.a)) | sum(character_length(bigtable.a)) |
-+----------+-------------------+-----------------------------------+-----------------------------------+
-| 64       | 64                | 10000                             | 640000                            |
-+----------+-------------------+-----------------------------------+-----------------------------------+
++----------+-------------------+-------------------------+-------------------------+
+| count(*) | count(bigtable.a) | max(length(bigtable.a)) | sum(length(bigtable.a)) |
++----------+-------------------+-------------------------+-------------------------+
+| 64       | 64                | 10000                   | 640000                  |
++----------+-------------------+-------------------------+-------------------------+
 
 INSERT INTO bigtable SELECT a, to_unixtime(ts) * 51 FROM bigtable;
 
@@ -120,11 +120,11 @@ Affected Rows: 64
 
 SELECT COUNT(*), COUNT(a), MAX(LENGTH(a)), SUM(LENGTH(a)) FROM bigtable;
 
-+----------+-------------------+-----------------------------------+-----------------------------------+
-| count(*) | count(bigtable.a) | max(character_length(bigtable.a)) | sum(character_length(bigtable.a)) |
-+----------+-------------------+-----------------------------------+-----------------------------------+
-| 128      | 128               | 10000                             | 1280000                           |
-+----------+-------------------+-----------------------------------+-----------------------------------+
++----------+-------------------+-------------------------+-------------------------+
+| count(*) | count(bigtable.a) | max(length(bigtable.a)) | sum(length(bigtable.a)) |
++----------+-------------------+-------------------------+-------------------------+
+| 128      | 128               | 10000                   | 1280000                 |
++----------+-------------------+-------------------------+-------------------------+
 
 INSERT INTO bigtable SELECT a, to_unixtime(ts) * 53 FROM bigtable;
 
@@ -132,11 +132,11 @@ Affected Rows: 128
 
 SELECT COUNT(*), COUNT(a), MAX(LENGTH(a)), SUM(LENGTH(a)) FROM bigtable;
 
-+----------+-------------------+-----------------------------------+-----------------------------------+
-| count(*) | count(bigtable.a) | max(character_length(bigtable.a)) | sum(character_length(bigtable.a)) |
-+----------+-------------------+-----------------------------------+-----------------------------------+
-| 256      | 256               | 10000                             | 2560000                           |
-+----------+-------------------+-----------------------------------+-----------------------------------+
++----------+-------------------+-------------------------+-------------------------+
+| count(*) | count(bigtable.a) | max(length(bigtable.a)) | sum(length(bigtable.a)) |
++----------+-------------------+-------------------------+-------------------------+
+| 256      | 256               | 10000                   | 2560000                 |
++----------+-------------------+-------------------------+-------------------------+
 
 INSERT INTO bigtable SELECT a, to_unixtime(ts) * 57 FROM bigtable;
 
@@ -144,11 +144,11 @@ Affected Rows: 256
 
 SELECT COUNT(*), COUNT(a), MAX(LENGTH(a)), SUM(LENGTH(a)) FROM bigtable;
 
-+----------+-------------------+-----------------------------------+-----------------------------------+
-| count(*) | count(bigtable.a) | max(character_length(bigtable.a)) | sum(character_length(bigtable.a)) |
-+----------+-------------------+-----------------------------------+-----------------------------------+
-| 512      | 512               | 10000                             | 5120000                           |
-+----------+-------------------+-----------------------------------+-----------------------------------+
++----------+-------------------+-------------------------+-------------------------+
+| count(*) | count(bigtable.a) | max(length(bigtable.a)) | sum(length(bigtable.a)) |
++----------+-------------------+-------------------------+-------------------------+
+| 512      | 512               | 10000                   | 5120000                 |
++----------+-------------------+-------------------------+-------------------------+
 
 INSERT INTO bigtable SELECT a, to_unixtime(ts) * 61 FROM bigtable;
 
@@ -156,11 +156,11 @@ Affected Rows: 512
 
 SELECT COUNT(*), COUNT(a), MAX(LENGTH(a)), SUM(LENGTH(a)) FROM bigtable;
 
-+----------+-------------------+-----------------------------------+-----------------------------------+
-| count(*) | count(bigtable.a) | max(character_length(bigtable.a)) | sum(character_length(bigtable.a)) |
-+----------+-------------------+-----------------------------------+-----------------------------------+
-| 1024     | 1024              | 10000                             | 10240000                          |
-+----------+-------------------+-----------------------------------+-----------------------------------+
++----------+-------------------+-------------------------+-------------------------+
+| count(*) | count(bigtable.a) | max(length(bigtable.a)) | sum(length(bigtable.a)) |
++----------+-------------------+-------------------------+-------------------------+
+| 1024     | 1024              | 10000                   | 10240000                |
++----------+-------------------+-------------------------+-------------------------+
 
 INSERT INTO bigtable SELECT a, to_unixtime(ts) * 63 FROM bigtable;
 
@@ -168,20 +168,20 @@ Affected Rows: 1024
 
 SELECT COUNT(*), COUNT(a), MAX(LENGTH(a)), SUM(LENGTH(a)) FROM bigtable;
 
-+----------+-------------------+-----------------------------------+-----------------------------------+
-| count(*) | count(bigtable.a) | max(character_length(bigtable.a)) | sum(character_length(bigtable.a)) |
-+----------+-------------------+-----------------------------------+-----------------------------------+
-| 2048     | 2048              | 10000                             | 20480000                          |
-+----------+-------------------+-----------------------------------+-----------------------------------+
++----------+-------------------+-------------------------+-------------------------+
+| count(*) | count(bigtable.a) | max(length(bigtable.a)) | sum(length(bigtable.a)) |
++----------+-------------------+-------------------------+-------------------------+
+| 2048     | 2048              | 10000                   | 20480000                |
++----------+-------------------+-------------------------+-------------------------+
 
 -- SQLNESS ARG restart=true
 SELECT COUNT(*), COUNT(a), MAX(LENGTH(a)), SUM(LENGTH(a)) FROM bigtable;
 
-+----------+-------------------+-----------------------------------+-----------------------------------+
-| count(*) | count(bigtable.a) | max(character_length(bigtable.a)) | sum(character_length(bigtable.a)) |
-+----------+-------------------+-----------------------------------+-----------------------------------+
-| 2048     | 2048              | 10000                             | 20480000                          |
-+----------+-------------------+-----------------------------------+-----------------------------------+
++----------+-------------------+-------------------------+-------------------------+
+| count(*) | count(bigtable.a) | max(length(bigtable.a)) | sum(length(bigtable.a)) |
++----------+-------------------+-------------------------+-------------------------+
+| 2048     | 2048              | 10000                   | 20480000                |
++----------+-------------------+-------------------------+-------------------------+
 
 INSERT INTO bigtable SELECT a, to_unixtime(ts) * 67 FROM bigtable;
 
@@ -189,11 +189,11 @@ Affected Rows: 2048
 
 SELECT COUNT(*), COUNT(a), MAX(LENGTH(a)), SUM(LENGTH(a)) FROM bigtable;
 
-+----------+-------------------+-----------------------------------+-----------------------------------+
-| count(*) | count(bigtable.a) | max(character_length(bigtable.a)) | sum(character_length(bigtable.a)) |
-+----------+-------------------+-----------------------------------+-----------------------------------+
-| 4096     | 4096              | 10000                             | 40960000                          |
-+----------+-------------------+-----------------------------------+-----------------------------------+
++----------+-------------------+-------------------------+-------------------------+
+| count(*) | count(bigtable.a) | max(length(bigtable.a)) | sum(length(bigtable.a)) |
++----------+-------------------+-------------------------+-------------------------+
+| 4096     | 4096              | 10000                   | 40960000                |
++----------+-------------------+-------------------------+-------------------------+
 
 INSERT INTO bigtable SELECT a, to_unixtime(ts) * 71 FROM bigtable;
 
@@ -201,11 +201,11 @@ Affected Rows: 4096
 
 SELECT COUNT(*), COUNT(a), MAX(LENGTH(a)), SUM(LENGTH(a)) FROM bigtable;
 
-+----------+-------------------+-----------------------------------+-----------------------------------+
-| count(*) | count(bigtable.a) | max(character_length(bigtable.a)) | sum(character_length(bigtable.a)) |
-+----------+-------------------+-----------------------------------+-----------------------------------+
-| 8192     | 8192              | 10000                             | 81920000                          |
-+----------+-------------------+-----------------------------------+-----------------------------------+
++----------+-------------------+-------------------------+-------------------------+
+| count(*) | count(bigtable.a) | max(length(bigtable.a)) | sum(length(bigtable.a)) |
++----------+-------------------+-------------------------+-------------------------+
+| 8192     | 8192              | 10000                   | 81920000                |
++----------+-------------------+-------------------------+-------------------------+
 
 DROP TABLE test;
 
diff --git a/tests/cases/standalone/common/types/string/unicode.result b/tests/cases/standalone/common/types/string/unicode.result
index 5580093c22..c9cd0283c0 100644
--- a/tests/cases/standalone/common/types/string/unicode.result
+++ b/tests/cases/standalone/common/types/string/unicode.result
@@ -58,12 +58,12 @@ SELECT substr('🦤🦆f', 1, 2);
 -- length on emojis
 SELECT length(s) FROM emojis ORDER BY id;
 
-+----------------------------+
-| character_length(emojis.s) |
-+----------------------------+
-| 1                          |
-| 3                          |
-+----------------------------+
++------------------+
+| length(emojis.s) |
++------------------+
+| 1                |
+| 3                |
++------------------+
 
 DROP TABLE emojis;
 
diff --git a/tests/cases/standalone/common/view/create.result b/tests/cases/standalone/common/view/create.result
index f524ed1c05..1e0650e165 100644
--- a/tests/cases/standalone/common/view/create.result
+++ b/tests/cases/standalone/common/view/create.result
@@ -9,7 +9,7 @@ Error: 2000(InvalidSyntax), Invalid SQL syntax: sql parser error: Expected: AS,
 
 CREATE VIEW test_view as DELETE FROM public.numbers;
 
-Error: 2000(InvalidSyntax), Invalid SQL syntax: sql parser error: Expected: SELECT, VALUES, or a subquery in the query body, found: DELETE at Line: 1, Column: 26
+Error: 1001(Unsupported), Failed to plan SQL: This feature is not implemented: Query DELETE FROM public.numbers not implemented yet
 
 --- Table already exists ---
 CREATE VIEW test_table as SELECT * FROM public.numbers;

From b53a0b86fb8b2f3cede177025c8a9ffb4bb40e3a Mon Sep 17 00:00:00 2001
From: LFC <990479+MichaelScofield@users.noreply.github.com>
Date: Fri, 24 Oct 2025 10:16:49 +0800
Subject: [PATCH 07/14] feat: create table with new json datatype (#7128)

* feat: create table with new json datatype

Signed-off-by: luofucong <luofc@foxmail.com>

* resolve PR comments

Signed-off-by: luofucong <luofc@foxmail.com>

---------

Signed-off-by: luofucong <luofc@foxmail.com>
---
 Cargo.lock                                   |  1 +
 src/api/src/v1/column_def.rs                 | 12 ++++-
 src/common/test-util/src/recordbatch.rs      |  2 +-
 src/datatypes/src/json.rs                    |  9 +++-
 src/datatypes/src/schema.rs                  |  5 +-
 src/datatypes/src/schema/column_schema.rs    | 17 ++++++
 src/datatypes/src/types/json_type.rs         |  6 ++-
 src/query/src/error.rs                       | 14 +++--
 src/query/src/sql/show_create_table.rs       |  4 ++
 src/sql/Cargo.toml                           |  1 +
 src/sql/src/error.rs                         | 12 ++++-
 src/sql/src/statements.rs                    | 16 +++++-
 src/sql/src/statements/create.rs             | 44 ++++++++++++++-
 src/sql/src/statements/option_map.rs         | 24 +++++++++
 src/sql/src/util.rs                          | 17 ++++++
 tests-integration/src/tests/instance_test.rs | 56 ++++++++++++++++++++
 16 files changed, 226 insertions(+), 14 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index f721f58369..231bd594ea 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -11964,6 +11964,7 @@ dependencies = [
  "datafusion-physical-expr",
  "datafusion-sql",
  "datatypes",
+ "either",
  "hex",
  "humantime",
  "iso8601",
diff --git a/src/api/src/v1/column_def.rs b/src/api/src/v1/column_def.rs
index 5be3d5c196..912b7ee13e 100644
--- a/src/api/src/v1/column_def.rs
+++ b/src/api/src/v1/column_def.rs
@@ -16,8 +16,8 @@ use std::collections::HashMap;
 
 use datatypes::schema::{
     COMMENT_KEY, ColumnDefaultConstraint, ColumnSchema, FULLTEXT_KEY, FulltextAnalyzer,
-    FulltextBackend, FulltextOptions, INVERTED_INDEX_KEY, SKIPPING_INDEX_KEY, SkippingIndexOptions,
-    SkippingIndexType,
+    FulltextBackend, FulltextOptions, INVERTED_INDEX_KEY, JSON_STRUCTURE_SETTINGS_KEY,
+    SKIPPING_INDEX_KEY, SkippingIndexOptions, SkippingIndexType,
 };
 use greptime_proto::v1::{
     Analyzer, FulltextBackend as PbFulltextBackend, SkippingIndexType as PbSkippingIndexType,
@@ -68,6 +68,9 @@ pub fn try_as_column_schema(column_def: &ColumnDef) -> Result<ColumnSchema> {
         if let Some(skipping_index) = options.options.get(SKIPPING_INDEX_GRPC_KEY) {
             metadata.insert(SKIPPING_INDEX_KEY.to_string(), skipping_index.to_owned());
         }
+        if let Some(settings) = options.options.get(JSON_STRUCTURE_SETTINGS_KEY) {
+            metadata.insert(JSON_STRUCTURE_SETTINGS_KEY.to_string(), settings.clone());
+        }
     }
 
     ColumnSchema::new(&column_def.name, data_type.into(), column_def.is_nullable)
@@ -139,6 +142,11 @@ pub fn options_from_column_schema(column_schema: &ColumnSchema) -> Option<Column
             .options
             .insert(SKIPPING_INDEX_GRPC_KEY.to_string(), skipping_index.clone());
     }
+    if let Some(settings) = column_schema.metadata().get(JSON_STRUCTURE_SETTINGS_KEY) {
+        options
+            .options
+            .insert(JSON_STRUCTURE_SETTINGS_KEY.to_string(), settings.clone());
+    }
 
     (!options.options.is_empty()).then_some(options)
 }
diff --git a/src/common/test-util/src/recordbatch.rs b/src/common/test-util/src/recordbatch.rs
index eb666e167a..aa68f79356 100644
--- a/src/common/test-util/src/recordbatch.rs
+++ b/src/common/test-util/src/recordbatch.rs
@@ -28,7 +28,7 @@ pub async fn check_output_stream(output: OutputData, expected: &str) {
         _ => unreachable!(),
     };
     let pretty_print = recordbatches.pretty_print().unwrap();
-    assert_eq!(pretty_print, expected, "actual: \n{}", pretty_print);
+    assert_eq!(pretty_print, expected.trim(), "actual: \n{}", pretty_print);
 }
 
 pub async fn execute_and_check_output(db: &Database, sql: &str, expected: ExpectedOutput<'_>) {
diff --git a/src/datatypes/src/json.rs b/src/datatypes/src/json.rs
index 380cc8ce06..902b84a131 100644
--- a/src/datatypes/src/json.rs
+++ b/src/datatypes/src/json.rs
@@ -24,6 +24,7 @@ use std::sync::Arc;
 
 use common_base::bytes::StringBytes;
 use ordered_float::OrderedFloat;
+use serde::{Deserialize, Serialize};
 use serde_json::{Map, Value as Json};
 use snafu::{ResultExt, ensure};
 
@@ -45,7 +46,7 @@ use crate::value::{ListValue, StructValue, Value};
 /// convert them to fully structured StructValue for user-facing APIs: the UI protocol and the UDF interface.
 ///
 /// **Important**: This settings only controls the internal form of JSON encoding.
-#[derive(Debug, Clone)]
+#[derive(Debug, Clone, Serialize, Deserialize)]
 pub enum JsonStructureSettings {
     // TODO(sunng87): provide a limit
     Structured(Option<StructType>),
@@ -111,6 +112,12 @@ impl JsonStructureSettings {
     }
 }
 
+impl Default for JsonStructureSettings {
+    fn default() -> Self {
+        Self::Structured(None)
+    }
+}
+
 impl<'a> JsonContext<'a> {
     /// Create a new context with an updated key path
     pub fn with_key(&self, key: &str) -> JsonContext<'a> {
diff --git a/src/datatypes/src/schema.rs b/src/datatypes/src/schema.rs
index 6bdf321137..9995072b7c 100644
--- a/src/datatypes/src/schema.rs
+++ b/src/datatypes/src/schema.rs
@@ -32,8 +32,9 @@ pub use crate::schema::column_schema::{
     COLUMN_FULLTEXT_OPT_KEY_FALSE_POSITIVE_RATE, COLUMN_FULLTEXT_OPT_KEY_GRANULARITY,
     COLUMN_SKIPPING_INDEX_OPT_KEY_FALSE_POSITIVE_RATE, COLUMN_SKIPPING_INDEX_OPT_KEY_GRANULARITY,
     COLUMN_SKIPPING_INDEX_OPT_KEY_TYPE, COMMENT_KEY, ColumnExtType, ColumnSchema, FULLTEXT_KEY,
-    FulltextAnalyzer, FulltextBackend, FulltextOptions, INVERTED_INDEX_KEY, Metadata,
-    SKIPPING_INDEX_KEY, SkippingIndexOptions, SkippingIndexType, TIME_INDEX_KEY,
+    FulltextAnalyzer, FulltextBackend, FulltextOptions, INVERTED_INDEX_KEY,
+    JSON_STRUCTURE_SETTINGS_KEY, Metadata, SKIPPING_INDEX_KEY, SkippingIndexOptions,
+    SkippingIndexType, TIME_INDEX_KEY,
 };
 pub use crate::schema::constraint::ColumnDefaultConstraint;
 pub use crate::schema::raw::RawSchema;
diff --git a/src/datatypes/src/schema/column_schema.rs b/src/datatypes/src/schema/column_schema.rs
index f176350b8c..627d898810 100644
--- a/src/datatypes/src/schema/column_schema.rs
+++ b/src/datatypes/src/schema/column_schema.rs
@@ -23,6 +23,7 @@ use sqlparser_derive::{Visit, VisitMut};
 
 use crate::data_type::{ConcreteDataType, DataType};
 use crate::error::{self, Error, InvalidFulltextOptionSnafu, ParseExtendedTypeSnafu, Result};
+use crate::json::JsonStructureSettings;
 use crate::schema::TYPE_KEY;
 use crate::schema::constraint::ColumnDefaultConstraint;
 use crate::value::Value;
@@ -41,6 +42,7 @@ pub const FULLTEXT_KEY: &str = "greptime:fulltext";
 pub const INVERTED_INDEX_KEY: &str = "greptime:inverted_index";
 /// Key used to store skip options in arrow field's metadata.
 pub const SKIPPING_INDEX_KEY: &str = "greptime:skipping_index";
+pub const JSON_STRUCTURE_SETTINGS_KEY: &str = "greptime:json:structure_settings";
 
 /// Keys used in fulltext options
 pub const COLUMN_FULLTEXT_CHANGE_OPT_KEY_ENABLE: &str = "enable";
@@ -391,6 +393,21 @@ impl ColumnSchema {
         self.metadata.remove(SKIPPING_INDEX_KEY);
         Ok(())
     }
+
+    pub fn json_structure_settings(&self) -> Result<Option<JsonStructureSettings>> {
+        self.metadata
+            .get(JSON_STRUCTURE_SETTINGS_KEY)
+            .map(|json| serde_json::from_str(json).context(error::DeserializeSnafu { json }))
+            .transpose()
+    }
+
+    pub fn with_json_structure_settings(&mut self, settings: &JsonStructureSettings) -> Result<()> {
+        self.metadata.insert(
+            JSON_STRUCTURE_SETTINGS_KEY.to_string(),
+            serde_json::to_string(settings).context(error::SerializeSnafu)?,
+        );
+        Ok(())
+    }
 }
 
 /// Column extended type set in column schema's metadata.
diff --git a/src/datatypes/src/types/json_type.rs b/src/datatypes/src/types/json_type.rs
index 01ec81dd08..99dcf9c571 100644
--- a/src/datatypes/src/types/json_type.rs
+++ b/src/datatypes/src/types/json_type.rs
@@ -15,6 +15,7 @@
 use std::str::FromStr;
 
 use arrow::datatypes::DataType as ArrowDataType;
+use arrow_schema::Fields;
 use common_base::bytes::Bytes;
 use serde::{Deserialize, Serialize};
 use snafu::ResultExt;
@@ -63,7 +64,10 @@ impl DataType for JsonType {
     }
 
     fn as_arrow_type(&self) -> ArrowDataType {
-        ArrowDataType::Binary
+        match self.format {
+            JsonFormat::Jsonb => ArrowDataType::Binary,
+            JsonFormat::Native(_) => ArrowDataType::Struct(Fields::empty()),
+        }
     }
 
     fn create_mutable_vector(&self, capacity: usize) -> Box<dyn MutableVector> {
diff --git a/src/query/src/error.rs b/src/query/src/error.rs
index 8cf64dbffc..4649b7fe49 100644
--- a/src/query/src/error.rs
+++ b/src/query/src/error.rs
@@ -353,6 +353,13 @@ pub enum Error {
         #[snafu(implicit)]
         location: Location,
     },
+
+    #[snafu(transparent)]
+    Datatypes {
+        source: datatypes::error::Error,
+        #[snafu(implicit)]
+        location: Location,
+    },
 }
 
 impl ErrorExt for Error {
@@ -406,9 +413,10 @@ impl ErrorExt for Error {
             MissingTableMutationHandler { .. } => StatusCode::Unexpected,
             GetRegionMetadata { .. } => StatusCode::RegionNotReady,
             TableReadOnly { .. } => StatusCode::Unsupported,
-            GetFulltextOptions { source, .. } | GetSkippingIndexOptions { source, .. } => {
-                source.status_code()
-            }
+
+            GetFulltextOptions { source, .. }
+            | GetSkippingIndexOptions { source, .. }
+            | Datatypes { source, .. } => source.status_code(),
         }
     }
 
diff --git a/src/query/src/sql/show_create_table.rs b/src/query/src/sql/show_create_table.rs
index 5466bb91e6..3b2d8aaceb 100644
--- a/src/query/src/sql/show_create_table.rs
+++ b/src/query/src/sql/show_create_table.rs
@@ -159,6 +159,10 @@ fn create_column(column_schema: &ColumnSchema, quote_style: char) -> Result<Colu
         extensions.inverted_index_options = Some(HashMap::new().into());
     }
 
+    if let Some(settings) = column_schema.json_structure_settings()? {
+        extensions.set_json_structure_settings(settings);
+    }
+
     Ok(Column {
         column_def: ColumnDef {
             name: Ident::with_quote(quote_style, name),
diff --git a/src/sql/Cargo.toml b/src/sql/Cargo.toml
index fbfb8c480a..7d7e0bf1da 100644
--- a/src/sql/Cargo.toml
+++ b/src/sql/Cargo.toml
@@ -29,6 +29,7 @@ datafusion-expr.workspace = true
 datafusion-physical-expr.workspace = true
 datafusion-sql.workspace = true
 datatypes.workspace = true
+either.workspace = true
 hex = "0.4"
 humantime.workspace = true
 iso8601 = "0.6.1"
diff --git a/src/sql/src/error.rs b/src/sql/src/error.rs
index 4caad26656..46fbd29d1a 100644
--- a/src/sql/src/error.rs
+++ b/src/sql/src/error.rs
@@ -332,6 +332,14 @@ pub enum Error {
         #[snafu(implicit)]
         location: Location,
     },
+
+    #[snafu(display("Failed to set JSON structure settings: {value}"))]
+    SetJsonStructureSettings {
+        value: String,
+        source: datatypes::error::Error,
+        #[snafu(implicit)]
+        location: Location,
+    },
 }
 
 impl ErrorExt for Error {
@@ -377,7 +385,9 @@ impl ErrorExt for Error {
             #[cfg(feature = "enterprise")]
             InvalidTriggerWebhookOption { .. } => StatusCode::InvalidArguments,
 
-            SerializeColumnDefaultConstraint { source, .. } => source.status_code(),
+            SerializeColumnDefaultConstraint { source, .. }
+            | SetJsonStructureSettings { source, .. } => source.status_code(),
+
             ConvertToGrpcDataType { source, .. } => source.status_code(),
             SqlCommon { source, .. } => source.status_code(),
             ConvertToDfStatement { .. } => StatusCode::Internal,
diff --git a/src/sql/src/statements.rs b/src/sql/src/statements.rs
index b48e208043..823b123011 100644
--- a/src/sql/src/statements.rs
+++ b/src/sql/src/statements.rs
@@ -49,8 +49,8 @@ use crate::ast::{
 };
 use crate::error::{
     self, ConvertToGrpcDataTypeSnafu, ConvertValueSnafu, Result,
-    SerializeColumnDefaultConstraintSnafu, SetFulltextOptionSnafu, SetSkippingIndexOptionSnafu,
-    SqlCommonSnafu,
+    SerializeColumnDefaultConstraintSnafu, SetFulltextOptionSnafu, SetJsonStructureSettingsSnafu,
+    SetSkippingIndexOptionSnafu, SqlCommonSnafu,
 };
 use crate::statements::create::Column;
 pub use crate::statements::option_map::OptionMap;
@@ -144,6 +144,18 @@ pub fn column_to_schema(
 
     column_schema.set_inverted_index(column.extensions.inverted_index_options.is_some());
 
+    if matches!(column.data_type(), SqlDataType::JSON) {
+        let settings = column
+            .extensions
+            .build_json_structure_settings()?
+            .unwrap_or_default();
+        column_schema
+            .with_json_structure_settings(&settings)
+            .with_context(|_| SetJsonStructureSettingsSnafu {
+                value: format!("{settings:?}"),
+            })?;
+    }
+
     Ok(column_schema)
 }
 
diff --git a/src/sql/src/statements/create.rs b/src/sql/src/statements/create.rs
index 9d945e7c8d..3c7f6d1731 100644
--- a/src/sql/src/statements/create.rs
+++ b/src/sql/src/statements/create.rs
@@ -32,6 +32,7 @@ use crate::error::{
 use crate::statements::OptionMap;
 use crate::statements::statement::Statement;
 use crate::statements::tql::Tql;
+use crate::util::OptionValue;
 
 const LINE_SEP: &str = ",\n";
 const COMMA_SEP: &str = ", ";
@@ -166,7 +167,20 @@ impl Display for Column {
             return Ok(());
         }
 
-        write!(f, "{}", self.column_def)?;
+        write!(f, "{} {}", self.column_def.name, self.column_def.data_type)?;
+        if let Some(options) = &self.extensions.json_datatype_options {
+            write!(
+                f,
+                "({})",
+                options
+                    .entries()
+                    .map(|(k, v)| format!("{k} = {v}"))
+                    .join(COMMA_SEP)
+            )?;
+        }
+        for option in &self.column_def.options {
+            write!(f, " {option}")?;
+        }
 
         if let Some(fulltext_options) = &self.extensions.fulltext_index_options {
             if !fulltext_options.is_empty() {
@@ -251,6 +265,34 @@ impl ColumnExtensions {
             })
             .transpose()
     }
+
+    pub fn set_json_structure_settings(&mut self, settings: JsonStructureSettings) {
+        let mut map = OptionMap::default();
+
+        let format = match settings {
+            JsonStructureSettings::Structured(_) => JSON_FORMAT_FULL_STRUCTURED,
+            JsonStructureSettings::PartialUnstructuredByKey { .. } => JSON_FORMAT_PARTIAL,
+            JsonStructureSettings::UnstructuredRaw => JSON_FORMAT_RAW,
+        };
+        map.insert(JSON_OPT_FORMAT.to_string(), format.to_string());
+
+        if let JsonStructureSettings::PartialUnstructuredByKey {
+            fields: _,
+            unstructured_keys,
+        } = settings
+        {
+            let value = OptionValue::from(
+                unstructured_keys
+                    .iter()
+                    .map(|x| x.as_str())
+                    .sorted()
+                    .collect::<Vec<_>>(),
+            );
+            map.insert_options(JSON_OPT_UNSTRUCTURED_KEYS, value);
+        }
+
+        self.json_datatype_options = Some(map);
+    }
 }
 
 /// Partition on columns or values.
diff --git a/src/sql/src/statements/option_map.rs b/src/sql/src/statements/option_map.rs
index f67b0dc72a..d6bd4d7608 100644
--- a/src/sql/src/statements/option_map.rs
+++ b/src/sql/src/statements/option_map.rs
@@ -16,6 +16,7 @@ use std::collections::{BTreeMap, HashMap};
 use std::ops::ControlFlow;
 
 use common_base::secrets::{ExposeSecret, ExposeSecretMut, SecretString};
+use either::Either;
 use serde::Serialize;
 use sqlparser::ast::{Visit, VisitMut, Visitor, VisitorMut};
 
@@ -56,6 +57,17 @@ impl OptionMap {
         }
     }
 
+    pub fn insert_options(&mut self, key: &str, value: OptionValue) {
+        if REDACTED_OPTIONS.contains(&key) {
+            self.secrets.insert(
+                key.to_string(),
+                SecretString::new(Box::new(value.to_string())),
+            );
+        } else {
+            self.options.insert(key.to_string(), value);
+        }
+    }
+
     pub fn get(&self, k: &str) -> Option<&str> {
         if let Some(value) = self.options.get(k) {
             value.as_string()
@@ -130,6 +142,18 @@ impl OptionMap {
         }
         result
     }
+
+    pub fn entries(&self) -> impl Iterator<Item = (&str, Either<&OptionValue, &str>)> {
+        let options = self
+            .options
+            .iter()
+            .map(|(k, v)| (k.as_str(), Either::Left(v)));
+        let secrets = self
+            .secrets
+            .keys()
+            .map(|k| (k.as_str(), Either::Right("******")));
+        std::iter::chain(options, secrets)
+    }
 }
 
 impl<I: IntoIterator<Item = (String, String)>> From<I> for OptionMap {
diff --git a/src/sql/src/util.rs b/src/sql/src/util.rs
index f71dfcc8d7..3b221d7642 100644
--- a/src/sql/src/util.rs
+++ b/src/sql/src/util.rs
@@ -15,6 +15,7 @@
 use std::collections::HashSet;
 use std::fmt::{Display, Formatter};
 
+use itertools::Itertools;
 use serde::Serialize;
 use snafu::ensure;
 use sqlparser::ast::{
@@ -131,6 +132,22 @@ impl From<Vec<&str>> for OptionValue {
     }
 }
 
+impl Display for OptionValue {
+    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
+        if let Some(s) = self.as_string() {
+            write!(f, "'{s}'")
+        } else if let Some(s) = self.as_list() {
+            write!(
+                f,
+                "[{}]",
+                s.into_iter().map(|x| format!("'{x}'")).join(", ")
+            )
+        } else {
+            write!(f, "'{}'", self.0)
+        }
+    }
+}
+
 pub fn parse_option_string(option: SqlOption) -> Result<(String, OptionValue)> {
     let SqlOption::KeyValue { key, value } = option else {
         return InvalidSqlSnafu {
diff --git a/tests-integration/src/tests/instance_test.rs b/tests-integration/src/tests/instance_test.rs
index 95664323ff..a29e468bf6 100644
--- a/tests-integration/src/tests/instance_test.rs
+++ b/tests-integration/src/tests/instance_test.rs
@@ -2338,3 +2338,59 @@ async fn test_copy_parquet_map_to_binary(instance: Arc<dyn MockInstance>) {
 +----+-----------------------------------------+"#;
     check_output_stream(output, expected).await;
 }
+
+#[apply(both_instances_cases)]
+async fn test_create_table_with_json_datatype(instance: Arc<dyn MockInstance>) {
+    let instance = instance.frontend();
+
+    let sql = r#"
+CREATE TABLE a (
+    j JSON(format = "partial", unstructured_keys = ["foo", "foo.bar"]),
+    ts TIMESTAMP TIME INDEX,
+)"#;
+    let output = execute_sql(&instance, sql).await.data;
+    assert!(matches!(output, OutputData::AffectedRows(0)));
+
+    // "show create table" finds the information from table metadata.
+    // So if the output is expected, we know the options are really set.
+    let output = execute_sql(&instance, "SHOW CREATE TABLE a").await.data;
+    let expected = r#"
++-------+------------------------------------------------------------------------------+
+| Table | Create Table                                                                 |
++-------+------------------------------------------------------------------------------+
+| a     | CREATE TABLE IF NOT EXISTS "a" (                                             |
+|       |   "j" JSON(format = 'partial', unstructured_keys = ['foo', 'foo.bar']) NULL, |
+|       |   "ts" TIMESTAMP(3) NOT NULL,                                                |
+|       |   TIME INDEX ("ts")                                                          |
+|       | )                                                                            |
+|       |                                                                              |
+|       | ENGINE=mito                                                                  |
+|       |                                                                              |
++-------+------------------------------------------------------------------------------+"#;
+    check_output_stream(output, expected).await;
+
+    // test the default options
+    let sql = r#"
+CREATE TABLE b (
+    j JSON,
+    ts TIMESTAMP TIME INDEX,
+)"#;
+    let output = execute_sql(&instance, sql).await.data;
+    assert!(matches!(output, OutputData::AffectedRows(0)));
+
+    let output = execute_sql(&instance, "SHOW CREATE TABLE b").await.data;
+    let expected = r#"
++-------+-----------------------------------------+
+| Table | Create Table                            |
++-------+-----------------------------------------+
+| b     | CREATE TABLE IF NOT EXISTS "b" (        |
+|       |   "j" JSON(format = 'structured') NULL, |
+|       |   "ts" TIMESTAMP(3) NOT NULL,           |
+|       |   TIME INDEX ("ts")                     |
+|       | )                                       |
+|       |                                         |
+|       | ENGINE=mito                             |
+|       |                                         |
++-------+-----------------------------------------+"#;
+    check_output_stream(output, expected).await;
+}

From a0e6bcbeb34107ddae16e2724c6572e0f5274a3f Mon Sep 17 00:00:00 2001
From: zyy17 <zyylsxm@gmail.com>
Date: Fri, 24 Oct 2025 11:12:45 +0800
Subject: [PATCH 08/14] feat: add `cpu_usage_millicores` and
 `memory_usage_bytes` in `information_schema.cluster_info` table. (#7051)

* refactor: add `hostname` in cluster_info table

Signed-off-by: zyy17 <zyylsxm@gmail.com>

* chore: update information schema result

Signed-off-by: zyy17 <zyylsxm@gmail.com>

* feat: enable zstd for bulk memtable encoded parts (#7045)

feat: enable zstd in bulk memtable

Signed-off-by: evenyag <realevenyag@gmail.com>

* refactor: add `get_total_cpu_millicores()` / `get_total_cpu_cores()` / `get_total_memory_bytes()` / `get_total_memory_readable()` in common-stat

Signed-off-by: zyy17 <zyylsxm@gmail.com>

* feat: add `cpu_usage_millicores` and `memory_usage_bytes` in `information_schema.cluster_info` table

Signed-off-by: zyy17 <zyylsxm@gmail.com>

* fix: compile warning and integration test failed

Signed-off-by: zyy17 <zyylsxm@gmail.com>

* fix: integration test failed

Signed-off-by: zyy17 <zyylsxm@gmail.com>

* refactor: add `ResourceStat`

Signed-off-by: zyy17 <zyylsxm@gmail.com>

* refactor: apply code review comments

Signed-off-by: zyy17 <zyylsxm@gmail.com>

* chore: update greptime-proto

Signed-off-by: zyy17 <zyylsxm@gmail.com>

---------

Signed-off-by: zyy17 <zyylsxm@gmail.com>
Signed-off-by: evenyag <realevenyag@gmail.com>
Co-authored-by: Yingwen <realevenyag@gmail.com>
---
 Cargo.lock                                    |  10 +-
 Cargo.toml                                    |   2 +-
 .../information_schema/cluster_info.rs        |  49 +++--
 src/cmd/src/flownode.rs                       |   5 +
 src/cmd/src/frontend.rs                       |   5 +
 src/common/config/Cargo.toml                  |   1 -
 src/common/config/src/lib.rs                  |   1 -
 src/common/config/src/utils.rs                |  34 ----
 src/common/meta/src/cluster.rs                |  16 +-
 src/common/stat/Cargo.toml                    |   3 +
 src/common/stat/src/cgroups.rs                |  23 ++-
 src/common/stat/src/lib.rs                    |  63 +-----
 src/common/stat/src/resource.rs               | 187 ++++++++++++++++++
 src/datanode/Cargo.toml                       |   1 +
 src/datanode/src/datanode.rs                  |   5 +
 src/datanode/src/heartbeat.rs                 |  30 ++-
 src/flow/src/heartbeat.rs                     |  49 +++--
 src/frontend/Cargo.toml                       |   1 +
 src/frontend/src/heartbeat.rs                 |  54 +++--
 src/meta-client/src/client.rs                 |  31 ++-
 src/meta-srv/Cargo.toml                       |   1 +
 src/meta-srv/src/discovery/lease.rs           |  18 +-
 src/meta-srv/src/election/rds/mysql.rs        |   6 +-
 src/meta-srv/src/election/rds/postgres.rs     |   6 +-
 .../handler/collect_cluster_info_handler.rs   |  18 +-
 src/meta-srv/src/metasrv.rs                   |  38 ++--
 src/meta-srv/src/metasrv/builder.rs           |   6 +-
 src/meta-srv/src/service/cluster.rs           |   6 +-
 src/standalone/src/information_extension.rs   |  11 +-
 tests-integration/Cargo.toml                  |   1 +
 tests-integration/src/cluster.rs              |   5 +
 .../information_schema/cluster_info.result    |   6 +-
 .../common/system/information_schema.result   |  18 +-
 .../information_schema/cluster_info.result    |   6 +-
 34 files changed, 504 insertions(+), 212 deletions(-)
 delete mode 100644 src/common/config/src/utils.rs
 create mode 100644 src/common/stat/src/resource.rs

diff --git a/Cargo.lock b/Cargo.lock
index 231bd594ea..07b1695817 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -2025,7 +2025,6 @@ dependencies = [
  "common-base",
  "common-error",
  "common-macro",
- "common-stat",
  "common-telemetry",
  "common-test-util",
  "common-wal",
@@ -2546,11 +2545,14 @@ name = "common-stat"
 version = "0.18.0"
 dependencies = [
  "common-base",
+ "common-runtime",
+ "common-telemetry",
  "lazy_static",
  "nix 0.30.1",
  "num_cpus",
  "prometheus",
  "sysinfo",
+ "tokio",
 ]
 
 [[package]]
@@ -3907,6 +3909,7 @@ dependencies = [
  "common-query",
  "common-recordbatch",
  "common-runtime",
+ "common-stat",
  "common-telemetry",
  "common-test-util",
  "common-time",
@@ -4904,6 +4907,7 @@ dependencies = [
  "common-query",
  "common-recordbatch",
  "common-runtime",
+ "common-stat",
  "common-telemetry",
  "common-test-util",
  "common-time",
@@ -5319,7 +5323,7 @@ dependencies = [
 [[package]]
 name = "greptime-proto"
 version = "0.1.0"
-source = "git+https://github.com/GreptimeTeam/greptime-proto.git?rev=72a0d22e0f5f716b2ee21bca091f87a88c36e5ca#72a0d22e0f5f716b2ee21bca091f87a88c36e5ca"
+source = "git+https://github.com/GreptimeTeam/greptime-proto.git?rev=14b9dc40bdc8288742b0cefc7bb024303b7429ef#14b9dc40bdc8288742b0cefc7bb024303b7429ef"
 dependencies = [
  "prost 0.13.5",
  "prost-types 0.13.5",
@@ -7398,6 +7402,7 @@ dependencies = [
  "common-procedure",
  "common-procedure-test",
  "common-runtime",
+ "common-stat",
  "common-telemetry",
  "common-time",
  "common-version",
@@ -12996,6 +13001,7 @@ dependencies = [
  "common-query",
  "common-recordbatch",
  "common-runtime",
+ "common-stat",
  "common-telemetry",
  "common-test-util",
  "common-time",
diff --git a/Cargo.toml b/Cargo.toml
index 8a9d574263..a4ce20bfd1 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -147,7 +147,7 @@ etcd-client = { git = "https://github.com/GreptimeTeam/etcd-client", rev = "f62d
 fst = "0.4.7"
 futures = "0.3"
 futures-util = "0.3"
-greptime-proto = { git = "https://github.com/GreptimeTeam/greptime-proto.git", rev = "72a0d22e0f5f716b2ee21bca091f87a88c36e5ca" }
+greptime-proto = { git = "https://github.com/GreptimeTeam/greptime-proto.git", rev = "14b9dc40bdc8288742b0cefc7bb024303b7429ef" }
 hex = "0.4"
 http = "1"
 humantime = "2.1"
diff --git a/src/catalog/src/system_schema/information_schema/cluster_info.rs b/src/catalog/src/system_schema/information_schema/cluster_info.rs
index f45dc5be06..1ba1a55fb6 100644
--- a/src/catalog/src/system_schema/information_schema/cluster_info.rs
+++ b/src/catalog/src/system_schema/information_schema/cluster_info.rs
@@ -33,7 +33,6 @@ use datatypes::timestamp::TimestampMillisecond;
 use datatypes::value::Value;
 use datatypes::vectors::{
     Int64VectorBuilder, StringVectorBuilder, TimestampMillisecondVectorBuilder,
-    UInt32VectorBuilder, UInt64VectorBuilder,
 };
 use serde::Serialize;
 use snafu::ResultExt;
@@ -53,6 +52,8 @@ const PEER_ADDR: &str = "peer_addr";
 const PEER_HOSTNAME: &str = "peer_hostname";
 const TOTAL_CPU_MILLICORES: &str = "total_cpu_millicores";
 const TOTAL_MEMORY_BYTES: &str = "total_memory_bytes";
+const CPU_USAGE_MILLICORES: &str = "cpu_usage_millicores";
+const MEMORY_USAGE_BYTES: &str = "memory_usage_bytes";
 const VERSION: &str = "version";
 const GIT_COMMIT: &str = "git_commit";
 const START_TIME: &str = "start_time";
@@ -67,15 +68,17 @@ const INIT_CAPACITY: usize = 42;
 /// - `peer_id`: the peer server id.
 /// - `peer_type`: the peer type, such as `datanode`, `frontend`, `metasrv` etc.
 /// - `peer_addr`: the peer gRPC address.
+/// - `peer_hostname`: the hostname of the peer.
 /// - `total_cpu_millicores`: the total CPU millicores of the peer.
 /// - `total_memory_bytes`: the total memory bytes of the peer.
+/// - `cpu_usage_millicores`: the CPU usage millicores of the peer.
+/// - `memory_usage_bytes`: the memory usage bytes of the peer.
 /// - `version`: the build package version of the peer.
 /// - `git_commit`: the build git commit hash of the peer.
 /// - `start_time`: the starting time of the peer.
 /// - `uptime`: the uptime of the peer.
 /// - `active_time`: the time since the last activity of the peer.
 /// - `node_status`: the status info of the peer.
-/// - `peer_hostname`: the hostname of the peer.
 ///
 #[derive(Debug)]
 pub(super) struct InformationSchemaClusterInfo {
@@ -99,12 +102,22 @@ impl InformationSchemaClusterInfo {
             ColumnSchema::new(PEER_HOSTNAME, ConcreteDataType::string_datatype(), true),
             ColumnSchema::new(
                 TOTAL_CPU_MILLICORES,
-                ConcreteDataType::uint32_datatype(),
+                ConcreteDataType::int64_datatype(),
                 false,
             ),
             ColumnSchema::new(
                 TOTAL_MEMORY_BYTES,
-                ConcreteDataType::uint64_datatype(),
+                ConcreteDataType::int64_datatype(),
+                false,
+            ),
+            ColumnSchema::new(
+                CPU_USAGE_MILLICORES,
+                ConcreteDataType::int64_datatype(),
+                false,
+            ),
+            ColumnSchema::new(
+                MEMORY_USAGE_BYTES,
+                ConcreteDataType::int64_datatype(),
                 false,
             ),
             ColumnSchema::new(VERSION, ConcreteDataType::string_datatype(), false),
@@ -167,8 +180,10 @@ struct InformationSchemaClusterInfoBuilder {
     peer_types: StringVectorBuilder,
     peer_addrs: StringVectorBuilder,
     peer_hostnames: StringVectorBuilder,
-    cpus: UInt32VectorBuilder,
-    memory_bytes: UInt64VectorBuilder,
+    total_cpu_millicores: Int64VectorBuilder,
+    total_memory_bytes: Int64VectorBuilder,
+    cpu_usage_millicores: Int64VectorBuilder,
+    memory_usage_bytes: Int64VectorBuilder,
     versions: StringVectorBuilder,
     git_commits: StringVectorBuilder,
     start_times: TimestampMillisecondVectorBuilder,
@@ -186,8 +201,10 @@ impl InformationSchemaClusterInfoBuilder {
             peer_types: StringVectorBuilder::with_capacity(INIT_CAPACITY),
             peer_addrs: StringVectorBuilder::with_capacity(INIT_CAPACITY),
             peer_hostnames: StringVectorBuilder::with_capacity(INIT_CAPACITY),
-            cpus: UInt32VectorBuilder::with_capacity(INIT_CAPACITY),
-            memory_bytes: UInt64VectorBuilder::with_capacity(INIT_CAPACITY),
+            total_cpu_millicores: Int64VectorBuilder::with_capacity(INIT_CAPACITY),
+            total_memory_bytes: Int64VectorBuilder::with_capacity(INIT_CAPACITY),
+            cpu_usage_millicores: Int64VectorBuilder::with_capacity(INIT_CAPACITY),
+            memory_usage_bytes: Int64VectorBuilder::with_capacity(INIT_CAPACITY),
             versions: StringVectorBuilder::with_capacity(INIT_CAPACITY),
             git_commits: StringVectorBuilder::with_capacity(INIT_CAPACITY),
             start_times: TimestampMillisecondVectorBuilder::with_capacity(INIT_CAPACITY),
@@ -243,8 +260,14 @@ impl InformationSchemaClusterInfoBuilder {
             self.start_times.push(None);
             self.uptimes.push(None);
         }
-        self.cpus.push(Some(node_info.cpus));
-        self.memory_bytes.push(Some(node_info.memory_bytes));
+        self.total_cpu_millicores
+            .push(Some(node_info.total_cpu_millicores));
+        self.total_memory_bytes
+            .push(Some(node_info.total_memory_bytes));
+        self.cpu_usage_millicores
+            .push(Some(node_info.cpu_usage_millicores));
+        self.memory_usage_bytes
+            .push(Some(node_info.memory_usage_bytes));
 
         if node_info.last_activity_ts > 0 {
             self.active_times.push(Some(
@@ -269,8 +292,10 @@ impl InformationSchemaClusterInfoBuilder {
             Arc::new(self.peer_types.finish()),
             Arc::new(self.peer_addrs.finish()),
             Arc::new(self.peer_hostnames.finish()),
-            Arc::new(self.cpus.finish()),
-            Arc::new(self.memory_bytes.finish()),
+            Arc::new(self.total_cpu_millicores.finish()),
+            Arc::new(self.total_memory_bytes.finish()),
+            Arc::new(self.cpu_usage_millicores.finish()),
+            Arc::new(self.memory_usage_bytes.finish()),
             Arc::new(self.versions.finish()),
             Arc::new(self.git_commits.finish()),
             Arc::new(self.start_times.finish()),
diff --git a/src/cmd/src/flownode.rs b/src/cmd/src/flownode.rs
index 500e9bfa89..07f3279724 100644
--- a/src/cmd/src/flownode.rs
+++ b/src/cmd/src/flownode.rs
@@ -30,6 +30,7 @@ use common_meta::heartbeat::handler::invalidate_table_cache::InvalidateCacheHand
 use common_meta::heartbeat::handler::parse_mailbox_message::ParseMailboxMessageHandler;
 use common_meta::key::TableMetadataManager;
 use common_meta::key::flow::FlowMetadataManager;
+use common_stat::ResourceStatImpl;
 use common_telemetry::info;
 use common_telemetry::logging::{DEFAULT_LOGGING_DIR, TracingOptions};
 use common_version::{short_version, verbose_version};
@@ -372,11 +373,15 @@ impl StartCommand {
             Arc::new(InvalidateCacheHandler::new(layered_cache_registry.clone())),
         ]);
 
+        let mut resource_stat = ResourceStatImpl::default();
+        resource_stat.start_collect_cpu_usage();
+
         let heartbeat_task = flow::heartbeat::HeartbeatTask::new(
             &opts,
             meta_client.clone(),
             opts.heartbeat.clone(),
             Arc::new(executor),
+            Arc::new(resource_stat),
         );
 
         let flow_metadata_manager = Arc::new(FlowMetadataManager::new(cached_meta_backend.clone()));
diff --git a/src/cmd/src/frontend.rs b/src/cmd/src/frontend.rs
index 4c72021a47..fda6d968bf 100644
--- a/src/cmd/src/frontend.rs
+++ b/src/cmd/src/frontend.rs
@@ -30,6 +30,7 @@ use common_meta::cache::{CacheRegistryBuilder, LayeredCacheRegistryBuilder};
 use common_meta::heartbeat::handler::HandlerGroupExecutor;
 use common_meta::heartbeat::handler::invalidate_table_cache::InvalidateCacheHandler;
 use common_meta::heartbeat::handler::parse_mailbox_message::ParseMailboxMessageHandler;
+use common_stat::ResourceStatImpl;
 use common_telemetry::info;
 use common_telemetry::logging::{DEFAULT_LOGGING_DIR, TracingOptions};
 use common_time::timezone::set_default_timezone;
@@ -421,11 +422,15 @@ impl StartCommand {
             Arc::new(InvalidateCacheHandler::new(layered_cache_registry.clone())),
         ]);
 
+        let mut resource_stat = ResourceStatImpl::default();
+        resource_stat.start_collect_cpu_usage();
+
         let heartbeat_task = HeartbeatTask::new(
             &opts,
             meta_client.clone(),
             opts.heartbeat.clone(),
             Arc::new(executor),
+            Arc::new(resource_stat),
         );
         let heartbeat_task = Some(heartbeat_task);
 
diff --git a/src/common/config/Cargo.toml b/src/common/config/Cargo.toml
index 1d2b21602f..b45c03a6c3 100644
--- a/src/common/config/Cargo.toml
+++ b/src/common/config/Cargo.toml
@@ -11,7 +11,6 @@ workspace = true
 common-base.workspace = true
 common-error.workspace = true
 common-macro.workspace = true
-common-stat.workspace = true
 config.workspace = true
 humantime-serde.workspace = true
 object-store.workspace = true
diff --git a/src/common/config/src/lib.rs b/src/common/config/src/lib.rs
index b806924217..cc25ebce16 100644
--- a/src/common/config/src/lib.rs
+++ b/src/common/config/src/lib.rs
@@ -14,7 +14,6 @@
 
 pub mod config;
 pub mod error;
-pub mod utils;
 
 use std::time::Duration;
 
diff --git a/src/common/config/src/utils.rs b/src/common/config/src/utils.rs
deleted file mode 100644
index 1bc986b77e..0000000000
--- a/src/common/config/src/utils.rs
+++ /dev/null
@@ -1,34 +0,0 @@
-// Copyright 2023 Greptime Team
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-use common_base::readable_size::ReadableSize;
-use common_stat::{get_total_cpu_millicores, get_total_memory_readable};
-
-/// `ResourceSpec` holds the static resource specifications of a node,
-/// such as CPU cores and memory capacity. These values are fixed
-/// at startup and do not change dynamically during runtime.
-#[derive(Debug, Clone, Copy)]
-pub struct ResourceSpec {
-    pub cpus: i64,
-    pub memory: Option<ReadableSize>,
-}
-
-impl Default for ResourceSpec {
-    fn default() -> Self {
-        Self {
-            cpus: get_total_cpu_millicores(),
-            memory: get_total_memory_readable(),
-        }
-    }
-}
diff --git a/src/common/meta/src/cluster.rs b/src/common/meta/src/cluster.rs
index 63001970b6..74485513e9 100644
--- a/src/common/meta/src/cluster.rs
+++ b/src/common/meta/src/cluster.rs
@@ -120,10 +120,16 @@ pub struct NodeInfo {
     pub start_time_ms: u64,
     // The node build cpus
     #[serde(default)]
-    pub cpus: u32,
+    pub total_cpu_millicores: i64,
     // The node build memory bytes
     #[serde(default)]
-    pub memory_bytes: u64,
+    pub total_memory_bytes: i64,
+    // The node build cpu usage millicores
+    #[serde(default)]
+    pub cpu_usage_millicores: i64,
+    // The node build memory usage bytes
+    #[serde(default)]
+    pub memory_usage_bytes: i64,
     // The node build hostname
     #[serde(default)]
     pub hostname: String,
@@ -333,8 +339,10 @@ mod tests {
             version: "".to_string(),
             git_commit: "".to_string(),
             start_time_ms: 1,
-            cpus: 0,
-            memory_bytes: 0,
+            total_cpu_millicores: 0,
+            total_memory_bytes: 0,
+            cpu_usage_millicores: 0,
+            memory_usage_bytes: 0,
             hostname: "test_hostname".to_string(),
         };
 
diff --git a/src/common/stat/Cargo.toml b/src/common/stat/Cargo.toml
index 3d0198f6a2..d0e8b5448f 100644
--- a/src/common/stat/Cargo.toml
+++ b/src/common/stat/Cargo.toml
@@ -6,11 +6,14 @@ license.workspace = true
 
 [dependencies]
 common-base.workspace = true
+common-runtime.workspace = true
+common-telemetry.workspace = true
 lazy_static.workspace = true
 nix.workspace = true
 num_cpus.workspace = true
 prometheus.workspace = true
 sysinfo.workspace = true
+tokio.workspace = true
 
 [lints]
 workspace = true
diff --git a/src/common/stat/src/cgroups.rs b/src/common/stat/src/cgroups.rs
index fe26f5ec36..ce8f5ac87a 100644
--- a/src/common/stat/src/cgroups.rs
+++ b/src/common/stat/src/cgroups.rs
@@ -117,7 +117,10 @@ pub fn get_cpu_limit_from_cgroups() -> Option<i64> {
     None
 }
 
-fn get_cpu_usage() -> Option<i64> {
+/// Get the usage of cpu in millicores from cgroups filesystem.
+///
+/// - Return `None` if it's not in the cgroups v2 environment or fails to read the cpu usage.
+pub fn get_cpu_usage_from_cgroups() -> Option<i64> {
     // In certain bare-metal environments, the `/sys/fs/cgroup/cpu.stat` file may be present and reflect system-wide CPU usage rather than container-specific metrics.
     // To ensure accurate collection of container-level CPU usage, verify the existence of the `/sys/fs/cgroup/memory.current` file.
     // The presence of this file typically indicates execution within a containerized environment, thereby validating the relevance of the collected CPU usage data.
@@ -142,6 +145,22 @@ fn get_cpu_usage() -> Option<i64> {
     fields[1].trim().parse::<i64>().ok()
 }
 
+// Calculate the cpu usage in millicores from cgroups filesystem.
+//
+// - Return `0` if the current cpu usage is equal to the last cpu usage or the interval is 0.
+pub(crate) fn calculate_cpu_usage(
+    current_cpu_usage_usecs: i64,
+    last_cpu_usage_usecs: i64,
+    interval_milliseconds: i64,
+) -> i64 {
+    let diff = current_cpu_usage_usecs - last_cpu_usage_usecs;
+    if diff > 0 && interval_milliseconds > 0 {
+        ((diff as f64 / interval_milliseconds as f64).round() as i64).max(1)
+    } else {
+        0
+    }
+}
+
 // Check whether the cgroup is v2.
 // - Return `true` if the cgroup is v2, otherwise return `false`.
 // - Return `None` if the detection fails or not on linux.
@@ -230,7 +249,7 @@ impl Collector for CgroupsMetricsCollector {
     }
 
     fn collect(&self) -> Vec<MetricFamily> {
-        if let Some(cpu_usage) = get_cpu_usage() {
+        if let Some(cpu_usage) = get_cpu_usage_from_cgroups() {
             self.cpu_usage.set(cpu_usage);
         }
 
diff --git a/src/common/stat/src/lib.rs b/src/common/stat/src/lib.rs
index 2c6cbea3f1..544b9439c8 100644
--- a/src/common/stat/src/lib.rs
+++ b/src/common/stat/src/lib.rs
@@ -13,66 +13,7 @@
 // limitations under the License.
 
 mod cgroups;
+mod resource;
 
 pub use cgroups::*;
-use common_base::readable_size::ReadableSize;
-use sysinfo::System;
-
-/// Get the total CPU in millicores.
-pub fn get_total_cpu_millicores() -> i64 {
-    // Get CPU limit from cgroups filesystem.
-    if let Some(cgroup_cpu_limit) = get_cpu_limit_from_cgroups() {
-        cgroup_cpu_limit
-    } else {
-        // Get total CPU cores from host system.
-        num_cpus::get() as i64 * 1000
-    }
-}
-
-/// Get the total memory in bytes.
-pub fn get_total_memory_bytes() -> i64 {
-    // Get memory limit from cgroups filesystem.
-    if let Some(cgroup_memory_limit) = get_memory_limit_from_cgroups() {
-        cgroup_memory_limit
-    } else {
-        // Get total memory from host system.
-        if sysinfo::IS_SUPPORTED_SYSTEM {
-            let mut sys_info = System::new();
-            sys_info.refresh_memory();
-            sys_info.total_memory() as i64
-        } else {
-            // If the system is not supported, return -1.
-            -1
-        }
-    }
-}
-
-/// Get the total CPU cores. The result will be rounded to the nearest integer.
-/// For example, if the total CPU is 1.5 cores(1500 millicores), the result will be 2.
-pub fn get_total_cpu_cores() -> usize {
-    ((get_total_cpu_millicores() as f64) / 1000.0).round() as usize
-}
-
-/// Get the total memory in readable size.
-pub fn get_total_memory_readable() -> Option<ReadableSize> {
-    if get_total_memory_bytes() > 0 {
-        Some(ReadableSize(get_total_memory_bytes() as u64))
-    } else {
-        None
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    #[test]
-    fn test_get_total_cpu_cores() {
-        assert!(get_total_cpu_cores() > 0);
-    }
-
-    #[test]
-    fn test_get_total_memory_readable() {
-        assert!(get_total_memory_readable().unwrap() > ReadableSize::mb(0));
-    }
-}
+pub use resource::*;
diff --git a/src/common/stat/src/resource.rs b/src/common/stat/src/resource.rs
new file mode 100644
index 0000000000..babfa54a19
--- /dev/null
+++ b/src/common/stat/src/resource.rs
@@ -0,0 +1,187 @@
+// Copyright 2023 Greptime Team
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use std::sync::Arc;
+use std::sync::atomic::{AtomicI64, Ordering};
+use std::time::Duration;
+
+use common_base::readable_size::ReadableSize;
+use common_runtime::JoinHandle;
+use common_telemetry::info;
+use sysinfo::System;
+use tokio::time::sleep;
+
+use crate::cgroups::calculate_cpu_usage;
+use crate::{
+    get_cpu_limit_from_cgroups, get_cpu_usage_from_cgroups, get_memory_limit_from_cgroups,
+    get_memory_usage_from_cgroups,
+};
+
+/// Get the total CPU in millicores. If the CPU limit is unset, it will return the total CPU cores from host system.
+pub fn get_total_cpu_millicores() -> i64 {
+    // Get CPU limit from cgroups filesystem.
+    if let Some(cgroup_cpu_limit) = get_cpu_limit_from_cgroups() {
+        cgroup_cpu_limit
+    } else {
+        // Get total CPU cores from host system.
+        num_cpus::get() as i64 * 1000
+    }
+}
+
+/// Get the total memory in bytes. If the memory limit is unset, it will return the total memory from host system.
+/// If the system is not supported to get the total host memory, it will return 0.
+pub fn get_total_memory_bytes() -> i64 {
+    // Get memory limit from cgroups filesystem.
+    if let Some(cgroup_memory_limit) = get_memory_limit_from_cgroups() {
+        cgroup_memory_limit
+    } else {
+        // Get total memory from host system.
+        if sysinfo::IS_SUPPORTED_SYSTEM {
+            let mut sys_info = System::new();
+            sys_info.refresh_memory();
+            sys_info.total_memory() as i64
+        } else {
+            // If the system is not supported, return 0
+            0
+        }
+    }
+}
+
+/// Get the total CPU cores. The result will be rounded to the nearest integer.
+/// For example, if the total CPU is 1.5 cores(1500 millicores), the result will be 2.
+pub fn get_total_cpu_cores() -> usize {
+    ((get_total_cpu_millicores() as f64) / 1000.0).round() as usize
+}
+
+/// Get the total memory in readable size.
+pub fn get_total_memory_readable() -> Option<ReadableSize> {
+    if get_total_memory_bytes() > 0 {
+        Some(ReadableSize(get_total_memory_bytes() as u64))
+    } else {
+        None
+    }
+}
+
+/// A reference to a `ResourceStat` implementation.
+pub type ResourceStatRef = Arc<dyn ResourceStat + Send + Sync>;
+
+/// A trait for getting resource statistics.
+pub trait ResourceStat {
+    /// Get the total CPU in millicores.
+    fn get_total_cpu_millicores(&self) -> i64;
+    /// Get the total memory in bytes.
+    fn get_total_memory_bytes(&self) -> i64;
+    /// Get the CPU usage in millicores.
+    fn get_cpu_usage_millicores(&self) -> i64;
+    /// Get the memory usage in bytes.
+    fn get_memory_usage_bytes(&self) -> i64;
+}
+
+/// A implementation of `ResourceStat` trait.
+pub struct ResourceStatImpl {
+    cpu_usage_millicores: Arc<AtomicI64>,
+    last_cpu_usage_usecs: Arc<AtomicI64>,
+    calculate_interval: Duration,
+    handler: Option<JoinHandle<()>>,
+}
+
+impl Default for ResourceStatImpl {
+    fn default() -> Self {
+        Self {
+            cpu_usage_millicores: Arc::new(AtomicI64::new(0)),
+            last_cpu_usage_usecs: Arc::new(AtomicI64::new(0)),
+            calculate_interval: Duration::from_secs(5),
+            handler: None,
+        }
+    }
+}
+
+impl ResourceStatImpl {
+    /// Start collecting CPU usage periodically. It will calculate the CPU usage in millicores based on rate of change of CPU usage usage_usec in `/sys/fs/cgroup/cpu.stat`.
+    /// It ONLY works in cgroup v2 environment.
+    pub fn start_collect_cpu_usage(&mut self) {
+        if self.handler.is_some() {
+            return;
+        }
+
+        let cpu_usage_millicores = self.cpu_usage_millicores.clone();
+        let last_cpu_usage_usecs = self.last_cpu_usage_usecs.clone();
+        let calculate_interval = self.calculate_interval;
+
+        let handler = common_runtime::spawn_global(async move {
+            info!(
+                "Starting to collect CPU usage periodically for every {} seconds",
+                calculate_interval.as_secs()
+            );
+            loop {
+                let current_cpu_usage_usecs = get_cpu_usage_from_cgroups();
+                if let Some(current_cpu_usage_usecs) = current_cpu_usage_usecs {
+                    // Skip the first time to collect CPU usage.
+                    if last_cpu_usage_usecs.load(Ordering::Relaxed) == 0 {
+                        last_cpu_usage_usecs.store(current_cpu_usage_usecs, Ordering::Relaxed);
+                        continue;
+                    }
+                    let cpu_usage = calculate_cpu_usage(
+                        current_cpu_usage_usecs,
+                        last_cpu_usage_usecs.load(Ordering::Relaxed),
+                        calculate_interval.as_millis() as i64,
+                    );
+                    cpu_usage_millicores.store(cpu_usage, Ordering::Relaxed);
+                    last_cpu_usage_usecs.store(current_cpu_usage_usecs, Ordering::Relaxed);
+                }
+                sleep(calculate_interval).await;
+            }
+        });
+
+        self.handler = Some(handler);
+    }
+}
+
+impl ResourceStat for ResourceStatImpl {
+    /// Get the total CPU in millicores.
+    fn get_total_cpu_millicores(&self) -> i64 {
+        get_total_cpu_millicores()
+    }
+
+    /// Get the total memory in bytes.
+    fn get_total_memory_bytes(&self) -> i64 {
+        get_total_memory_bytes()
+    }
+
+    /// Get the CPU usage in millicores.
+    fn get_cpu_usage_millicores(&self) -> i64 {
+        self.cpu_usage_millicores.load(Ordering::Relaxed)
+    }
+
+    /// Get the memory usage in bytes.
+    /// It ONLY works in cgroup v2 environment.
+    fn get_memory_usage_bytes(&self) -> i64 {
+        get_memory_usage_from_cgroups().unwrap_or_default()
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_get_total_cpu_cores() {
+        assert!(get_total_cpu_cores() > 0);
+    }
+
+    #[test]
+    fn test_get_total_memory_readable() {
+        assert!(get_total_memory_readable().unwrap() > ReadableSize::mb(0));
+    }
+}
diff --git a/src/datanode/Cargo.toml b/src/datanode/Cargo.toml
index 3dcffd0ac9..265ede339e 100644
--- a/src/datanode/Cargo.toml
+++ b/src/datanode/Cargo.toml
@@ -30,6 +30,7 @@ common-procedure.workspace = true
 common-query.workspace = true
 common-recordbatch.workspace = true
 common-runtime.workspace = true
+common-stat.workspace = true
 common-telemetry.workspace = true
 common-time.workspace = true
 common-version.workspace = true
diff --git a/src/datanode/src/datanode.rs b/src/datanode/src/datanode.rs
index ed8b41f0c7..b9b8edcdba 100644
--- a/src/datanode/src/datanode.rs
+++ b/src/datanode/src/datanode.rs
@@ -27,6 +27,7 @@ use common_meta::key::runtime_switch::RuntimeSwitchManager;
 use common_meta::key::{SchemaMetadataManager, SchemaMetadataManagerRef};
 use common_meta::kv_backend::KvBackendRef;
 pub use common_procedure::options::ProcedureConfig;
+use common_stat::ResourceStatImpl;
 use common_telemetry::{error, info, warn};
 use common_wal::config::DatanodeWalConfig;
 use common_wal::config::kafka::DatanodeKafkaConfig;
@@ -282,6 +283,9 @@ impl DatanodeBuilder {
             open_all_regions.await?;
         }
 
+        let mut resource_stat = ResourceStatImpl::default();
+        resource_stat.start_collect_cpu_usage();
+
         let heartbeat_task = if let Some(meta_client) = meta_client {
             Some(
                 HeartbeatTask::try_new(
@@ -290,6 +294,7 @@ impl DatanodeBuilder {
                     meta_client,
                     cache_registry,
                     self.plugins.clone(),
+                    Arc::new(resource_stat),
                 )
                 .await?,
             )
diff --git a/src/datanode/src/heartbeat.rs b/src/datanode/src/heartbeat.rs
index 9c059e5698..607e031b43 100644
--- a/src/datanode/src/heartbeat.rs
+++ b/src/datanode/src/heartbeat.rs
@@ -20,7 +20,6 @@ use std::time::Duration;
 use api::v1::meta::heartbeat_request::NodeWorkloads;
 use api::v1::meta::{DatanodeWorkloads, HeartbeatRequest, NodeInfo, Peer, RegionRole, RegionStat};
 use common_base::Plugins;
-use common_config::utils::ResourceSpec;
 use common_meta::cache_invalidator::CacheInvalidatorRef;
 use common_meta::datanode::REGION_STATISTIC_KEY;
 use common_meta::distributed_time_constants::META_KEEP_ALIVE_INTERVAL_SECS;
@@ -31,6 +30,7 @@ use common_meta::heartbeat::handler::{
 };
 use common_meta::heartbeat::mailbox::{HeartbeatMailbox, MailboxRef};
 use common_meta::heartbeat::utils::outgoing_message_to_mailbox_message;
+use common_stat::ResourceStatRef;
 use common_telemetry::{debug, error, info, trace, warn};
 use common_workload::DatanodeWorkloadType;
 use meta_client::MetaClientRef;
@@ -63,7 +63,7 @@ pub struct HeartbeatTask {
     interval: u64,
     resp_handler_executor: HeartbeatResponseHandlerExecutorRef,
     region_alive_keeper: Arc<RegionAliveKeeper>,
-    resource_spec: ResourceSpec,
+    resource_stat: ResourceStatRef,
 }
 
 impl Drop for HeartbeatTask {
@@ -80,6 +80,7 @@ impl HeartbeatTask {
         meta_client: MetaClientRef,
         cache_invalidator: CacheInvalidatorRef,
         plugins: Plugins,
+        resource_stat: ResourceStatRef,
     ) -> Result<Self> {
         let countdown_task_handler_ext = plugins.get::<CountdownTaskHandlerExtRef>();
         let region_alive_keeper = Arc::new(RegionAliveKeeper::new(
@@ -109,7 +110,7 @@ impl HeartbeatTask {
             interval: opts.heartbeat.interval.as_millis() as u64,
             resp_handler_executor,
             region_alive_keeper,
-            resource_spec: Default::default(),
+            resource_stat,
         })
     }
 
@@ -186,6 +187,7 @@ impl HeartbeatTask {
             .context(error::HandleHeartbeatResponseSnafu)
     }
 
+    #[allow(deprecated)]
     /// Start heartbeat task, spawn background task.
     pub async fn start(
         &self,
@@ -237,8 +239,9 @@ impl HeartbeatTask {
 
         self.region_alive_keeper.start(Some(event_receiver)).await?;
         let mut last_sent = Instant::now();
-        let cpus = self.resource_spec.cpus as u32;
-        let memory_bytes = self.resource_spec.memory.unwrap_or_default().as_bytes();
+        let total_cpu_millicores = self.resource_stat.get_total_cpu_millicores();
+        let total_memory_bytes = self.resource_stat.get_total_memory_bytes();
+        let resource_stat = self.resource_stat.clone();
 
         common_runtime::spawn_hb(async move {
             let sleep = tokio::time::sleep(Duration::from_millis(0));
@@ -252,8 +255,13 @@ impl HeartbeatTask {
                     version: build_info.version.to_string(),
                     git_commit: build_info.commit_short.to_string(),
                     start_time_ms: node_epoch,
-                    cpus,
-                    memory_bytes,
+                    total_cpu_millicores,
+                    total_memory_bytes,
+                    cpu_usage_millicores: 0,
+                    memory_usage_bytes: 0,
+                    // TODO(zyy17): Remove these deprecated fields when the deprecated fields are removed from the proto.
+                    cpus: total_cpu_millicores as u32,
+                    memory_bytes: total_memory_bytes as u64,
                     hostname: hostname::get()
                         .unwrap_or_default()
                         .to_string_lossy()
@@ -297,12 +305,18 @@ impl HeartbeatTask {
                         let topic_stats = region_server_clone.topic_stats();
                         let now = Instant::now();
                         let duration_since_epoch = (now - epoch).as_millis() as u64;
-                        let req = HeartbeatRequest {
+                        let mut req = HeartbeatRequest {
                             region_stats,
                             topic_stats,
                             duration_since_epoch,
                             ..heartbeat_request.clone()
                         };
+
+                        if let Some(info) = req.info.as_mut() {
+                            info.cpu_usage_millicores = resource_stat.get_cpu_usage_millicores();
+                            info.memory_usage_bytes = resource_stat.get_memory_usage_bytes();
+                        }
+
                         sleep.as_mut().reset(now + Duration::from_millis(interval));
                         Some(req)
                     }
diff --git a/src/flow/src/heartbeat.rs b/src/flow/src/heartbeat.rs
index cc42668f5a..89b37860c5 100644
--- a/src/flow/src/heartbeat.rs
+++ b/src/flow/src/heartbeat.rs
@@ -18,7 +18,6 @@ use std::sync::Arc;
 use std::sync::atomic::{AtomicBool, Ordering};
 
 use api::v1::meta::{HeartbeatRequest, Peer};
-use common_config::utils::ResourceSpec;
 use common_error::ext::BoxedError;
 use common_meta::heartbeat::handler::{
     HeartbeatResponseHandlerContext, HeartbeatResponseHandlerExecutorRef,
@@ -26,6 +25,7 @@ use common_meta::heartbeat::handler::{
 use common_meta::heartbeat::mailbox::{HeartbeatMailbox, MailboxRef, OutgoingMessage};
 use common_meta::heartbeat::utils::outgoing_message_to_mailbox_message;
 use common_meta::key::flow::flow_state::FlowStat;
+use common_stat::ResourceStatRef;
 use common_telemetry::{debug, error, info, warn};
 use greptime_proto::v1::meta::NodeInfo;
 use meta_client::client::{HeartbeatSender, HeartbeatStream, MetaClient};
@@ -69,7 +69,7 @@ pub struct HeartbeatTask {
     resp_handler_executor: HeartbeatResponseHandlerExecutorRef,
     running: Arc<AtomicBool>,
     query_stat_size: Option<SizeReportSender>,
-    resource_spec: ResourceSpec,
+    resource_stat: ResourceStatRef,
 }
 
 impl HeartbeatTask {
@@ -77,11 +77,13 @@ impl HeartbeatTask {
         self.query_stat_size = Some(query_stat_size);
         self
     }
+
     pub fn new(
         opts: &FlownodeOptions,
         meta_client: Arc<MetaClient>,
         heartbeat_opts: HeartbeatOptions,
         resp_handler_executor: HeartbeatResponseHandlerExecutorRef,
+        resource_stat: ResourceStatRef,
     ) -> Self {
         Self {
             node_id: opts.node_id.unwrap_or(0),
@@ -93,7 +95,7 @@ impl HeartbeatTask {
             resp_handler_executor,
             running: Arc::new(AtomicBool::new(false)),
             query_stat_size: None,
-            resource_spec: Default::default(),
+            resource_stat,
         }
     }
 
@@ -146,6 +148,8 @@ impl HeartbeatTask {
         heartbeat_request: &HeartbeatRequest,
         message: Option<OutgoingMessage>,
         latest_report: &Option<FlowStat>,
+        cpu_usage: i64,
+        memory_usage: i64,
     ) -> Option<HeartbeatRequest> {
         let mailbox_message = match message.map(outgoing_message_to_mailbox_message) {
             Some(Ok(message)) => Some(message),
@@ -170,21 +174,38 @@ impl HeartbeatTask {
                     .collect(),
             });
 
-        Some(HeartbeatRequest {
+        let mut heartbeat_request = HeartbeatRequest {
             mailbox_message,
             flow_stat,
             ..heartbeat_request.clone()
-        })
+        };
+
+        if let Some(info) = heartbeat_request.info.as_mut() {
+            info.cpu_usage_millicores = cpu_usage;
+            info.memory_usage_bytes = memory_usage;
+        }
+
+        Some(heartbeat_request)
     }
 
-    fn build_node_info(start_time_ms: u64, cpus: u32, memory_bytes: u64) -> Option<NodeInfo> {
+    #[allow(deprecated)]
+    fn build_node_info(
+        start_time_ms: u64,
+        total_cpu_millicores: i64,
+        total_memory_bytes: i64,
+    ) -> Option<NodeInfo> {
         let build_info = common_version::build_info();
         Some(NodeInfo {
             version: build_info.version.to_string(),
             git_commit: build_info.commit_short.to_string(),
             start_time_ms,
-            cpus,
-            memory_bytes,
+            total_cpu_millicores,
+            total_memory_bytes,
+            cpu_usage_millicores: 0,
+            memory_usage_bytes: 0,
+            // TODO(zyy17): Remove these deprecated fields when the deprecated fields are removed from the proto.
+            cpus: total_cpu_millicores as u32,
+            memory_bytes: total_memory_bytes as u64,
             hostname: hostname::get()
                 .unwrap_or_default()
                 .to_string_lossy()
@@ -203,9 +224,9 @@ impl HeartbeatTask {
             id: self.node_id,
             addr: self.peer_addr.clone(),
         });
-        let cpus = self.resource_spec.cpus as u32;
-        let memory_bytes = self.resource_spec.memory.unwrap_or_default().as_bytes();
-
+        let total_cpu_millicores = self.resource_stat.get_total_cpu_millicores();
+        let total_memory_bytes = self.resource_stat.get_total_memory_bytes();
+        let resource_stat = self.resource_stat.clone();
         let query_stat_size = self.query_stat_size.clone();
 
         common_runtime::spawn_hb(async move {
@@ -218,7 +239,7 @@ impl HeartbeatTask {
             let heartbeat_request = HeartbeatRequest {
                 peer: self_peer,
                 node_epoch,
-                info: Self::build_node_info(node_epoch, cpus, memory_bytes),
+                info: Self::build_node_info(node_epoch, total_cpu_millicores, total_memory_bytes),
                 ..Default::default()
             };
 
@@ -226,7 +247,7 @@ impl HeartbeatTask {
                 let req = tokio::select! {
                     message = outgoing_rx.recv() => {
                         if let Some(message) = message {
-                            Self::new_heartbeat_request(&heartbeat_request, Some(message), &latest_report)
+                            Self::new_heartbeat_request(&heartbeat_request, Some(message), &latest_report, 0, 0)
                         } else {
                             warn!("Sender has been dropped, exiting the heartbeat loop");
                             // Receives None that means Sender was dropped, we need to break the current loop
@@ -234,7 +255,7 @@ impl HeartbeatTask {
                         }
                     }
                     _ = interval.tick() => {
-                        Self::new_heartbeat_request(&heartbeat_request, None, &latest_report)
+                        Self::new_heartbeat_request(&heartbeat_request, None, &latest_report, resource_stat.get_cpu_usage_millicores(), resource_stat.get_memory_usage_bytes())
                     }
                 };
 
diff --git a/src/frontend/Cargo.toml b/src/frontend/Cargo.toml
index b90e4f5eb2..24d9c8c5ff 100644
--- a/src/frontend/Cargo.toml
+++ b/src/frontend/Cargo.toml
@@ -37,6 +37,7 @@ common-procedure.workspace = true
 common-query.workspace = true
 common-recordbatch.workspace = true
 common-runtime.workspace = true
+common-stat.workspace = true
 common-telemetry.workspace = true
 common-time.workspace = true
 common-version.workspace = true
diff --git a/src/frontend/src/heartbeat.rs b/src/frontend/src/heartbeat.rs
index 76fdc3305b..95645ad1ca 100644
--- a/src/frontend/src/heartbeat.rs
+++ b/src/frontend/src/heartbeat.rs
@@ -18,12 +18,12 @@ mod tests;
 use std::sync::Arc;
 
 use api::v1::meta::{HeartbeatRequest, NodeInfo, Peer};
-use common_config::utils::ResourceSpec;
 use common_meta::heartbeat::handler::{
     HeartbeatResponseHandlerContext, HeartbeatResponseHandlerExecutorRef,
 };
 use common_meta::heartbeat::mailbox::{HeartbeatMailbox, MailboxRef, OutgoingMessage};
 use common_meta::heartbeat::utils::outgoing_message_to_mailbox_message;
+use common_stat::ResourceStatRef;
 use common_telemetry::{debug, error, info, warn};
 use meta_client::client::{HeartbeatSender, HeartbeatStream, MetaClient};
 use servers::addrs;
@@ -47,7 +47,7 @@ pub struct HeartbeatTask {
     retry_interval: Duration,
     resp_handler_executor: HeartbeatResponseHandlerExecutorRef,
     start_time_ms: u64,
-    resource_spec: ResourceSpec,
+    resource_stat: ResourceStatRef,
 }
 
 impl HeartbeatTask {
@@ -56,6 +56,7 @@ impl HeartbeatTask {
         meta_client: Arc<MetaClient>,
         heartbeat_opts: HeartbeatOptions,
         resp_handler_executor: HeartbeatResponseHandlerExecutorRef,
+        resource_stat: ResourceStatRef,
     ) -> Self {
         HeartbeatTask {
             // if internal grpc is configured, use its address as the peer address
@@ -71,7 +72,7 @@ impl HeartbeatTask {
             retry_interval: heartbeat_opts.retry_interval,
             resp_handler_executor,
             start_time_ms: common_time::util::current_time_millis() as u64,
-            resource_spec: Default::default(),
+            resource_stat,
         }
     }
 
@@ -133,6 +134,8 @@ impl HeartbeatTask {
     fn new_heartbeat_request(
         heartbeat_request: &HeartbeatRequest,
         message: Option<OutgoingMessage>,
+        cpu_usage: i64,
+        memory_usage: i64,
     ) -> Option<HeartbeatRequest> {
         let mailbox_message = match message.map(outgoing_message_to_mailbox_message) {
             Some(Ok(message)) => Some(message),
@@ -143,21 +146,38 @@ impl HeartbeatTask {
             None => None,
         };
 
-        Some(HeartbeatRequest {
+        let mut heartbeat_request = HeartbeatRequest {
             mailbox_message,
             ..heartbeat_request.clone()
-        })
+        };
+
+        if let Some(info) = heartbeat_request.info.as_mut() {
+            info.memory_usage_bytes = memory_usage;
+            info.cpu_usage_millicores = cpu_usage;
+        }
+
+        Some(heartbeat_request)
     }
 
-    fn build_node_info(start_time_ms: u64, cpus: u32, memory_bytes: u64) -> Option<NodeInfo> {
+    #[allow(deprecated)]
+    fn build_node_info(
+        start_time_ms: u64,
+        total_cpu_millicores: i64,
+        total_memory_bytes: i64,
+    ) -> Option<NodeInfo> {
         let build_info = common_version::build_info();
 
         Some(NodeInfo {
             version: build_info.version.to_string(),
             git_commit: build_info.commit_short.to_string(),
             start_time_ms,
-            cpus,
-            memory_bytes,
+            total_cpu_millicores,
+            total_memory_bytes,
+            cpu_usage_millicores: 0,
+            memory_usage_bytes: 0,
+            // TODO(zyy17): Remove these deprecated fields when the deprecated fields are removed from the proto.
+            cpus: total_cpu_millicores as u32,
+            memory_bytes: total_memory_bytes as u64,
             hostname: hostname::get()
                 .unwrap_or_default()
                 .to_string_lossy()
@@ -177,16 +197,20 @@ impl HeartbeatTask {
             id: 0,
             addr: self.peer_addr.clone(),
         });
-        let cpus = self.resource_spec.cpus as u32;
-        let memory_bytes = self.resource_spec.memory.unwrap_or_default().as_bytes();
-
+        let total_cpu_millicores = self.resource_stat.get_total_cpu_millicores();
+        let total_memory_bytes = self.resource_stat.get_total_memory_bytes();
+        let resource_stat = self.resource_stat.clone();
         common_runtime::spawn_hb(async move {
             let sleep = tokio::time::sleep(Duration::from_millis(0));
             tokio::pin!(sleep);
 
             let heartbeat_request = HeartbeatRequest {
                 peer: self_peer,
-                info: Self::build_node_info(start_time_ms, cpus, memory_bytes),
+                info: Self::build_node_info(
+                    start_time_ms,
+                    total_cpu_millicores,
+                    total_memory_bytes,
+                ),
                 ..Default::default()
             };
 
@@ -194,7 +218,7 @@ impl HeartbeatTask {
                 let req = tokio::select! {
                     message = outgoing_rx.recv() => {
                         if let Some(message) = message {
-                            Self::new_heartbeat_request(&heartbeat_request, Some(message))
+                            Self::new_heartbeat_request(&heartbeat_request, Some(message), 0, 0)
                         } else {
                             warn!("Sender has been dropped, exiting the heartbeat loop");
                             // Receives None that means Sender was dropped, we need to break the current loop
@@ -202,8 +226,8 @@ impl HeartbeatTask {
                         }
                     }
                     _ = &mut sleep => {
-                        sleep.as_mut().reset(Instant::now() + report_interval);
-                       Self::new_heartbeat_request(&heartbeat_request, None)
+                       sleep.as_mut().reset(Instant::now() + report_interval);
+                       Self::new_heartbeat_request(&heartbeat_request, None, resource_stat.get_cpu_usage_millicores(), resource_stat.get_memory_usage_bytes())
                     }
                 };
 
diff --git a/src/meta-client/src/client.rs b/src/meta-client/src/client.rs
index 2a66c1570a..d819251597 100644
--- a/src/meta-client/src/client.rs
+++ b/src/meta-client/src/client.rs
@@ -24,7 +24,9 @@ mod util;
 use std::fmt::Debug;
 use std::sync::Arc;
 
-use api::v1::meta::{ProcedureDetailResponse, ReconcileRequest, ReconcileResponse, Role};
+use api::v1::meta::{
+    MetasrvNodeInfo, ProcedureDetailResponse, ReconcileRequest, ReconcileResponse, Role,
+};
 pub use ask_leader::{AskLeader, LeaderProvider, LeaderProviderRef};
 use cluster::Client as ClusterClient;
 pub use cluster::ClusterKvBackend;
@@ -371,7 +373,8 @@ impl ClusterInfo for MetaClient {
         let mut nodes = if get_metasrv_nodes {
             let last_activity_ts = -1; // Metasrv does not provide this information.
 
-            let (leader, followers) = cluster_client.get_metasrv_peers().await?;
+            let (leader, followers): (Option<MetasrvNodeInfo>, Vec<MetasrvNodeInfo>) =
+                cluster_client.get_metasrv_peers().await?;
             followers
                 .into_iter()
                 .map(|node| {
@@ -383,8 +386,10 @@ impl ClusterInfo for MetaClient {
                             version: node_info.version,
                             git_commit: node_info.git_commit,
                             start_time_ms: node_info.start_time_ms,
-                            cpus: node_info.cpus,
-                            memory_bytes: node_info.memory_bytes,
+                            total_cpu_millicores: node_info.total_cpu_millicores,
+                            total_memory_bytes: node_info.total_memory_bytes,
+                            cpu_usage_millicores: node_info.cpu_usage_millicores,
+                            memory_usage_bytes: node_info.memory_usage_bytes,
                             hostname: node_info.hostname,
                         }
                     } else {
@@ -396,8 +401,10 @@ impl ClusterInfo for MetaClient {
                             version: node.version,
                             git_commit: node.git_commit,
                             start_time_ms: node.start_time_ms,
-                            cpus: node.cpus,
-                            memory_bytes: node.memory_bytes,
+                            total_cpu_millicores: node.cpus as i64,
+                            total_memory_bytes: node.memory_bytes as i64,
+                            cpu_usage_millicores: 0,
+                            memory_usage_bytes: 0,
                             hostname: "".to_string(),
                         }
                     }
@@ -411,8 +418,10 @@ impl ClusterInfo for MetaClient {
                             version: node_info.version,
                             git_commit: node_info.git_commit,
                             start_time_ms: node_info.start_time_ms,
-                            cpus: node_info.cpus,
-                            memory_bytes: node_info.memory_bytes,
+                            total_cpu_millicores: node_info.total_cpu_millicores,
+                            total_memory_bytes: node_info.total_memory_bytes,
+                            cpu_usage_millicores: node_info.cpu_usage_millicores,
+                            memory_usage_bytes: node_info.memory_usage_bytes,
                             hostname: node_info.hostname,
                         }
                     } else {
@@ -424,8 +433,10 @@ impl ClusterInfo for MetaClient {
                             version: node.version,
                             git_commit: node.git_commit,
                             start_time_ms: node.start_time_ms,
-                            cpus: node.cpus,
-                            memory_bytes: node.memory_bytes,
+                            total_cpu_millicores: node.cpus as i64,
+                            total_memory_bytes: node.memory_bytes as i64,
+                            cpu_usage_millicores: 0,
+                            memory_usage_bytes: 0,
                             hostname: "".to_string(),
                         }
                     }
diff --git a/src/meta-srv/Cargo.toml b/src/meta-srv/Cargo.toml
index 90a4fdc17b..bd2075501c 100644
--- a/src/meta-srv/Cargo.toml
+++ b/src/meta-srv/Cargo.toml
@@ -39,6 +39,7 @@ common-meta.workspace = true
 common-options.workspace = true
 common-procedure.workspace = true
 common-runtime.workspace = true
+common-stat.workspace = true
 common-telemetry.workspace = true
 common-time.workspace = true
 common-version.workspace = true
diff --git a/src/meta-srv/src/discovery/lease.rs b/src/meta-srv/src/discovery/lease.rs
index 46b92c0f1a..9d9e0d6c23 100644
--- a/src/meta-srv/src/discovery/lease.rs
+++ b/src/meta-srv/src/discovery/lease.rs
@@ -243,8 +243,10 @@ mod tests {
             version: "1.0.0".to_string(),
             git_commit: "1234567890".to_string(),
             start_time_ms: current_time_millis() as u64,
-            cpus: 0,
-            memory_bytes: 0,
+            total_cpu_millicores: 0,
+            total_memory_bytes: 0,
+            cpu_usage_millicores: 0,
+            memory_usage_bytes: 0,
             hostname: "test_hostname".to_string(),
         };
 
@@ -269,8 +271,10 @@ mod tests {
             version: "1.0.0".to_string(),
             git_commit: "1234567890".to_string(),
             start_time_ms: current_time_millis() as u64,
-            cpus: 0,
-            memory_bytes: 0,
+            total_cpu_millicores: 0,
+            total_memory_bytes: 0,
+            cpu_usage_millicores: 0,
+            memory_usage_bytes: 0,
             hostname: "test_hostname".to_string(),
         };
 
@@ -307,8 +311,10 @@ mod tests {
             version: "1.0.0".to_string(),
             git_commit: "1234567890".to_string(),
             start_time_ms: last_activity_ts as u64,
-            cpus: 0,
-            memory_bytes: 0,
+            total_cpu_millicores: 0,
+            total_memory_bytes: 0,
+            cpu_usage_millicores: 0,
+            memory_usage_bytes: 0,
             hostname: "test_hostname".to_string(),
         };
 
diff --git a/src/meta-srv/src/election/rds/mysql.rs b/src/meta-srv/src/election/rds/mysql.rs
index a0890969f8..014923c7c3 100644
--- a/src/meta-srv/src/election/rds/mysql.rs
+++ b/src/meta-srv/src/election/rds/mysql.rs
@@ -1161,8 +1161,10 @@ mod tests {
             version: "test_version".to_string(),
             git_commit: "test_git_commit".to_string(),
             start_time_ms: 0,
-            cpus: 0,
-            memory_bytes: 0,
+            total_cpu_millicores: 0,
+            total_memory_bytes: 0,
+            cpu_usage_millicores: 0,
+            memory_usage_bytes: 0,
             hostname: "test_hostname".to_string(),
         };
         mysql_election.register_candidate(&node_info).await.unwrap();
diff --git a/src/meta-srv/src/election/rds/postgres.rs b/src/meta-srv/src/election/rds/postgres.rs
index 14b2bbb409..beab74dac4 100644
--- a/src/meta-srv/src/election/rds/postgres.rs
+++ b/src/meta-srv/src/election/rds/postgres.rs
@@ -1000,8 +1000,10 @@ mod tests {
             version: "test_version".to_string(),
             git_commit: "test_git_commit".to_string(),
             start_time_ms: 0,
-            cpus: 0,
-            memory_bytes: 0,
+            total_cpu_millicores: 0,
+            total_memory_bytes: 0,
+            cpu_usage_millicores: 0,
+            memory_usage_bytes: 0,
             hostname: "test_hostname".to_string(),
         };
         pg_election.register_candidate(&node_info).await.unwrap();
diff --git a/src/meta-srv/src/handler/collect_cluster_info_handler.rs b/src/meta-srv/src/handler/collect_cluster_info_handler.rs
index f144f3edc5..c96229f9cf 100644
--- a/src/meta-srv/src/handler/collect_cluster_info_handler.rs
+++ b/src/meta-srv/src/handler/collect_cluster_info_handler.rs
@@ -52,8 +52,10 @@ impl HeartbeatHandler for CollectFrontendClusterInfoHandler {
             version: info.version,
             git_commit: info.git_commit,
             start_time_ms: info.start_time_ms,
-            cpus: info.cpus,
-            memory_bytes: info.memory_bytes,
+            total_cpu_millicores: info.total_cpu_millicores,
+            total_memory_bytes: info.total_memory_bytes,
+            cpu_usage_millicores: info.cpu_usage_millicores,
+            memory_usage_bytes: info.memory_usage_bytes,
             hostname: info.hostname,
         };
 
@@ -88,8 +90,10 @@ impl HeartbeatHandler for CollectFlownodeClusterInfoHandler {
             version: info.version,
             git_commit: info.git_commit,
             start_time_ms: info.start_time_ms,
-            cpus: info.cpus,
-            memory_bytes: info.memory_bytes,
+            total_cpu_millicores: info.total_cpu_millicores,
+            total_memory_bytes: info.total_memory_bytes,
+            cpu_usage_millicores: info.cpu_usage_millicores,
+            memory_usage_bytes: info.memory_usage_bytes,
             hostname: info.hostname,
         };
 
@@ -142,8 +146,10 @@ impl HeartbeatHandler for CollectDatanodeClusterInfoHandler {
             version: info.version,
             git_commit: info.git_commit,
             start_time_ms: info.start_time_ms,
-            cpus: info.cpus,
-            memory_bytes: info.memory_bytes,
+            total_cpu_millicores: info.total_cpu_millicores,
+            total_memory_bytes: info.total_memory_bytes,
+            cpu_usage_millicores: info.cpu_usage_millicores,
+            memory_usage_bytes: info.memory_usage_bytes,
             hostname: info.hostname,
         };
 
diff --git a/src/meta-srv/src/metasrv.rs b/src/meta-srv/src/metasrv.rs
index 4c2c7fcf53..aeaea1337b 100644
--- a/src/meta-srv/src/metasrv.rs
+++ b/src/meta-srv/src/metasrv.rs
@@ -22,7 +22,6 @@ use std::time::Duration;
 use clap::ValueEnum;
 use common_base::Plugins;
 use common_base::readable_size::ReadableSize;
-use common_config::utils::ResourceSpec;
 use common_config::{Configurable, DEFAULT_DATA_HOME};
 use common_event_recorder::EventRecorderOptions;
 use common_greptimedb_telemetry::GreptimeDBTelemetryTask;
@@ -47,6 +46,7 @@ use common_options::datanode::DatanodeClientOptions;
 use common_options::memory::MemoryOptions;
 use common_procedure::ProcedureManagerRef;
 use common_procedure::options::ProcedureConfig;
+use common_stat::ResourceStatRef;
 use common_telemetry::logging::{LoggingOptions, TracingOptions};
 use common_telemetry::{error, info, warn};
 use common_wal::config::MetasrvWalConfig;
@@ -372,12 +372,16 @@ pub struct MetasrvNodeInfo {
     pub git_commit: String,
     // The node start timestamp in milliseconds
     pub start_time_ms: u64,
-    // The node cpus
+    // The node total cpu millicores
     #[serde(default)]
-    pub cpus: u32,
-    // The node memory bytes
+    pub total_cpu_millicores: i64,
     #[serde(default)]
-    pub memory_bytes: u64,
+    // The node total memory bytes
+    pub total_memory_bytes: i64,
+    /// The node build cpu usage millicores
+    pub cpu_usage_millicores: i64,
+    /// The node build memory usage bytes
+    pub memory_usage_bytes: i64,
     // The node hostname
     #[serde(default)]
     pub hostname: String,
@@ -397,15 +401,19 @@ impl From<MetasrvNodeInfo> for api::v1::meta::MetasrvNodeInfo {
             version: node_info.version.clone(),
             git_commit: node_info.git_commit.clone(),
             start_time_ms: node_info.start_time_ms,
-            cpus: node_info.cpus,
-            memory_bytes: node_info.memory_bytes,
+            cpus: node_info.total_cpu_millicores as u32,
+            memory_bytes: node_info.total_memory_bytes as u64,
             // The canonical location for node information.
             info: Some(api::v1::meta::NodeInfo {
                 version: node_info.version,
                 git_commit: node_info.git_commit,
                 start_time_ms: node_info.start_time_ms,
-                cpus: node_info.cpus,
-                memory_bytes: node_info.memory_bytes,
+                total_cpu_millicores: node_info.total_cpu_millicores,
+                total_memory_bytes: node_info.total_memory_bytes,
+                cpu_usage_millicores: node_info.cpu_usage_millicores,
+                memory_usage_bytes: node_info.memory_usage_bytes,
+                cpus: node_info.total_cpu_millicores as u32,
+                memory_bytes: node_info.total_memory_bytes as u64,
                 hostname: node_info.hostname,
             }),
         }
@@ -517,7 +525,7 @@ pub struct Metasrv {
     region_flush_ticker: Option<RegionFlushTickerRef>,
     table_id_sequence: SequenceRef,
     reconciliation_manager: ReconciliationManagerRef,
-    resource_spec: ResourceSpec,
+    resource_stat: ResourceStatRef,
 
     plugins: Plugins,
 }
@@ -699,8 +707,8 @@ impl Metasrv {
         self.start_time_ms
     }
 
-    pub fn resource_spec(&self) -> &ResourceSpec {
-        &self.resource_spec
+    pub fn resource_stat(&self) -> &ResourceStatRef {
+        &self.resource_stat
     }
 
     pub fn node_info(&self) -> MetasrvNodeInfo {
@@ -710,8 +718,10 @@ impl Metasrv {
             version: build_info.version.to_string(),
             git_commit: build_info.commit_short.to_string(),
             start_time_ms: self.start_time_ms(),
-            cpus: self.resource_spec().cpus as u32,
-            memory_bytes: self.resource_spec().memory.unwrap_or_default().as_bytes(),
+            total_cpu_millicores: self.resource_stat.get_total_cpu_millicores(),
+            total_memory_bytes: self.resource_stat.get_total_memory_bytes(),
+            cpu_usage_millicores: self.resource_stat.get_cpu_usage_millicores(),
+            memory_usage_bytes: self.resource_stat.get_memory_usage_bytes(),
             hostname: hostname::get()
                 .unwrap_or_default()
                 .to_string_lossy()
diff --git a/src/meta-srv/src/metasrv/builder.rs b/src/meta-srv/src/metasrv/builder.rs
index 9cc0b8cc72..0bcc914e27 100644
--- a/src/meta-srv/src/metasrv/builder.rs
+++ b/src/meta-srv/src/metasrv/builder.rs
@@ -46,6 +46,7 @@ use common_meta::stats::topic::TopicStatsRegistry;
 use common_meta::wal_options_allocator::{build_kafka_client, build_wal_options_allocator};
 use common_procedure::ProcedureManagerRef;
 use common_procedure::local::{LocalManager, ManagerConfig};
+use common_stat::ResourceStatImpl;
 use common_telemetry::{info, warn};
 use snafu::{ResultExt, ensure};
 use store_api::storage::MAX_REGION_SEQ;
@@ -517,6 +518,9 @@ impl MetasrvBuilder {
             .try_start()
             .context(error::InitReconciliationManagerSnafu)?;
 
+        let mut resource_stat = ResourceStatImpl::default();
+        resource_stat.start_collect_cpu_usage();
+
         Ok(Metasrv {
             state,
             started: Arc::new(AtomicBool::new(false)),
@@ -556,7 +560,7 @@ impl MetasrvBuilder {
             table_id_sequence,
             reconciliation_manager,
             topic_stats_registry,
-            resource_spec: Default::default(),
+            resource_stat: Arc::new(resource_stat),
         })
     }
 }
diff --git a/src/meta-srv/src/service/cluster.rs b/src/meta-srv/src/service/cluster.rs
index e39337c374..5c0ae4c71f 100644
--- a/src/meta-srv/src/service/cluster.rs
+++ b/src/meta-srv/src/service/cluster.rs
@@ -97,8 +97,10 @@ impl Metasrv {
             version: build_info.version.to_string(),
             git_commit: build_info.commit_short.to_string(),
             start_time_ms: self.start_time_ms(),
-            cpus: self.resource_spec().cpus as u32,
-            memory_bytes: self.resource_spec().memory.unwrap_or_default().as_bytes(),
+            total_cpu_millicores: self.resource_stat().get_total_cpu_millicores(),
+            total_memory_bytes: self.resource_stat().get_total_memory_bytes(),
+            cpu_usage_millicores: self.resource_stat().get_cpu_usage_millicores(),
+            memory_usage_bytes: self.resource_stat().get_memory_usage_bytes(),
             hostname: hostname::get()
                 .unwrap_or_default()
                 .to_string_lossy()
diff --git a/src/standalone/src/information_extension.rs b/src/standalone/src/information_extension.rs
index b15ab74a98..852da25e65 100644
--- a/src/standalone/src/information_extension.rs
+++ b/src/standalone/src/information_extension.rs
@@ -24,6 +24,7 @@ use common_meta::key::flow::flow_state::FlowStat;
 use common_meta::peer::Peer;
 use common_procedure::{ProcedureInfo, ProcedureManagerRef};
 use common_query::request::QueryRequest;
+use common_stat::{ResourceStatImpl, ResourceStatRef};
 use datanode::region_server::RegionServer;
 use flow::StreamingEngine;
 use snafu::ResultExt;
@@ -35,15 +36,19 @@ pub struct StandaloneInformationExtension {
     procedure_manager: ProcedureManagerRef,
     start_time_ms: u64,
     flow_streaming_engine: RwLock<Option<Arc<StreamingEngine>>>,
+    resource_stat: ResourceStatRef,
 }
 
 impl StandaloneInformationExtension {
     pub fn new(region_server: RegionServer, procedure_manager: ProcedureManagerRef) -> Self {
+        let mut resource_stat = ResourceStatImpl::default();
+        resource_stat.start_collect_cpu_usage();
         Self {
             region_server,
             procedure_manager,
             start_time_ms: common_time::util::current_time_millis() as u64,
             flow_streaming_engine: RwLock::new(None),
+            resource_stat: Arc::new(resource_stat),
         }
     }
 
@@ -75,8 +80,10 @@ impl InformationExtension for StandaloneInformationExtension {
             // Use `self.start_time_ms` instead.
             // It's not precise but enough.
             start_time_ms: self.start_time_ms,
-            cpus: common_stat::get_total_cpu_millicores() as u32,
-            memory_bytes: common_stat::get_total_memory_bytes() as u64,
+            total_cpu_millicores: self.resource_stat.get_total_cpu_millicores(),
+            total_memory_bytes: self.resource_stat.get_total_memory_bytes(),
+            cpu_usage_millicores: self.resource_stat.get_cpu_usage_millicores(),
+            memory_usage_bytes: self.resource_stat.get_memory_usage_bytes(),
             hostname: hostname::get()
                 .unwrap_or_default()
                 .to_string_lossy()
diff --git a/tests-integration/Cargo.toml b/tests-integration/Cargo.toml
index 13e4cc3115..91cb0f5ad2 100644
--- a/tests-integration/Cargo.toml
+++ b/tests-integration/Cargo.toml
@@ -35,6 +35,7 @@ common-procedure.workspace = true
 common-query.workspace = true
 common-recordbatch.workspace = true
 common-runtime.workspace = true
+common-stat.workspace = true
 common-telemetry.workspace = true
 common-test-util.workspace = true
 common-time.workspace = true
diff --git a/tests-integration/src/cluster.rs b/tests-integration/src/cluster.rs
index 6be94cbcd4..19c2ce4134 100644
--- a/tests-integration/src/cluster.rs
+++ b/tests-integration/src/cluster.rs
@@ -44,6 +44,7 @@ use common_meta::kv_backend::memory::MemoryKvBackend;
 use common_meta::peer::Peer;
 use common_runtime::Builder as RuntimeBuilder;
 use common_runtime::runtime::BuilderBuild;
+use common_stat::ResourceStatImpl;
 use common_test_util::temp_dir::create_temp_dir;
 use common_wal::config::{DatanodeWalConfig, MetasrvWalConfig};
 use datanode::config::DatanodeOptions;
@@ -411,11 +412,15 @@ impl GreptimeDbClusterBuilder {
 
         let fe_opts = self.build_frontend_options();
 
+        let mut resource_stat = ResourceStatImpl::default();
+        resource_stat.start_collect_cpu_usage();
+
         let heartbeat_task = HeartbeatTask::new(
             &fe_opts,
             meta_client.clone(),
             HeartbeatOptions::default(),
             Arc::new(handlers_executor),
+            Arc::new(resource_stat),
         );
 
         let instance = FrontendBuilder::new(
diff --git a/tests/cases/distributed/information_schema/cluster_info.result b/tests/cases/distributed/information_schema/cluster_info.result
index 63d02f4355..4ab8f6808d 100644
--- a/tests/cases/distributed/information_schema/cluster_info.result
+++ b/tests/cases/distributed/information_schema/cluster_info.result
@@ -11,8 +11,10 @@ DESC TABLE CLUSTER_INFO;
 | peer_type            | String               |     | NO   |         | FIELD         |
 | peer_addr            | String               |     | YES  |         | FIELD         |
 | peer_hostname        | String               |     | YES  |         | FIELD         |
-| total_cpu_millicores | UInt32               |     | NO   |         | FIELD         |
-| total_memory_bytes   | UInt64               |     | NO   |         | FIELD         |
+| total_cpu_millicores | Int64                |     | NO   |         | FIELD         |
+| total_memory_bytes   | Int64                |     | NO   |         | FIELD         |
+| cpu_usage_millicores | Int64                |     | NO   |         | FIELD         |
+| memory_usage_bytes   | Int64                |     | NO   |         | FIELD         |
 | version              | String               |     | NO   |         | FIELD         |
 | git_commit           | String               |     | NO   |         | FIELD         |
 | start_time           | TimestampMillisecond |     | YES  |         | FIELD         |
diff --git a/tests/cases/standalone/common/system/information_schema.result b/tests/cases/standalone/common/system/information_schema.result
index eef56b91b2..1cb53ccfe3 100644
--- a/tests/cases/standalone/common/system/information_schema.result
+++ b/tests/cases/standalone/common/system/information_schema.result
@@ -72,18 +72,20 @@ select * from information_schema.columns order by table_schema, table_name, colu
 | greptime      | information_schema | check_constraints                     | constraint_catalog                | 1                | 2147483647               | 2147483647             |                   |               |                    | utf8               | utf8_bin       |            |       | select,insert |                       | String               | string              | FIELD         |                | No          | string              |                |        |
 | greptime      | information_schema | check_constraints                     | constraint_name                   | 3                | 2147483647               | 2147483647             |                   |               |                    | utf8               | utf8_bin       |            |       | select,insert |                       | String               | string              | FIELD         |                | No          | string              |                |        |
 | greptime      | information_schema | check_constraints                     | constraint_schema                 | 2                | 2147483647               | 2147483647             |                   |               |                    | utf8               | utf8_bin       |            |       | select,insert |                       | String               | string              | FIELD         |                | No          | string              |                |        |
-| greptime      | information_schema | cluster_info                          | active_time                       | 11               | 2147483647               | 2147483647             |                   |               |                    | utf8               | utf8_bin       |            |       | select,insert |                       | String               | string              | FIELD         |                | Yes         | string              |                |        |
-| greptime      | information_schema | cluster_info                          | git_commit                        | 8                | 2147483647               | 2147483647             |                   |               |                    | utf8               | utf8_bin       |            |       | select,insert |                       | String               | string              | FIELD         |                | No          | string              |                |        |
-| greptime      | information_schema | cluster_info                          | node_status                       | 12               | 2147483647               | 2147483647             |                   |               |                    | utf8               | utf8_bin       |            |       | select,insert |                       | String               | string              | FIELD         |                | Yes         | string              |                |        |
+| greptime      | information_schema | cluster_info                          | active_time                       | 13               | 2147483647               | 2147483647             |                   |               |                    | utf8               | utf8_bin       |            |       | select,insert |                       | String               | string              | FIELD         |                | Yes         | string              |                |        |
+| greptime      | information_schema | cluster_info                          | cpu_usage_millicores              | 7                |                          |                        | 19                | 0             |                    |                    |                |            |       | select,insert |                       | Int64                | bigint              | FIELD         |                | No          | bigint              |                |        |
+| greptime      | information_schema | cluster_info                          | git_commit                        | 10               | 2147483647               | 2147483647             |                   |               |                    | utf8               | utf8_bin       |            |       | select,insert |                       | String               | string              | FIELD         |                | No          | string              |                |        |
+| greptime      | information_schema | cluster_info                          | memory_usage_bytes                | 8                |                          |                        | 19                | 0             |                    |                    |                |            |       | select,insert |                       | Int64                | bigint              | FIELD         |                | No          | bigint              |                |        |
+| greptime      | information_schema | cluster_info                          | node_status                       | 14               | 2147483647               | 2147483647             |                   |               |                    | utf8               | utf8_bin       |            |       | select,insert |                       | String               | string              | FIELD         |                | Yes         | string              |                |        |
 | greptime      | information_schema | cluster_info                          | peer_addr                         | 3                | 2147483647               | 2147483647             |                   |               |                    | utf8               | utf8_bin       |            |       | select,insert |                       | String               | string              | FIELD         |                | Yes         | string              |                |        |
 | greptime      | information_schema | cluster_info                          | peer_hostname                     | 4                | 2147483647               | 2147483647             |                   |               |                    | utf8               | utf8_bin       |            |       | select,insert |                       | String               | string              | FIELD         |                | Yes         | string              |                |        |
 | greptime      | information_schema | cluster_info                          | peer_id                           | 1                |                          |                        | 19                | 0             |                    |                    |                |            |       | select,insert |                       | Int64                | bigint              | FIELD         |                | No          | bigint              |                |        |
 | greptime      | information_schema | cluster_info                          | peer_type                         | 2                | 2147483647               | 2147483647             |                   |               |                    | utf8               | utf8_bin       |            |       | select,insert |                       | String               | string              | FIELD         |                | No          | string              |                |        |
-| greptime      | information_schema | cluster_info                          | start_time                        | 9                |                          |                        |                   |               | 3                  |                    |                |            |       | select,insert |                       | TimestampMillisecond | timestamp(3)        | FIELD         |                | Yes         | timestamp(3)        |                |        |
-| greptime      | information_schema | cluster_info                          | total_cpu_millicores              | 5                |                          |                        | 10                | 0             |                    |                    |                |            |       | select,insert |                       | UInt32               | int unsigned        | FIELD         |                | No          | int unsigned        |                |        |
-| greptime      | information_schema | cluster_info                          | total_memory_bytes                | 6                |                          |                        | 20                | 0             |                    |                    |                |            |       | select,insert |                       | UInt64               | bigint unsigned     | FIELD         |                | No          | bigint unsigned     |                |        |
-| greptime      | information_schema | cluster_info                          | uptime                            | 10               | 2147483647               | 2147483647             |                   |               |                    | utf8               | utf8_bin       |            |       | select,insert |                       | String               | string              | FIELD         |                | Yes         | string              |                |        |
-| greptime      | information_schema | cluster_info                          | version                           | 7                | 2147483647               | 2147483647             |                   |               |                    | utf8               | utf8_bin       |            |       | select,insert |                       | String               | string              | FIELD         |                | No          | string              |                |        |
+| greptime      | information_schema | cluster_info                          | start_time                        | 11               |                          |                        |                   |               | 3                  |                    |                |            |       | select,insert |                       | TimestampMillisecond | timestamp(3)        | FIELD         |                | Yes         | timestamp(3)        |                |        |
+| greptime      | information_schema | cluster_info                          | total_cpu_millicores              | 5                |                          |                        | 19                | 0             |                    |                    |                |            |       | select,insert |                       | Int64                | bigint              | FIELD         |                | No          | bigint              |                |        |
+| greptime      | information_schema | cluster_info                          | total_memory_bytes                | 6                |                          |                        | 19                | 0             |                    |                    |                |            |       | select,insert |                       | Int64                | bigint              | FIELD         |                | No          | bigint              |                |        |
+| greptime      | information_schema | cluster_info                          | uptime                            | 12               | 2147483647               | 2147483647             |                   |               |                    | utf8               | utf8_bin       |            |       | select,insert |                       | String               | string              | FIELD         |                | Yes         | string              |                |        |
+| greptime      | information_schema | cluster_info                          | version                           | 9                | 2147483647               | 2147483647             |                   |               |                    | utf8               | utf8_bin       |            |       | select,insert |                       | String               | string              | FIELD         |                | No          | string              |                |        |
 | greptime      | information_schema | collation_character_set_applicability | character_set_name                | 2                | 2147483647               | 2147483647             |                   |               |                    | utf8               | utf8_bin       |            |       | select,insert |                       | String               | string              | FIELD         |                | No          | string              |                |        |
 | greptime      | information_schema | collation_character_set_applicability | collation_name                    | 1                | 2147483647               | 2147483647             |                   |               |                    | utf8               | utf8_bin       |            |       | select,insert |                       | String               | string              | FIELD         |                | No          | string              |                |        |
 | greptime      | information_schema | collations                            | character_set_name                | 2                | 2147483647               | 2147483647             |                   |               |                    | utf8               | utf8_bin       |            |       | select,insert |                       | String               | string              | FIELD         |                | No          | string              |                |        |
diff --git a/tests/cases/standalone/information_schema/cluster_info.result b/tests/cases/standalone/information_schema/cluster_info.result
index 8542984028..bc9520ba6a 100644
--- a/tests/cases/standalone/information_schema/cluster_info.result
+++ b/tests/cases/standalone/information_schema/cluster_info.result
@@ -11,8 +11,10 @@ DESC TABLE CLUSTER_INFO;
 | peer_type            | String               |     | NO   |         | FIELD         |
 | peer_addr            | String               |     | YES  |         | FIELD         |
 | peer_hostname        | String               |     | YES  |         | FIELD         |
-| total_cpu_millicores | UInt32               |     | NO   |         | FIELD         |
-| total_memory_bytes   | UInt64               |     | NO   |         | FIELD         |
+| total_cpu_millicores | Int64                |     | NO   |         | FIELD         |
+| total_memory_bytes   | Int64                |     | NO   |         | FIELD         |
+| cpu_usage_millicores | Int64                |     | NO   |         | FIELD         |
+| memory_usage_bytes   | Int64                |     | NO   |         | FIELD         |
 | version              | String               |     | NO   |         | FIELD         |
 | git_commit           | String               |     | NO   |         | FIELD         |
 | start_time           | TimestampMillisecond |     | YES  |         | FIELD         |

From 03a29c6591e97de34196069d5c196e501016d3e1 Mon Sep 17 00:00:00 2001
From: Sicong Hu <sinhco@outlook.com>
Date: Fri, 24 Oct 2025 11:24:13 +0800
Subject: [PATCH 09/14] fix: correct test_index_build_type_compact (#7137)

Signed-off-by: SNC123 <sinhco@outlook.com>
---
 src/mito2/src/engine/index_build_test.rs     | 17 +++---
 src/mito2/src/engine/listener.rs             | 63 +++++++++++++++-----
 src/mito2/src/sst/index.rs                   | 25 ++++++++
 src/mito2/src/worker.rs                      | 11 +++-
 src/mito2/src/worker/handle_rebuild_index.rs |  6 +-
 5 files changed, 91 insertions(+), 31 deletions(-)

diff --git a/src/mito2/src/engine/index_build_test.rs b/src/mito2/src/engine/index_build_test.rs
index 6fe27929e5..404aa3ad01 100644
--- a/src/mito2/src/engine/index_build_test.rs
+++ b/src/mito2/src/engine/index_build_test.rs
@@ -32,11 +32,6 @@ use crate::test_util::{
     CreateRequestBuilder, TestEnv, build_rows, flush_region, put_rows, reopen_region, rows_schema,
 };
 
-// wait listener receives enough success count.
-async fn wait_finish(listener: &IndexBuildListener, times: usize) {
-    listener.wait_finish(times).await;
-}
-
 fn async_build_mode_config(is_create_on_flush: bool) -> MitoConfig {
     let mut config = MitoConfig::default();
     config.index.build_mode = IndexBuildMode::Async;
@@ -84,7 +79,7 @@ fn assert_listener_counts(
     expected_success_count: usize,
 ) {
     assert_eq!(listener.begin_count(), expected_begin_count);
-    assert_eq!(listener.success_count(), expected_success_count);
+    assert_eq!(listener.finish_count(), expected_success_count);
 }
 
 #[tokio::test]
@@ -155,7 +150,7 @@ async fn test_index_build_type_flush() {
     flush_region(&engine, region_id, None).await;
 
     // After 2 index build task are finished, 2 index files should exist.
-    wait_finish(&listener, 2).await;
+    listener.wait_finish(2).await;
     let scanner = engine
         .scanner(region_id, ScanRequest::default())
         .await
@@ -204,6 +199,8 @@ async fn test_index_build_type_compact() {
     put_and_flush(&engine, region_id, &column_schemas, 15..25).await;
     put_and_flush(&engine, region_id, &column_schemas, 40..50).await;
 
+    // all index build tasks begin means flush tasks are all finished.
+    listener.wait_begin(4).await;
     // Before compaction is triggered, files should be 4, and not all index files are built.
     let scanner = engine
         .scanner(region_id, ScanRequest::default())
@@ -216,8 +213,8 @@ async fn test_index_build_type_compact() {
     // This explicit compaction call serves to make the process deterministic for the test.
     compact(&engine, region_id).await;
 
+    listener.wait_begin(5).await; // 4 flush + 1 compaction begin
     // Before compaction is triggered, files should be 2, and not all index files are built.
-    listener.clear_success_count();
     let scanner = engine
         .scanner(region_id, ScanRequest::default())
         .await
@@ -226,7 +223,7 @@ async fn test_index_build_type_compact() {
     assert!(num_of_index_files(&engine, &scanner, region_id).await < 2);
 
     // Wait a while to make sure index build tasks are finished.
-    wait_finish(&listener, 2).await;
+    listener.wait_stop(5).await; // 4 flush + 1 compaction = some abort + some finish
     let scanner = engine
         .scanner(region_id, ScanRequest::default())
         .await
@@ -292,7 +289,7 @@ async fn test_index_build_type_schema_change() {
         .handle_request(region_id, RegionRequest::Alter(set_index_request))
         .await
         .unwrap();
-    wait_finish(&listener, 1).await;
+    listener.wait_finish(1).await;
     let scanner = engine
         .scanner(region_id, ScanRequest::default())
         .await
diff --git a/src/mito2/src/engine/listener.rs b/src/mito2/src/engine/listener.rs
index 317c3cdfd0..ebc20ac280 100644
--- a/src/mito2/src/engine/listener.rs
+++ b/src/mito2/src/engine/listener.rs
@@ -75,10 +75,13 @@ pub trait EventListener: Send + Sync {
     async fn on_notify_region_change_result_begin(&self, _region_id: RegionId) {}
 
     /// Notifies the listener that the index build task is executed successfully.
-    async fn on_index_build_success(&self, _region_file_id: RegionFileId) {}
+    async fn on_index_build_finish(&self, _region_file_id: RegionFileId) {}
 
     /// Notifies the listener that the index build task is started.
     async fn on_index_build_begin(&self, _region_file_id: RegionFileId) {}
+
+    /// Notifies the listener that the index build task is aborted.
+    async fn on_index_build_abort(&self, _region_file_id: RegionFileId) {}
 }
 
 pub type EventListenerRef = Arc<dyn EventListener>;
@@ -309,45 +312,75 @@ impl EventListener for NotifyRegionChangeResultListener {
 
 #[derive(Default)]
 pub struct IndexBuildListener {
-    notify: Notify,
-    success_count: AtomicUsize,
-    start_count: AtomicUsize,
+    begin_count: AtomicUsize,
+    begin_notify: Notify,
+    finish_count: AtomicUsize,
+    finish_notify: Notify,
+    abort_count: AtomicUsize,
+    abort_notify: Notify,
+    // stop means finished or aborted
+    stop_notify: Notify,
 }
 
 impl IndexBuildListener {
     /// Wait until index build is done for `times` times.
     pub async fn wait_finish(&self, times: usize) {
-        while self.success_count.load(Ordering::Relaxed) < times {
-            self.notify.notified().await;
+        while self.finish_count.load(Ordering::Relaxed) < times {
+            self.finish_notify.notified().await;
+        }
+    }
+
+    /// Wait until index build is stopped for `times` times.
+    pub async fn wait_stop(&self, times: usize) {
+        while self.finish_count.load(Ordering::Relaxed) + self.abort_count.load(Ordering::Relaxed)
+            < times
+        {
+            self.stop_notify.notified().await;
+        }
+    }
+
+    /// Wait until index build is begun for `times` times.
+    pub async fn wait_begin(&self, times: usize) {
+        while self.begin_count.load(Ordering::Relaxed) < times {
+            self.begin_notify.notified().await;
         }
     }
 
     /// Clears the success count.
-    pub fn clear_success_count(&self) {
-        self.success_count.store(0, Ordering::Relaxed);
+    pub fn clear_finish_count(&self) {
+        self.finish_count.store(0, Ordering::Relaxed);
     }
 
     /// Returns the success count.
-    pub fn success_count(&self) -> usize {
-        self.success_count.load(Ordering::Relaxed)
+    pub fn finish_count(&self) -> usize {
+        self.finish_count.load(Ordering::Relaxed)
     }
 
     /// Returns the start count.
     pub fn begin_count(&self) -> usize {
-        self.start_count.load(Ordering::Relaxed)
+        self.begin_count.load(Ordering::Relaxed)
     }
 }
 
 #[async_trait]
 impl EventListener for IndexBuildListener {
-    async fn on_index_build_success(&self, region_file_id: RegionFileId) {
+    async fn on_index_build_finish(&self, region_file_id: RegionFileId) {
         info!("Region {} index build successfully", region_file_id);
-        self.success_count.fetch_add(1, Ordering::Relaxed);
-        self.notify.notify_one();
+        self.finish_count.fetch_add(1, Ordering::Relaxed);
+        self.finish_notify.notify_one();
+        self.stop_notify.notify_one();
     }
 
     async fn on_index_build_begin(&self, region_file_id: RegionFileId) {
         info!("Region {} index build begin", region_file_id);
-        self.start_count.fetch_add(1, Ordering::Relaxed);
+        self.begin_count.fetch_add(1, Ordering::Relaxed);
+        self.begin_notify.notify_one();
+    }
+
+    async fn on_index_build_abort(&self, region_file_id: RegionFileId) {
+        info!("Region {} index build aborted", region_file_id);
+        self.abort_count.fetch_add(1, Ordering::Relaxed);
+        self.abort_notify.notify_one();
+        self.stop_notify.notify_one();
     }
 }
diff --git a/src/mito2/src/sst/index.rs b/src/mito2/src/sst/index.rs
index 8ad7f6ef01..cc8469332a 100644
--- a/src/mito2/src/sst/index.rs
+++ b/src/mito2/src/sst/index.rs
@@ -62,6 +62,7 @@ use crate::sst::index::inverted_index::creator::InvertedIndexer;
 use crate::sst::parquet::SstInfo;
 use crate::sst::parquet::flat_format::primary_key_column_index;
 use crate::sst::parquet::format::PrimaryKeyArray;
+use crate::worker::WorkerListener;
 
 pub(crate) const TYPE_INVERTED_INDEX: &str = "inverted_index";
 pub(crate) const TYPE_FULLTEXT_INDEX: &str = "fulltext_index";
@@ -451,6 +452,7 @@ pub struct IndexBuildTask {
     pub file_meta: FileMeta,
     pub reason: IndexBuildType,
     pub access_layer: AccessLayerRef,
+    pub(crate) listener: WorkerListener,
     pub(crate) manifest_ctx: ManifestContextRef,
     pub write_cache: Option<WriteCacheRef>,
     pub file_purger: FilePurgerRef,
@@ -486,6 +488,12 @@ impl IndexBuildTask {
     }
 
     async fn do_index_build(&mut self, version_control: VersionControlRef) {
+        self.listener
+            .on_index_build_begin(RegionFileId::new(
+                self.file_meta.region_id,
+                self.file_meta.file_id,
+            ))
+            .await;
         match self.index_build(version_control).await {
             Ok(outcome) => self.on_success(outcome).await,
             Err(e) => {
@@ -540,6 +548,12 @@ impl IndexBuildTask {
         if !self.check_sst_file_exists(&version_control).await {
             // Calls abort to clean up index files.
             indexer.abort().await;
+            self.listener
+                .on_index_build_abort(RegionFileId::new(
+                    self.file_meta.region_id,
+                    self.file_meta.file_id,
+                ))
+                .await;
             return Ok(IndexBuildOutcome::Aborted(format!(
                 "SST file not found during index build, region: {}, file_id: {}",
                 self.file_meta.region_id, self.file_meta.file_id
@@ -575,6 +589,12 @@ impl IndexBuildTask {
             if !self.check_sst_file_exists(&version_control).await {
                 // Calls abort to clean up index files.
                 indexer.abort().await;
+                self.listener
+                    .on_index_build_abort(RegionFileId::new(
+                        self.file_meta.region_id,
+                        self.file_meta.file_id,
+                    ))
+                    .await;
                 return Ok(IndexBuildOutcome::Aborted(format!(
                     "SST file not found during index build, region: {}, file_id: {}",
                     self.file_meta.region_id, self.file_meta.file_id
@@ -1192,6 +1212,7 @@ mod tests {
             },
             reason: IndexBuildType::Flush,
             access_layer: env.access_layer.clone(),
+            listener: WorkerListener::default(),
             manifest_ctx,
             write_cache: None,
             file_purger,
@@ -1242,6 +1263,7 @@ mod tests {
             file_meta: file_meta.clone(),
             reason: IndexBuildType::Flush,
             access_layer: env.access_layer.clone(),
+            listener: WorkerListener::default(),
             manifest_ctx,
             write_cache: None,
             file_purger,
@@ -1309,6 +1331,7 @@ mod tests {
             file_meta: file_meta.clone(),
             reason: IndexBuildType::Flush,
             access_layer: env.access_layer.clone(),
+            listener: WorkerListener::default(),
             manifest_ctx,
             write_cache: None,
             file_purger,
@@ -1405,6 +1428,7 @@ mod tests {
             file_meta: file_meta.clone(),
             reason: IndexBuildType::Flush,
             access_layer: env.access_layer.clone(),
+            listener: WorkerListener::default(),
             manifest_ctx,
             write_cache: None,
             file_purger,
@@ -1485,6 +1509,7 @@ mod tests {
             file_meta: file_meta.clone(),
             reason: IndexBuildType::Flush,
             access_layer: env.access_layer.clone(),
+            listener: WorkerListener::default(),
             manifest_ctx,
             write_cache: Some(write_cache.clone()),
             file_purger,
diff --git a/src/mito2/src/worker.rs b/src/mito2/src/worker.rs
index 87c25cd964..322141fd1b 100644
--- a/src/mito2/src/worker.rs
+++ b/src/mito2/src/worker.rs
@@ -1220,10 +1220,10 @@ impl WorkerListener {
         }
     }
 
-    pub(crate) async fn on_index_build_success(&self, _region_file_id: RegionFileId) {
+    pub(crate) async fn on_index_build_finish(&self, _region_file_id: RegionFileId) {
         #[cfg(any(test, feature = "test"))]
         if let Some(listener) = &self.listener {
-            listener.on_index_build_success(_region_file_id).await;
+            listener.on_index_build_finish(_region_file_id).await;
         }
     }
 
@@ -1233,6 +1233,13 @@ impl WorkerListener {
             listener.on_index_build_begin(_region_file_id).await;
         }
     }
+
+    pub(crate) async fn on_index_build_abort(&self, _region_file_id: RegionFileId) {
+        #[cfg(any(test, feature = "test"))]
+        if let Some(listener) = &self.listener {
+            listener.on_index_build_abort(_region_file_id).await;
+        }
+    }
 }
 
 #[cfg(test)]
diff --git a/src/mito2/src/worker/handle_rebuild_index.rs b/src/mito2/src/worker/handle_rebuild_index.rs
index 71f9bc206f..38ca07f1a9 100644
--- a/src/mito2/src/worker/handle_rebuild_index.rs
+++ b/src/mito2/src/worker/handle_rebuild_index.rs
@@ -71,6 +71,7 @@ impl<S> RegionWorkerLoop<S> {
             file_meta: file.meta_ref().clone(),
             reason: build_type,
             access_layer: access_layer.clone(),
+            listener: self.listener.clone(),
             manifest_ctx: region.manifest_ctx.clone(),
             write_cache: self.cache_manager.write_cache().cloned(),
             file_purger: file.file_purger(),
@@ -172,9 +173,6 @@ impl<S> RegionWorkerLoop<S> {
             let _ = self
                 .index_build_scheduler
                 .schedule_build(&region.version_control, task);
-            self.listener
-                .on_index_build_begin(RegionFileId::new(region_id, file_handle.meta_ref().file_id))
-                .await;
         }
         // Wait for all index build tasks to finish and notify the caller.
         common_runtime::spawn_global(async move {
@@ -212,7 +210,7 @@ impl<S> RegionWorkerLoop<S> {
         );
         for file_meta in &request.edit.files_to_add {
             self.listener
-                .on_index_build_success(RegionFileId::new(region_id, file_meta.file_id))
+                .on_index_build_finish(RegionFileId::new(region_id, file_meta.file_id))
                 .await;
         }
     }

From 6ad23bc9b41a368d7396b46838c6f5570b5539a3 Mon Sep 17 00:00:00 2001
From: LFC <990479+MichaelScofield@users.noreply.github.com>
Date: Fri, 24 Oct 2025 11:28:04 +0800
Subject: [PATCH 10/14] refactor: convert to postgres values directly from
 arrow (#7131)

* refactor: convert to pg values directly from arrow

Signed-off-by: luofucong <luofc@foxmail.com>

* resolve PR comments

Signed-off-by: luofucong <luofc@foxmail.com>

---------

Signed-off-by: luofucong <luofc@foxmail.com>
---
 src/servers/src/postgres/handler.rs |   25 +-
 src/servers/src/postgres/types.rs   | 1135 +++++++++++++++++----------
 2 files changed, 714 insertions(+), 446 deletions(-)

diff --git a/src/servers/src/postgres/handler.rs b/src/servers/src/postgres/handler.rs
index 9561b9605e..daccf9dc26 100644
--- a/src/servers/src/postgres/handler.rs
+++ b/src/servers/src/postgres/handler.rs
@@ -28,7 +28,7 @@ use futures::{Sink, SinkExt, Stream, StreamExt, future, stream};
 use pgwire::api::portal::{Format, Portal};
 use pgwire::api::query::{ExtendedQueryHandler, SimpleQueryHandler};
 use pgwire::api::results::{
-    DataRowEncoder, DescribePortalResponse, DescribeStatementResponse, QueryResponse, Response, Tag,
+    DescribePortalResponse, DescribeStatementResponse, QueryResponse, Response, Tag,
 };
 use pgwire::api::stmt::{QueryParser, StoredStatement};
 use pgwire::api::{ClientInfo, ErrorHandler, Type};
@@ -160,25 +160,16 @@ where
     let pg_schema = Arc::new(schema_to_pg(schema.as_ref(), field_format).map_err(convert_err)?);
     let pg_schema_ref = pg_schema.clone();
     let data_row_stream = recordbatches_stream
-        .map(|record_batch_result| match record_batch_result {
-            Ok(rb) => stream::iter(
-                // collect rows from a single recordbatch into vector to avoid
-                // borrowing it
-                rb.rows().map(Ok).collect::<Vec<_>>(),
-            )
+        .map(move |result| match result {
+            Ok(record_batch) => stream::iter(RecordBatchRowIterator::new(
+                query_ctx.clone(),
+                pg_schema_ref.clone(),
+                record_batch,
+            ))
             .boxed(),
             Err(e) => stream::once(future::err(convert_err(e))).boxed(),
         })
-        .flatten() // flatten into stream<result<row>>
-        .map(move |row| {
-            row.and_then(|row| {
-                let mut encoder = DataRowEncoder::new(pg_schema_ref.clone());
-                for (value, column) in row.into_iter().zip(schema.column_schemas()) {
-                    encode_value(&query_ctx, value, &mut encoder, &column.data_type)?;
-                }
-                encoder.finish()
-            })
-        });
+        .flatten();
 
     Ok(Response::Query(QueryResponse::new(
         pg_schema,
diff --git a/src/servers/src/postgres/types.rs b/src/servers/src/postgres/types.rs
index 9c32ee2fdd..b8251a5d95 100644
--- a/src/servers/src/postgres/types.rs
+++ b/src/servers/src/postgres/types.rs
@@ -18,23 +18,38 @@ mod error;
 mod interval;
 
 use std::collections::HashMap;
-use std::ops::Deref;
 use std::sync::Arc;
 
+use arrow::array::{Array, ArrayRef, AsArray};
+use arrow::datatypes::{
+    Date32Type, Date64Type, Decimal128Type, DurationMicrosecondType, DurationMillisecondType,
+    DurationNanosecondType, DurationSecondType, Float32Type, Float64Type, Int8Type, Int16Type,
+    Int32Type, Int64Type, IntervalDayTimeType, IntervalMonthDayNanoType, IntervalYearMonthType,
+    Time32MillisecondType, Time32SecondType, Time64MicrosecondType, Time64NanosecondType,
+    TimestampMicrosecondType, TimestampMillisecondType, TimestampNanosecondType,
+    TimestampSecondType, UInt8Type, UInt16Type, UInt32Type, UInt64Type,
+};
+use arrow_schema::{DataType, IntervalUnit, TimeUnit};
 use chrono::{DateTime, NaiveDate, NaiveDateTime, NaiveTime};
-use common_time::{IntervalDayTime, IntervalMonthDayNano, IntervalYearMonth};
+use common_decimal::Decimal128;
+use common_recordbatch::RecordBatch;
+use common_time::time::Time;
+use common_time::{
+    Date, Duration, IntervalDayTime, IntervalMonthDayNano, IntervalYearMonth, Timestamp,
+};
 use datafusion_common::ScalarValue;
 use datafusion_expr::LogicalPlan;
 use datatypes::arrow::datatypes::DataType as ArrowDataType;
 use datatypes::json::JsonStructureSettings;
 use datatypes::prelude::{ConcreteDataType, Value};
-use datatypes::schema::Schema;
-use datatypes::types::{IntervalType, JsonFormat, TimestampType, jsonb_to_string};
-use datatypes::value::{ListValue, StructValue};
+use datatypes::schema::{ColumnSchema, Schema, SchemaRef};
+use datatypes::types::{IntervalType, TimestampType, jsonb_to_string};
+use datatypes::value::StructValue;
 use pgwire::api::Type;
 use pgwire::api::portal::{Format, Portal};
 use pgwire::api::results::{DataRowEncoder, FieldInfo};
 use pgwire::error::{PgWireError, PgWireResult};
+use pgwire::messages::data::DataRow;
 use session::context::QueryContextRef;
 use session::session_config::PGByteaOutputValue;
 use snafu::ResultExt;
@@ -88,182 +103,158 @@ fn encode_struct(
 
 fn encode_array(
     query_ctx: &QueryContextRef,
-    value_list: ListValue,
+    array: ArrayRef,
     builder: &mut DataRowEncoder,
 ) -> PgWireResult<()> {
-    match value_list.datatype().as_ref() {
-        ConcreteDataType::Boolean(_) => {
-            let array = value_list
-                .items()
+    macro_rules! encode_primitive_array {
+        ($array: ident, $data_type: ty, $lower_type: ty, $upper_type: ty) => {{
+            let array = $array.iter().collect::<Vec<Option<$data_type>>>();
+            if array
                 .iter()
-                .map(|v| match v {
-                    Value::Null => Ok(None),
-                    Value::Boolean(v) => Ok(Some(*v)),
-                    _ => Err(convert_err(Error::Internal {
-                        err_msg: format!("Invalid list item type, find {v:?}, expected bool",),
-                    })),
-                })
-                .collect::<PgWireResult<Vec<Option<bool>>>>()?;
+                .all(|x| x.is_none_or(|i| i <= <$lower_type>::MAX as $data_type))
+            {
+                builder.encode_field(
+                    &array
+                        .into_iter()
+                        .map(|x| x.map(|i| i as $lower_type))
+                        .collect::<Vec<Option<$lower_type>>>(),
+                )
+            } else {
+                builder.encode_field(
+                    &array
+                        .into_iter()
+                        .map(|x| x.map(|i| i as $upper_type))
+                        .collect::<Vec<Option<$upper_type>>>(),
+                )
+            }
+        }};
+    }
+
+    match array.data_type() {
+        DataType::Boolean => {
+            let array = array.as_boolean();
+            let array = array.iter().collect::<Vec<_>>();
             builder.encode_field(&array)
         }
-        ConcreteDataType::Int8(_) | ConcreteDataType::UInt8(_) => {
-            let array = value_list
-                .items()
-                .iter()
-                .map(|v| match v {
-                    Value::Null => Ok(None),
-                    Value::Int8(v) => Ok(Some(*v)),
-                    Value::UInt8(v) => Ok(Some(*v as i8)),
-                    _ => Err(convert_err(Error::Internal {
-                        err_msg: format!(
-                            "Invalid list item type, find {v:?}, expected int8 or uint8",
-                        ),
-                    })),
-                })
-                .collect::<PgWireResult<Vec<Option<i8>>>>()?;
+        DataType::Int8 => {
+            let array = array.as_primitive::<Int8Type>();
+            let array = array.iter().collect::<Vec<_>>();
             builder.encode_field(&array)
         }
-        ConcreteDataType::Int16(_) | ConcreteDataType::UInt16(_) => {
-            let array = value_list
-                .items()
-                .iter()
-                .map(|v| match v {
-                    Value::Null => Ok(None),
-                    Value::Int16(v) => Ok(Some(*v)),
-                    Value::UInt16(v) => Ok(Some(*v as i16)),
-                    _ => Err(convert_err(Error::Internal {
-                        err_msg: format!(
-                            "Invalid list item type, find {v:?}, expected int16 or uint16",
-                        ),
-                    })),
-                })
-                .collect::<PgWireResult<Vec<Option<i16>>>>()?;
+        DataType::Int16 => {
+            let array = array.as_primitive::<Int16Type>();
+            let array = array.iter().collect::<Vec<_>>();
             builder.encode_field(&array)
         }
-        ConcreteDataType::Int32(_) | ConcreteDataType::UInt32(_) => {
-            let array = value_list
-                .items()
-                .iter()
-                .map(|v| match v {
-                    Value::Null => Ok(None),
-                    Value::Int32(v) => Ok(Some(*v)),
-                    Value::UInt32(v) => Ok(Some(*v as i32)),
-                    _ => Err(convert_err(Error::Internal {
-                        err_msg: format!(
-                            "Invalid list item type, find {v:?}, expected int32 or uint32",
-                        ),
-                    })),
-                })
-                .collect::<PgWireResult<Vec<Option<i32>>>>()?;
+        DataType::Int32 => {
+            let array = array.as_primitive::<Int32Type>();
+            let array = array.iter().collect::<Vec<_>>();
             builder.encode_field(&array)
         }
-        ConcreteDataType::Int64(_) | ConcreteDataType::UInt64(_) => {
-            let array = value_list
-                .items()
-                .iter()
-                .map(|v| match v {
-                    Value::Null => Ok(None),
-                    Value::Int64(v) => Ok(Some(*v)),
-                    Value::UInt64(v) => Ok(Some(*v as i64)),
-                    _ => Err(convert_err(Error::Internal {
-                        err_msg: format!(
-                            "Invalid list item type, find {v:?}, expected int64 or uint64",
-                        ),
-                    })),
-                })
-                .collect::<PgWireResult<Vec<Option<i64>>>>()?;
+        DataType::Int64 => {
+            let array = array.as_primitive::<Int64Type>();
+            let array = array.iter().collect::<Vec<_>>();
             builder.encode_field(&array)
         }
-        ConcreteDataType::Float32(_) => {
-            let array = value_list
-                .items()
-                .iter()
-                .map(|v| match v {
-                    Value::Null => Ok(None),
-                    Value::Float32(v) => Ok(Some(v.0)),
-                    _ => Err(convert_err(Error::Internal {
-                        err_msg: format!("Invalid list item type, find {v:?}, expected float32",),
-                    })),
-                })
-                .collect::<PgWireResult<Vec<Option<f32>>>>()?;
+        DataType::UInt8 => {
+            let array = array.as_primitive::<UInt8Type>();
+            encode_primitive_array!(array, u8, i8, i16)
+        }
+        DataType::UInt16 => {
+            let array = array.as_primitive::<UInt16Type>();
+            encode_primitive_array!(array, u16, i16, i32)
+        }
+        DataType::UInt32 => {
+            let array = array.as_primitive::<UInt32Type>();
+            encode_primitive_array!(array, u32, i32, i64)
+        }
+        DataType::UInt64 => {
+            let array = array.as_primitive::<UInt64Type>();
+            let array = array.iter().collect::<Vec<_>>();
+            if array.iter().all(|x| x.is_none_or(|i| i <= i64::MAX as u64)) {
+                builder.encode_field(
+                    &array
+                        .into_iter()
+                        .map(|x| x.map(|i| i as i64))
+                        .collect::<Vec<Option<i64>>>(),
+                )
+            } else {
+                builder.encode_field(
+                    &array
+                        .into_iter()
+                        .map(|x| x.map(|i| i.to_string()))
+                        .collect::<Vec<_>>(),
+                )
+            }
+        }
+        DataType::Float32 => {
+            let array = array.as_primitive::<Float32Type>();
+            let array = array.iter().collect::<Vec<_>>();
             builder.encode_field(&array)
         }
-        ConcreteDataType::Float64(_) => {
-            let array = value_list
-                .items()
-                .iter()
-                .map(|v| match v {
-                    Value::Null => Ok(None),
-                    Value::Float64(v) => Ok(Some(v.0)),
-                    _ => Err(convert_err(Error::Internal {
-                        err_msg: format!("Invalid list item type, find {v:?}, expected float64",),
-                    })),
-                })
-                .collect::<PgWireResult<Vec<Option<f64>>>>()?;
+        DataType::Float64 => {
+            let array = array.as_primitive::<Float64Type>();
+            let array = array.iter().collect::<Vec<_>>();
             builder.encode_field(&array)
         }
-        ConcreteDataType::Binary(_) | ConcreteDataType::Vector(_) => {
+        DataType::Binary => {
             let bytea_output = query_ctx.configuration_parameter().postgres_bytea_output();
 
+            let array = array.as_binary::<i32>();
             match *bytea_output {
                 PGByteaOutputValue::ESCAPE => {
-                    let array = value_list
-                        .items()
+                    let array = array
                         .iter()
-                        .map(|v| match v {
-                            Value::Null => Ok(None),
-                            Value::Binary(v) => Ok(Some(EscapeOutputBytea(v.deref()))),
-
-                            _ => Err(convert_err(Error::Internal {
-                                err_msg: format!(
-                                    "Invalid list item type, find {v:?}, expected binary",
-                                ),
-                            })),
-                        })
-                        .collect::<PgWireResult<Vec<Option<EscapeOutputBytea>>>>()?;
+                        .map(|v| v.map(EscapeOutputBytea))
+                        .collect::<Vec<_>>();
                     builder.encode_field(&array)
                 }
                 PGByteaOutputValue::HEX => {
-                    let array = value_list
-                        .items()
+                    let array = array
                         .iter()
-                        .map(|v| match v {
-                            Value::Null => Ok(None),
-                            Value::Binary(v) => Ok(Some(HexOutputBytea(v.deref()))),
-
-                            _ => Err(convert_err(Error::Internal {
-                                err_msg: format!(
-                                    "Invalid list item type, find {v:?}, expected binary",
-                                ),
-                            })),
-                        })
-                        .collect::<PgWireResult<Vec<Option<HexOutputBytea>>>>()?;
+                        .map(|v| v.map(HexOutputBytea))
+                        .collect::<Vec<_>>();
                     builder.encode_field(&array)
                 }
             }
         }
-        &ConcreteDataType::String(_) => {
-            let array = value_list
-                .items()
-                .iter()
-                .map(|v| match v {
-                    Value::Null => Ok(None),
-                    Value::String(v) => Ok(Some(v.as_utf8())),
-                    _ => Err(convert_err(Error::Internal {
-                        err_msg: format!("Invalid list item type, find {v:?}, expected string",),
-                    })),
-                })
-                .collect::<PgWireResult<Vec<Option<&str>>>>()?;
+        DataType::Utf8 => {
+            let array = array.as_string::<i32>();
+            let array = array.into_iter().collect::<Vec<_>>();
             builder.encode_field(&array)
         }
-        ConcreteDataType::Date(_) => {
-            let array = value_list
-                .items()
-                .iter()
+        DataType::LargeUtf8 => {
+            let array = array.as_string::<i64>();
+            let array = array.into_iter().collect::<Vec<_>>();
+            builder.encode_field(&array)
+        }
+        DataType::Utf8View => {
+            let array = array.as_string_view();
+            let array = array.into_iter().collect::<Vec<_>>();
+            builder.encode_field(&array)
+        }
+        DataType::Date32 | DataType::Date64 => {
+            let iter: Box<dyn Iterator<Item = Option<i32>>> =
+                if matches!(array.data_type(), DataType::Date32) {
+                    let array = array.as_primitive::<Date32Type>();
+                    Box::new(array.into_iter())
+                } else {
+                    let array = array.as_primitive::<Date64Type>();
+                    // `Date64` values are milliseconds representation of `Date32` values, according
+                    // to its specification. So we convert them to `Date32` values to process the
+                    // `Date64` array unified with `Date32` array.
+                    Box::new(
+                        array
+                            .into_iter()
+                            .map(|x| x.map(|i| (i / 86_400_000) as i32)),
+                    )
+                };
+            let array = iter
+                .into_iter()
                 .map(|v| match v {
-                    Value::Null => Ok(None),
-                    Value::Date(v) => {
-                        if let Some(date) = v.to_chrono_date() {
+                    None => Ok(None),
+                    Some(v) => {
+                        if let Some(date) = Date::new(v).to_chrono_date() {
                             let (style, order) =
                                 *query_ctx.configuration_parameter().pg_datetime_style();
                             Ok(Some(StylingDate(date, style, order)))
@@ -273,20 +264,36 @@ fn encode_array(
                             }))
                         }
                     }
-                    _ => Err(convert_err(Error::Internal {
-                        err_msg: format!("Invalid list item type, find {v:?}, expected date",),
-                    })),
                 })
                 .collect::<PgWireResult<Vec<Option<StylingDate>>>>()?;
             builder.encode_field(&array)
         }
-        ConcreteDataType::Timestamp(_) => {
-            let array = value_list
-                .items()
-                .iter()
+        DataType::Timestamp(time_unit, _) => {
+            let array = match time_unit {
+                TimeUnit::Second => {
+                    let array = array.as_primitive::<TimestampSecondType>();
+                    array.into_iter().collect::<Vec<_>>()
+                }
+                TimeUnit::Millisecond => {
+                    let array = array.as_primitive::<TimestampMillisecondType>();
+                    array.into_iter().collect::<Vec<_>>()
+                }
+                TimeUnit::Microsecond => {
+                    let array = array.as_primitive::<TimestampMicrosecondType>();
+                    array.into_iter().collect::<Vec<_>>()
+                }
+                TimeUnit::Nanosecond => {
+                    let array = array.as_primitive::<TimestampNanosecondType>();
+                    array.into_iter().collect::<Vec<_>>()
+                }
+            };
+            let time_unit = time_unit.into();
+            let array = array
+                .into_iter()
                 .map(|v| match v {
-                    Value::Null => Ok(None),
-                    Value::Timestamp(v) => {
+                    None => Ok(None),
+                    Some(v) => {
+                        let v = Timestamp::new(v, time_unit);
                         if let Some(datetime) =
                             v.to_chrono_datetime_with_timezone(Some(&query_ctx.timezone()))
                         {
@@ -299,183 +306,404 @@ fn encode_array(
                             }))
                         }
                     }
-                    _ => Err(convert_err(Error::Internal {
-                        err_msg: format!("Invalid list item type, find {v:?}, expected timestamp",),
-                    })),
                 })
                 .collect::<PgWireResult<Vec<Option<StylingDateTime>>>>()?;
             builder.encode_field(&array)
         }
-        ConcreteDataType::Time(_) => {
-            let array = value_list
-                .items()
-                .iter()
-                .map(|v| match v {
-                    Value::Null => Ok(None),
-                    Value::Time(v) => Ok(v.to_chrono_time()),
-                    _ => Err(convert_err(Error::Internal {
-                        err_msg: format!("Invalid list item type, find {v:?}, expected time",),
-                    })),
-                })
-                .collect::<PgWireResult<Vec<Option<NaiveTime>>>>()?;
+        DataType::Time32(time_unit) | DataType::Time64(time_unit) => {
+            let iter: Box<dyn Iterator<Item = Option<Time>>> = match time_unit {
+                TimeUnit::Second => {
+                    let array = array.as_primitive::<Time32SecondType>();
+                    Box::new(
+                        array
+                            .into_iter()
+                            .map(|v| v.map(|i| Time::new_second(i as i64))),
+                    )
+                }
+                TimeUnit::Millisecond => {
+                    let array = array.as_primitive::<Time32MillisecondType>();
+                    Box::new(
+                        array
+                            .into_iter()
+                            .map(|v| v.map(|i| Time::new_millisecond(i as i64))),
+                    )
+                }
+                TimeUnit::Microsecond => {
+                    let array = array.as_primitive::<Time64MicrosecondType>();
+                    Box::new(array.into_iter().map(|v| v.map(Time::new_microsecond)))
+                }
+                TimeUnit::Nanosecond => {
+                    let array = array.as_primitive::<Time64NanosecondType>();
+                    Box::new(array.into_iter().map(|v| v.map(Time::new_nanosecond)))
+                }
+            };
+            let array = iter
+                .into_iter()
+                .map(|v| v.and_then(|v| v.to_chrono_time()))
+                .collect::<Vec<Option<NaiveTime>>>();
             builder.encode_field(&array)
         }
-        ConcreteDataType::Interval(_) => {
-            let array = value_list
-                .items()
-                .iter()
-                .map(|v| match v {
-                    Value::Null => Ok(None),
-                    Value::IntervalYearMonth(v) => Ok(Some(PgInterval::from(*v))),
-                    Value::IntervalDayTime(v) => Ok(Some(PgInterval::from(*v))),
-                    Value::IntervalMonthDayNano(v) => Ok(Some(PgInterval::from(*v))),
-                    _ => Err(convert_err(Error::Internal {
-                        err_msg: format!("Invalid list item type, find {v:?}, expected interval",),
-                    })),
-                })
-                .collect::<PgWireResult<Vec<Option<PgInterval>>>>()?;
+        DataType::Interval(interval_unit) => {
+            let array = match interval_unit {
+                IntervalUnit::YearMonth => {
+                    let array = array.as_primitive::<IntervalYearMonthType>();
+                    array
+                        .into_iter()
+                        .map(|v| v.map(|i| PgInterval::from(IntervalYearMonth::from(i))))
+                        .collect::<Vec<_>>()
+                }
+                IntervalUnit::DayTime => {
+                    let array = array.as_primitive::<IntervalDayTimeType>();
+                    array
+                        .into_iter()
+                        .map(|v| v.map(|i| PgInterval::from(IntervalDayTime::from(i))))
+                        .collect::<Vec<_>>()
+                }
+                IntervalUnit::MonthDayNano => {
+                    let array = array.as_primitive::<IntervalMonthDayNanoType>();
+                    array
+                        .into_iter()
+                        .map(|v| v.map(|i| PgInterval::from(IntervalMonthDayNano::from(i))))
+                        .collect::<Vec<_>>()
+                }
+            };
             builder.encode_field(&array)
         }
-        ConcreteDataType::Decimal128(_) => {
-            let array = value_list
-                .items()
-                .iter()
-                .map(|v| match v {
-                    Value::Null => Ok(None),
-                    Value::Decimal128(v) => Ok(Some(v.to_string())),
-                    _ => Err(convert_err(Error::Internal {
-                        err_msg: format!("Invalid list item type, find {v:?}, expected decimal",),
-                    })),
-                })
-                .collect::<PgWireResult<Vec<Option<String>>>>()?;
+        DataType::Decimal128(precision, scale) => {
+            let array = array.as_primitive::<Decimal128Type>();
+            let array = array
+                .into_iter()
+                .map(|v| v.map(|i| Decimal128::new(i, *precision, *scale).to_string()))
+                .collect::<Vec<_>>();
             builder.encode_field(&array)
         }
-        ConcreteDataType::Json(j) => match &j.format {
-            JsonFormat::Jsonb => {
-                let array = value_list
-                    .take_items()
-                    .into_iter()
-                    .map(|v| match v {
-                        Value::Null => Ok(None),
-                        Value::Binary(v) => {
-                            let s = jsonb_to_string(&v).map_err(convert_err)?;
-                            Ok(Some(s))
-                        }
-
-                        _ => Err(convert_err(Error::Internal {
-                            err_msg: format!("Invalid list item type, find {v:?}, expected json",),
-                        })),
-                    })
-                    .collect::<PgWireResult<Vec<Option<String>>>>()?;
-                builder.encode_field(&array)
-            }
-            JsonFormat::Native(_) => {
-                let array = value_list
-                    .take_items()
-                    .into_iter()
-                    .map(|v| match v {
-                        Value::Null => Ok(None),
-                        Value::Json(inner) => serde_json::Value::try_from(*inner)
-                            .map(Some)
-                            .map_err(|e| PgWireError::ApiError(Box::new(e))),
-                        _ => Err(convert_err(Error::Internal {
-                            err_msg: format!("Invalid list item type, find {v:?}, expected json",),
-                        })),
-                    })
-                    .collect::<PgWireResult<Vec<Option<serde_json::Value>>>>()?;
-                builder.encode_field(&array)
-            }
-        },
         _ => Err(convert_err(Error::Internal {
             err_msg: format!(
                 "cannot write array type {:?} in postgres protocol: unimplemented",
-                value_list.datatype()
+                array.data_type()
             ),
         })),
     }
 }
 
-pub(super) fn encode_value(
-    query_ctx: &QueryContextRef,
-    value: Value,
-    builder: &mut DataRowEncoder,
-    datatype: &ConcreteDataType,
-) -> PgWireResult<()> {
-    match value {
-        Value::Null => builder.encode_field(&None::<&i8>),
-        Value::Boolean(v) => builder.encode_field(&v),
-        Value::UInt8(v) => builder.encode_field(&(v as i8)),
-        Value::UInt16(v) => builder.encode_field(&(v as i16)),
-        Value::UInt32(v) => builder.encode_field(&v),
-        Value::UInt64(v) => builder.encode_field(&(v as i64)),
-        Value::Int8(v) => builder.encode_field(&v),
-        Value::Int16(v) => builder.encode_field(&v),
-        Value::Int32(v) => builder.encode_field(&v),
-        Value::Int64(v) => builder.encode_field(&v),
-        Value::Float32(v) => builder.encode_field(&v.0),
-        Value::Float64(v) => builder.encode_field(&v.0),
-        Value::String(v) => builder.encode_field(&v.as_utf8()),
-        Value::Binary(v) => match datatype {
-            ConcreteDataType::Json(_j) => {
-                let s = jsonb_to_string(v.as_ref()).map_err(convert_err)?;
-                builder.encode_field(&s)
+pub(crate) struct RecordBatchRowIterator {
+    query_ctx: QueryContextRef,
+    pg_schema: Arc<Vec<FieldInfo>>,
+    schema: SchemaRef,
+    record_batch: arrow::record_batch::RecordBatch,
+    i: usize,
+}
+
+impl Iterator for RecordBatchRowIterator {
+    type Item = PgWireResult<DataRow>;
+
+    fn next(&mut self) -> Option<Self::Item> {
+        if self.i < self.record_batch.num_rows() {
+            let mut encoder = DataRowEncoder::new(self.pg_schema.clone());
+            if let Err(e) = self.encode_row(self.i, &mut encoder) {
+                return Some(Err(e));
             }
-            _ => {
-                let bytea_output = query_ctx.configuration_parameter().postgres_bytea_output();
-                match *bytea_output {
-                    PGByteaOutputValue::ESCAPE => {
-                        builder.encode_field(&EscapeOutputBytea(v.deref()))
+            self.i += 1;
+            Some(encoder.finish())
+        } else {
+            None
+        }
+    }
+}
+
+impl RecordBatchRowIterator {
+    pub(crate) fn new(
+        query_ctx: QueryContextRef,
+        pg_schema: Arc<Vec<FieldInfo>>,
+        record_batch: RecordBatch,
+    ) -> Self {
+        let schema = record_batch.schema.clone();
+        let record_batch = record_batch.into_df_record_batch();
+        Self {
+            query_ctx,
+            pg_schema,
+            schema,
+            record_batch,
+            i: 0,
+        }
+    }
+
+    fn encode_row(&mut self, i: usize, encoder: &mut DataRowEncoder) -> PgWireResult<()> {
+        for (j, column) in self.record_batch.columns().iter().enumerate() {
+            if column.is_null(i) {
+                encoder.encode_field(&None::<&i8>)?;
+                continue;
+            }
+
+            match column.data_type() {
+                DataType::Null => {
+                    encoder.encode_field(&None::<&i8>)?;
+                }
+                DataType::Boolean => {
+                    let array = column.as_boolean();
+                    encoder.encode_field(&array.value(i))?;
+                }
+                DataType::UInt8 => {
+                    let array = column.as_primitive::<UInt8Type>();
+                    let value = array.value(i);
+                    if value <= i8::MAX as u8 {
+                        encoder.encode_field(&(value as i8))?;
+                    } else {
+                        encoder.encode_field(&(value as i16))?;
                     }
-                    PGByteaOutputValue::HEX => builder.encode_field(&HexOutputBytea(v.deref())),
+                }
+                DataType::UInt16 => {
+                    let array = column.as_primitive::<UInt16Type>();
+                    let value = array.value(i);
+                    if value <= i16::MAX as u16 {
+                        encoder.encode_field(&(value as i16))?;
+                    } else {
+                        encoder.encode_field(&(value as i32))?;
+                    }
+                }
+                DataType::UInt32 => {
+                    let array = column.as_primitive::<UInt32Type>();
+                    let value = array.value(i);
+                    if value <= i32::MAX as u32 {
+                        encoder.encode_field(&(value as i32))?;
+                    } else {
+                        encoder.encode_field(&(value as i64))?;
+                    }
+                }
+                DataType::UInt64 => {
+                    let array = column.as_primitive::<UInt64Type>();
+                    let value = array.value(i);
+                    if value <= i64::MAX as u64 {
+                        encoder.encode_field(&(value as i64))?;
+                    } else {
+                        encoder.encode_field(&value.to_string())?;
+                    }
+                }
+                DataType::Int8 => {
+                    let array = column.as_primitive::<Int8Type>();
+                    encoder.encode_field(&array.value(i))?;
+                }
+                DataType::Int16 => {
+                    let array = column.as_primitive::<Int16Type>();
+                    encoder.encode_field(&array.value(i))?;
+                }
+                DataType::Int32 => {
+                    let array = column.as_primitive::<Int32Type>();
+                    encoder.encode_field(&array.value(i))?;
+                }
+                DataType::Int64 => {
+                    let array = column.as_primitive::<Int64Type>();
+                    encoder.encode_field(&array.value(i))?;
+                }
+                DataType::Float32 => {
+                    let array = column.as_primitive::<Float32Type>();
+                    encoder.encode_field(&array.value(i))?;
+                }
+                DataType::Float64 => {
+                    let array = column.as_primitive::<Float64Type>();
+                    encoder.encode_field(&array.value(i))?;
+                }
+                DataType::Utf8 => {
+                    let array = column.as_string::<i32>();
+                    let value = array.value(i);
+                    encoder.encode_field(&value)?;
+                }
+                DataType::Utf8View => {
+                    let array = column.as_string_view();
+                    let value = array.value(i);
+                    encoder.encode_field(&value)?;
+                }
+                DataType::LargeUtf8 => {
+                    let array = column.as_string::<i64>();
+                    let value = array.value(i);
+                    encoder.encode_field(&value)?;
+                }
+                DataType::Binary => {
+                    let array = column.as_binary::<i32>();
+                    let v = array.value(i);
+                    encode_bytes(
+                        &self.schema.column_schemas()[j],
+                        v,
+                        encoder,
+                        &self.query_ctx,
+                    )?;
+                }
+                DataType::BinaryView => {
+                    let array = column.as_binary_view();
+                    let v = array.value(i);
+                    encode_bytes(
+                        &self.schema.column_schemas()[j],
+                        v,
+                        encoder,
+                        &self.query_ctx,
+                    )?;
+                }
+                DataType::LargeBinary => {
+                    let array = column.as_binary::<i64>();
+                    let v = array.value(i);
+                    encode_bytes(
+                        &self.schema.column_schemas()[j],
+                        v,
+                        encoder,
+                        &self.query_ctx,
+                    )?;
+                }
+                DataType::Date32 | DataType::Date64 => {
+                    let v = if matches!(column.data_type(), DataType::Date32) {
+                        let array = column.as_primitive::<Date32Type>();
+                        array.value(i)
+                    } else {
+                        let array = column.as_primitive::<Date64Type>();
+                        // `Date64` values are milliseconds representation of `Date32` values,
+                        // according to its specification. So we convert the `Date64` value here to
+                        // the `Date32` value to process them unified.
+                        (array.value(i) / 86_400_000) as i32
+                    };
+                    let v = Date::new(v);
+                    let date = v.to_chrono_date().map(|v| {
+                        let (style, order) =
+                            *self.query_ctx.configuration_parameter().pg_datetime_style();
+                        StylingDate(v, style, order)
+                    });
+                    encoder.encode_field(&date)?;
+                }
+                DataType::Timestamp(time_unit, _) => {
+                    let v = match time_unit {
+                        TimeUnit::Second => {
+                            let array = column.as_primitive::<TimestampSecondType>();
+                            array.value(i)
+                        }
+                        TimeUnit::Millisecond => {
+                            let array = column.as_primitive::<TimestampMillisecondType>();
+                            array.value(i)
+                        }
+                        TimeUnit::Microsecond => {
+                            let array = column.as_primitive::<TimestampMicrosecondType>();
+                            array.value(i)
+                        }
+                        TimeUnit::Nanosecond => {
+                            let array = column.as_primitive::<TimestampNanosecondType>();
+                            array.value(i)
+                        }
+                    };
+                    let v = Timestamp::new(v, time_unit.into());
+                    let datetime = v
+                        .to_chrono_datetime_with_timezone(Some(&self.query_ctx.timezone()))
+                        .map(|v| {
+                            let (style, order) =
+                                *self.query_ctx.configuration_parameter().pg_datetime_style();
+                            StylingDateTime(v, style, order)
+                        });
+                    encoder.encode_field(&datetime)?;
+                }
+                DataType::Interval(interval_unit) => match interval_unit {
+                    IntervalUnit::YearMonth => {
+                        let array = column.as_primitive::<IntervalYearMonthType>();
+                        let v: IntervalYearMonth = array.value(i).into();
+                        encoder.encode_field(&PgInterval::from(v))?;
+                    }
+                    IntervalUnit::DayTime => {
+                        let array = column.as_primitive::<IntervalDayTimeType>();
+                        let v: IntervalDayTime = array.value(i).into();
+                        encoder.encode_field(&PgInterval::from(v))?;
+                    }
+                    IntervalUnit::MonthDayNano => {
+                        let array = column.as_primitive::<IntervalMonthDayNanoType>();
+                        let v: IntervalMonthDayNano = array.value(i).into();
+                        encoder.encode_field(&PgInterval::from(v))?;
+                    }
+                },
+                DataType::Duration(time_unit) => {
+                    let v = match time_unit {
+                        TimeUnit::Second => {
+                            let array = column.as_primitive::<DurationSecondType>();
+                            array.value(i)
+                        }
+                        TimeUnit::Millisecond => {
+                            let array = column.as_primitive::<DurationMillisecondType>();
+                            array.value(i)
+                        }
+                        TimeUnit::Microsecond => {
+                            let array = column.as_primitive::<DurationMicrosecondType>();
+                            array.value(i)
+                        }
+                        TimeUnit::Nanosecond => {
+                            let array = column.as_primitive::<DurationNanosecondType>();
+                            array.value(i)
+                        }
+                    };
+                    let d = Duration::new(v, time_unit.into());
+                    match PgInterval::try_from(d) {
+                        Ok(i) => encoder.encode_field(&i)?,
+                        Err(e) => {
+                            return Err(convert_err(Error::Internal {
+                                err_msg: e.to_string(),
+                            }));
+                        }
+                    }
+                }
+                DataType::List(_) => {
+                    let array = column.as_list::<i32>();
+                    let items = array.value(i);
+                    encode_array(&self.query_ctx, items, encoder)?;
+                }
+                DataType::Struct(_) => {
+                    encode_struct(&self.query_ctx, Default::default(), encoder)?;
+                }
+                DataType::Time32(time_unit) | DataType::Time64(time_unit) => {
+                    let v = match time_unit {
+                        TimeUnit::Second => {
+                            let array = column.as_primitive::<Time32SecondType>();
+                            Time::new_second(array.value(i) as i64)
+                        }
+                        TimeUnit::Millisecond => {
+                            let array = column.as_primitive::<Time32MillisecondType>();
+                            Time::new_millisecond(array.value(i) as i64)
+                        }
+                        TimeUnit::Microsecond => {
+                            let array = column.as_primitive::<Time64MicrosecondType>();
+                            Time::new_microsecond(array.value(i))
+                        }
+                        TimeUnit::Nanosecond => {
+                            let array = column.as_primitive::<Time64NanosecondType>();
+                            Time::new_nanosecond(array.value(i))
+                        }
+                    };
+                    encoder.encode_field(&v.to_chrono_time())?;
+                }
+                DataType::Decimal128(precision, scale) => {
+                    let array = column.as_primitive::<Decimal128Type>();
+                    let v = Decimal128::new(array.value(i), *precision, *scale);
+                    encoder.encode_field(&v.to_string())?;
+                }
+                _ => {
+                    return Err(convert_err(Error::Internal {
+                        err_msg: format!(
+                            "cannot convert datatype {} to postgres",
+                            column.data_type()
+                        ),
+                    }));
                 }
             }
-        },
-        Value::Date(v) => {
-            if let Some(date) = v.to_chrono_date() {
-                let (style, order) = *query_ctx.configuration_parameter().pg_datetime_style();
-                builder.encode_field(&StylingDate(date, style, order))
-            } else {
-                Err(convert_err(Error::Internal {
-                    err_msg: format!("Failed to convert date to postgres type {v:?}",),
-                }))
-            }
         }
-        Value::Timestamp(v) => {
-            if let Some(datetime) = v.to_chrono_datetime_with_timezone(Some(&query_ctx.timezone()))
-            {
-                let (style, order) = *query_ctx.configuration_parameter().pg_datetime_style();
-                builder.encode_field(&StylingDateTime(datetime, style, order))
-            } else {
-                Err(convert_err(Error::Internal {
-                    err_msg: format!("Failed to convert date to postgres type {v:?}",),
-                }))
-            }
-        }
-        Value::Time(v) => {
-            if let Some(time) = v.to_chrono_time() {
-                builder.encode_field(&time)
-            } else {
-                Err(convert_err(Error::Internal {
-                    err_msg: format!("Failed to convert time to postgres type {v:?}",),
-                }))
-            }
-        }
-        Value::IntervalYearMonth(v) => builder.encode_field(&PgInterval::from(v)),
-        Value::IntervalDayTime(v) => builder.encode_field(&PgInterval::from(v)),
-        Value::IntervalMonthDayNano(v) => builder.encode_field(&PgInterval::from(v)),
-        Value::Decimal128(v) => builder.encode_field(&v.to_string()),
-        Value::Duration(d) => match PgInterval::try_from(d) {
-            Ok(i) => builder.encode_field(&i),
-            Err(e) => Err(convert_err(Error::Internal {
-                err_msg: e.to_string(),
-            })),
-        },
-        Value::List(values) => encode_array(query_ctx, values, builder),
-        Value::Struct(values) => encode_struct(query_ctx, values, builder),
-        Value::Json(inner) => {
-            let json_value = serde_json::Value::try_from(*inner)
-                .map_err(|e| PgWireError::ApiError(Box::new(e)))?;
-            builder.encode_field(&json_value)
+        Ok(())
+    }
+}
+
+fn encode_bytes(
+    schema: &ColumnSchema,
+    v: &[u8],
+    encoder: &mut DataRowEncoder,
+    query_ctx: &QueryContextRef,
+) -> PgWireResult<()> {
+    if let ConcreteDataType::Json(_) = &schema.data_type {
+        let s = jsonb_to_string(v).map_err(convert_err)?;
+        encoder.encode_field(&s)
+    } else {
+        let bytea_output = query_ctx.configuration_parameter().postgres_bytea_output();
+        match *bytea_output {
+            PGByteaOutputValue::ESCAPE => encoder.encode_field(&EscapeOutputBytea(v)),
+            PGByteaOutputValue::HEX => encoder.encode_field(&HexOutputBytea(v)),
         }
     }
 }
@@ -1082,11 +1310,17 @@ pub(super) fn param_types_to_pg_types(
 mod test {
     use std::sync::Arc;
 
-    use common_time::Timestamp;
-    use common_time::interval::IntervalUnit;
-    use common_time::timestamp::TimeUnit;
+    use arrow::array::{
+        Float64Builder, Int64Builder, ListBuilder, StringBuilder, TimestampSecondBuilder,
+    };
+    use arrow_schema::Field;
     use datatypes::schema::{ColumnSchema, Schema};
-    use datatypes::value::ListValue;
+    use datatypes::vectors::{
+        BinaryVector, BooleanVector, DateVector, Float32Vector, Float64Vector, Int8Vector,
+        Int16Vector, Int32Vector, Int64Vector, IntervalDayTimeVector, IntervalMonthDayNanoVector,
+        IntervalYearMonthVector, ListVector, NullVector, StringVector, TimeSecondVector,
+        TimestampSecondVector, UInt8Vector, UInt16Vector, UInt32Vector, UInt64Vector, VectorRef,
+    };
     use pgwire::api::Type;
     use pgwire::api::results::{FieldFormat, FieldInfo};
     use session::context::QueryContextBuilder;
@@ -1194,12 +1428,8 @@ mod test {
             FieldInfo::new("uint32s".into(), None, None, Type::INT4, FieldFormat::Text),
             FieldInfo::new("uint64s".into(), None, None, Type::INT8, FieldFormat::Text),
             FieldInfo::new("int8s".into(), None, None, Type::CHAR, FieldFormat::Text),
-            FieldInfo::new("int8s".into(), None, None, Type::CHAR, FieldFormat::Text),
-            FieldInfo::new("int16s".into(), None, None, Type::INT2, FieldFormat::Text),
             FieldInfo::new("int16s".into(), None, None, Type::INT2, FieldFormat::Text),
             FieldInfo::new("int32s".into(), None, None, Type::INT4, FieldFormat::Text),
-            FieldInfo::new("int32s".into(), None, None, Type::INT4, FieldFormat::Text),
-            FieldInfo::new("int64s".into(), None, None, Type::INT8, FieldFormat::Text),
             FieldInfo::new("int64s".into(), None, None, Type::INT8, FieldFormat::Text),
             FieldInfo::new(
                 "float32s".into(),
@@ -1208,34 +1438,6 @@ mod test {
                 Type::FLOAT4,
                 FieldFormat::Text,
             ),
-            FieldInfo::new(
-                "float32s".into(),
-                None,
-                None,
-                Type::FLOAT4,
-                FieldFormat::Text,
-            ),
-            FieldInfo::new(
-                "float32s".into(),
-                None,
-                None,
-                Type::FLOAT4,
-                FieldFormat::Text,
-            ),
-            FieldInfo::new(
-                "float64s".into(),
-                None,
-                None,
-                Type::FLOAT8,
-                FieldFormat::Text,
-            ),
-            FieldInfo::new(
-                "float64s".into(),
-                None,
-                None,
-                Type::FLOAT8,
-                FieldFormat::Text,
-            ),
             FieldInfo::new(
                 "float64s".into(),
                 None,
@@ -1317,95 +1519,170 @@ mod test {
             ),
         ];
 
-        let datatypes = vec![
-            ConcreteDataType::null_datatype(),
-            ConcreteDataType::boolean_datatype(),
-            ConcreteDataType::uint8_datatype(),
-            ConcreteDataType::uint16_datatype(),
-            ConcreteDataType::uint32_datatype(),
-            ConcreteDataType::uint64_datatype(),
-            ConcreteDataType::int8_datatype(),
-            ConcreteDataType::int8_datatype(),
-            ConcreteDataType::int16_datatype(),
-            ConcreteDataType::int16_datatype(),
-            ConcreteDataType::int32_datatype(),
-            ConcreteDataType::int32_datatype(),
-            ConcreteDataType::int64_datatype(),
-            ConcreteDataType::int64_datatype(),
-            ConcreteDataType::float32_datatype(),
-            ConcreteDataType::float32_datatype(),
-            ConcreteDataType::float32_datatype(),
-            ConcreteDataType::float64_datatype(),
-            ConcreteDataType::float64_datatype(),
-            ConcreteDataType::float64_datatype(),
-            ConcreteDataType::string_datatype(),
-            ConcreteDataType::binary_datatype(),
-            ConcreteDataType::date_datatype(),
-            ConcreteDataType::time_datatype(TimeUnit::Second),
-            ConcreteDataType::timestamp_datatype(TimeUnit::Second),
-            ConcreteDataType::interval_datatype(IntervalUnit::YearMonth),
-            ConcreteDataType::interval_datatype(IntervalUnit::DayTime),
-            ConcreteDataType::interval_datatype(IntervalUnit::MonthDayNano),
-            ConcreteDataType::list_datatype(Arc::new(ConcreteDataType::int64_datatype())),
-            ConcreteDataType::list_datatype(Arc::new(ConcreteDataType::float64_datatype())),
-            ConcreteDataType::list_datatype(Arc::new(ConcreteDataType::string_datatype())),
-            ConcreteDataType::list_datatype(
-                Arc::new(ConcreteDataType::timestamp_second_datatype()),
+        let arrow_schema = arrow_schema::Schema::new(vec![
+            Field::new("x", DataType::Null, true),
+            Field::new("x", DataType::Boolean, true),
+            Field::new("x", DataType::UInt8, true),
+            Field::new("x", DataType::UInt16, true),
+            Field::new("x", DataType::UInt32, true),
+            Field::new("x", DataType::UInt64, true),
+            Field::new("x", DataType::Int8, true),
+            Field::new("x", DataType::Int16, true),
+            Field::new("x", DataType::Int32, true),
+            Field::new("x", DataType::Int64, true),
+            Field::new("x", DataType::Float32, true),
+            Field::new("x", DataType::Float64, true),
+            Field::new("x", DataType::Utf8, true),
+            Field::new("x", DataType::Binary, true),
+            Field::new("x", DataType::Date32, true),
+            Field::new("x", DataType::Time32(TimeUnit::Second), true),
+            Field::new("x", DataType::Timestamp(TimeUnit::Second, None), true),
+            Field::new("x", DataType::Interval(IntervalUnit::YearMonth), true),
+            Field::new("x", DataType::Interval(IntervalUnit::DayTime), true),
+            Field::new("x", DataType::Interval(IntervalUnit::MonthDayNano), true),
+            Field::new(
+                "x",
+                DataType::List(Arc::new(Field::new("item", DataType::Int64, true))),
+                true,
             ),
-        ];
+            Field::new(
+                "x",
+                DataType::List(Arc::new(Field::new("item", DataType::Float64, true))),
+                true,
+            ),
+            Field::new(
+                "x",
+                DataType::List(Arc::new(Field::new("item", DataType::Utf8, true))),
+                true,
+            ),
+            Field::new(
+                "x",
+                DataType::List(Arc::new(Field::new(
+                    "item",
+                    DataType::Timestamp(TimeUnit::Second, None),
+                    true,
+                ))),
+                true,
+            ),
+        ]);
+
+        let mut builder = ListBuilder::new(Int64Builder::new());
+        builder.append_value([Some(1i64), None, Some(2)]);
+        builder.append_null();
+        builder.append_value([Some(-1i64), None, Some(-2)]);
+        let i64_list_array = builder.finish();
+
+        let mut builder = ListBuilder::new(Float64Builder::new());
+        builder.append_value([Some(1.0f64), None, Some(2.0)]);
+        builder.append_null();
+        builder.append_value([Some(-1.0f64), None, Some(-2.0)]);
+        let f64_list_array = builder.finish();
+
+        let mut builder = ListBuilder::new(StringBuilder::new());
+        builder.append_value([Some("a"), None, Some("b")]);
+        builder.append_null();
+        builder.append_value([Some("c"), None, Some("d")]);
+        let string_list_array = builder.finish();
+
+        let mut builder = ListBuilder::new(TimestampSecondBuilder::new());
+        builder.append_value([Some(1i64), None, Some(2)]);
+        builder.append_null();
+        builder.append_value([Some(3i64), None, Some(4)]);
+        let timestamp_list_array = builder.finish();
+
         let values = vec![
-            Value::Null,
-            Value::Boolean(true),
-            Value::UInt8(u8::MAX),
-            Value::UInt16(u16::MAX),
-            Value::UInt32(u32::MAX),
-            Value::UInt64(u64::MAX),
-            Value::Int8(i8::MAX),
-            Value::Int8(i8::MIN),
-            Value::Int16(i16::MAX),
-            Value::Int16(i16::MIN),
-            Value::Int32(i32::MAX),
-            Value::Int32(i32::MIN),
-            Value::Int64(i64::MAX),
-            Value::Int64(i64::MIN),
-            Value::Float32(f32::MAX.into()),
-            Value::Float32(f32::MIN.into()),
-            Value::Float32(0f32.into()),
-            Value::Float64(f64::MAX.into()),
-            Value::Float64(f64::MIN.into()),
-            Value::Float64(0f64.into()),
-            Value::String("greptime".into()),
-            Value::Binary("greptime".as_bytes().into()),
-            Value::Date(1001i32.into()),
-            Value::Time(1001i64.into()),
-            Value::Timestamp(1000001i64.into()),
-            Value::IntervalYearMonth(IntervalYearMonth::new(1)),
-            Value::IntervalDayTime(IntervalDayTime::new(1, 10)),
-            Value::IntervalMonthDayNano(IntervalMonthDayNano::new(1, 1, 10)),
-            Value::List(ListValue::new(
-                vec![Value::Int64(1i64)],
-                Arc::new(ConcreteDataType::int64_datatype()),
-            )),
-            Value::List(ListValue::new(
-                vec![Value::Float64(1.0f64.into())],
-                Arc::new(ConcreteDataType::float64_datatype()),
-            )),
-            Value::List(ListValue::new(
-                vec![Value::String("tom".into())],
-                Arc::new(ConcreteDataType::string_datatype()),
-            )),
-            Value::List(ListValue::new(
-                vec![Value::Timestamp(Timestamp::new(1i64, TimeUnit::Second))],
-                Arc::new(ConcreteDataType::timestamp_second_datatype()),
-            )),
+            Arc::new(NullVector::new(3)) as VectorRef,
+            Arc::new(BooleanVector::from(vec![Some(true), Some(false), None])),
+            Arc::new(UInt8Vector::from(vec![Some(u8::MAX), Some(u8::MIN), None])),
+            Arc::new(UInt16Vector::from(vec![
+                Some(u16::MAX),
+                Some(u16::MIN),
+                None,
+            ])),
+            Arc::new(UInt32Vector::from(vec![
+                Some(u32::MAX),
+                Some(u32::MIN),
+                None,
+            ])),
+            Arc::new(UInt64Vector::from(vec![
+                Some(u64::MAX),
+                Some(u64::MIN),
+                None,
+            ])),
+            Arc::new(Int8Vector::from(vec![Some(i8::MAX), Some(i8::MIN), None])),
+            Arc::new(Int16Vector::from(vec![
+                Some(i16::MAX),
+                Some(i16::MIN),
+                None,
+            ])),
+            Arc::new(Int32Vector::from(vec![
+                Some(i32::MAX),
+                Some(i32::MIN),
+                None,
+            ])),
+            Arc::new(Int64Vector::from(vec![
+                Some(i64::MAX),
+                Some(i64::MIN),
+                None,
+            ])),
+            Arc::new(Float32Vector::from(vec![
+                None,
+                Some(f32::MAX),
+                Some(f32::MIN),
+            ])),
+            Arc::new(Float64Vector::from(vec![
+                None,
+                Some(f64::MAX),
+                Some(f64::MIN),
+            ])),
+            Arc::new(StringVector::from(vec![
+                None,
+                Some("hello"),
+                Some("greptime"),
+            ])),
+            Arc::new(BinaryVector::from(vec![
+                None,
+                Some("hello".as_bytes().to_vec()),
+                Some("world".as_bytes().to_vec()),
+            ])),
+            Arc::new(DateVector::from(vec![Some(1001), None, Some(1)])),
+            Arc::new(TimeSecondVector::from(vec![Some(1001), None, Some(1)])),
+            Arc::new(TimestampSecondVector::from(vec![
+                Some(1000001),
+                None,
+                Some(1),
+            ])),
+            Arc::new(IntervalYearMonthVector::from(vec![Some(1), None, Some(2)])),
+            Arc::new(IntervalDayTimeVector::from(vec![
+                Some(arrow::datatypes::IntervalDayTime::new(1, 1)),
+                None,
+                Some(arrow::datatypes::IntervalDayTime::new(2, 2)),
+            ])),
+            Arc::new(IntervalMonthDayNanoVector::from(vec![
+                Some(arrow::datatypes::IntervalMonthDayNano::new(1, 1, 10)),
+                None,
+                Some(arrow::datatypes::IntervalMonthDayNano::new(2, 2, 20)),
+            ])),
+            Arc::new(ListVector::from(i64_list_array)),
+            Arc::new(ListVector::from(f64_list_array)),
+            Arc::new(ListVector::from(string_list_array)),
+            Arc::new(ListVector::from(timestamp_list_array)),
         ];
+        let record_batch =
+            RecordBatch::new(Arc::new(arrow_schema.try_into().unwrap()), values).unwrap();
+
         let query_context = QueryContextBuilder::default()
             .configuration_parameter(Default::default())
             .build()
             .into();
-        let mut builder = DataRowEncoder::new(Arc::new(schema));
-        for (value, datatype) in values.into_iter().zip(datatypes) {
-            encode_value(&query_context, value, &mut builder, &datatype).unwrap();
+        let schema = Arc::new(schema);
+
+        let rows = RecordBatchRowIterator::new(query_context, schema.clone(), record_batch)
+            .filter_map(|x| x.ok())
+            .collect::<Vec<_>>();
+        assert_eq!(rows.len(), 3);
+        for row in rows {
+            assert_eq!(row.field_count, schema.len() as i16);
         }
     }
 

From b78ee1743cf7121e4144864c679475e81b8bc996 Mon Sep 17 00:00:00 2001
From: Ning Sun <sunng@protonmail.com>
Date: Fri, 24 Oct 2025 11:36:07 +0800
Subject: [PATCH 11/14] feat: add a missing pg_catalog function
 current_database (#7138)

feat: add a missing function current_database
---
 src/common/function/src/system/pg_catalog.rs | 27 ++++++++++++++++++++
 1 file changed, 27 insertions(+)

diff --git a/src/common/function/src/system/pg_catalog.rs b/src/common/function/src/system/pg_catalog.rs
index c768aae248..b66e208ea9 100644
--- a/src/common/function/src/system/pg_catalog.rs
+++ b/src/common/function/src/system/pg_catalog.rs
@@ -32,10 +32,36 @@ use crate::system::define_nullary_udf;
 const CURRENT_SCHEMA_FUNCTION_NAME: &str = "current_schema";
 const CURRENT_SCHEMAS_FUNCTION_NAME: &str = "current_schemas";
 const SESSION_USER_FUNCTION_NAME: &str = "session_user";
+const CURRENT_DATABASE_FUNCTION_NAME: &str = "current_database";
 
 define_nullary_udf!(CurrentSchemaFunction);
 define_nullary_udf!(CurrentSchemasFunction);
 define_nullary_udf!(SessionUserFunction);
+define_nullary_udf!(CurrentDatabaseFunction);
+
+impl Function for CurrentDatabaseFunction {
+    fn name(&self) -> &str {
+        CURRENT_DATABASE_FUNCTION_NAME
+    }
+
+    fn return_type(&self, _: &[DataType]) -> datafusion_common::Result<DataType> {
+        Ok(DataType::Utf8View)
+    }
+
+    fn signature(&self) -> &Signature {
+        &self.signature
+    }
+
+    fn invoke_with_args(
+        &self,
+        args: ScalarFunctionArgs,
+    ) -> datafusion_common::Result<ColumnarValue> {
+        let func_ctx = find_function_context(&args)?;
+        let db = func_ctx.query_ctx.current_catalog().to_string();
+
+        Ok(ColumnarValue::Scalar(ScalarValue::Utf8View(Some(db))))
+    }
+}
 
 // Though "current_schema" can be aliased to "database", to not cause any breaking changes,
 // we are not doing it: not until https://github.com/apache/datafusion/issues/17469 is resolved.
@@ -141,6 +167,7 @@ impl PGCatalogFunction {
         registry.register_scalar(CurrentSchemaFunction::default());
         registry.register_scalar(CurrentSchemasFunction::default());
         registry.register_scalar(SessionUserFunction::default());
+        registry.register_scalar(CurrentDatabaseFunction::default());
         registry.register(pg_catalog::format_type::create_format_type_udf());
         registry.register(pg_catalog::create_pg_get_partkeydef_udf());
         registry.register(pg_catalog::has_privilege_udf::create_has_privilege_udf(

From 4c70b4c31d5abd4ffc47c6acc27d1e7546c4a6cd Mon Sep 17 00:00:00 2001
From: Yingwen <realevenyag@gmail.com>
Date: Fri, 24 Oct 2025 13:53:48 +0800
Subject: [PATCH 12/14] feat: store estimated series num in file meta (#7126)

* feat: add num_series to FileMeta

Signed-off-by: evenyag <realevenyag@gmail.com>

* feat: add SeriesEstimator to collect num_series

Signed-off-by: evenyag <realevenyag@gmail.com>

* fix: set num_series in compactor

Signed-off-by: evenyag <realevenyag@gmail.com>

* chore: print num_series in Debug for FileMeta

Signed-off-by: evenyag <realevenyag@gmail.com>

* style: fmt code

Signed-off-by: evenyag <realevenyag@gmail.com>

* style: fix clippy

Signed-off-by: evenyag <realevenyag@gmail.com>

* fix: increase series count when next ts <= last

Signed-off-by: evenyag <realevenyag@gmail.com>

* test: add tests for SeriesEstimator

Signed-off-by: evenyag <realevenyag@gmail.com>

* feat: add num_series to ssts_manifest table

Signed-off-by: evenyag <realevenyag@gmail.com>

* test: update sqlness tests

Signed-off-by: evenyag <realevenyag@gmail.com>

* test: fix metric engine list entry test

Signed-off-by: evenyag <realevenyag@gmail.com>

---------

Signed-off-by: evenyag <realevenyag@gmail.com>
---
 src/metric-engine/src/engine/flush.rs         |  12 +-
 src/mito2/src/compaction/compactor.rs         |   1 +
 src/mito2/src/compaction/test_util.rs         |   1 +
 src/mito2/src/engine/basic_test.rs            |  12 +-
 src/mito2/src/flush.rs                        |   1 +
 src/mito2/src/manifest/tests/checkpoint.rs    |   2 +
 src/mito2/src/memtable/bulk/part.rs           |  10 +-
 src/mito2/src/region.rs                       |   1 +
 src/mito2/src/remap_manifest.rs               |   1 +
 src/mito2/src/sst.rs                          | 428 ++++++++++++++++++
 src/mito2/src/sst/file.rs                     |   7 +
 src/mito2/src/sst/file_purger.rs              |   2 +
 src/mito2/src/sst/file_ref.rs                 |   1 +
 src/mito2/src/sst/parquet.rs                  |   3 +
 src/mito2/src/sst/parquet/writer.rs           |  12 +-
 src/mito2/src/test_util/sst_util.rs           |   1 +
 src/mito2/src/test_util/version_util.rs       |   2 +
 src/store-api/src/sst_entry.rs                |  27 +-
 .../common/information_schema/ssts.result     |  33 +-
 .../common/system/information_schema.result   |  13 +-
 20 files changed, 527 insertions(+), 43 deletions(-)

diff --git a/src/metric-engine/src/engine/flush.rs b/src/metric-engine/src/engine/flush.rs
index 23899cbb05..c82862583d 100644
--- a/src/metric-engine/src/engine/flush.rs
+++ b/src/metric-engine/src/engine/flush.rs
@@ -127,12 +127,12 @@ mod tests {
         assert_eq!(
             debug_format,
             r#"
-ManifestSstEntry { table_dir: "test_metric_region/", region_id: 47244640257(11, 1), table_id: 11, region_number: 1, region_group: 0, region_sequence: 1, file_id: "<file_id>", level: 0, file_path: "test_metric_region/11_0000000001/data/<file_id>.parquet", file_size: 3173, index_file_path: Some("test_metric_region/11_0000000001/data/index/<file_id>.puffin"), index_file_size: Some(235), num_rows: 10, num_row_groups: 1, min_ts: 0::Millisecond, max_ts: 9::Millisecond, sequence: Some(20), origin_region_id: 47244640257(11, 1), node_id: None, visible: true }
-ManifestSstEntry { table_dir: "test_metric_region/", region_id: 47244640258(11, 2), table_id: 11, region_number: 2, region_group: 0, region_sequence: 2, file_id: "<file_id>", level: 0, file_path: "test_metric_region/11_0000000002/data/<file_id>.parquet", file_size: 3173, index_file_path: Some("test_metric_region/11_0000000002/data/index/<file_id>.puffin"), index_file_size: Some(235), num_rows: 10, num_row_groups: 1, min_ts: 0::Millisecond, max_ts: 9::Millisecond, sequence: Some(10), origin_region_id: 47244640258(11, 2), node_id: None, visible: true }
-ManifestSstEntry { table_dir: "test_metric_region/", region_id: 47261417473(11, 16777217), table_id: 11, region_number: 16777217, region_group: 1, region_sequence: 1, file_id: "<file_id>", level: 0, file_path: "test_metric_region/11_0000000001/metadata/<file_id>.parquet", file_size: 3505, index_file_path: None, index_file_size: None, num_rows: 8, num_row_groups: 1, min_ts: 0::Millisecond, max_ts: 0::Millisecond, sequence: Some(8), origin_region_id: 47261417473(11, 16777217), node_id: None, visible: true }
-ManifestSstEntry { table_dir: "test_metric_region/", region_id: 47261417474(11, 16777218), table_id: 11, region_number: 16777218, region_group: 1, region_sequence: 2, file_id: "<file_id>", level: 0, file_path: "test_metric_region/11_0000000002/metadata/<file_id>.parquet", file_size: 3489, index_file_path: None, index_file_size: None, num_rows: 4, num_row_groups: 1, min_ts: 0::Millisecond, max_ts: 0::Millisecond, sequence: Some(4), origin_region_id: 47261417474(11, 16777218), node_id: None, visible: true }
-ManifestSstEntry { table_dir: "test_metric_region/", region_id: 94489280554(22, 42), table_id: 22, region_number: 42, region_group: 0, region_sequence: 42, file_id: "<file_id>", level: 0, file_path: "test_metric_region/22_0000000042/data/<file_id>.parquet", file_size: 3173, index_file_path: Some("test_metric_region/22_0000000042/data/index/<file_id>.puffin"), index_file_size: Some(235), num_rows: 10, num_row_groups: 1, min_ts: 0::Millisecond, max_ts: 9::Millisecond, sequence: Some(10), origin_region_id: 94489280554(22, 42), node_id: None, visible: true }
-ManifestSstEntry { table_dir: "test_metric_region/", region_id: 94506057770(22, 16777258), table_id: 22, region_number: 16777258, region_group: 1, region_sequence: 42, file_id: "<file_id>", level: 0, file_path: "test_metric_region/22_0000000042/metadata/<file_id>.parquet", file_size: 3489, index_file_path: None, index_file_size: None, num_rows: 4, num_row_groups: 1, min_ts: 0::Millisecond, max_ts: 0::Millisecond, sequence: Some(4), origin_region_id: 94506057770(22, 16777258), node_id: None, visible: true }"#
+ManifestSstEntry { table_dir: "test_metric_region/", region_id: 47244640257(11, 1), table_id: 11, region_number: 1, region_group: 0, region_sequence: 1, file_id: "<file_id>", level: 0, file_path: "test_metric_region/11_0000000001/data/<file_id>.parquet", file_size: 3173, index_file_path: Some("test_metric_region/11_0000000001/data/index/<file_id>.puffin"), index_file_size: Some(235), num_rows: 10, num_row_groups: 1, num_series: Some(1), min_ts: 0::Millisecond, max_ts: 9::Millisecond, sequence: Some(20), origin_region_id: 47244640257(11, 1), node_id: None, visible: true }
+ManifestSstEntry { table_dir: "test_metric_region/", region_id: 47244640258(11, 2), table_id: 11, region_number: 2, region_group: 0, region_sequence: 2, file_id: "<file_id>", level: 0, file_path: "test_metric_region/11_0000000002/data/<file_id>.parquet", file_size: 3173, index_file_path: Some("test_metric_region/11_0000000002/data/index/<file_id>.puffin"), index_file_size: Some(235), num_rows: 10, num_row_groups: 1, num_series: Some(1), min_ts: 0::Millisecond, max_ts: 9::Millisecond, sequence: Some(10), origin_region_id: 47244640258(11, 2), node_id: None, visible: true }
+ManifestSstEntry { table_dir: "test_metric_region/", region_id: 47261417473(11, 16777217), table_id: 11, region_number: 16777217, region_group: 1, region_sequence: 1, file_id: "<file_id>", level: 0, file_path: "test_metric_region/11_0000000001/metadata/<file_id>.parquet", file_size: 3505, index_file_path: None, index_file_size: None, num_rows: 8, num_row_groups: 1, num_series: Some(8), min_ts: 0::Millisecond, max_ts: 0::Millisecond, sequence: Some(8), origin_region_id: 47261417473(11, 16777217), node_id: None, visible: true }
+ManifestSstEntry { table_dir: "test_metric_region/", region_id: 47261417474(11, 16777218), table_id: 11, region_number: 16777218, region_group: 1, region_sequence: 2, file_id: "<file_id>", level: 0, file_path: "test_metric_region/11_0000000002/metadata/<file_id>.parquet", file_size: 3489, index_file_path: None, index_file_size: None, num_rows: 4, num_row_groups: 1, num_series: Some(4), min_ts: 0::Millisecond, max_ts: 0::Millisecond, sequence: Some(4), origin_region_id: 47261417474(11, 16777218), node_id: None, visible: true }
+ManifestSstEntry { table_dir: "test_metric_region/", region_id: 94489280554(22, 42), table_id: 22, region_number: 42, region_group: 0, region_sequence: 42, file_id: "<file_id>", level: 0, file_path: "test_metric_region/22_0000000042/data/<file_id>.parquet", file_size: 3173, index_file_path: Some("test_metric_region/22_0000000042/data/index/<file_id>.puffin"), index_file_size: Some(235), num_rows: 10, num_row_groups: 1, num_series: Some(1), min_ts: 0::Millisecond, max_ts: 9::Millisecond, sequence: Some(10), origin_region_id: 94489280554(22, 42), node_id: None, visible: true }
+ManifestSstEntry { table_dir: "test_metric_region/", region_id: 94506057770(22, 16777258), table_id: 22, region_number: 16777258, region_group: 1, region_sequence: 42, file_id: "<file_id>", level: 0, file_path: "test_metric_region/22_0000000042/metadata/<file_id>.parquet", file_size: 3489, index_file_path: None, index_file_size: None, num_rows: 4, num_row_groups: 1, num_series: Some(4), min_ts: 0::Millisecond, max_ts: 0::Millisecond, sequence: Some(4), origin_region_id: 94506057770(22, 16777258), node_id: None, visible: true }"#
         );
         // list from storage
         let storage_entries = mito
diff --git a/src/mito2/src/compaction/compactor.rs b/src/mito2/src/compaction/compactor.rs
index ba267f4a48..2b871947c0 100644
--- a/src/mito2/src/compaction/compactor.rs
+++ b/src/mito2/src/compaction/compactor.rs
@@ -433,6 +433,7 @@ impl Compactor for DefaultCompactor {
                         num_row_groups: sst_info.num_row_groups,
                         sequence: max_sequence,
                         partition_expr: partition_expr.clone(),
+                        num_series: sst_info.num_series,
                     })
                     .collect::<Vec<_>>();
                 let output_file_names =
diff --git a/src/mito2/src/compaction/test_util.rs b/src/mito2/src/compaction/test_util.rs
index b785d36bcb..3dc212ff4d 100644
--- a/src/mito2/src/compaction/test_util.rs
+++ b/src/mito2/src/compaction/test_util.rs
@@ -78,6 +78,7 @@ pub fn new_file_handle_with_size_and_sequence(
             index_file_size: 0,
             num_rows: 0,
             num_row_groups: 0,
+            num_series: 0,
             sequence: NonZeroU64::new(sequence),
             partition_expr: None,
         },
diff --git a/src/mito2/src/engine/basic_test.rs b/src/mito2/src/engine/basic_test.rs
index 39f2366659..ca62f384c7 100644
--- a/src/mito2/src/engine/basic_test.rs
+++ b/src/mito2/src/engine/basic_test.rs
@@ -859,9 +859,9 @@ async fn test_cache_null_primary_key_with_format(flat_format: bool) {
 #[tokio::test]
 async fn test_list_ssts() {
     test_list_ssts_with_format(false, r#"
-ManifestSstEntry { table_dir: "test/", region_id: 47244640257(11, 1), table_id: 11, region_number: 1, region_group: 0, region_sequence: 1, file_id: "<file_id>", level: 0, file_path: "test/11_0000000001/<file_id>.parquet", file_size: 2531, index_file_path: Some("test/11_0000000001/index/<file_id>.puffin"), index_file_size: Some(250), num_rows: 10, num_row_groups: 1, min_ts: 0::Millisecond, max_ts: 9000::Millisecond, sequence: Some(10), origin_region_id: 47244640257(11, 1), node_id: None, visible: true }
-ManifestSstEntry { table_dir: "test/", region_id: 47244640258(11, 2), table_id: 11, region_number: 2, region_group: 0, region_sequence: 2, file_id: "<file_id>", level: 0, file_path: "test/11_0000000002/<file_id>.parquet", file_size: 2531, index_file_path: Some("test/11_0000000002/index/<file_id>.puffin"), index_file_size: Some(250), num_rows: 10, num_row_groups: 1, min_ts: 0::Millisecond, max_ts: 9000::Millisecond, sequence: Some(10), origin_region_id: 47244640258(11, 2), node_id: None, visible: true }
-ManifestSstEntry { table_dir: "test/", region_id: 94489280554(22, 42), table_id: 22, region_number: 42, region_group: 0, region_sequence: 42, file_id: "<file_id>", level: 0, file_path: "test/22_0000000042/<file_id>.parquet", file_size: 2531, index_file_path: Some("test/22_0000000042/index/<file_id>.puffin"), index_file_size: Some(250), num_rows: 10, num_row_groups: 1, min_ts: 0::Millisecond, max_ts: 9000::Millisecond, sequence: Some(10), origin_region_id: 94489280554(22, 42), node_id: None, visible: true }"# ,r#"
+ManifestSstEntry { table_dir: "test/", region_id: 47244640257(11, 1), table_id: 11, region_number: 1, region_group: 0, region_sequence: 1, file_id: "<file_id>", level: 0, file_path: "test/11_0000000001/<file_id>.parquet", file_size: 2531, index_file_path: Some("test/11_0000000001/index/<file_id>.puffin"), index_file_size: Some(250), num_rows: 10, num_row_groups: 1, num_series: Some(1), min_ts: 0::Millisecond, max_ts: 9000::Millisecond, sequence: Some(10), origin_region_id: 47244640257(11, 1), node_id: None, visible: true }
+ManifestSstEntry { table_dir: "test/", region_id: 47244640258(11, 2), table_id: 11, region_number: 2, region_group: 0, region_sequence: 2, file_id: "<file_id>", level: 0, file_path: "test/11_0000000002/<file_id>.parquet", file_size: 2531, index_file_path: Some("test/11_0000000002/index/<file_id>.puffin"), index_file_size: Some(250), num_rows: 10, num_row_groups: 1, num_series: Some(1), min_ts: 0::Millisecond, max_ts: 9000::Millisecond, sequence: Some(10), origin_region_id: 47244640258(11, 2), node_id: None, visible: true }
+ManifestSstEntry { table_dir: "test/", region_id: 94489280554(22, 42), table_id: 22, region_number: 42, region_group: 0, region_sequence: 42, file_id: "<file_id>", level: 0, file_path: "test/22_0000000042/<file_id>.parquet", file_size: 2531, index_file_path: Some("test/22_0000000042/index/<file_id>.puffin"), index_file_size: Some(250), num_rows: 10, num_row_groups: 1, num_series: Some(1), min_ts: 0::Millisecond, max_ts: 9000::Millisecond, sequence: Some(10), origin_region_id: 94489280554(22, 42), node_id: None, visible: true }"# ,r#"
 StorageSstEntry { file_path: "test/11_0000000001/<file_id>.parquet", file_size: None, last_modified_ms: None, node_id: None }
 StorageSstEntry { file_path: "test/11_0000000001/index/<file_id>.puffin", file_size: None, last_modified_ms: None, node_id: None }
 StorageSstEntry { file_path: "test/11_0000000002/<file_id>.parquet", file_size: None, last_modified_ms: None, node_id: None }
@@ -869,9 +869,9 @@ StorageSstEntry { file_path: "test/11_0000000002/index/<file_id>.puffin", file_s
 StorageSstEntry { file_path: "test/22_0000000042/<file_id>.parquet", file_size: None, last_modified_ms: None, node_id: None }
 StorageSstEntry { file_path: "test/22_0000000042/index/<file_id>.puffin", file_size: None, last_modified_ms: None, node_id: None }"#).await;
     test_list_ssts_with_format(true, r#"
-ManifestSstEntry { table_dir: "test/", region_id: 47244640257(11, 1), table_id: 11, region_number: 1, region_group: 0, region_sequence: 1, file_id: "<file_id>", level: 0, file_path: "test/11_0000000001/<file_id>.parquet", file_size: 2855, index_file_path: Some("test/11_0000000001/index/<file_id>.puffin"), index_file_size: Some(292), num_rows: 10, num_row_groups: 1, min_ts: 0::Millisecond, max_ts: 9000::Millisecond, sequence: Some(10), origin_region_id: 47244640257(11, 1), node_id: None, visible: true }
-ManifestSstEntry { table_dir: "test/", region_id: 47244640258(11, 2), table_id: 11, region_number: 2, region_group: 0, region_sequence: 2, file_id: "<file_id>", level: 0, file_path: "test/11_0000000002/<file_id>.parquet", file_size: 2855, index_file_path: Some("test/11_0000000002/index/<file_id>.puffin"), index_file_size: Some(292), num_rows: 10, num_row_groups: 1, min_ts: 0::Millisecond, max_ts: 9000::Millisecond, sequence: Some(10), origin_region_id: 47244640258(11, 2), node_id: None, visible: true }
-ManifestSstEntry { table_dir: "test/", region_id: 94489280554(22, 42), table_id: 22, region_number: 42, region_group: 0, region_sequence: 42, file_id: "<file_id>", level: 0, file_path: "test/22_0000000042/<file_id>.parquet", file_size: 2855, index_file_path: Some("test/22_0000000042/index/<file_id>.puffin"), index_file_size: Some(292), num_rows: 10, num_row_groups: 1, min_ts: 0::Millisecond, max_ts: 9000::Millisecond, sequence: Some(10), origin_region_id: 94489280554(22, 42), node_id: None, visible: true }"#, r#"
+ManifestSstEntry { table_dir: "test/", region_id: 47244640257(11, 1), table_id: 11, region_number: 1, region_group: 0, region_sequence: 1, file_id: "<file_id>", level: 0, file_path: "test/11_0000000001/<file_id>.parquet", file_size: 2855, index_file_path: Some("test/11_0000000001/index/<file_id>.puffin"), index_file_size: Some(292), num_rows: 10, num_row_groups: 1, num_series: Some(1), min_ts: 0::Millisecond, max_ts: 9000::Millisecond, sequence: Some(10), origin_region_id: 47244640257(11, 1), node_id: None, visible: true }
+ManifestSstEntry { table_dir: "test/", region_id: 47244640258(11, 2), table_id: 11, region_number: 2, region_group: 0, region_sequence: 2, file_id: "<file_id>", level: 0, file_path: "test/11_0000000002/<file_id>.parquet", file_size: 2855, index_file_path: Some("test/11_0000000002/index/<file_id>.puffin"), index_file_size: Some(292), num_rows: 10, num_row_groups: 1, num_series: Some(1), min_ts: 0::Millisecond, max_ts: 9000::Millisecond, sequence: Some(10), origin_region_id: 47244640258(11, 2), node_id: None, visible: true }
+ManifestSstEntry { table_dir: "test/", region_id: 94489280554(22, 42), table_id: 22, region_number: 42, region_group: 0, region_sequence: 42, file_id: "<file_id>", level: 0, file_path: "test/22_0000000042/<file_id>.parquet", file_size: 2855, index_file_path: Some("test/22_0000000042/index/<file_id>.puffin"), index_file_size: Some(292), num_rows: 10, num_row_groups: 1, num_series: Some(1), min_ts: 0::Millisecond, max_ts: 9000::Millisecond, sequence: Some(10), origin_region_id: 94489280554(22, 42), node_id: None, visible: true }"#, r#"
 StorageSstEntry { file_path: "test/11_0000000001/<file_id>.parquet", file_size: None, last_modified_ms: None, node_id: None }
 StorageSstEntry { file_path: "test/11_0000000001/index/<file_id>.puffin", file_size: None, last_modified_ms: None, node_id: None }
 StorageSstEntry { file_path: "test/11_0000000002/<file_id>.parquet", file_size: None, last_modified_ms: None, node_id: None }
diff --git a/src/mito2/src/flush.rs b/src/mito2/src/flush.rs
index eb5e605ce1..ddad947f8a 100644
--- a/src/mito2/src/flush.rs
+++ b/src/mito2/src/flush.rs
@@ -641,6 +641,7 @@ impl RegionFlushTask {
             num_row_groups: sst_info.num_row_groups,
             sequence: NonZeroU64::new(max_sequence),
             partition_expr,
+            num_series: sst_info.num_series,
         }
     }
 
diff --git a/src/mito2/src/manifest/tests/checkpoint.rs b/src/mito2/src/manifest/tests/checkpoint.rs
index e10d3aad46..a99a7878ad 100644
--- a/src/mito2/src/manifest/tests/checkpoint.rs
+++ b/src/mito2/src/manifest/tests/checkpoint.rs
@@ -269,6 +269,7 @@ async fn checkpoint_with_different_compression_types() {
             num_row_groups: 0,
             sequence: None,
             partition_expr: None,
+            num_series: 0,
         };
         let action = RegionMetaActionList::new(vec![RegionMetaAction::Edit(RegionEdit {
             files_to_add: vec![file_meta],
@@ -334,6 +335,7 @@ fn generate_action_lists(num: usize) -> (Vec<FileId>, Vec<RegionMetaActionList>)
             num_row_groups: 0,
             sequence: None,
             partition_expr: None,
+            num_series: 0,
         };
         let action = RegionMetaActionList::new(vec![RegionMetaAction::Edit(RegionEdit {
             files_to_add: vec![file_meta],
diff --git a/src/mito2/src/memtable/bulk/part.rs b/src/mito2/src/memtable/bulk/part.rs
index 4eb2655755..21ac141cff 100644
--- a/src/mito2/src/memtable/bulk/part.rs
+++ b/src/mito2/src/memtable/bulk/part.rs
@@ -69,7 +69,7 @@ use crate::sst::parquet::flat_format::primary_key_column_index;
 use crate::sst::parquet::format::{PrimaryKeyArray, PrimaryKeyArrayBuilder, ReadFormat};
 use crate::sst::parquet::helper::parse_parquet_metadata;
 use crate::sst::parquet::{PARQUET_METADATA_KEY, SstInfo};
-use crate::sst::to_sst_arrow_schema;
+use crate::sst::{SeriesEstimator, to_sst_arrow_schema};
 
 const INIT_DICT_VALUE_CAPACITY: usize = 8;
 
@@ -563,6 +563,7 @@ impl EncodedBulkPart {
             num_row_groups: self.metadata.parquet_metadata.num_row_groups() as u64,
             file_metadata: Some(self.metadata.parquet_metadata.clone()),
             index_metadata: IndexOutput::default(),
+            num_series: self.metadata.num_series,
         }
     }
 
@@ -602,6 +603,8 @@ pub struct BulkPartMeta {
     pub parquet_metadata: Arc<ParquetMetaData>,
     /// Part region schema.
     pub region_metadata: RegionMetadataRef,
+    /// Number of series.
+    pub num_series: u64,
 }
 
 /// Metrics for encoding a part.
@@ -669,6 +672,7 @@ impl BulkPartEncoder {
         let mut writer = ArrowWriter::try_new(&mut buf, arrow_schema, self.writer_props.clone())
             .context(EncodeMemtableSnafu)?;
         let mut total_rows = 0;
+        let mut series_estimator = SeriesEstimator::default();
 
         // Process each batch from the iterator
         let mut iter_start = Instant::now();
@@ -679,6 +683,7 @@ impl BulkPartEncoder {
                 continue;
             }
 
+            series_estimator.update_flat(&batch);
             metrics.raw_size += record_batch_estimated_size(&batch);
             let write_start = Instant::now();
             writer.write(&batch).context(EncodeMemtableSnafu)?;
@@ -701,6 +706,7 @@ impl BulkPartEncoder {
 
         let buf = Bytes::from(buf);
         let parquet_metadata = Arc::new(parse_parquet_metadata(file_metadata)?);
+        let num_series = series_estimator.finish();
 
         Ok(Some(EncodedBulkPart {
             data: buf,
@@ -710,6 +716,7 @@ impl BulkPartEncoder {
                 min_timestamp,
                 parquet_metadata,
                 region_metadata: self.metadata.clone(),
+                num_series,
             },
         }))
     }
@@ -742,6 +749,7 @@ impl BulkPartEncoder {
                 min_timestamp: part.min_timestamp,
                 parquet_metadata,
                 region_metadata: self.metadata.clone(),
+                num_series: part.estimated_series_count() as u64,
             },
         }))
     }
diff --git a/src/mito2/src/region.rs b/src/mito2/src/region.rs
index aac7090174..ee49da763e 100644
--- a/src/mito2/src/region.rs
+++ b/src/mito2/src/region.rs
@@ -608,6 +608,7 @@ impl MitoRegion {
                     index_file_size,
                     num_rows: meta.num_rows,
                     num_row_groups: meta.num_row_groups,
+                    num_series: Some(meta.num_series),
                     min_ts: meta.time_range.0,
                     max_ts: meta.time_range.1,
                     sequence: meta.sequence.map(|s| s.get()),
diff --git a/src/mito2/src/remap_manifest.rs b/src/mito2/src/remap_manifest.rs
index 6800a4bf4d..a10159401b 100644
--- a/src/mito2/src/remap_manifest.rs
+++ b/src/mito2/src/remap_manifest.rs
@@ -431,6 +431,7 @@ mod tests {
             num_row_groups: 1,
             sequence: NonZeroU64::new(1),
             partition_expr,
+            num_series: 1,
         }
     }
 
diff --git a/src/mito2/src/sst.rs b/src/mito2/src/sst.rs
index 1d94e74eaa..f3f51bdc08 100644
--- a/src/mito2/src/sst.rs
+++ b/src/mito2/src/sst.rs
@@ -21,7 +21,9 @@ use common_base::readable_size::ReadableSize;
 use datatypes::arrow::datatypes::{
     DataType as ArrowDataType, Field, FieldRef, Fields, Schema, SchemaRef,
 };
+use datatypes::arrow::record_batch::RecordBatch;
 use datatypes::prelude::ConcreteDataType;
+use datatypes::timestamp::timestamp_array_to_primitive;
 use serde::{Deserialize, Serialize};
 use store_api::codec::PrimaryKeyEncoding;
 use store_api::metadata::RegionMetadata;
@@ -29,6 +31,9 @@ use store_api::storage::consts::{
     OP_TYPE_COLUMN_NAME, PRIMARY_KEY_COLUMN_NAME, SEQUENCE_COLUMN_NAME,
 };
 
+use crate::read::Batch;
+use crate::sst::parquet::flat_format::time_index_column_index;
+
 pub mod file;
 pub mod file_purger;
 pub mod file_ref;
@@ -241,3 +246,426 @@ fn plain_internal_fields() -> [FieldRef; 2] {
         Arc::new(Field::new(OP_TYPE_COLUMN_NAME, ArrowDataType::UInt8, false)),
     ]
 }
+
+/// Gets the estimated number of series from record batches.
+///
+/// This struct tracks the last timestamp value to detect series boundaries
+/// by observing when timestamps decrease (indicating a new series).
+#[derive(Default)]
+pub(crate) struct SeriesEstimator {
+    /// The last timestamp value seen
+    last_timestamp: Option<i64>,
+    /// The estimated number of series
+    series_count: u64,
+}
+
+impl SeriesEstimator {
+    /// Updates the estimator with a new Batch.
+    ///
+    /// Since each Batch contains only one series, this increments the series count
+    /// and updates the last timestamp.
+    pub(crate) fn update(&mut self, batch: &Batch) {
+        let Some(last_ts) = batch.last_timestamp() else {
+            return;
+        };
+
+        // Checks if there's a boundary between the last batch and this batch
+        if let Some(prev_last_ts) = self.last_timestamp {
+            // If the first timestamp of this batch is less than the last timestamp
+            // we've seen, it indicates a new series
+            if let Some(first_ts) = batch.first_timestamp()
+                && first_ts.value() <= prev_last_ts
+            {
+                self.series_count += 1;
+            }
+        } else {
+            // First batch, counts as first series
+            self.series_count = 1;
+        }
+
+        // Updates the last timestamp
+        self.last_timestamp = Some(last_ts.value());
+    }
+
+    /// Updates the estimator with a new record batch in flat format.
+    ///
+    /// This method examines the time index column to detect series boundaries.
+    pub(crate) fn update_flat(&mut self, record_batch: &RecordBatch) {
+        let batch_rows = record_batch.num_rows();
+        if batch_rows == 0 {
+            return;
+        }
+
+        let time_index_pos = time_index_column_index(record_batch.num_columns());
+        let timestamps = record_batch.column(time_index_pos);
+        let Some((ts_values, _unit)) = timestamp_array_to_primitive(timestamps) else {
+            return;
+        };
+        let values = ts_values.values();
+
+        // Checks if there's a boundary between the last batch and this batch
+        if let Some(last_ts) = self.last_timestamp {
+            if values[0] <= last_ts {
+                self.series_count += 1;
+            }
+        } else {
+            // First batch, counts as first series
+            self.series_count = 1;
+        }
+
+        // Counts series boundaries within this batch.
+        for i in 0..batch_rows - 1 {
+            // We assumes the same timestamp as a new series, which is different from
+            // how we split batches.
+            if values[i] >= values[i + 1] {
+                self.series_count += 1;
+            }
+        }
+
+        // Updates the last timestamp
+        self.last_timestamp = Some(values[batch_rows - 1]);
+    }
+
+    /// Returns the estimated number of series.
+    pub(crate) fn finish(&mut self) -> u64 {
+        self.last_timestamp = None;
+        let count = self.series_count;
+        self.series_count = 0;
+
+        count
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use std::sync::Arc;
+
+    use api::v1::OpType;
+    use datatypes::arrow::array::{
+        BinaryArray, DictionaryArray, TimestampMillisecondArray, UInt8Array, UInt8Builder,
+        UInt32Array, UInt64Array,
+    };
+    use datatypes::arrow::datatypes::{DataType as ArrowDataType, Field, Schema, TimeUnit};
+    use datatypes::arrow::record_batch::RecordBatch;
+
+    use super::*;
+    use crate::read::{Batch, BatchBuilder};
+
+    fn new_batch(
+        primary_key: &[u8],
+        timestamps: &[i64],
+        sequences: &[u64],
+        op_types: &[OpType],
+    ) -> Batch {
+        let timestamps = Arc::new(TimestampMillisecondArray::from(timestamps.to_vec()));
+        let sequences = Arc::new(UInt64Array::from(sequences.to_vec()));
+        let mut op_type_builder = UInt8Builder::with_capacity(op_types.len());
+        for op_type in op_types {
+            op_type_builder.append_value(*op_type as u8);
+        }
+        let op_types = Arc::new(UInt8Array::from(
+            op_types.iter().map(|op| *op as u8).collect::<Vec<_>>(),
+        ));
+
+        let mut builder = BatchBuilder::new(primary_key.to_vec());
+        builder
+            .timestamps_array(timestamps)
+            .unwrap()
+            .sequences_array(sequences)
+            .unwrap()
+            .op_types_array(op_types)
+            .unwrap();
+        builder.build().unwrap()
+    }
+
+    fn new_flat_record_batch(timestamps: &[i64]) -> RecordBatch {
+        // Flat format has: [fields..., time_index, __primary_key, __sequence, __op_type]
+        let num_cols = 4; // time_index + 3 internal columns
+        let time_index_pos = time_index_column_index(num_cols);
+        assert_eq!(time_index_pos, 0); // For 4 columns, time index should be at position 0
+
+        let time_array = Arc::new(TimestampMillisecondArray::from(timestamps.to_vec()));
+        let pk_array = Arc::new(DictionaryArray::new(
+            UInt32Array::from(vec![0; timestamps.len()]),
+            Arc::new(BinaryArray::from(vec![b"test".as_slice()])),
+        ));
+        let seq_array = Arc::new(UInt64Array::from(vec![1; timestamps.len()]));
+        let op_array = Arc::new(UInt8Array::from(vec![1; timestamps.len()]));
+
+        let schema = Arc::new(Schema::new(vec![
+            Field::new(
+                "time",
+                ArrowDataType::Timestamp(TimeUnit::Millisecond, None),
+                false,
+            ),
+            Field::new_dictionary(
+                "__primary_key",
+                ArrowDataType::UInt32,
+                ArrowDataType::Binary,
+                false,
+            ),
+            Field::new("__sequence", ArrowDataType::UInt64, false),
+            Field::new("__op_type", ArrowDataType::UInt8, false),
+        ]));
+
+        RecordBatch::try_new(schema, vec![time_array, pk_array, seq_array, op_array]).unwrap()
+    }
+
+    #[test]
+    fn test_series_estimator_empty_batch() {
+        let mut estimator = SeriesEstimator::default();
+        let batch = new_batch(b"test", &[], &[], &[]);
+        estimator.update(&batch);
+        assert_eq!(0, estimator.finish());
+    }
+
+    #[test]
+    fn test_series_estimator_single_batch() {
+        let mut estimator = SeriesEstimator::default();
+        let batch = new_batch(
+            b"test",
+            &[1, 2, 3],
+            &[1, 2, 3],
+            &[OpType::Put, OpType::Put, OpType::Put],
+        );
+        estimator.update(&batch);
+        assert_eq!(1, estimator.finish());
+    }
+
+    #[test]
+    fn test_series_estimator_multiple_batches_same_series() {
+        let mut estimator = SeriesEstimator::default();
+
+        // First batch with timestamps 1, 2, 3
+        let batch1 = new_batch(
+            b"test",
+            &[1, 2, 3],
+            &[1, 2, 3],
+            &[OpType::Put, OpType::Put, OpType::Put],
+        );
+        estimator.update(&batch1);
+
+        // Second batch with timestamps 4, 5, 6 (continuation)
+        let batch2 = new_batch(
+            b"test",
+            &[4, 5, 6],
+            &[4, 5, 6],
+            &[OpType::Put, OpType::Put, OpType::Put],
+        );
+        estimator.update(&batch2);
+
+        assert_eq!(1, estimator.finish());
+    }
+
+    #[test]
+    fn test_series_estimator_new_series_detected() {
+        let mut estimator = SeriesEstimator::default();
+
+        // First batch with timestamps 1, 2, 3
+        let batch1 = new_batch(
+            b"pk0",
+            &[1, 2, 3],
+            &[1, 2, 3],
+            &[OpType::Put, OpType::Put, OpType::Put],
+        );
+        estimator.update(&batch1);
+
+        // Second batch with timestamps 2, 3, 4 (timestamp goes back, new series)
+        let batch2 = new_batch(
+            b"pk1",
+            &[2, 3, 4],
+            &[4, 5, 6],
+            &[OpType::Put, OpType::Put, OpType::Put],
+        );
+        estimator.update(&batch2);
+
+        assert_eq!(2, estimator.finish());
+    }
+
+    #[test]
+    fn test_series_estimator_equal_timestamp_boundary() {
+        let mut estimator = SeriesEstimator::default();
+
+        // First batch ending at timestamp 5
+        let batch1 = new_batch(
+            b"test",
+            &[1, 2, 5],
+            &[1, 2, 3],
+            &[OpType::Put, OpType::Put, OpType::Put],
+        );
+        estimator.update(&batch1);
+
+        // Second batch starting at timestamp 5 (equal, indicates new series)
+        let batch2 = new_batch(
+            b"test",
+            &[5, 6, 7],
+            &[4, 5, 6],
+            &[OpType::Put, OpType::Put, OpType::Put],
+        );
+        estimator.update(&batch2);
+
+        assert_eq!(2, estimator.finish());
+    }
+
+    #[test]
+    fn test_series_estimator_finish_resets_state() {
+        let mut estimator = SeriesEstimator::default();
+
+        let batch1 = new_batch(
+            b"test",
+            &[1, 2, 3],
+            &[1, 2, 3],
+            &[OpType::Put, OpType::Put, OpType::Put],
+        );
+        estimator.update(&batch1);
+
+        assert_eq!(1, estimator.finish());
+
+        // After finish, state should be reset
+        let batch2 = new_batch(
+            b"test",
+            &[4, 5, 6],
+            &[4, 5, 6],
+            &[OpType::Put, OpType::Put, OpType::Put],
+        );
+        estimator.update(&batch2);
+
+        assert_eq!(1, estimator.finish());
+    }
+
+    #[test]
+    fn test_series_estimator_flat_empty_batch() {
+        let mut estimator = SeriesEstimator::default();
+        let record_batch = new_flat_record_batch(&[]);
+        estimator.update_flat(&record_batch);
+        assert_eq!(0, estimator.finish());
+    }
+
+    #[test]
+    fn test_series_estimator_flat_single_batch() {
+        let mut estimator = SeriesEstimator::default();
+        let record_batch = new_flat_record_batch(&[1, 2, 3]);
+        estimator.update_flat(&record_batch);
+        assert_eq!(1, estimator.finish());
+    }
+
+    #[test]
+    fn test_series_estimator_flat_series_boundary_within_batch() {
+        let mut estimator = SeriesEstimator::default();
+        // Timestamps decrease from 3 to 2, indicating a series boundary
+        let record_batch = new_flat_record_batch(&[1, 2, 3, 2, 4, 5]);
+        estimator.update_flat(&record_batch);
+        // Should detect boundary at position 3 (3 >= 2)
+        assert_eq!(2, estimator.finish());
+    }
+
+    #[test]
+    fn test_series_estimator_flat_multiple_boundaries_within_batch() {
+        let mut estimator = SeriesEstimator::default();
+        // Multiple series boundaries: 5>=4, 6>=3
+        let record_batch = new_flat_record_batch(&[1, 2, 5, 4, 6, 3, 7]);
+        estimator.update_flat(&record_batch);
+        assert_eq!(3, estimator.finish());
+    }
+
+    #[test]
+    fn test_series_estimator_flat_equal_timestamps() {
+        let mut estimator = SeriesEstimator::default();
+        // Equal timestamps are considered as new series
+        let record_batch = new_flat_record_batch(&[1, 2, 2, 3, 3, 3, 4]);
+        estimator.update_flat(&record_batch);
+        // Boundaries at: 2>=2, 3>=3, 3>=3
+        assert_eq!(4, estimator.finish());
+    }
+
+    #[test]
+    fn test_series_estimator_flat_multiple_batches_continuation() {
+        let mut estimator = SeriesEstimator::default();
+
+        // First batch: timestamps 1, 2, 3
+        let batch1 = new_flat_record_batch(&[1, 2, 3]);
+        estimator.update_flat(&batch1);
+
+        // Second batch: timestamps 4, 5, 6 (continuation)
+        let batch2 = new_flat_record_batch(&[4, 5, 6]);
+        estimator.update_flat(&batch2);
+
+        assert_eq!(1, estimator.finish());
+    }
+
+    #[test]
+    fn test_series_estimator_flat_multiple_batches_new_series() {
+        let mut estimator = SeriesEstimator::default();
+
+        // First batch: timestamps 1, 2, 3
+        let batch1 = new_flat_record_batch(&[1, 2, 3]);
+        estimator.update_flat(&batch1);
+
+        // Second batch: timestamps 2, 3, 4 (goes back to 2, new series)
+        let batch2 = new_flat_record_batch(&[2, 3, 4]);
+        estimator.update_flat(&batch2);
+
+        assert_eq!(2, estimator.finish());
+    }
+
+    #[test]
+    fn test_series_estimator_flat_boundary_at_batch_edge_equal() {
+        let mut estimator = SeriesEstimator::default();
+
+        // First batch ending at 5
+        let batch1 = new_flat_record_batch(&[1, 2, 5]);
+        estimator.update_flat(&batch1);
+
+        // Second batch starting at 5 (equal timestamp, new series)
+        let batch2 = new_flat_record_batch(&[5, 6, 7]);
+        estimator.update_flat(&batch2);
+
+        assert_eq!(2, estimator.finish());
+    }
+
+    #[test]
+    fn test_series_estimator_flat_mixed_batches() {
+        let mut estimator = SeriesEstimator::default();
+
+        // Batch 1: single series [10, 20, 30]
+        let batch1 = new_flat_record_batch(&[10, 20, 30]);
+        estimator.update_flat(&batch1);
+
+        // Batch 2: starts new series [5, 15], boundary within batch [15, 10, 25]
+        let batch2 = new_flat_record_batch(&[5, 15, 10, 25]);
+        estimator.update_flat(&batch2);
+
+        // Batch 3: continues from 25 to [30, 35]
+        let batch3 = new_flat_record_batch(&[30, 35]);
+        estimator.update_flat(&batch3);
+
+        // Expected: 1 (batch1) + 1 (batch2 start) + 1 (within batch2) = 3
+        assert_eq!(3, estimator.finish());
+    }
+
+    #[test]
+    fn test_series_estimator_flat_descending_timestamps() {
+        let mut estimator = SeriesEstimator::default();
+        // Strictly descending timestamps - each pair creates a boundary
+        let record_batch = new_flat_record_batch(&[10, 9, 8, 7, 6]);
+        estimator.update_flat(&record_batch);
+        // Boundaries: 10>=9, 9>=8, 8>=7, 7>=6 = 4 boundaries + 1 initial = 5 series
+        assert_eq!(5, estimator.finish());
+    }
+
+    #[test]
+    fn test_series_estimator_flat_finish_resets_state() {
+        let mut estimator = SeriesEstimator::default();
+
+        let batch1 = new_flat_record_batch(&[1, 2, 3]);
+        estimator.update_flat(&batch1);
+
+        assert_eq!(1, estimator.finish());
+
+        // After finish, state should be reset
+        let batch2 = new_flat_record_batch(&[4, 5, 6]);
+        estimator.update_flat(&batch2);
+
+        assert_eq!(1, estimator.finish());
+    }
+}
diff --git a/src/mito2/src/sst/file.rs b/src/mito2/src/sst/file.rs
index 4ddde55746..ae255e9407 100644
--- a/src/mito2/src/sst/file.rs
+++ b/src/mito2/src/sst/file.rs
@@ -175,6 +175,10 @@ pub struct FileMeta {
         deserialize_with = "deserialize_partition_expr"
     )]
     pub partition_expr: Option<PartitionExpr>,
+    /// Number of series in the file.
+    ///
+    /// The number is 0 if the series number is not available.
+    pub num_series: u64,
 }
 
 impl Debug for FileMeta {
@@ -210,6 +214,7 @@ impl Debug for FileMeta {
                 }
             })
             .field("partition_expr", &self.partition_expr)
+            .field("num_series", &self.num_series)
             .finish()
     }
 }
@@ -458,6 +463,7 @@ mod tests {
             num_row_groups: 0,
             sequence: None,
             partition_expr: None,
+            num_series: 0,
         }
     }
 
@@ -503,6 +509,7 @@ mod tests {
             num_row_groups: 0,
             sequence: None,
             partition_expr: Some(partition_expr.clone()),
+            num_series: 0,
         };
 
         // Test serialization/deserialization
diff --git a/src/mito2/src/sst/file_purger.rs b/src/mito2/src/sst/file_purger.rs
index 7bd0e6b515..c5197ea2fb 100644
--- a/src/mito2/src/sst/file_purger.rs
+++ b/src/mito2/src/sst/file_purger.rs
@@ -236,6 +236,7 @@ mod tests {
                     num_row_groups: 0,
                     sequence: None,
                     partition_expr: None,
+                    num_series: 0,
                 },
                 file_purger,
             );
@@ -302,6 +303,7 @@ mod tests {
                     num_row_groups: 1,
                     sequence: NonZeroU64::new(4096),
                     partition_expr: None,
+                    num_series: 0,
                 },
                 file_purger,
             );
diff --git a/src/mito2/src/sst/file_ref.rs b/src/mito2/src/sst/file_ref.rs
index c8b86ed0fd..de071b3f04 100644
--- a/src/mito2/src/sst/file_ref.rs
+++ b/src/mito2/src/sst/file_ref.rs
@@ -259,6 +259,7 @@ mod tests {
             num_row_groups: 1,
             sequence: NonZeroU64::new(4096),
             partition_expr: None,
+            num_series: 0,
         };
 
         file_ref_mgr.add_file(&file_meta);
diff --git a/src/mito2/src/sst/parquet.rs b/src/mito2/src/sst/parquet.rs
index 9b56ffd4ae..83cd17acc8 100644
--- a/src/mito2/src/sst/parquet.rs
+++ b/src/mito2/src/sst/parquet.rs
@@ -84,6 +84,8 @@ pub struct SstInfo {
     pub file_metadata: Option<Arc<ParquetMetaData>>,
     /// Index Meta Data
     pub index_metadata: IndexOutput,
+    /// Number of series
+    pub num_series: u64,
 }
 
 #[cfg(test)]
@@ -766,6 +768,7 @@ mod tests {
                         .expect("partition expression should be valid JSON"),
                     None => None,
                 },
+                num_series: 0,
             },
             Arc::new(NoopFilePurger),
         );
diff --git a/src/mito2/src/sst/parquet/writer.rs b/src/mito2/src/sst/parquet/writer.rs
index 01e1e95a9c..d52615690f 100644
--- a/src/mito2/src/sst/parquet/writer.rs
+++ b/src/mito2/src/sst/parquet/writer.rs
@@ -57,7 +57,9 @@ use crate::sst::parquet::flat_format::{FlatWriteFormat, time_index_column_index}
 use crate::sst::parquet::format::PrimaryKeyWriteFormat;
 use crate::sst::parquet::helper::parse_parquet_metadata;
 use crate::sst::parquet::{PARQUET_METADATA_KEY, SstInfo, WriteOptions};
-use crate::sst::{DEFAULT_WRITE_BUFFER_SIZE, DEFAULT_WRITE_CONCURRENCY, FlatSchemaOptions};
+use crate::sst::{
+    DEFAULT_WRITE_BUFFER_SIZE, DEFAULT_WRITE_CONCURRENCY, FlatSchemaOptions, SeriesEstimator,
+};
 
 /// Parquet SST writer.
 pub struct ParquetWriter<F: WriterFactory, I: IndexerBuilder, P: FilePathProvider> {
@@ -176,7 +178,7 @@ where
     ) -> Result<()> {
         // maybe_init_writer will re-create a new file.
         if let Some(mut current_writer) = mem::take(&mut self.writer) {
-            let stats = mem::take(stats);
+            let mut stats = mem::take(stats);
             // At least one row has been written.
             assert!(stats.num_rows > 0);
 
@@ -211,6 +213,7 @@ where
 
             // convert FileMetaData to ParquetMetaData
             let parquet_metadata = parse_parquet_metadata(file_meta)?;
+            let num_series = stats.series_estimator.finish();
             ssts.push(SstInfo {
                 file_id: self.current_file,
                 time_range,
@@ -219,6 +222,7 @@ where
                 num_row_groups: parquet_metadata.num_row_groups() as u64,
                 file_metadata: Some(Arc::new(parquet_metadata)),
                 index_metadata: index_output,
+                num_series,
             });
             self.current_file = FileId::random();
             self.bytes_written.store(0, Ordering::Relaxed)
@@ -496,6 +500,8 @@ struct SourceStats {
     num_rows: usize,
     /// Time range of fetched batches.
     time_range: Option<(Timestamp, Timestamp)>,
+    /// Series estimator for computing num_series.
+    series_estimator: SeriesEstimator,
 }
 
 impl SourceStats {
@@ -505,6 +511,7 @@ impl SourceStats {
         }
 
         self.num_rows += batch.num_rows();
+        self.series_estimator.update(batch);
         // Safety: batch is not empty.
         let (min_in_batch, max_in_batch) = (
             batch.first_timestamp().unwrap(),
@@ -524,6 +531,7 @@ impl SourceStats {
         }
 
         self.num_rows += record_batch.num_rows();
+        self.series_estimator.update_flat(record_batch);
 
         // Get the timestamp column by index
         let time_index_col_idx = time_index_column_index(record_batch.num_columns());
diff --git a/src/mito2/src/test_util/sst_util.rs b/src/mito2/src/test_util/sst_util.rs
index 5eacf06bd5..fc29ca0826 100644
--- a/src/mito2/src/test_util/sst_util.rs
+++ b/src/mito2/src/test_util/sst_util.rs
@@ -127,6 +127,7 @@ pub fn sst_file_handle_with_file_id(file_id: FileId, start_ms: i64, end_ms: i64)
             index_file_size: 0,
             num_rows: 0,
             num_row_groups: 0,
+            num_series: 0,
             sequence: None,
             partition_expr: None,
         },
diff --git a/src/mito2/src/test_util/version_util.rs b/src/mito2/src/test_util/version_util.rs
index 86cc11eaf5..30da6677e3 100644
--- a/src/mito2/src/test_util/version_util.rs
+++ b/src/mito2/src/test_util/version_util.rs
@@ -105,6 +105,7 @@ impl VersionControlBuilder {
                 index_file_size: 0,
                 num_rows: 0,
                 num_row_groups: 0,
+                num_series: 0,
                 sequence: NonZeroU64::new(start_ms as u64),
                 partition_expr: match &self.metadata.partition_expr {
                     Some(json_str) => partition::expr::PartitionExpr::from_json_str(json_str)
@@ -193,6 +194,7 @@ pub(crate) fn apply_edit(
                 index_file_size: 0,
                 num_rows: 0,
                 num_row_groups: 0,
+                num_series: 0,
                 sequence: NonZeroU64::new(*start_ms as u64),
                 partition_expr: match &version_control.current().version.metadata.partition_expr {
                     Some(json_str) => partition::expr::PartitionExpr::from_json_str(json_str)
diff --git a/src/store-api/src/sst_entry.rs b/src/store-api/src/sst_entry.rs
index 8330af7b2e..52295bdb59 100644
--- a/src/store-api/src/sst_entry.rs
+++ b/src/store-api/src/sst_entry.rs
@@ -61,6 +61,8 @@ pub struct ManifestSstEntry {
     pub num_rows: u64,
     /// Number of row groups in the SST.
     pub num_row_groups: u64,
+    /// Number of series in the SST.
+    pub num_series: Option<u64>,
     /// Min timestamp.
     pub min_ts: Timestamp,
     /// Max timestamp.
@@ -94,6 +96,7 @@ impl ManifestSstEntry {
             ColumnSchema::new("index_file_size", Ty::uint64_datatype(), true),
             ColumnSchema::new("num_rows", Ty::uint64_datatype(), false),
             ColumnSchema::new("num_row_groups", Ty::uint64_datatype(), false),
+            ColumnSchema::new("num_series", Ty::uint64_datatype(), true),
             ColumnSchema::new("min_ts", Ty::timestamp_nanosecond_datatype(), true),
             ColumnSchema::new("max_ts", Ty::timestamp_nanosecond_datatype(), true),
             ColumnSchema::new("sequence", Ty::uint64_datatype(), true),
@@ -120,6 +123,7 @@ impl ManifestSstEntry {
         let index_file_sizes = entries.iter().map(|e| e.index_file_size);
         let num_rows = entries.iter().map(|e| e.num_rows);
         let num_row_groups = entries.iter().map(|e| e.num_row_groups);
+        let num_series = entries.iter().map(|e| e.num_series);
         let min_ts = entries.iter().map(|e| {
             e.min_ts
                 .convert_to(TimeUnit::Nanosecond)
@@ -150,6 +154,7 @@ impl ManifestSstEntry {
             Arc::new(UInt64Array::from_iter(index_file_sizes)),
             Arc::new(UInt64Array::from_iter_values(num_rows)),
             Arc::new(UInt64Array::from_iter_values(num_row_groups)),
+            Arc::new(UInt64Array::from_iter(num_series)),
             Arc::new(TimestampNanosecondArray::from_iter(min_ts)),
             Arc::new(TimestampNanosecondArray::from_iter(max_ts)),
             Arc::new(UInt64Array::from_iter(sequences)),
@@ -434,6 +439,7 @@ mod tests {
                 index_file_size: None,
                 num_rows: 10,
                 num_row_groups: 2,
+                num_series: Some(5),
                 min_ts: Timestamp::new_millisecond(1000), // 1s -> 1_000_000_000ns
                 max_ts: Timestamp::new_second(2),         // 2s -> 2_000_000_000ns
                 sequence: None,
@@ -456,6 +462,7 @@ mod tests {
                 index_file_size: Some(11),
                 num_rows: 20,
                 num_row_groups: 4,
+                num_series: None,
                 min_ts: Timestamp::new_nanosecond(5),     // 5ns
                 max_ts: Timestamp::new_microsecond(2000), // 2ms -> 2_000_000ns
                 sequence: Some(9),
@@ -590,16 +597,24 @@ mod tests {
         assert_eq!(2, num_row_groups.value(0));
         assert_eq!(4, num_row_groups.value(1));
 
-        let min_ts = batch
+        let num_series = batch
             .column(14)
             .as_any()
+            .downcast_ref::<UInt64Array>()
+            .unwrap();
+        assert_eq!(5, num_series.value(0));
+        assert!(num_series.is_null(1));
+
+        let min_ts = batch
+            .column(15)
+            .as_any()
             .downcast_ref::<TimestampNanosecondArray>()
             .unwrap();
         assert_eq!(1_000_000_000, min_ts.value(0));
         assert_eq!(5, min_ts.value(1));
 
         let max_ts = batch
-            .column(15)
+            .column(16)
             .as_any()
             .downcast_ref::<TimestampNanosecondArray>()
             .unwrap();
@@ -607,7 +622,7 @@ mod tests {
         assert_eq!(2_000_000, max_ts.value(1));
 
         let sequences = batch
-            .column(16)
+            .column(17)
             .as_any()
             .downcast_ref::<UInt64Array>()
             .unwrap();
@@ -615,7 +630,7 @@ mod tests {
         assert_eq!(9, sequences.value(1));
 
         let origin_region_ids = batch
-            .column(17)
+            .column(18)
             .as_any()
             .downcast_ref::<UInt64Array>()
             .unwrap();
@@ -623,7 +638,7 @@ mod tests {
         assert_eq!(region_id2.as_u64(), origin_region_ids.value(1));
 
         let node_ids = batch
-            .column(18)
+            .column(19)
             .as_any()
             .downcast_ref::<UInt64Array>()
             .unwrap();
@@ -631,7 +646,7 @@ mod tests {
         assert!(node_ids.is_null(1));
 
         let visible = batch
-            .column(19)
+            .column(20)
             .as_any()
             .downcast_ref::<BooleanArray>()
             .unwrap();
diff --git a/tests/cases/standalone/common/information_schema/ssts.result b/tests/cases/standalone/common/information_schema/ssts.result
index 2c28e6e63c..d546efbdfb 100644
--- a/tests/cases/standalone/common/information_schema/ssts.result
+++ b/tests/cases/standalone/common/information_schema/ssts.result
@@ -17,6 +17,7 @@ DESC TABLE information_schema.ssts_manifest;
 | index_file_size  | UInt64              |     | YES  |         | FIELD         |
 | num_rows         | UInt64              |     | NO   |         | FIELD         |
 | num_row_groups   | UInt64              |     | NO   |         | FIELD         |
+| num_series       | UInt64              |     | YES  |         | FIELD         |
 | min_ts           | TimestampNanosecond |     | YES  |         | FIELD         |
 | max_ts           | TimestampNanosecond |     | YES  |         | FIELD         |
 | sequence         | UInt64              |     | YES  |         | FIELD         |
@@ -95,13 +96,13 @@ ADMIN FLUSH_TABLE('sst_case');
 -- SQLNESS REPLACE (/public/\d+) /public/<TABLE_ID>
 SELECT * FROM information_schema.ssts_manifest order by file_path;
 
-+----------------------------+---------------+----------+---------------+--------------+-----------------+--------------------------------------+-------+----------------------------------------------------------------------------------------+-----------+---------------------------------------------------------------------------------------------+-----------------+----------+----------------+-------------------------+-------------------------+----------+------------------+---------+---------+
-| table_dir                  | region_id     | table_id | region_number | region_group | region_sequence | file_id                              | level | file_path                                                                              | file_size | index_file_path                                                                             | index_file_size | num_rows | num_row_groups | min_ts                  | max_ts                  | sequence | origin_region_id | node_id | visible |
-+----------------------------+---------------+----------+---------------+--------------+-----------------+--------------------------------------+-------+----------------------------------------------------------------------------------------+-----------+---------------------------------------------------------------------------------------------+-----------------+----------+----------------+-------------------------+-------------------------+----------+------------------+---------+---------+
-| data/greptime/public/<TABLE_ID>/ |<NUM>|<NUM>|<NUM>|<NUM>|<NUM>| <UUID> |<NUM>| data/greptime/public/<TABLE_ID>/<REGION_ID>_<REGION_NUMBER>/<UUID>.parquet |<NUM>| data/greptime/public/<TABLE_ID>/<REGION_ID>_<REGION_NUMBER>/index/<UUID>.puffin |<NUM>|<NUM>|<NUM>| <DATETIME> | <DATETIME> |<NUM>|<NUM>|<NUM>| true    |
-| data/greptime/public/<TABLE_ID>/ |<NUM>|<NUM>|<NUM>|<NUM>|<NUM>| <UUID> |<NUM>| data/greptime/public/<TABLE_ID>/<REGION_ID>_<REGION_NUMBER>/<UUID>.parquet |<NUM>| data/greptime/public/<TABLE_ID>/<REGION_ID>_<REGION_NUMBER>/index/<UUID>.puffin |<NUM>|<NUM>|<NUM>| <DATETIME> | <DATETIME> |<NUM>|<NUM>|<NUM>| true    |
-| data/greptime/public/<TABLE_ID>/ |<NUM>|<NUM>|<NUM>|<NUM>|<NUM>| <UUID> |<NUM>| data/greptime/public/<TABLE_ID>/<REGION_ID>_<REGION_NUMBER>/<UUID>.parquet |<NUM>| data/greptime/public/<TABLE_ID>/<REGION_ID>_<REGION_NUMBER>/index/<UUID>.puffin |<NUM>|<NUM>|<NUM>| <DATETIME> | <DATETIME> |<NUM>|<NUM>|<NUM>| true    |
-+----------------------------+---------------+----------+---------------+--------------+-----------------+--------------------------------------+-------+----------------------------------------------------------------------------------------+-----------+---------------------------------------------------------------------------------------------+-----------------+----------+----------------+-------------------------+-------------------------+----------+------------------+---------+---------+
++----------------------------+---------------+----------+---------------+--------------+-----------------+--------------------------------------+-------+----------------------------------------------------------------------------------------+-----------+---------------------------------------------------------------------------------------------+-----------------+----------+----------------+------------+-------------------------+-------------------------+----------+------------------+---------+---------+
+| table_dir                  | region_id     | table_id | region_number | region_group | region_sequence | file_id                              | level | file_path                                                                              | file_size | index_file_path                                                                             | index_file_size | num_rows | num_row_groups | num_series | min_ts                  | max_ts                  | sequence | origin_region_id | node_id | visible |
++----------------------------+---------------+----------+---------------+--------------+-----------------+--------------------------------------+-------+----------------------------------------------------------------------------------------+-----------+---------------------------------------------------------------------------------------------+-----------------+----------+----------------+------------+-------------------------+-------------------------+----------+------------------+---------+---------+
+| data/greptime/public/<TABLE_ID>/ |<NUM>|<NUM>|<NUM>|<NUM>|<NUM>| <UUID> |<NUM>| data/greptime/public/<TABLE_ID>/<REGION_ID>_<REGION_NUMBER>/<UUID>.parquet |<NUM>| data/greptime/public/<TABLE_ID>/<REGION_ID>_<REGION_NUMBER>/index/<UUID>.puffin |<NUM>|<NUM>|<NUM>|<NUM>| <DATETIME> | <DATETIME> |<NUM>|<NUM>|<NUM>| true    |
+| data/greptime/public/<TABLE_ID>/ |<NUM>|<NUM>|<NUM>|<NUM>|<NUM>| <UUID> |<NUM>| data/greptime/public/<TABLE_ID>/<REGION_ID>_<REGION_NUMBER>/<UUID>.parquet |<NUM>| data/greptime/public/<TABLE_ID>/<REGION_ID>_<REGION_NUMBER>/index/<UUID>.puffin |<NUM>|<NUM>|<NUM>|<NUM>| <DATETIME> | <DATETIME> |<NUM>|<NUM>|<NUM>| true    |
+| data/greptime/public/<TABLE_ID>/ |<NUM>|<NUM>|<NUM>|<NUM>|<NUM>| <UUID> |<NUM>| data/greptime/public/<TABLE_ID>/<REGION_ID>_<REGION_NUMBER>/<UUID>.parquet |<NUM>| data/greptime/public/<TABLE_ID>/<REGION_ID>_<REGION_NUMBER>/index/<UUID>.puffin |<NUM>|<NUM>|<NUM>|<NUM>| <DATETIME> | <DATETIME> |<NUM>|<NUM>|<NUM>| true    |
++----------------------------+---------------+----------+---------------+--------------+-----------------+--------------------------------------+-------+----------------------------------------------------------------------------------------+-----------+---------------------------------------------------------------------------------------------+-----------------+----------+----------------+------------+-------------------------+-------------------------+----------+------------------+---------+---------+
 
 -- SQLNESS REPLACE (\s+\d+\s+) <NUM>
 -- SQLNESS REPLACE ([0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}) <UUID>
@@ -163,15 +164,15 @@ ADMIN FLUSH_TABLE('sst_case');
 -- SQLNESS REPLACE (/public/\d+) /public/<TABLE_ID>
 SELECT * FROM information_schema.ssts_manifest order by file_path;
 
-+----------------------------+---------------+----------+---------------+--------------+-----------------+--------------------------------------+-------+----------------------------------------------------------------------------------------+-----------+---------------------------------------------------------------------------------------------+-----------------+----------+----------------+-------------------------+-------------------------+----------+------------------+---------+---------+
-| table_dir                  | region_id     | table_id | region_number | region_group | region_sequence | file_id                              | level | file_path                                                                              | file_size | index_file_path                                                                             | index_file_size | num_rows | num_row_groups | min_ts                  | max_ts                  | sequence | origin_region_id | node_id | visible |
-+----------------------------+---------------+----------+---------------+--------------+-----------------+--------------------------------------+-------+----------------------------------------------------------------------------------------+-----------+---------------------------------------------------------------------------------------------+-----------------+----------+----------------+-------------------------+-------------------------+----------+------------------+---------+---------+
-| data/greptime/public/<TABLE_ID>/ |<NUM>|<NUM>|<NUM>|<NUM>|<NUM>| <UUID> |<NUM>| data/greptime/public/<TABLE_ID>/<REGION_ID>_<REGION_NUMBER>/<UUID>.parquet |<NUM>| data/greptime/public/<TABLE_ID>/<REGION_ID>_<REGION_NUMBER>/index/<UUID>.puffin |<NUM>|<NUM>|<NUM>| <DATETIME> | <DATETIME> |<NUM>|<NUM>|<NUM>| true    |
-| data/greptime/public/<TABLE_ID>/ |<NUM>|<NUM>|<NUM>|<NUM>|<NUM>| <UUID> |<NUM>| data/greptime/public/<TABLE_ID>/<REGION_ID>_<REGION_NUMBER>/<UUID>.parquet |<NUM>| data/greptime/public/<TABLE_ID>/<REGION_ID>_<REGION_NUMBER>/index/<UUID>.puffin |<NUM>|<NUM>|<NUM>| <DATETIME> | <DATETIME> |<NUM>|<NUM>|<NUM>| true    |
-| data/greptime/public/<TABLE_ID>/ |<NUM>|<NUM>|<NUM>|<NUM>|<NUM>| <UUID> |<NUM>| data/greptime/public/<TABLE_ID>/<REGION_ID>_<REGION_NUMBER>/<UUID>.parquet |<NUM>| data/greptime/public/<TABLE_ID>/<REGION_ID>_<REGION_NUMBER>/index/<UUID>.puffin |<NUM>|<NUM>|<NUM>| <DATETIME> | <DATETIME> |<NUM>|<NUM>|<NUM>| true    |
-| data/greptime/public/<TABLE_ID>/ |<NUM>|<NUM>|<NUM>|<NUM>|<NUM>| <UUID> |<NUM>| data/greptime/public/<TABLE_ID>/<REGION_ID>_<REGION_NUMBER>/<UUID>.parquet |<NUM>| data/greptime/public/<TABLE_ID>/<REGION_ID>_<REGION_NUMBER>/index/<UUID>.puffin |<NUM>|<NUM>|<NUM>| <DATETIME> | <DATETIME> |<NUM>|<NUM>|<NUM>| true    |
-| data/greptime/public/<TABLE_ID>/ |<NUM>|<NUM>|<NUM>|<NUM>|<NUM>| <UUID> |<NUM>| data/greptime/public/<TABLE_ID>/<REGION_ID>_<REGION_NUMBER>/<UUID>.parquet |<NUM>| data/greptime/public/<TABLE_ID>/<REGION_ID>_<REGION_NUMBER>/index/<UUID>.puffin |<NUM>|<NUM>|<NUM>| <DATETIME> | <DATETIME> |<NUM>|<NUM>|<NUM>| true    |
-+----------------------------+---------------+----------+---------------+--------------+-----------------+--------------------------------------+-------+----------------------------------------------------------------------------------------+-----------+---------------------------------------------------------------------------------------------+-----------------+----------+----------------+-------------------------+-------------------------+----------+------------------+---------+---------+
++----------------------------+---------------+----------+---------------+--------------+-----------------+--------------------------------------+-------+----------------------------------------------------------------------------------------+-----------+---------------------------------------------------------------------------------------------+-----------------+----------+----------------+------------+-------------------------+-------------------------+----------+------------------+---------+---------+
+| table_dir                  | region_id     | table_id | region_number | region_group | region_sequence | file_id                              | level | file_path                                                                              | file_size | index_file_path                                                                             | index_file_size | num_rows | num_row_groups | num_series | min_ts                  | max_ts                  | sequence | origin_region_id | node_id | visible |
++----------------------------+---------------+----------+---------------+--------------+-----------------+--------------------------------------+-------+----------------------------------------------------------------------------------------+-----------+---------------------------------------------------------------------------------------------+-----------------+----------+----------------+------------+-------------------------+-------------------------+----------+------------------+---------+---------+
+| data/greptime/public/<TABLE_ID>/ |<NUM>|<NUM>|<NUM>|<NUM>|<NUM>| <UUID> |<NUM>| data/greptime/public/<TABLE_ID>/<REGION_ID>_<REGION_NUMBER>/<UUID>.parquet |<NUM>| data/greptime/public/<TABLE_ID>/<REGION_ID>_<REGION_NUMBER>/index/<UUID>.puffin |<NUM>|<NUM>|<NUM>|<NUM>| <DATETIME> | <DATETIME> |<NUM>|<NUM>|<NUM>| true    |
+| data/greptime/public/<TABLE_ID>/ |<NUM>|<NUM>|<NUM>|<NUM>|<NUM>| <UUID> |<NUM>| data/greptime/public/<TABLE_ID>/<REGION_ID>_<REGION_NUMBER>/<UUID>.parquet |<NUM>| data/greptime/public/<TABLE_ID>/<REGION_ID>_<REGION_NUMBER>/index/<UUID>.puffin |<NUM>|<NUM>|<NUM>|<NUM>| <DATETIME> | <DATETIME> |<NUM>|<NUM>|<NUM>| true    |
+| data/greptime/public/<TABLE_ID>/ |<NUM>|<NUM>|<NUM>|<NUM>|<NUM>| <UUID> |<NUM>| data/greptime/public/<TABLE_ID>/<REGION_ID>_<REGION_NUMBER>/<UUID>.parquet |<NUM>| data/greptime/public/<TABLE_ID>/<REGION_ID>_<REGION_NUMBER>/index/<UUID>.puffin |<NUM>|<NUM>|<NUM>|<NUM>| <DATETIME> | <DATETIME> |<NUM>|<NUM>|<NUM>| true    |
+| data/greptime/public/<TABLE_ID>/ |<NUM>|<NUM>|<NUM>|<NUM>|<NUM>| <UUID> |<NUM>| data/greptime/public/<TABLE_ID>/<REGION_ID>_<REGION_NUMBER>/<UUID>.parquet |<NUM>| data/greptime/public/<TABLE_ID>/<REGION_ID>_<REGION_NUMBER>/index/<UUID>.puffin |<NUM>|<NUM>|<NUM>|<NUM>| <DATETIME> | <DATETIME> |<NUM>|<NUM>|<NUM>| true    |
+| data/greptime/public/<TABLE_ID>/ |<NUM>|<NUM>|<NUM>|<NUM>|<NUM>| <UUID> |<NUM>| data/greptime/public/<TABLE_ID>/<REGION_ID>_<REGION_NUMBER>/<UUID>.parquet |<NUM>| data/greptime/public/<TABLE_ID>/<REGION_ID>_<REGION_NUMBER>/index/<UUID>.puffin |<NUM>|<NUM>|<NUM>|<NUM>| <DATETIME> | <DATETIME> |<NUM>|<NUM>|<NUM>| true    |
++----------------------------+---------------+----------+---------------+--------------+-----------------+--------------------------------------+-------+----------------------------------------------------------------------------------------+-----------+---------------------------------------------------------------------------------------------+-----------------+----------+----------------+------------+-------------------------+-------------------------+----------+------------------+---------+---------+
 
 -- SQLNESS REPLACE (\s+\d+\s+) <NUM>
 -- SQLNESS REPLACE ([0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}) <UUID>
diff --git a/tests/cases/standalone/common/system/information_schema.result b/tests/cases/standalone/common/system/information_schema.result
index 1cb53ccfe3..d211938c2a 100644
--- a/tests/cases/standalone/common/system/information_schema.result
+++ b/tests/cases/standalone/common/system/information_schema.result
@@ -411,20 +411,21 @@ select * from information_schema.columns order by table_schema, table_name, colu
 | greptime      | information_schema | ssts_manifest                         | index_file_path                   | 11               | 2147483647               | 2147483647             |                   |               |                    | utf8               | utf8_bin       |            |       | select,insert |                       | String               | string              | FIELD         |                | Yes         | string              |                |        |
 | greptime      | information_schema | ssts_manifest                         | index_file_size                   | 12               |                          |                        | 20                | 0             |                    |                    |                |            |       | select,insert |                       | UInt64               | bigint unsigned     | FIELD         |                | Yes         | bigint unsigned     |                |        |
 | greptime      | information_schema | ssts_manifest                         | level                             | 8                |                          |                        | 3                 | 0             |                    |                    |                |            |       | select,insert |                       | UInt8                | tinyint unsigned    | FIELD         |                | No          | tinyint unsigned    |                |        |
-| greptime      | information_schema | ssts_manifest                         | max_ts                            | 16               |                          |                        |                   |               | 9                  |                    |                |            |       | select,insert |                       | TimestampNanosecond  | timestamp(9)        | FIELD         |                | Yes         | timestamp(9)        |                |        |
-| greptime      | information_schema | ssts_manifest                         | min_ts                            | 15               |                          |                        |                   |               | 9                  |                    |                |            |       | select,insert |                       | TimestampNanosecond  | timestamp(9)        | FIELD         |                | Yes         | timestamp(9)        |                |        |
-| greptime      | information_schema | ssts_manifest                         | node_id                           | 19               |                          |                        | 20                | 0             |                    |                    |                |            |       | select,insert |                       | UInt64               | bigint unsigned     | FIELD         |                | Yes         | bigint unsigned     |                |        |
+| greptime      | information_schema | ssts_manifest                         | max_ts                            | 17               |                          |                        |                   |               | 9                  |                    |                |            |       | select,insert |                       | TimestampNanosecond  | timestamp(9)        | FIELD         |                | Yes         | timestamp(9)        |                |        |
+| greptime      | information_schema | ssts_manifest                         | min_ts                            | 16               |                          |                        |                   |               | 9                  |                    |                |            |       | select,insert |                       | TimestampNanosecond  | timestamp(9)        | FIELD         |                | Yes         | timestamp(9)        |                |        |
+| greptime      | information_schema | ssts_manifest                         | node_id                           | 20               |                          |                        | 20                | 0             |                    |                    |                |            |       | select,insert |                       | UInt64               | bigint unsigned     | FIELD         |                | Yes         | bigint unsigned     |                |        |
 | greptime      | information_schema | ssts_manifest                         | num_row_groups                    | 14               |                          |                        | 20                | 0             |                    |                    |                |            |       | select,insert |                       | UInt64               | bigint unsigned     | FIELD         |                | No          | bigint unsigned     |                |        |
 | greptime      | information_schema | ssts_manifest                         | num_rows                          | 13               |                          |                        | 20                | 0             |                    |                    |                |            |       | select,insert |                       | UInt64               | bigint unsigned     | FIELD         |                | No          | bigint unsigned     |                |        |
-| greptime      | information_schema | ssts_manifest                         | origin_region_id                  | 18               |                          |                        | 20                | 0             |                    |                    |                |            |       | select,insert |                       | UInt64               | bigint unsigned     | FIELD         |                | No          | bigint unsigned     |                |        |
+| greptime      | information_schema | ssts_manifest                         | num_series                        | 15               |                          |                        | 20                | 0             |                    |                    |                |            |       | select,insert |                       | UInt64               | bigint unsigned     | FIELD         |                | Yes         | bigint unsigned     |                |        |
+| greptime      | information_schema | ssts_manifest                         | origin_region_id                  | 19               |                          |                        | 20                | 0             |                    |                    |                |            |       | select,insert |                       | UInt64               | bigint unsigned     | FIELD         |                | No          | bigint unsigned     |                |        |
 | greptime      | information_schema | ssts_manifest                         | region_group                      | 5                |                          |                        | 3                 | 0             |                    |                    |                |            |       | select,insert |                       | UInt8                | tinyint unsigned    | FIELD         |                | No          | tinyint unsigned    |                |        |
 | greptime      | information_schema | ssts_manifest                         | region_id                         | 2                |                          |                        | 20                | 0             |                    |                    |                |            |       | select,insert |                       | UInt64               | bigint unsigned     | FIELD         |                | No          | bigint unsigned     |                |        |
 | greptime      | information_schema | ssts_manifest                         | region_number                     | 4                |                          |                        | 10                | 0             |                    |                    |                |            |       | select,insert |                       | UInt32               | int unsigned        | FIELD         |                | No          | int unsigned        |                |        |
 | greptime      | information_schema | ssts_manifest                         | region_sequence                   | 6                |                          |                        | 10                | 0             |                    |                    |                |            |       | select,insert |                       | UInt32               | int unsigned        | FIELD         |                | No          | int unsigned        |                |        |
-| greptime      | information_schema | ssts_manifest                         | sequence                          | 17               |                          |                        | 20                | 0             |                    |                    |                |            |       | select,insert |                       | UInt64               | bigint unsigned     | FIELD         |                | Yes         | bigint unsigned     |                |        |
+| greptime      | information_schema | ssts_manifest                         | sequence                          | 18               |                          |                        | 20                | 0             |                    |                    |                |            |       | select,insert |                       | UInt64               | bigint unsigned     | FIELD         |                | Yes         | bigint unsigned     |                |        |
 | greptime      | information_schema | ssts_manifest                         | table_dir                         | 1                | 2147483647               | 2147483647             |                   |               |                    | utf8               | utf8_bin       |            |       | select,insert |                       | String               | string              | FIELD         |                | No          | string              |                |        |
 | greptime      | information_schema | ssts_manifest                         | table_id                          | 3                |                          |                        | 10                | 0             |                    |                    |                |            |       | select,insert |                       | UInt32               | int unsigned        | FIELD         |                | No          | int unsigned        |                |        |
-| greptime      | information_schema | ssts_manifest                         | visible                           | 20               |                          |                        |                   |               |                    |                    |                |            |       | select,insert |                       | Boolean              | boolean             | FIELD         |                | No          | boolean             |                |        |
+| greptime      | information_schema | ssts_manifest                         | visible                           | 21               |                          |                        |                   |               |                    |                    |                |            |       | select,insert |                       | Boolean              | boolean             | FIELD         |                | No          | boolean             |                |        |
 | greptime      | information_schema | ssts_storage                          | file_path                         | 1                | 2147483647               | 2147483647             |                   |               |                    | utf8               | utf8_bin       |            |       | select,insert |                       | String               | string              | FIELD         |                | No          | string              |                |        |
 | greptime      | information_schema | ssts_storage                          | file_size                         | 2                |                          |                        | 20                | 0             |                    |                    |                |            |       | select,insert |                       | UInt64               | bigint unsigned     | FIELD         |                | Yes         | bigint unsigned     |                |        |
 | greptime      | information_schema | ssts_storage                          | last_modified_ms                  | 3                |                          |                        |                   |               | 3                  |                    |                |            |       | select,insert |                       | TimestampMillisecond | timestamp(3)        | FIELD         |                | Yes         | timestamp(3)        |                |        |

From 7da2f5ed12c278601a955620715f4dab4aed5271 Mon Sep 17 00:00:00 2001
From: Weny Xu <wenymedia@gmail.com>
Date: Fri, 24 Oct 2025 17:11:42 +0800
Subject: [PATCH 13/14] refactor: refactor instruction handler and adds support
 for batch region downgrade operations (#7130)

* refactor: refactor instruction handler

Signed-off-by: WenyXu <wenymedia@gmail.com>

* refactor: support batch downgrade region instructions

Signed-off-by: WenyXu <wenymedia@gmail.com>

* fix compat

Signed-off-by: WenyXu <wenymedia@gmail.com>

* fix clippy

Signed-off-by: WenyXu <wenymedia@gmail.com>

* add tests

Signed-off-by: WenyXu <wenymedia@gmail.com>

* chore: add comments

Signed-off-by: WenyXu <wenymedia@gmail.com>

---------

Signed-off-by: WenyXu <wenymedia@gmail.com>
---
 src/common/meta/src/instruction.rs            | 159 ++++-
 src/datanode/src/heartbeat/handler.rs         | 111 ++--
 .../src/heartbeat/handler/close_region.rs     |  86 +--
 .../src/heartbeat/handler/downgrade_region.rs | 617 ++++++++++--------
 .../src/heartbeat/handler/flush_region.rs     | 161 ++---
 .../src/heartbeat/handler/open_region.rs      |  94 +--
 .../src/heartbeat/handler/upgrade_region.rs   | 385 +++++------
 .../downgrade_leader_region.rs                |  26 +-
 src/meta-srv/src/procedure/test_util.rs       |  18 +-
 9 files changed, 953 insertions(+), 704 deletions(-)

diff --git a/src/common/meta/src/instruction.rs b/src/common/meta/src/instruction.rs
index 9a9d955f58..c7bd82d675 100644
--- a/src/common/meta/src/instruction.rs
+++ b/src/common/meta/src/instruction.rs
@@ -55,6 +55,10 @@ impl Display for RegionIdent {
 /// The result of downgrade leader region.
 #[derive(Debug, Serialize, Deserialize, PartialEq, Eq, Clone)]
 pub struct DowngradeRegionReply {
+    /// The [RegionId].
+    /// For compatibility, it is defaulted to [RegionId::new(0, 0)].
+    #[serde(default)]
+    pub region_id: RegionId,
     /// Returns the `last_entry_id` if available.
     pub last_entry_id: Option<u64>,
     /// Returns the `metadata_last_entry_id` if available (Only available for metric engine).
@@ -423,14 +427,60 @@ pub enum Instruction {
     CloseRegions(Vec<RegionIdent>),
     /// Upgrades a region.
     UpgradeRegion(UpgradeRegion),
+    #[serde(
+        deserialize_with = "single_or_multiple_from",
+        alias = "DowngradeRegion"
+    )]
     /// Downgrades a region.
-    DowngradeRegion(DowngradeRegion),
+    DowngradeRegions(Vec<DowngradeRegion>),
     /// Invalidates batch cache.
     InvalidateCaches(Vec<CacheIdent>),
     /// Flushes regions.
     FlushRegions(FlushRegions),
 }
 
+impl Instruction {
+    /// Converts the instruction into a vector of [OpenRegion].
+    pub fn into_open_regions(self) -> Option<Vec<OpenRegion>> {
+        match self {
+            Self::OpenRegions(open_regions) => Some(open_regions),
+            _ => None,
+        }
+    }
+
+    /// Converts the instruction into a vector of [RegionIdent].
+    pub fn into_close_regions(self) -> Option<Vec<RegionIdent>> {
+        match self {
+            Self::CloseRegions(close_regions) => Some(close_regions),
+            _ => None,
+        }
+    }
+
+    /// Converts the instruction into a [FlushRegions].
+    pub fn into_flush_regions(self) -> Option<FlushRegions> {
+        match self {
+            Self::FlushRegions(flush_regions) => Some(flush_regions),
+            _ => None,
+        }
+    }
+
+    /// Converts the instruction into a [DowngradeRegion].
+    pub fn into_downgrade_regions(self) -> Option<Vec<DowngradeRegion>> {
+        match self {
+            Self::DowngradeRegions(downgrade_region) => Some(downgrade_region),
+            _ => None,
+        }
+    }
+
+    /// Converts the instruction into a [UpgradeRegion].
+    pub fn into_upgrade_regions(self) -> Option<UpgradeRegion> {
+        match self {
+            Self::UpgradeRegion(upgrade_region) => Some(upgrade_region),
+            _ => None,
+        }
+    }
+}
+
 /// The reply of [UpgradeRegion].
 #[derive(Debug, Serialize, Deserialize, PartialEq, Eq, Clone)]
 pub struct UpgradeRegionReply {
@@ -452,6 +502,39 @@ impl Display for UpgradeRegionReply {
     }
 }
 
+#[derive(Debug, Serialize, Deserialize, PartialEq, Eq, Clone)]
+pub struct DowngradeRegionsReply {
+    pub replies: Vec<DowngradeRegionReply>,
+}
+
+impl DowngradeRegionsReply {
+    pub fn new(replies: Vec<DowngradeRegionReply>) -> Self {
+        Self { replies }
+    }
+
+    pub fn single(reply: DowngradeRegionReply) -> Self {
+        Self::new(vec![reply])
+    }
+}
+
+#[derive(Deserialize)]
+#[serde(untagged)]
+enum DowngradeRegionsCompat {
+    Single(DowngradeRegionReply),
+    Multiple(DowngradeRegionsReply),
+}
+
+fn downgrade_regions_compat_from<'de, D>(deserializer: D) -> Result<DowngradeRegionsReply, D::Error>
+where
+    D: Deserializer<'de>,
+{
+    let helper = DowngradeRegionsCompat::deserialize(deserializer)?;
+    Ok(match helper {
+        DowngradeRegionsCompat::Single(x) => DowngradeRegionsReply::new(vec![x]),
+        DowngradeRegionsCompat::Multiple(reply) => reply,
+    })
+}
+
 #[derive(Debug, Serialize, Deserialize, PartialEq, Eq, Clone)]
 #[serde(tag = "type", rename_all = "snake_case")]
 pub enum InstructionReply {
@@ -460,7 +543,11 @@ pub enum InstructionReply {
     #[serde(alias = "close_region")]
     CloseRegions(SimpleReply),
     UpgradeRegion(UpgradeRegionReply),
-    DowngradeRegion(DowngradeRegionReply),
+    #[serde(
+        alias = "downgrade_region",
+        deserialize_with = "downgrade_regions_compat_from"
+    )]
+    DowngradeRegions(DowngradeRegionsReply),
     FlushRegions(FlushRegionReply),
 }
 
@@ -470,8 +557,8 @@ impl Display for InstructionReply {
             Self::OpenRegions(reply) => write!(f, "InstructionReply::OpenRegions({})", reply),
             Self::CloseRegions(reply) => write!(f, "InstructionReply::CloseRegions({})", reply),
             Self::UpgradeRegion(reply) => write!(f, "InstructionReply::UpgradeRegion({})", reply),
-            Self::DowngradeRegion(reply) => {
-                write!(f, "InstructionReply::DowngradeRegion({})", reply)
+            Self::DowngradeRegions(reply) => {
+                write!(f, "InstructionReply::DowngradeRegions({:?})", reply)
             }
             Self::FlushRegions(reply) => write!(f, "InstructionReply::FlushRegions({})", reply),
         }
@@ -493,6 +580,27 @@ impl InstructionReply {
             _ => panic!("Expected OpenRegions reply"),
         }
     }
+
+    pub fn expect_upgrade_region_reply(self) -> UpgradeRegionReply {
+        match self {
+            Self::UpgradeRegion(reply) => reply,
+            _ => panic!("Expected UpgradeRegion reply"),
+        }
+    }
+
+    pub fn expect_downgrade_regions_reply(self) -> Vec<DowngradeRegionReply> {
+        match self {
+            Self::DowngradeRegions(reply) => reply.replies,
+            _ => panic!("Expected DowngradeRegion reply"),
+        }
+    }
+
+    pub fn expect_flush_regions_reply(self) -> FlushRegionReply {
+        match self {
+            Self::FlushRegions(reply) => reply,
+            _ => panic!("Expected FlushRegions reply"),
+        }
+    }
 }
 
 #[cfg(test)]
@@ -532,11 +640,27 @@ mod tests {
             r#"{"CloseRegions":[{"datanode_id":2,"table_id":1024,"region_number":1,"engine":"mito2"}]}"#,
             serialized
         );
+
+        let downgrade_region = InstructionReply::DowngradeRegions(DowngradeRegionsReply::single(
+            DowngradeRegionReply {
+                region_id: RegionId::new(1024, 1),
+                last_entry_id: None,
+                metadata_last_entry_id: None,
+                exists: true,
+                error: None,
+            },
+        ));
+
+        let serialized = serde_json::to_string(&downgrade_region).unwrap();
+        assert_eq!(
+            r#"{"type":"downgrade_regions","replies":[{"region_id":4398046511105,"last_entry_id":null,"metadata_last_entry_id":null,"exists":true,"error":null}]}"#,
+            serialized
+        )
     }
 
     #[test]
     fn test_deserialize_instruction() {
-        let open_region_instruction = r#"{"OpenRegion":[{"region_ident":{"datanode_id":2,"table_id":1024,"region_number":1,"engine":"mito2"},"region_storage_path":"test/foo","region_options":{},"region_wal_options":{},"skip_wal_replay":false}]}"#;
+        let open_region_instruction = r#"{"OpenRegion":{"region_ident":{"datanode_id":2,"table_id":1024,"region_number":1,"engine":"mito2"},"region_storage_path":"test/foo","region_options":{},"region_wal_options":{},"skip_wal_replay":false}}"#;
         let open_region_instruction: Instruction =
             serde_json::from_str(open_region_instruction).unwrap();
         let open_region = Instruction::OpenRegions(vec![OpenRegion::new(
@@ -553,7 +677,7 @@ mod tests {
         )]);
         assert_eq!(open_region_instruction, open_region);
 
-        let close_region_instruction = r#"{"CloseRegion":[{"datanode_id":2,"table_id":1024,"region_number":1,"engine":"mito2"}]}"#;
+        let close_region_instruction = r#"{"CloseRegion":{"datanode_id":2,"table_id":1024,"region_number":1,"engine":"mito2"}}"#;
         let close_region_instruction: Instruction =
             serde_json::from_str(close_region_instruction).unwrap();
         let close_region = Instruction::CloseRegions(vec![RegionIdent {
@@ -564,6 +688,15 @@ mod tests {
         }]);
         assert_eq!(close_region_instruction, close_region);
 
+        let downgrade_region_instruction = r#"{"DowngradeRegions":{"region_id":4398046511105,"flush_timeout":{"secs":1,"nanos":0}}}"#;
+        let downgrade_region_instruction: Instruction =
+            serde_json::from_str(downgrade_region_instruction).unwrap();
+        let downgrade_region = Instruction::DowngradeRegions(vec![DowngradeRegion {
+            region_id: RegionId::new(1024, 1),
+            flush_timeout: Some(Duration::from_millis(1000)),
+        }]);
+        assert_eq!(downgrade_region_instruction, downgrade_region);
+
         let close_region_instruction_reply =
             r#"{"result":true,"error":null,"type":"close_region"}"#;
         let close_region_instruction_reply: InstructionReply =
@@ -582,6 +715,20 @@ mod tests {
             error: None,
         });
         assert_eq!(open_region_instruction_reply, open_region_reply);
+
+        let downgrade_region_instruction_reply = r#"{"region_id":4398046511105,"last_entry_id":null,"metadata_last_entry_id":null,"exists":true,"error":null,"type":"downgrade_region"}"#;
+        let downgrade_region_instruction_reply: InstructionReply =
+            serde_json::from_str(downgrade_region_instruction_reply).unwrap();
+        let downgrade_region_reply = InstructionReply::DowngradeRegions(
+            DowngradeRegionsReply::single(DowngradeRegionReply {
+                region_id: RegionId::new(1024, 1),
+                last_entry_id: None,
+                metadata_last_entry_id: None,
+                exists: true,
+                error: None,
+            }),
+        );
+        assert_eq!(downgrade_region_instruction_reply, downgrade_region_reply);
     }
 
     #[derive(Debug, Clone, Serialize, Deserialize)]
diff --git a/src/datanode/src/heartbeat/handler.rs b/src/datanode/src/heartbeat/handler.rs
index 14a671a14b..71b3181a04 100644
--- a/src/datanode/src/heartbeat/handler.rs
+++ b/src/datanode/src/heartbeat/handler.rs
@@ -13,16 +13,13 @@
 // limitations under the License.
 
 use async_trait::async_trait;
-use common_meta::RegionIdent;
 use common_meta::error::{InvalidHeartbeatResponseSnafu, Result as MetaResult};
 use common_meta::heartbeat::handler::{
     HandleControl, HeartbeatResponseHandler, HeartbeatResponseHandlerContext,
 };
 use common_meta::instruction::{Instruction, InstructionReply};
 use common_telemetry::error;
-use futures::future::BoxFuture;
 use snafu::OptionExt;
-use store_api::storage::RegionId;
 
 mod close_region;
 mod downgrade_region;
@@ -30,10 +27,15 @@ mod flush_region;
 mod open_region;
 mod upgrade_region;
 
+use crate::heartbeat::handler::close_region::CloseRegionsHandler;
+use crate::heartbeat::handler::downgrade_region::DowngradeRegionsHandler;
+use crate::heartbeat::handler::flush_region::FlushRegionsHandler;
+use crate::heartbeat::handler::open_region::OpenRegionsHandler;
+use crate::heartbeat::handler::upgrade_region::UpgradeRegionsHandler;
 use crate::heartbeat::task_tracker::TaskTracker;
 use crate::region_server::RegionServer;
 
-/// Handler for [Instruction::OpenRegion] and [Instruction::CloseRegion].
+/// The handler for [`Instruction`]s.
 #[derive(Clone)]
 pub struct RegionHeartbeatResponseHandler {
     region_server: RegionServer,
@@ -43,9 +45,14 @@ pub struct RegionHeartbeatResponseHandler {
     open_region_parallelism: usize,
 }
 
-/// Handler of the instruction.
-pub type InstructionHandler =
-    Box<dyn FnOnce(HandlerContext) -> BoxFuture<'static, Option<InstructionReply>> + Send>;
+#[async_trait::async_trait]
+pub trait InstructionHandler: Send + Sync {
+    async fn handle(
+        &self,
+        ctx: &HandlerContext,
+        instruction: Instruction,
+    ) -> Option<InstructionReply>;
+}
 
 #[derive(Clone)]
 pub struct HandlerContext {
@@ -56,10 +63,6 @@ pub struct HandlerContext {
 }
 
 impl HandlerContext {
-    fn region_ident_to_region_id(region_ident: &RegionIdent) -> RegionId {
-        RegionId::new(region_ident.table_id, region_ident.region_number)
-    }
-
     #[cfg(test)]
     pub fn new_for_test(region_server: RegionServer) -> Self {
         Self {
@@ -90,31 +93,16 @@ impl RegionHeartbeatResponseHandler {
         self
     }
 
-    /// Builds the [InstructionHandler].
-    fn build_handler(&self, instruction: Instruction) -> MetaResult<InstructionHandler> {
+    fn build_handler(&self, instruction: &Instruction) -> MetaResult<Box<dyn InstructionHandler>> {
         match instruction {
-            Instruction::OpenRegions(open_regions) => {
-                let open_region_parallelism = self.open_region_parallelism;
-                Ok(Box::new(move |handler_context| {
-                    handler_context
-                        .handle_open_regions_instruction(open_regions, open_region_parallelism)
-                }))
-            }
-            Instruction::CloseRegions(close_regions) => Ok(Box::new(move |handler_context| {
-                handler_context.handle_close_regions_instruction(close_regions)
-            })),
-            Instruction::DowngradeRegion(downgrade_region) => {
-                Ok(Box::new(move |handler_context| {
-                    handler_context.handle_downgrade_region_instruction(downgrade_region)
-                }))
-            }
-            Instruction::UpgradeRegion(upgrade_region) => Ok(Box::new(move |handler_context| {
-                handler_context.handle_upgrade_region_instruction(upgrade_region)
+            Instruction::CloseRegions(_) => Ok(Box::new(CloseRegionsHandler)),
+            Instruction::OpenRegions(_) => Ok(Box::new(OpenRegionsHandler {
+                open_region_parallelism: self.open_region_parallelism,
             })),
+            Instruction::FlushRegions(_) => Ok(Box::new(FlushRegionsHandler)),
+            Instruction::DowngradeRegions(_) => Ok(Box::new(DowngradeRegionsHandler)),
+            Instruction::UpgradeRegion(_) => Ok(Box::new(UpgradeRegionsHandler)),
             Instruction::InvalidateCaches(_) => InvalidHeartbeatResponseSnafu.fail(),
-            Instruction::FlushRegions(flush_regions) => Ok(Box::new(move |handler_context| {
-                handler_context.handle_flush_regions_instruction(flush_regions)
-            })),
         }
     }
 }
@@ -124,7 +112,7 @@ impl HeartbeatResponseHandler for RegionHeartbeatResponseHandler {
     fn is_acceptable(&self, ctx: &HeartbeatResponseHandlerContext) -> bool {
         matches!(ctx.incoming_message.as_ref(), |Some((
             _,
-            Instruction::DowngradeRegion { .. },
+            Instruction::DowngradeRegions { .. },
         ))| Some((
             _,
             Instruction::UpgradeRegion { .. }
@@ -151,15 +139,19 @@ impl HeartbeatResponseHandler for RegionHeartbeatResponseHandler {
         let catchup_tasks = self.catchup_tasks.clone();
         let downgrade_tasks = self.downgrade_tasks.clone();
         let flush_tasks = self.flush_tasks.clone();
-        let handler = self.build_handler(instruction)?;
+        let handler = self.build_handler(&instruction)?;
         let _handle = common_runtime::spawn_global(async move {
-            let reply = handler(HandlerContext {
-                region_server,
-                catchup_tasks,
-                downgrade_tasks,
-                flush_tasks,
-            })
-            .await;
+            let reply = handler
+                .handle(
+                    &HandlerContext {
+                        region_server,
+                        catchup_tasks,
+                        downgrade_tasks,
+                        flush_tasks,
+                    },
+                    instruction,
+                )
+                .await;
 
             if let Some(reply) = reply
                 && let Err(e) = mailbox.send((meta, reply)).await
@@ -179,6 +171,7 @@ mod tests {
     use std::sync::Arc;
     use std::time::Duration;
 
+    use common_meta::RegionIdent;
     use common_meta::heartbeat::mailbox::{
         HeartbeatMailbox, IncomingMessage, MailboxRef, MessageMeta,
     };
@@ -249,10 +242,10 @@ mod tests {
         );
 
         // Downgrade region
-        let instruction = Instruction::DowngradeRegion(DowngradeRegion {
+        let instruction = Instruction::DowngradeRegions(vec![DowngradeRegion {
             region_id: RegionId::new(2048, 1),
             flush_timeout: Some(Duration::from_secs(1)),
-        });
+        }]);
         assert!(
             heartbeat_handler
                 .is_acceptable(&heartbeat_env.create_handler_ctx((meta.clone(), instruction)))
@@ -447,10 +440,10 @@ mod tests {
         // Should be ok, if we try to downgrade it twice.
         for _ in 0..2 {
             let meta = MessageMeta::new_test(1, "test", "dn-1", "me-0");
-            let instruction = Instruction::DowngradeRegion(DowngradeRegion {
+            let instruction = Instruction::DowngradeRegions(vec![DowngradeRegion {
                 region_id,
                 flush_timeout: Some(Duration::from_secs(1)),
-            });
+            }]);
 
             let mut ctx = heartbeat_env.create_handler_ctx((meta, instruction));
             let control = heartbeat_handler.handle(&mut ctx).await.unwrap();
@@ -458,33 +451,27 @@ mod tests {
 
             let (_, reply) = heartbeat_env.receiver.recv().await.unwrap();
 
-            if let InstructionReply::DowngradeRegion(reply) = reply {
-                assert!(reply.exists);
-                assert!(reply.error.is_none());
-                assert_eq!(reply.last_entry_id.unwrap(), 0);
-            } else {
-                unreachable!()
-            }
+            let reply = &reply.expect_downgrade_regions_reply()[0];
+            assert!(reply.exists);
+            assert!(reply.error.is_none());
+            assert_eq!(reply.last_entry_id.unwrap(), 0);
         }
 
         // Downgrades a not exists region.
         let meta = MessageMeta::new_test(1, "test", "dn-1", "me-0");
-        let instruction = Instruction::DowngradeRegion(DowngradeRegion {
+        let instruction = Instruction::DowngradeRegions(vec![DowngradeRegion {
             region_id: RegionId::new(2048, 1),
             flush_timeout: Some(Duration::from_secs(1)),
-        });
+        }]);
         let mut ctx = heartbeat_env.create_handler_ctx((meta, instruction));
         let control = heartbeat_handler.handle(&mut ctx).await.unwrap();
         assert_matches!(control, HandleControl::Continue);
 
         let (_, reply) = heartbeat_env.receiver.recv().await.unwrap();
 
-        if let InstructionReply::DowngradeRegion(reply) = reply {
-            assert!(!reply.exists);
-            assert!(reply.error.is_none());
-            assert!(reply.last_entry_id.is_none());
-        } else {
-            unreachable!()
-        }
+        let reply = reply.expect_downgrade_regions_reply();
+        assert!(!reply[0].exists);
+        assert!(reply[0].error.is_none());
+        assert!(reply[0].last_entry_id.is_none());
     }
 }
diff --git a/src/datanode/src/heartbeat/handler/close_region.rs b/src/datanode/src/heartbeat/handler/close_region.rs
index c942642731..88ed043fab 100644
--- a/src/datanode/src/heartbeat/handler/close_region.rs
+++ b/src/datanode/src/heartbeat/handler/close_region.rs
@@ -12,60 +12,64 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-use common_meta::RegionIdent;
-use common_meta::instruction::{InstructionReply, SimpleReply};
+use common_meta::instruction::{Instruction, InstructionReply, SimpleReply};
 use common_telemetry::warn;
 use futures::future::join_all;
-use futures_util::future::BoxFuture;
 use store_api::region_request::{RegionCloseRequest, RegionRequest};
+use store_api::storage::RegionId;
 
 use crate::error;
-use crate::heartbeat::handler::HandlerContext;
+use crate::heartbeat::handler::{HandlerContext, InstructionHandler};
 
-impl HandlerContext {
-    pub(crate) fn handle_close_regions_instruction(
-        self,
-        region_idents: Vec<RegionIdent>,
-    ) -> BoxFuture<'static, Option<InstructionReply>> {
-        Box::pin(async move {
-            let region_ids = region_idents
-                .into_iter()
-                .map(|region_ident| Self::region_ident_to_region_id(&region_ident))
-                .collect::<Vec<_>>();
+#[derive(Debug, Clone, Copy, Default)]
+pub struct CloseRegionsHandler;
 
-            let futs = region_ids.iter().map(|region_id| {
-                self.region_server
-                    .handle_request(*region_id, RegionRequest::Close(RegionCloseRequest {}))
-            });
+#[async_trait::async_trait]
+impl InstructionHandler for CloseRegionsHandler {
+    async fn handle(
+        &self,
+        ctx: &HandlerContext,
+        instruction: Instruction,
+    ) -> Option<InstructionReply> {
+        // Safety: must be `Instruction::CloseRegions` instruction.
+        let region_idents = instruction.into_close_regions().unwrap();
+        let region_ids = region_idents
+            .into_iter()
+            .map(|region_ident| RegionId::new(region_ident.table_id, region_ident.region_number))
+            .collect::<Vec<_>>();
 
-            let results = join_all(futs).await;
+        let futs = region_ids.iter().map(|region_id| {
+            ctx.region_server
+                .handle_request(*region_id, RegionRequest::Close(RegionCloseRequest {}))
+        });
 
-            let mut errors = vec![];
-            for (region_id, result) in region_ids.into_iter().zip(results.into_iter()) {
-                match result {
-                    Ok(_) => (),
-                    Err(error::Error::RegionNotFound { .. }) => {
-                        warn!(
-                            "Received a close regions instruction from meta, but target region:{} is not found.",
-                            region_id
-                        );
-                    }
-                    Err(err) => errors.push(format!("region:{region_id}: {err:?}")),
+        let results = join_all(futs).await;
+
+        let mut errors = vec![];
+        for (region_id, result) in region_ids.into_iter().zip(results.into_iter()) {
+            match result {
+                Ok(_) => (),
+                Err(error::Error::RegionNotFound { .. }) => {
+                    warn!(
+                        "Received a close regions instruction from meta, but target region:{} is not found.",
+                        region_id
+                    );
                 }
+                Err(err) => errors.push(format!("region:{region_id}: {err:?}")),
             }
+        }
 
-            if errors.is_empty() {
-                return Some(InstructionReply::CloseRegions(SimpleReply {
-                    result: true,
-                    error: None,
-                }));
-            }
+        if errors.is_empty() {
+            return Some(InstructionReply::CloseRegions(SimpleReply {
+                result: true,
+                error: None,
+            }));
+        }
 
-            Some(InstructionReply::CloseRegions(SimpleReply {
-                result: false,
-                error: Some(errors.join("; ")),
-            }))
-        })
+        Some(InstructionReply::CloseRegions(SimpleReply {
+            result: false,
+            error: Some(errors.join("; ")),
+        }))
     }
 }
 
diff --git a/src/datanode/src/heartbeat/handler/downgrade_region.rs b/src/datanode/src/heartbeat/handler/downgrade_region.rs
index 06d3ab046e..91ceddb91a 100644
--- a/src/datanode/src/heartbeat/handler/downgrade_region.rs
+++ b/src/datanode/src/heartbeat/handler/downgrade_region.rs
@@ -12,209 +12,242 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-use common_meta::instruction::{DowngradeRegion, DowngradeRegionReply, InstructionReply};
+use common_meta::instruction::{
+    DowngradeRegion, DowngradeRegionReply, DowngradeRegionsReply, Instruction, InstructionReply,
+};
 use common_telemetry::tracing::info;
 use common_telemetry::{error, warn};
-use futures_util::future::BoxFuture;
+use futures::future::join_all;
 use store_api::region_engine::{SetRegionRoleStateResponse, SettableRegionRoleState};
 use store_api::region_request::{RegionFlushRequest, RegionRequest};
 use store_api::storage::RegionId;
 
-use crate::heartbeat::handler::HandlerContext;
+use crate::heartbeat::handler::{HandlerContext, InstructionHandler};
 use crate::heartbeat::task_tracker::WaitResult;
 
-impl HandlerContext {
-    async fn downgrade_to_follower_gracefully(
+#[derive(Debug, Clone, Copy, Default)]
+pub struct DowngradeRegionsHandler;
+
+impl DowngradeRegionsHandler {
+    async fn handle_downgrade_region(
+        ctx: &HandlerContext,
+        DowngradeRegion {
+            region_id,
+            flush_timeout,
+        }: DowngradeRegion,
+    ) -> DowngradeRegionReply {
+        let Some(writable) = ctx.region_server.is_region_leader(region_id) else {
+            warn!("Region: {region_id} is not found");
+            return DowngradeRegionReply {
+                region_id,
+                last_entry_id: None,
+                metadata_last_entry_id: None,
+                exists: false,
+                error: None,
+            };
+        };
+
+        let region_server_moved = ctx.region_server.clone();
+
+        // Ignores flush request
+        if !writable {
+            warn!(
+                "Region: {region_id} is not writable, flush_timeout: {:?}",
+                flush_timeout
+            );
+            return ctx.downgrade_to_follower_gracefully(region_id).await;
+        }
+
+        // If flush_timeout is not set, directly convert region to follower.
+        let Some(flush_timeout) = flush_timeout else {
+            return ctx.downgrade_to_follower_gracefully(region_id).await;
+        };
+
+        // Sets region to downgrading,
+        // the downgrading region will reject all write requests.
+        // However, the downgrading region will still accept read, flush requests.
+        match ctx
+            .region_server
+            .set_region_role_state_gracefully(region_id, SettableRegionRoleState::DowngradingLeader)
+            .await
+        {
+            Ok(SetRegionRoleStateResponse::Success { .. }) => {}
+            Ok(SetRegionRoleStateResponse::NotFound) => {
+                warn!("Region: {region_id} is not found");
+                return DowngradeRegionReply {
+                    region_id,
+                    last_entry_id: None,
+                    metadata_last_entry_id: None,
+                    exists: false,
+                    error: None,
+                };
+            }
+            Ok(SetRegionRoleStateResponse::InvalidTransition(err)) => {
+                error!(err; "Failed to convert region to downgrading leader - invalid transition");
+                return DowngradeRegionReply {
+                    region_id,
+                    last_entry_id: None,
+                    metadata_last_entry_id: None,
+                    exists: true,
+                    error: Some(format!("{err:?}")),
+                };
+            }
+            Err(err) => {
+                error!(err; "Failed to convert region to downgrading leader");
+                return DowngradeRegionReply {
+                    region_id,
+                    last_entry_id: None,
+                    metadata_last_entry_id: None,
+                    exists: true,
+                    error: Some(format!("{err:?}")),
+                };
+            }
+        }
+
+        let register_result = ctx
+            .downgrade_tasks
+            .try_register(
+                region_id,
+                Box::pin(async move {
+                    info!("Flush region: {region_id} before converting region to follower");
+                    region_server_moved
+                        .handle_request(
+                            region_id,
+                            RegionRequest::Flush(RegionFlushRequest {
+                                row_group_size: None,
+                            }),
+                        )
+                        .await?;
+
+                    Ok(())
+                }),
+            )
+            .await;
+
+        if register_result.is_busy() {
+            warn!("Another flush task is running for the region: {region_id}");
+        }
+
+        let mut watcher = register_result.into_watcher();
+        let result = ctx.downgrade_tasks.wait(&mut watcher, flush_timeout).await;
+
+        match result {
+            WaitResult::Timeout => DowngradeRegionReply {
+                region_id,
+                last_entry_id: None,
+                metadata_last_entry_id: None,
+                exists: true,
+                error: Some(format!(
+                    "Flush region timeout, region: {region_id}, timeout: {:?}",
+                    flush_timeout
+                )),
+            },
+            WaitResult::Finish(Ok(_)) => ctx.downgrade_to_follower_gracefully(region_id).await,
+            WaitResult::Finish(Err(err)) => DowngradeRegionReply {
+                region_id,
+                last_entry_id: None,
+                metadata_last_entry_id: None,
+                exists: true,
+                error: Some(format!("{err:?}")),
+            },
+        }
+    }
+}
+
+#[async_trait::async_trait]
+impl InstructionHandler for DowngradeRegionsHandler {
+    async fn handle(
         &self,
-        region_id: RegionId,
+        ctx: &HandlerContext,
+        instruction: Instruction,
     ) -> Option<InstructionReply> {
+        // Safety: must be `Instruction::DowngradeRegion` instruction.
+        let downgrade_regions = instruction.into_downgrade_regions().unwrap();
+        let futures = downgrade_regions
+            .into_iter()
+            .map(|downgrade_region| Self::handle_downgrade_region(ctx, downgrade_region));
+        // Join all futures; parallelism is governed by the underlying flush scheduler.
+        let results = join_all(futures).await;
+
+        Some(InstructionReply::DowngradeRegions(
+            DowngradeRegionsReply::new(results),
+        ))
+    }
+}
+
+impl HandlerContext {
+    async fn downgrade_to_follower_gracefully(&self, region_id: RegionId) -> DowngradeRegionReply {
         match self
             .region_server
             .set_region_role_state_gracefully(region_id, SettableRegionRoleState::Follower)
             .await
         {
-            Ok(SetRegionRoleStateResponse::Success(success)) => {
-                Some(InstructionReply::DowngradeRegion(DowngradeRegionReply {
-                    last_entry_id: success.last_entry_id(),
-                    metadata_last_entry_id: success.metadata_last_entry_id(),
-                    exists: true,
-                    error: None,
-                }))
-            }
+            Ok(SetRegionRoleStateResponse::Success(success)) => DowngradeRegionReply {
+                region_id,
+                last_entry_id: success.last_entry_id(),
+                metadata_last_entry_id: success.metadata_last_entry_id(),
+                exists: true,
+                error: None,
+            },
             Ok(SetRegionRoleStateResponse::NotFound) => {
                 warn!("Region: {region_id} is not found");
-                Some(InstructionReply::DowngradeRegion(DowngradeRegionReply {
+                DowngradeRegionReply {
+                    region_id,
                     last_entry_id: None,
                     metadata_last_entry_id: None,
                     exists: false,
                     error: None,
-                }))
+                }
             }
             Ok(SetRegionRoleStateResponse::InvalidTransition(err)) => {
                 error!(err; "Failed to convert region to follower - invalid transition");
-                Some(InstructionReply::DowngradeRegion(DowngradeRegionReply {
+                DowngradeRegionReply {
+                    region_id,
                     last_entry_id: None,
                     metadata_last_entry_id: None,
                     exists: true,
                     error: Some(format!("{err:?}")),
-                }))
+                }
             }
             Err(err) => {
                 error!(err; "Failed to convert region to {}", SettableRegionRoleState::Follower);
-                Some(InstructionReply::DowngradeRegion(DowngradeRegionReply {
+                DowngradeRegionReply {
+                    region_id,
                     last_entry_id: None,
                     metadata_last_entry_id: None,
                     exists: true,
                     error: Some(format!("{err:?}")),
-                }))
+                }
             }
         }
     }
-
-    pub(crate) fn handle_downgrade_region_instruction(
-        self,
-        DowngradeRegion {
-            region_id,
-            flush_timeout,
-        }: DowngradeRegion,
-    ) -> BoxFuture<'static, Option<InstructionReply>> {
-        Box::pin(async move {
-            let Some(writable) = self.region_server.is_region_leader(region_id) else {
-                warn!("Region: {region_id} is not found");
-                return Some(InstructionReply::DowngradeRegion(DowngradeRegionReply {
-                    last_entry_id: None,
-                    metadata_last_entry_id: None,
-                    exists: false,
-                    error: None,
-                }));
-            };
-
-            let region_server_moved = self.region_server.clone();
-
-            // Ignores flush request
-            if !writable {
-                warn!(
-                    "Region: {region_id} is not writable, flush_timeout: {:?}",
-                    flush_timeout
-                );
-                return self.downgrade_to_follower_gracefully(region_id).await;
-            }
-
-            // If flush_timeout is not set, directly convert region to follower.
-            let Some(flush_timeout) = flush_timeout else {
-                return self.downgrade_to_follower_gracefully(region_id).await;
-            };
-
-            // Sets region to downgrading,
-            // the downgrading region will reject all write requests.
-            // However, the downgrading region will still accept read, flush requests.
-            match self
-                .region_server
-                .set_region_role_state_gracefully(
-                    region_id,
-                    SettableRegionRoleState::DowngradingLeader,
-                )
-                .await
-            {
-                Ok(SetRegionRoleStateResponse::Success { .. }) => {}
-                Ok(SetRegionRoleStateResponse::NotFound) => {
-                    warn!("Region: {region_id} is not found");
-                    return Some(InstructionReply::DowngradeRegion(DowngradeRegionReply {
-                        last_entry_id: None,
-                        metadata_last_entry_id: None,
-                        exists: false,
-                        error: None,
-                    }));
-                }
-                Ok(SetRegionRoleStateResponse::InvalidTransition(err)) => {
-                    error!(err; "Failed to convert region to downgrading leader - invalid transition");
-                    return Some(InstructionReply::DowngradeRegion(DowngradeRegionReply {
-                        last_entry_id: None,
-                        metadata_last_entry_id: None,
-                        exists: true,
-                        error: Some(format!("{err:?}")),
-                    }));
-                }
-                Err(err) => {
-                    error!(err; "Failed to convert region to downgrading leader");
-                    return Some(InstructionReply::DowngradeRegion(DowngradeRegionReply {
-                        last_entry_id: None,
-                        metadata_last_entry_id: None,
-                        exists: true,
-                        error: Some(format!("{err:?}")),
-                    }));
-                }
-            }
-
-            let register_result = self
-                .downgrade_tasks
-                .try_register(
-                    region_id,
-                    Box::pin(async move {
-                        info!("Flush region: {region_id} before converting region to follower");
-                        region_server_moved
-                            .handle_request(
-                                region_id,
-                                RegionRequest::Flush(RegionFlushRequest {
-                                    row_group_size: None,
-                                }),
-                            )
-                            .await?;
-
-                        Ok(())
-                    }),
-                )
-                .await;
-
-            if register_result.is_busy() {
-                warn!("Another flush task is running for the region: {region_id}");
-            }
-
-            let mut watcher = register_result.into_watcher();
-            let result = self.downgrade_tasks.wait(&mut watcher, flush_timeout).await;
-
-            match result {
-                WaitResult::Timeout => {
-                    Some(InstructionReply::DowngradeRegion(DowngradeRegionReply {
-                        last_entry_id: None,
-                        metadata_last_entry_id: None,
-                        exists: true,
-                        error: Some(format!(
-                            "Flush region timeout, region: {region_id}, timeout: {:?}",
-                            flush_timeout
-                        )),
-                    }))
-                }
-                WaitResult::Finish(Ok(_)) => self.downgrade_to_follower_gracefully(region_id).await,
-                WaitResult::Finish(Err(err)) => {
-                    Some(InstructionReply::DowngradeRegion(DowngradeRegionReply {
-                        last_entry_id: None,
-                        metadata_last_entry_id: None,
-                        exists: true,
-                        error: Some(format!("{err:?}")),
-                    }))
-                }
-            }
-        })
-    }
 }
 
 #[cfg(test)]
 mod tests {
     use std::assert_matches::assert_matches;
+    use std::sync::Arc;
     use std::time::Duration;
 
-    use common_meta::instruction::{DowngradeRegion, InstructionReply};
+    use common_meta::heartbeat::handler::{HandleControl, HeartbeatResponseHandler};
+    use common_meta::heartbeat::mailbox::MessageMeta;
+    use common_meta::instruction::{DowngradeRegion, Instruction};
+    use mito2::config::MitoConfig;
     use mito2::engine::MITO_ENGINE_NAME;
+    use mito2::test_util::{CreateRequestBuilder, TestEnv};
     use store_api::region_engine::{
-        RegionRole, SetRegionRoleStateResponse, SetRegionRoleStateSuccess,
+        RegionEngine, RegionRole, SetRegionRoleStateResponse, SetRegionRoleStateSuccess,
     };
     use store_api::region_request::RegionRequest;
     use store_api::storage::RegionId;
     use tokio::time::Instant;
 
     use crate::error;
-    use crate::heartbeat::handler::HandlerContext;
+    use crate::heartbeat::handler::downgrade_region::DowngradeRegionsHandler;
+    use crate::heartbeat::handler::tests::HeartbeatResponseTestEnv;
+    use crate::heartbeat::handler::{
+        HandlerContext, InstructionHandler, RegionHeartbeatResponseHandler,
+    };
     use crate::tests::{MockRegionEngine, mock_region_server};
 
     #[tokio::test]
@@ -227,20 +260,20 @@ mod tests {
         let waits = vec![None, Some(Duration::from_millis(100u64))];
 
         for flush_timeout in waits {
-            let reply = handler_context
-                .clone()
-                .handle_downgrade_region_instruction(DowngradeRegion {
-                    region_id,
-                    flush_timeout,
-                })
+            let reply = DowngradeRegionsHandler
+                .handle(
+                    &handler_context,
+                    Instruction::DowngradeRegions(vec![DowngradeRegion {
+                        region_id,
+                        flush_timeout,
+                    }]),
+                )
                 .await;
-            assert_matches!(reply, Some(InstructionReply::DowngradeRegion(_)));
 
-            if let InstructionReply::DowngradeRegion(reply) = reply.unwrap() {
-                assert!(!reply.exists);
-                assert!(reply.error.is_none());
-                assert!(reply.last_entry_id.is_none());
-            }
+            let reply = &reply.unwrap().expect_downgrade_regions_reply()[0];
+            assert!(!reply.exists);
+            assert!(reply.error.is_none());
+            assert!(reply.last_entry_id.is_none());
         }
     }
 
@@ -270,20 +303,20 @@ mod tests {
 
         let waits = vec![None, Some(Duration::from_millis(100u64))];
         for flush_timeout in waits {
-            let reply = handler_context
-                .clone()
-                .handle_downgrade_region_instruction(DowngradeRegion {
-                    region_id,
-                    flush_timeout,
-                })
+            let reply = DowngradeRegionsHandler
+                .handle(
+                    &handler_context,
+                    Instruction::DowngradeRegions(vec![DowngradeRegion {
+                        region_id,
+                        flush_timeout,
+                    }]),
+                )
                 .await;
-            assert_matches!(reply, Some(InstructionReply::DowngradeRegion(_)));
 
-            if let InstructionReply::DowngradeRegion(reply) = reply.unwrap() {
-                assert!(reply.exists);
-                assert!(reply.error.is_none());
-                assert_eq!(reply.last_entry_id.unwrap(), 1024);
-            }
+            let reply = &reply.unwrap().expect_downgrade_regions_reply()[0];
+            assert!(reply.exists);
+            assert!(reply.error.is_none());
+            assert_eq!(reply.last_entry_id.unwrap(), 1024);
         }
     }
 
@@ -305,20 +338,20 @@ mod tests {
         let handler_context = HandlerContext::new_for_test(mock_region_server);
 
         let flush_timeout = Duration::from_millis(100);
-        let reply = handler_context
-            .clone()
-            .handle_downgrade_region_instruction(DowngradeRegion {
-                region_id,
-                flush_timeout: Some(flush_timeout),
-            })
+        let reply = DowngradeRegionsHandler
+            .handle(
+                &handler_context,
+                Instruction::DowngradeRegions(vec![DowngradeRegion {
+                    region_id,
+                    flush_timeout: Some(flush_timeout),
+                }]),
+            )
             .await;
-        assert_matches!(reply, Some(InstructionReply::DowngradeRegion(_)));
 
-        if let InstructionReply::DowngradeRegion(reply) = reply.unwrap() {
-            assert!(reply.exists);
-            assert!(reply.error.unwrap().contains("timeout"));
-            assert!(reply.last_entry_id.is_none());
-        }
+        let reply = &reply.unwrap().expect_downgrade_regions_reply()[0];
+        assert!(reply.exists);
+        assert!(reply.error.as_ref().unwrap().contains("timeout"));
+        assert!(reply.last_entry_id.is_none());
     }
 
     #[tokio::test]
@@ -344,36 +377,38 @@ mod tests {
         ];
 
         for flush_timeout in waits {
-            let reply = handler_context
-                .clone()
-                .handle_downgrade_region_instruction(DowngradeRegion {
-                    region_id,
-                    flush_timeout,
-                })
+            let reply = DowngradeRegionsHandler
+                .handle(
+                    &handler_context,
+                    Instruction::DowngradeRegions(vec![DowngradeRegion {
+                        region_id,
+                        flush_timeout,
+                    }]),
+                )
                 .await;
-            assert_matches!(reply, Some(InstructionReply::DowngradeRegion(_)));
-            if let InstructionReply::DowngradeRegion(reply) = reply.unwrap() {
-                assert!(reply.exists);
-                assert!(reply.error.unwrap().contains("timeout"));
-                assert!(reply.last_entry_id.is_none());
-            }
+
+            let reply = &reply.unwrap().expect_downgrade_regions_reply()[0];
+            assert!(reply.exists);
+            assert!(reply.error.as_ref().unwrap().contains("timeout"));
+            assert!(reply.last_entry_id.is_none());
         }
         let timer = Instant::now();
-        let reply = handler_context
-            .handle_downgrade_region_instruction(DowngradeRegion {
-                region_id,
-                flush_timeout: Some(Duration::from_millis(500)),
-            })
+        let reply = DowngradeRegionsHandler
+            .handle(
+                &handler_context,
+                Instruction::DowngradeRegions(vec![DowngradeRegion {
+                    region_id,
+                    flush_timeout: Some(Duration::from_millis(500)),
+                }]),
+            )
             .await;
-        assert_matches!(reply, Some(InstructionReply::DowngradeRegion(_)));
         // Must less than 300 ms.
         assert!(timer.elapsed().as_millis() < 300);
 
-        if let InstructionReply::DowngradeRegion(reply) = reply.unwrap() {
-            assert!(reply.exists);
-            assert!(reply.error.is_none());
-            assert_eq!(reply.last_entry_id.unwrap(), 1024);
-        }
+        let reply = &reply.unwrap().expect_downgrade_regions_reply()[0];
+        assert!(reply.exists);
+        assert!(reply.error.is_none());
+        assert_eq!(reply.last_entry_id.unwrap(), 1024);
     }
 
     #[tokio::test]
@@ -405,36 +440,36 @@ mod tests {
         ];
 
         for flush_timeout in waits {
-            let reply = handler_context
-                .clone()
-                .handle_downgrade_region_instruction(DowngradeRegion {
-                    region_id,
-                    flush_timeout,
-                })
+            let reply = DowngradeRegionsHandler
+                .handle(
+                    &handler_context,
+                    Instruction::DowngradeRegions(vec![DowngradeRegion {
+                        region_id,
+                        flush_timeout,
+                    }]),
+                )
                 .await;
-            assert_matches!(reply, Some(InstructionReply::DowngradeRegion(_)));
-            if let InstructionReply::DowngradeRegion(reply) = reply.unwrap() {
-                assert!(reply.exists);
-                assert!(reply.error.unwrap().contains("timeout"));
-                assert!(reply.last_entry_id.is_none());
-            }
-        }
-        let timer = Instant::now();
-        let reply = handler_context
-            .handle_downgrade_region_instruction(DowngradeRegion {
-                region_id,
-                flush_timeout: Some(Duration::from_millis(500)),
-            })
-            .await;
-        assert_matches!(reply, Some(InstructionReply::DowngradeRegion(_)));
-        // Must less than 300 ms.
-        assert!(timer.elapsed().as_millis() < 300);
-
-        if let InstructionReply::DowngradeRegion(reply) = reply.unwrap() {
+            let reply = &reply.unwrap().expect_downgrade_regions_reply()[0];
             assert!(reply.exists);
-            assert!(reply.error.unwrap().contains("flush failed"));
+            assert!(reply.error.as_ref().unwrap().contains("timeout"));
             assert!(reply.last_entry_id.is_none());
         }
+        let timer = Instant::now();
+        let reply = DowngradeRegionsHandler
+            .handle(
+                &handler_context,
+                Instruction::DowngradeRegions(vec![DowngradeRegion {
+                    region_id,
+                    flush_timeout: Some(Duration::from_millis(500)),
+                }]),
+            )
+            .await;
+        // Must less than 300 ms.
+        assert!(timer.elapsed().as_millis() < 300);
+        let reply = &reply.unwrap().expect_downgrade_regions_reply()[0];
+        assert!(reply.exists);
+        assert!(reply.error.as_ref().unwrap().contains("flush failed"));
+        assert!(reply.last_entry_id.is_none());
     }
 
     #[tokio::test]
@@ -449,19 +484,19 @@ mod tests {
             });
         mock_region_server.register_test_region(region_id, mock_engine);
         let handler_context = HandlerContext::new_for_test(mock_region_server);
-        let reply = handler_context
-            .clone()
-            .handle_downgrade_region_instruction(DowngradeRegion {
-                region_id,
-                flush_timeout: None,
-            })
+        let reply = DowngradeRegionsHandler
+            .handle(
+                &handler_context,
+                Instruction::DowngradeRegions(vec![DowngradeRegion {
+                    region_id,
+                    flush_timeout: None,
+                }]),
+            )
             .await;
-        assert_matches!(reply, Some(InstructionReply::DowngradeRegion(_)));
-        if let InstructionReply::DowngradeRegion(reply) = reply.unwrap() {
-            assert!(!reply.exists);
-            assert!(reply.error.is_none());
-            assert!(reply.last_entry_id.is_none());
-        }
+        let reply = &reply.unwrap().expect_downgrade_regions_reply()[0];
+        assert!(!reply.exists);
+        assert!(reply.error.is_none());
+        assert!(reply.last_entry_id.is_none());
     }
 
     #[tokio::test]
@@ -480,23 +515,77 @@ mod tests {
             });
         mock_region_server.register_test_region(region_id, mock_engine);
         let handler_context = HandlerContext::new_for_test(mock_region_server);
-        let reply = handler_context
-            .clone()
-            .handle_downgrade_region_instruction(DowngradeRegion {
-                region_id,
-                flush_timeout: None,
-            })
+        let reply = DowngradeRegionsHandler
+            .handle(
+                &handler_context,
+                Instruction::DowngradeRegions(vec![DowngradeRegion {
+                    region_id,
+                    flush_timeout: None,
+                }]),
+            )
             .await;
-        assert_matches!(reply, Some(InstructionReply::DowngradeRegion(_)));
-        if let InstructionReply::DowngradeRegion(reply) = reply.unwrap() {
-            assert!(reply.exists);
-            assert!(
-                reply
-                    .error
-                    .unwrap()
-                    .contains("Failed to set region to readonly")
-            );
-            assert!(reply.last_entry_id.is_none());
-        }
+        let reply = &reply.unwrap().expect_downgrade_regions_reply()[0];
+        assert!(reply.exists);
+        assert!(
+            reply
+                .error
+                .as_ref()
+                .unwrap()
+                .contains("Failed to set region to readonly")
+        );
+        assert!(reply.last_entry_id.is_none());
+    }
+
+    #[tokio::test]
+    async fn test_downgrade_regions() {
+        common_telemetry::init_default_ut_logging();
+
+        let mut region_server = mock_region_server();
+        let heartbeat_handler = RegionHeartbeatResponseHandler::new(region_server.clone());
+        let mut engine_env = TestEnv::with_prefix("downgrade-regions").await;
+        let engine = engine_env.create_engine(MitoConfig::default()).await;
+        region_server.register_engine(Arc::new(engine.clone()));
+        let region_id = RegionId::new(1024, 1);
+        let region_id1 = RegionId::new(1024, 2);
+        let builder = CreateRequestBuilder::new();
+        let create_req = builder.build();
+        region_server
+            .handle_request(region_id, RegionRequest::Create(create_req))
+            .await
+            .unwrap();
+        let create_req1 = builder.build();
+        region_server
+            .handle_request(region_id1, RegionRequest::Create(create_req1))
+            .await
+            .unwrap();
+        let meta = MessageMeta::new_test(1, "test", "dn-1", "meta-0");
+        let instruction = Instruction::DowngradeRegions(vec![
+            DowngradeRegion {
+                region_id,
+                flush_timeout: Some(Duration::from_secs(1)),
+            },
+            DowngradeRegion {
+                region_id: region_id1,
+                flush_timeout: Some(Duration::from_secs(1)),
+            },
+        ]);
+        let mut heartbeat_env = HeartbeatResponseTestEnv::new();
+        let mut ctx = heartbeat_env.create_handler_ctx((meta, instruction));
+        let control = heartbeat_handler.handle(&mut ctx).await.unwrap();
+        assert_matches!(control, HandleControl::Continue);
+
+        let (_, reply) = heartbeat_env.receiver.recv().await.unwrap();
+        let reply = reply.expect_downgrade_regions_reply();
+        assert_eq!(reply[0].region_id, region_id);
+        assert!(reply[0].exists);
+        assert!(reply[0].error.is_none());
+        assert_eq!(reply[0].last_entry_id, Some(0));
+        assert_eq!(reply[1].region_id, region_id1);
+        assert!(reply[1].exists);
+        assert!(reply[1].error.is_none());
+        assert_eq!(reply[1].last_entry_id, Some(0));
+
+        assert_eq!(engine.role(region_id).unwrap(), RegionRole::Follower);
+        assert_eq!(engine.role(region_id1).unwrap(), RegionRole::Follower);
     }
 }
diff --git a/src/datanode/src/heartbeat/handler/flush_region.rs b/src/datanode/src/heartbeat/handler/flush_region.rs
index 963d3bf488..56b841bf00 100644
--- a/src/datanode/src/heartbeat/handler/flush_region.rs
+++ b/src/datanode/src/heartbeat/handler/flush_region.rs
@@ -15,19 +15,53 @@
 use std::time::Instant;
 
 use common_meta::instruction::{
-    FlushErrorStrategy, FlushRegionReply, FlushRegions, FlushStrategy, InstructionReply,
+    FlushErrorStrategy, FlushRegionReply, FlushStrategy, Instruction, InstructionReply,
 };
 use common_telemetry::{debug, warn};
-use futures_util::future::BoxFuture;
 use store_api::region_request::{RegionFlushRequest, RegionRequest};
 use store_api::storage::RegionId;
 
-use crate::error::{self, RegionNotFoundSnafu, RegionNotReadySnafu, UnexpectedSnafu};
-use crate::heartbeat::handler::HandlerContext;
+use crate::error::{self, RegionNotFoundSnafu, RegionNotReadySnafu, Result, UnexpectedSnafu};
+use crate::heartbeat::handler::{HandlerContext, InstructionHandler};
+
+pub struct FlushRegionsHandler;
+
+#[async_trait::async_trait]
+impl InstructionHandler for FlushRegionsHandler {
+    async fn handle(
+        &self,
+        ctx: &HandlerContext,
+        instruction: Instruction,
+    ) -> Option<InstructionReply> {
+        let start_time = Instant::now();
+        let flush_regions = instruction.into_flush_regions().unwrap();
+        let strategy = flush_regions.strategy;
+        let region_ids = flush_regions.region_ids;
+        let error_strategy = flush_regions.error_strategy;
+
+        let reply = if matches!(strategy, FlushStrategy::Async) {
+            // Asynchronous hint mode: fire-and-forget, no reply expected
+            ctx.handle_flush_hint(region_ids).await;
+            None
+        } else {
+            // Synchronous mode: return reply with results
+            let reply = ctx.handle_flush_sync(region_ids, error_strategy).await;
+            Some(InstructionReply::FlushRegions(reply))
+        };
+
+        let elapsed = start_time.elapsed();
+        debug!(
+            "FlushRegions strategy: {:?}, elapsed: {:?}, reply: {:?}",
+            strategy, elapsed, reply
+        );
+
+        reply
+    }
+}
 
 impl HandlerContext {
     /// Performs the actual region flush operation.
-    async fn perform_region_flush(&self, region_id: RegionId) -> Result<(), error::Error> {
+    async fn perform_region_flush(&self, region_id: RegionId) -> Result<()> {
         let request = RegionRequest::Flush(RegionFlushRequest {
             row_group_size: None,
         });
@@ -92,7 +126,7 @@ impl HandlerContext {
     }
 
     /// Flushes a single region synchronously with proper error handling.
-    async fn flush_single_region_sync(&self, region_id: RegionId) -> Result<(), error::Error> {
+    async fn flush_single_region_sync(&self, region_id: RegionId) -> Result<()> {
         // Check if region is leader and writable
         let Some(writable) = self.region_server.is_region_leader(region_id) else {
             return Err(RegionNotFoundSnafu { region_id }.build());
@@ -135,37 +169,6 @@ impl HandlerContext {
             .build()),
         }
     }
-
-    /// Unified handler for FlushRegions with all flush semantics.
-    pub(crate) fn handle_flush_regions_instruction(
-        self,
-        flush_regions: FlushRegions,
-    ) -> BoxFuture<'static, Option<InstructionReply>> {
-        Box::pin(async move {
-            let start_time = Instant::now();
-            let strategy = flush_regions.strategy;
-            let region_ids = flush_regions.region_ids;
-            let error_strategy = flush_regions.error_strategy;
-
-            let reply = if matches!(strategy, FlushStrategy::Async) {
-                // Asynchronous hint mode: fire-and-forget, no reply expected
-                self.handle_flush_hint(region_ids).await;
-                None
-            } else {
-                // Synchronous mode: return reply with results
-                let reply = self.handle_flush_sync(region_ids, error_strategy).await;
-                Some(InstructionReply::FlushRegions(reply))
-            };
-
-            let elapsed = start_time.elapsed();
-            debug!(
-                "FlushRegions strategy: {:?}, elapsed: {:?}, reply: {:?}",
-                strategy, elapsed, reply
-            );
-
-            reply
-        })
-    }
 }
 
 #[cfg(test)]
@@ -201,9 +204,11 @@ mod tests {
 
         // Async hint mode
         let flush_instruction = FlushRegions::async_batch(region_ids.clone());
-        let reply = handler_context
-            .clone()
-            .handle_flush_regions_instruction(flush_instruction)
+        let reply = FlushRegionsHandler
+            .handle(
+                &handler_context,
+                Instruction::FlushRegions(flush_instruction),
+            )
             .await;
         assert!(reply.is_none()); // Hint mode returns no reply
         assert_eq!(*flushed_region_ids.read().unwrap(), region_ids);
@@ -212,8 +217,11 @@ mod tests {
         flushed_region_ids.write().unwrap().clear();
         let not_found_region_ids = (0..2).map(|i| RegionId::new(2048, i)).collect::<Vec<_>>();
         let flush_instruction = FlushRegions::async_batch(not_found_region_ids);
-        let reply = handler_context
-            .handle_flush_regions_instruction(flush_instruction)
+        let reply = FlushRegionsHandler
+            .handle(
+                &handler_context,
+                Instruction::FlushRegions(flush_instruction),
+            )
             .await;
         assert!(reply.is_none());
         assert!(flushed_region_ids.read().unwrap().is_empty());
@@ -238,20 +246,17 @@ mod tests {
         let handler_context = HandlerContext::new_for_test(mock_region_server);
 
         let flush_instruction = FlushRegions::sync_single(region_id);
-        let reply = handler_context
-            .handle_flush_regions_instruction(flush_instruction)
+        let reply = FlushRegionsHandler
+            .handle(
+                &handler_context,
+                Instruction::FlushRegions(flush_instruction),
+            )
             .await;
-
-        assert!(reply.is_some());
-        if let Some(InstructionReply::FlushRegions(flush_reply)) = reply {
-            assert!(flush_reply.overall_success);
-            assert_eq!(flush_reply.results.len(), 1);
-            assert_eq!(flush_reply.results[0].0, region_id);
-            assert!(flush_reply.results[0].1.is_ok());
-        } else {
-            panic!("Expected FlushRegions reply");
-        }
-
+        let flush_reply = reply.unwrap().expect_flush_regions_reply();
+        assert!(flush_reply.overall_success);
+        assert_eq!(flush_reply.results.len(), 1);
+        assert_eq!(flush_reply.results[0].0, region_id);
+        assert!(flush_reply.results[0].1.is_ok());
         assert_eq!(*flushed_region_ids.read().unwrap(), vec![region_id]);
     }
 
@@ -281,18 +286,16 @@ mod tests {
         // Sync batch with fail-fast strategy
         let flush_instruction =
             FlushRegions::sync_batch(region_ids.clone(), FlushErrorStrategy::FailFast);
-        let reply = handler_context
-            .handle_flush_regions_instruction(flush_instruction)
+        let reply = FlushRegionsHandler
+            .handle(
+                &handler_context,
+                Instruction::FlushRegions(flush_instruction),
+            )
             .await;
-
-        assert!(reply.is_some());
-        if let Some(InstructionReply::FlushRegions(flush_reply)) = reply {
-            assert!(!flush_reply.overall_success); // Should fail due to non-existent regions
-            // With fail-fast, only process regions until first failure
-            assert!(flush_reply.results.len() <= region_ids.len());
-        } else {
-            panic!("Expected FlushRegions reply");
-        }
+        let flush_reply = reply.unwrap().expect_flush_regions_reply();
+        assert!(!flush_reply.overall_success); // Should fail due to non-existent regions
+        // With fail-fast, only process regions until first failure
+        assert!(flush_reply.results.len() <= region_ids.len());
     }
 
     #[tokio::test]
@@ -317,20 +320,18 @@ mod tests {
         // Sync batch with try-all strategy
         let flush_instruction =
             FlushRegions::sync_batch(region_ids.clone(), FlushErrorStrategy::TryAll);
-        let reply = handler_context
-            .handle_flush_regions_instruction(flush_instruction)
+        let reply = FlushRegionsHandler
+            .handle(
+                &handler_context,
+                Instruction::FlushRegions(flush_instruction),
+            )
             .await;
-
-        assert!(reply.is_some());
-        if let Some(InstructionReply::FlushRegions(flush_reply)) = reply {
-            assert!(!flush_reply.overall_success); // Should fail due to one non-existent region
-            // With try-all, should process all regions
-            assert_eq!(flush_reply.results.len(), region_ids.len());
-            // First should succeed, second should fail
-            assert!(flush_reply.results[0].1.is_ok());
-            assert!(flush_reply.results[1].1.is_err());
-        } else {
-            panic!("Expected FlushRegions reply");
-        }
+        let flush_reply = reply.unwrap().expect_flush_regions_reply();
+        assert!(!flush_reply.overall_success); // Should fail due to one non-existent region
+        // With try-all, should process all regions
+        assert_eq!(flush_reply.results.len(), region_ids.len());
+        // First should succeed, second should fail
+        assert!(flush_reply.results[0].1.is_ok());
+        assert!(flush_reply.results[1].1.is_err());
     }
 }
diff --git a/src/datanode/src/heartbeat/handler/open_region.rs b/src/datanode/src/heartbeat/handler/open_region.rs
index e6ea973eec..77cd4fe6a0 100644
--- a/src/datanode/src/heartbeat/handler/open_region.rs
+++ b/src/datanode/src/heartbeat/handler/open_region.rs
@@ -12,56 +12,62 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-use common_meta::instruction::{InstructionReply, OpenRegion, SimpleReply};
+use common_meta::instruction::{Instruction, InstructionReply, OpenRegion, SimpleReply};
 use common_meta::wal_options_allocator::prepare_wal_options;
-use futures_util::future::BoxFuture;
 use store_api::path_utils::table_dir;
 use store_api::region_request::{PathType, RegionOpenRequest};
+use store_api::storage::RegionId;
 
-use crate::heartbeat::handler::HandlerContext;
+use crate::heartbeat::handler::{HandlerContext, InstructionHandler};
 
-impl HandlerContext {
-    pub(crate) fn handle_open_regions_instruction(
-        self,
-        open_regions: Vec<OpenRegion>,
-        open_region_parallelism: usize,
-    ) -> BoxFuture<'static, Option<InstructionReply>> {
-        Box::pin(async move {
-            let requests = open_regions
-                .into_iter()
-                .map(|open_region| {
-                    let OpenRegion {
-                        region_ident,
-                        region_storage_path,
-                        mut region_options,
-                        region_wal_options,
-                        skip_wal_replay,
-                    } = open_region;
-                    let region_id = Self::region_ident_to_region_id(&region_ident);
-                    prepare_wal_options(&mut region_options, region_id, &region_wal_options);
-                    let request = RegionOpenRequest {
-                        engine: region_ident.engine,
-                        table_dir: table_dir(&region_storage_path, region_id.table_id()),
-                        path_type: PathType::Bare,
-                        options: region_options,
-                        skip_wal_replay,
-                        checkpoint: None,
-                    };
-                    (region_id, request)
-                })
-                .collect::<Vec<_>>();
+pub struct OpenRegionsHandler {
+    pub open_region_parallelism: usize,
+}
 
-            let result = self
-                .region_server
-                .handle_batch_open_requests(open_region_parallelism, requests, false)
-                .await;
-            let success = result.is_ok();
-            let error = result.as_ref().map_err(|e| format!("{e:?}")).err();
-            Some(InstructionReply::OpenRegions(SimpleReply {
-                result: success,
-                error,
-            }))
-        })
+#[async_trait::async_trait]
+impl InstructionHandler for OpenRegionsHandler {
+    async fn handle(
+        &self,
+        ctx: &HandlerContext,
+        instruction: Instruction,
+    ) -> Option<InstructionReply> {
+        let open_regions = instruction.into_open_regions().unwrap();
+
+        let requests = open_regions
+            .into_iter()
+            .map(|open_region| {
+                let OpenRegion {
+                    region_ident,
+                    region_storage_path,
+                    mut region_options,
+                    region_wal_options,
+                    skip_wal_replay,
+                } = open_region;
+                let region_id = RegionId::new(region_ident.table_id, region_ident.region_number);
+                prepare_wal_options(&mut region_options, region_id, &region_wal_options);
+                let request = RegionOpenRequest {
+                    engine: region_ident.engine,
+                    table_dir: table_dir(&region_storage_path, region_id.table_id()),
+                    path_type: PathType::Bare,
+                    options: region_options,
+                    skip_wal_replay,
+                    checkpoint: None,
+                };
+                (region_id, request)
+            })
+            .collect::<Vec<_>>();
+
+        let result = ctx
+            .region_server
+            .handle_batch_open_requests(self.open_region_parallelism, requests, false)
+            .await;
+        let success = result.is_ok();
+        let error = result.as_ref().map_err(|e| format!("{e:?}")).err();
+
+        Some(InstructionReply::OpenRegions(SimpleReply {
+            result: success,
+            error,
+        }))
     }
 }
 
diff --git a/src/datanode/src/heartbeat/handler/upgrade_region.rs b/src/datanode/src/heartbeat/handler/upgrade_region.rs
index c1f238e059..239eaf1e4c 100644
--- a/src/datanode/src/heartbeat/handler/upgrade_region.rs
+++ b/src/datanode/src/heartbeat/handler/upgrade_region.rs
@@ -12,18 +12,24 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-use common_meta::instruction::{InstructionReply, UpgradeRegion, UpgradeRegionReply};
+use common_meta::instruction::{Instruction, InstructionReply, UpgradeRegion, UpgradeRegionReply};
 use common_telemetry::{info, warn};
-use futures_util::future::BoxFuture;
 use store_api::region_request::{RegionCatchupRequest, RegionRequest, ReplayCheckpoint};
 
-use crate::heartbeat::handler::HandlerContext;
+use crate::heartbeat::handler::{HandlerContext, InstructionHandler};
 use crate::heartbeat::task_tracker::WaitResult;
 
-impl HandlerContext {
-    pub(crate) fn handle_upgrade_region_instruction(
-        self,
-        UpgradeRegion {
+#[derive(Debug, Clone, Copy, Default)]
+pub struct UpgradeRegionsHandler;
+
+#[async_trait::async_trait]
+impl InstructionHandler for UpgradeRegionsHandler {
+    async fn handle(
+        &self,
+        ctx: &HandlerContext,
+        instruction: Instruction,
+    ) -> Option<InstructionReply> {
+        let UpgradeRegion {
             region_id,
             last_entry_id,
             metadata_last_entry_id,
@@ -31,116 +37,116 @@ impl HandlerContext {
             location_id,
             replay_entry_id,
             metadata_replay_entry_id,
-        }: UpgradeRegion,
-    ) -> BoxFuture<'static, Option<InstructionReply>> {
-        Box::pin(async move {
-            let Some(writable) = self.region_server.is_region_leader(region_id) else {
-                return Some(InstructionReply::UpgradeRegion(UpgradeRegionReply {
-                    ready: false,
-                    exists: false,
-                    error: None,
-                }));
-            };
+        } = instruction.into_upgrade_regions().unwrap();
 
-            if writable {
-                return Some(InstructionReply::UpgradeRegion(UpgradeRegionReply {
+        let Some(writable) = ctx.region_server.is_region_leader(region_id) else {
+            return Some(InstructionReply::UpgradeRegion(UpgradeRegionReply {
+                ready: false,
+                exists: false,
+                error: None,
+            }));
+        };
+
+        if writable {
+            return Some(InstructionReply::UpgradeRegion(UpgradeRegionReply {
+                ready: true,
+                exists: true,
+                error: None,
+            }));
+        }
+
+        let region_server_moved = ctx.region_server.clone();
+
+        let checkpoint = match (replay_entry_id, metadata_replay_entry_id) {
+            (Some(entry_id), metadata_entry_id) => Some(ReplayCheckpoint {
+                entry_id,
+                metadata_entry_id,
+            }),
+            _ => None,
+        };
+
+        // The catchup task is almost zero cost if the inside region is writable.
+        // Therefore, it always registers a new catchup task.
+        let register_result = ctx
+            .catchup_tasks
+            .try_register(
+                region_id,
+                Box::pin(async move {
+                    info!(
+                        "Executing region: {region_id} catchup to: last entry id {last_entry_id:?}"
+                    );
+                    region_server_moved
+                        .handle_request(
+                            region_id,
+                            RegionRequest::Catchup(RegionCatchupRequest {
+                                set_writable: true,
+                                entry_id: last_entry_id,
+                                metadata_entry_id: metadata_last_entry_id,
+                                location_id,
+                                checkpoint,
+                            }),
+                        )
+                        .await?;
+
+                    Ok(())
+                }),
+            )
+            .await;
+
+        if register_result.is_busy() {
+            warn!("Another catchup task is running for the region: {region_id}");
+        }
+
+        // Returns immediately
+        let Some(replay_timeout) = replay_timeout else {
+            return Some(InstructionReply::UpgradeRegion(UpgradeRegionReply {
+                ready: false,
+                exists: true,
+                error: None,
+            }));
+        };
+
+        // We don't care that it returns a newly registered or running task.
+        let mut watcher = register_result.into_watcher();
+        let result = ctx.catchup_tasks.wait(&mut watcher, replay_timeout).await;
+
+        match result {
+            WaitResult::Timeout => Some(InstructionReply::UpgradeRegion(UpgradeRegionReply {
+                ready: false,
+                exists: true,
+                error: None,
+            })),
+            WaitResult::Finish(Ok(_)) => {
+                Some(InstructionReply::UpgradeRegion(UpgradeRegionReply {
                     ready: true,
                     exists: true,
                     error: None,
-                }));
+                }))
             }
-
-            let region_server_moved = self.region_server.clone();
-
-            let checkpoint = match (replay_entry_id, metadata_replay_entry_id) {
-                (Some(entry_id), metadata_entry_id) => Some(ReplayCheckpoint {
-                    entry_id,
-                    metadata_entry_id,
-                }),
-                _ => None,
-            };
-
-            // The catchup task is almost zero cost if the inside region is writable.
-            // Therefore, it always registers a new catchup task.
-            let register_result = self
-                .catchup_tasks
-                .try_register(
-                    region_id,
-                    Box::pin(async move {
-                        info!("Executing region: {region_id} catchup to: last entry id {last_entry_id:?}");
-                        region_server_moved
-                            .handle_request(
-                                region_id,
-                                RegionRequest::Catchup(RegionCatchupRequest {
-                                    set_writable: true,
-                                    entry_id: last_entry_id,
-                                    metadata_entry_id: metadata_last_entry_id,
-                                    location_id,
-                                    checkpoint,
-                                }),
-                            )
-                            .await?;
-
-                        Ok(())
-                    }),
-                )
-                .await;
-
-            if register_result.is_busy() {
-                warn!("Another catchup task is running for the region: {region_id}");
-            }
-
-            // Returns immediately
-            let Some(replay_timeout) = replay_timeout else {
-                return Some(InstructionReply::UpgradeRegion(UpgradeRegionReply {
+            WaitResult::Finish(Err(err)) => {
+                Some(InstructionReply::UpgradeRegion(UpgradeRegionReply {
                     ready: false,
                     exists: true,
-                    error: None,
-                }));
-            };
-
-            // We don't care that it returns a newly registered or running task.
-            let mut watcher = register_result.into_watcher();
-            let result = self.catchup_tasks.wait(&mut watcher, replay_timeout).await;
-
-            match result {
-                WaitResult::Timeout => Some(InstructionReply::UpgradeRegion(UpgradeRegionReply {
-                    ready: false,
-                    exists: true,
-                    error: None,
-                })),
-                WaitResult::Finish(Ok(_)) => {
-                    Some(InstructionReply::UpgradeRegion(UpgradeRegionReply {
-                        ready: true,
-                        exists: true,
-                        error: None,
-                    }))
-                }
-                WaitResult::Finish(Err(err)) => {
-                    Some(InstructionReply::UpgradeRegion(UpgradeRegionReply {
-                        ready: false,
-                        exists: true,
-                        error: Some(format!("{err:?}")),
-                    }))
-                }
+                    error: Some(format!("{err:?}")),
+                }))
             }
-        })
+        }
     }
 }
 
 #[cfg(test)]
 mod tests {
-    use std::assert_matches::assert_matches;
     use std::time::Duration;
 
-    use common_meta::instruction::{InstructionReply, UpgradeRegion};
+    use common_meta::instruction::{Instruction, UpgradeRegion};
     use mito2::engine::MITO_ENGINE_NAME;
     use store_api::region_engine::RegionRole;
     use store_api::storage::RegionId;
     use tokio::time::Instant;
 
     use crate::error;
-    use crate::heartbeat::handler::HandlerContext;
+    use crate::heartbeat::handler::upgrade_region::UpgradeRegionsHandler;
+    use crate::heartbeat::handler::{HandlerContext, InstructionHandler};
     use crate::tests::{MockRegionEngine, mock_region_server};
 
     #[tokio::test]
@@ -155,20 +161,20 @@ mod tests {
         let waits = vec![None, Some(Duration::from_millis(100u64))];
 
         for replay_timeout in waits {
-            let reply = handler_context
-                .clone()
-                .handle_upgrade_region_instruction(UpgradeRegion {
-                    region_id,
-                    replay_timeout,
-                    ..Default::default()
-                })
+            let reply = UpgradeRegionsHandler
+                .handle(
+                    &handler_context,
+                    Instruction::UpgradeRegion(UpgradeRegion {
+                        region_id,
+                        replay_timeout,
+                        ..Default::default()
+                    }),
+                )
                 .await;
-            assert_matches!(reply, Some(InstructionReply::UpgradeRegion(_)));
 
-            if let InstructionReply::UpgradeRegion(reply) = reply.unwrap() {
-                assert!(!reply.exists);
-                assert!(reply.error.is_none());
-            }
+            let reply = reply.unwrap().expect_upgrade_region_reply();
+            assert!(!reply.exists);
+            assert!(reply.error.is_none());
         }
     }
 
@@ -192,21 +198,21 @@ mod tests {
         let waits = vec![None, Some(Duration::from_millis(100u64))];
 
         for replay_timeout in waits {
-            let reply = handler_context
-                .clone()
-                .handle_upgrade_region_instruction(UpgradeRegion {
-                    region_id,
-                    replay_timeout,
-                    ..Default::default()
-                })
+            let reply = UpgradeRegionsHandler
+                .handle(
+                    &handler_context,
+                    Instruction::UpgradeRegion(UpgradeRegion {
+                        region_id,
+                        replay_timeout,
+                        ..Default::default()
+                    }),
+                )
                 .await;
-            assert_matches!(reply, Some(InstructionReply::UpgradeRegion(_)));
 
-            if let InstructionReply::UpgradeRegion(reply) = reply.unwrap() {
-                assert!(reply.ready);
-                assert!(reply.exists);
-                assert!(reply.error.is_none());
-            }
+            let reply = reply.unwrap().expect_upgrade_region_reply();
+            assert!(reply.ready);
+            assert!(reply.exists);
+            assert!(reply.error.is_none());
         }
     }
 
@@ -230,21 +236,21 @@ mod tests {
         let waits = vec![None, Some(Duration::from_millis(100u64))];
 
         for replay_timeout in waits {
-            let reply = handler_context
-                .clone()
-                .handle_upgrade_region_instruction(UpgradeRegion {
-                    region_id,
-                    replay_timeout,
-                    ..Default::default()
-                })
+            let reply = UpgradeRegionsHandler
+                .handle(
+                    &handler_context,
+                    Instruction::UpgradeRegion(UpgradeRegion {
+                        region_id,
+                        replay_timeout,
+                        ..Default::default()
+                    }),
+                )
                 .await;
-            assert_matches!(reply, Some(InstructionReply::UpgradeRegion(_)));
 
-            if let InstructionReply::UpgradeRegion(reply) = reply.unwrap() {
-                assert!(!reply.ready);
-                assert!(reply.exists);
-                assert!(reply.error.is_none());
-            }
+            let reply = reply.unwrap().expect_upgrade_region_reply();
+            assert!(!reply.ready);
+            assert!(reply.exists);
+            assert!(reply.error.is_none());
         }
     }
 
@@ -271,40 +277,41 @@ mod tests {
         let handler_context = HandlerContext::new_for_test(mock_region_server);
 
         for replay_timeout in waits {
-            let reply = handler_context
-                .clone()
-                .handle_upgrade_region_instruction(UpgradeRegion {
-                    region_id,
-                    replay_timeout,
-                    ..Default::default()
-                })
+            let reply = UpgradeRegionsHandler
+                .handle(
+                    &handler_context,
+                    Instruction::UpgradeRegion(UpgradeRegion {
+                        region_id,
+                        replay_timeout,
+                        ..Default::default()
+                    }),
+                )
                 .await;
-            assert_matches!(reply, Some(InstructionReply::UpgradeRegion(_)));
 
-            if let InstructionReply::UpgradeRegion(reply) = reply.unwrap() {
-                assert!(!reply.ready);
-                assert!(reply.exists);
-                assert!(reply.error.is_none());
-            }
-        }
-
-        let timer = Instant::now();
-        let reply = handler_context
-            .handle_upgrade_region_instruction(UpgradeRegion {
-                region_id,
-                replay_timeout: Some(Duration::from_millis(500)),
-                ..Default::default()
-            })
-            .await;
-        assert_matches!(reply, Some(InstructionReply::UpgradeRegion(_)));
-        // Must less than 300 ms.
-        assert!(timer.elapsed().as_millis() < 300);
-
-        if let InstructionReply::UpgradeRegion(reply) = reply.unwrap() {
-            assert!(reply.ready);
+            let reply = reply.unwrap().expect_upgrade_region_reply();
+            assert!(!reply.ready);
             assert!(reply.exists);
             assert!(reply.error.is_none());
         }
+
+        let timer = Instant::now();
+        let reply = UpgradeRegionsHandler
+            .handle(
+                &handler_context,
+                Instruction::UpgradeRegion(UpgradeRegion {
+                    region_id,
+                    replay_timeout: Some(Duration::from_millis(500)),
+                    ..Default::default()
+                }),
+            )
+            .await;
+        // Must less than 300 ms.
+        assert!(timer.elapsed().as_millis() < 300);
+
+        let reply = reply.unwrap().expect_upgrade_region_reply();
+        assert!(reply.ready);
+        assert!(reply.exists);
+        assert!(reply.error.is_none());
     }
 
     #[tokio::test]
@@ -329,37 +336,37 @@ mod tests {
 
         let handler_context = HandlerContext::new_for_test(mock_region_server);
 
-        let reply = handler_context
-            .clone()
-            .handle_upgrade_region_instruction(UpgradeRegion {
-                region_id,
-                ..Default::default()
-            })
+        let reply = UpgradeRegionsHandler
+            .handle(
+                &handler_context,
+                Instruction::UpgradeRegion(UpgradeRegion {
+                    region_id,
+                    ..Default::default()
+                }),
+            )
             .await;
-        assert_matches!(reply, Some(InstructionReply::UpgradeRegion(_)));
 
         // It didn't wait for handle returns; it had no idea about the error.
-        if let InstructionReply::UpgradeRegion(reply) = reply.unwrap() {
-            assert!(!reply.ready);
-            assert!(reply.exists);
-            assert!(reply.error.is_none());
-        }
+        let reply = reply.unwrap().expect_upgrade_region_reply();
+        assert!(!reply.ready);
+        assert!(reply.exists);
+        assert!(reply.error.is_none());
 
-        let reply = handler_context
-            .clone()
-            .handle_upgrade_region_instruction(UpgradeRegion {
-                region_id,
-                replay_timeout: Some(Duration::from_millis(200)),
-                ..Default::default()
-            })
+        let reply = UpgradeRegionsHandler
+            .handle(
+                &handler_context,
+                Instruction::UpgradeRegion(UpgradeRegion {
+                    region_id,
+                    replay_timeout: Some(Duration::from_millis(200)),
+                    ..Default::default()
+                }),
+            )
             .await;
-        assert_matches!(reply, Some(InstructionReply::UpgradeRegion(_)));
 
-        if let InstructionReply::UpgradeRegion(reply) = reply.unwrap() {
-            assert!(!reply.ready);
-            assert!(reply.exists);
-            assert!(reply.error.is_some());
-            assert!(reply.error.unwrap().contains("mock_error"));
-        }
+        let reply = reply.unwrap().expect_upgrade_region_reply();
+        assert!(!reply.ready);
+        assert!(reply.exists);
+        assert!(reply.error.is_some());
+        assert!(reply.error.unwrap().contains("mock_error"));
     }
 }
diff --git a/src/meta-srv/src/procedure/region_migration/downgrade_leader_region.rs b/src/meta-srv/src/procedure/region_migration/downgrade_leader_region.rs
index ad805ae680..fb4065748c 100644
--- a/src/meta-srv/src/procedure/region_migration/downgrade_leader_region.rs
+++ b/src/meta-srv/src/procedure/region_migration/downgrade_leader_region.rs
@@ -19,7 +19,7 @@ use api::v1::meta::MailboxMessage;
 use common_error::ext::BoxedError;
 use common_meta::distributed_time_constants::REGION_LEASE_SECS;
 use common_meta::instruction::{
-    DowngradeRegion, DowngradeRegionReply, Instruction, InstructionReply,
+    DowngradeRegion, DowngradeRegionReply, DowngradeRegionsReply, Instruction, InstructionReply,
 };
 use common_procedure::{Context as ProcedureContext, Status};
 use common_telemetry::{error, info, warn};
@@ -120,10 +120,10 @@ impl DowngradeLeaderRegion {
     ) -> Instruction {
         let pc = &ctx.persistent_ctx;
         let region_id = pc.region_id;
-        Instruction::DowngradeRegion(DowngradeRegion {
+        Instruction::DowngradeRegions(vec![DowngradeRegion {
             region_id,
             flush_timeout: Some(flush_timeout),
-        })
+        }])
     }
 
     /// Tries to downgrade a leader region.
@@ -173,12 +173,7 @@ impl DowngradeLeaderRegion {
                     region_id,
                     now.elapsed()
                 );
-                let InstructionReply::DowngradeRegion(DowngradeRegionReply {
-                    last_entry_id,
-                    metadata_last_entry_id,
-                    exists,
-                    error,
-                }) = reply
+                let InstructionReply::DowngradeRegions(DowngradeRegionsReply { replies }) = reply
                 else {
                     return error::UnexpectedInstructionReplySnafu {
                         mailbox_message: msg.to_string(),
@@ -187,6 +182,15 @@ impl DowngradeLeaderRegion {
                     .fail();
                 };
 
+                // TODO(weny): handle multiple replies.
+                let DowngradeRegionReply {
+                    region_id,
+                    last_entry_id,
+                    metadata_last_entry_id,
+                    exists,
+                    error,
+                } = &replies[0];
+
                 if error.is_some() {
                     return error::RetryLaterSnafu {
                         reason: format!(
@@ -216,12 +220,12 @@ impl DowngradeLeaderRegion {
                 }
 
                 if let Some(last_entry_id) = last_entry_id {
-                    ctx.volatile_ctx.set_last_entry_id(last_entry_id);
+                    ctx.volatile_ctx.set_last_entry_id(*last_entry_id);
                 }
 
                 if let Some(metadata_last_entry_id) = metadata_last_entry_id {
                     ctx.volatile_ctx
-                        .set_metadata_last_entry_id(metadata_last_entry_id);
+                        .set_metadata_last_entry_id(*metadata_last_entry_id);
                 }
 
                 Ok(())
diff --git a/src/meta-srv/src/procedure/test_util.rs b/src/meta-srv/src/procedure/test_util.rs
index 8197087351..247f112514 100644
--- a/src/meta-srv/src/procedure/test_util.rs
+++ b/src/meta-srv/src/procedure/test_util.rs
@@ -17,7 +17,8 @@ use std::collections::HashMap;
 use api::v1::meta::mailbox_message::Payload;
 use api::v1::meta::{HeartbeatResponse, MailboxMessage};
 use common_meta::instruction::{
-    DowngradeRegionReply, FlushRegionReply, InstructionReply, SimpleReply, UpgradeRegionReply,
+    DowngradeRegionReply, DowngradeRegionsReply, FlushRegionReply, InstructionReply, SimpleReply,
+    UpgradeRegionReply,
 };
 use common_meta::key::TableMetadataManagerRef;
 use common_meta::key::table_route::TableRouteValue;
@@ -183,12 +184,15 @@ pub fn new_downgrade_region_reply(
         to: "meta".to_string(),
         timestamp_millis: current_time_millis(),
         payload: Some(Payload::Json(
-            serde_json::to_string(&InstructionReply::DowngradeRegion(DowngradeRegionReply {
-                last_entry_id,
-                metadata_last_entry_id: None,
-                exists: exist,
-                error,
-            }))
+            serde_json::to_string(&InstructionReply::DowngradeRegions(
+                DowngradeRegionsReply::new(vec![DowngradeRegionReply {
+                    region_id: RegionId::new(0, 0),
+                    last_entry_id,
+                    metadata_last_entry_id: None,
+                    exists: exist,
+                    error,
+                }]),
+            ))
             .unwrap(),
         )),
     }

From d8563ba56d91dde0375457f32b50203357887876 Mon Sep 17 00:00:00 2001
From: dennis zhuang <killme2008@gmail.com>
Date: Sat, 25 Oct 2025 16:41:49 +0800
Subject: [PATCH 14/14] feat: adds regex_extract function and more type tests
 (#7107)

* feat: adds format, regex_extract function and more type tests

Signed-off-by: Dennis Zhuang <killme2008@gmail.com>

* fix: forgot functions

Signed-off-by: Dennis Zhuang <killme2008@gmail.com>

* chore: forgot null type

Signed-off-by: Dennis Zhuang <killme2008@gmail.com>

* test: forgot date type

Signed-off-by: Dennis Zhuang <killme2008@gmail.com>

* feat: remove format function

Signed-off-by: Dennis Zhuang <killme2008@gmail.com>

* test: update results after upgrading datafusion

Signed-off-by: Dennis Zhuang <killme2008@gmail.com>

---------

Signed-off-by: Dennis Zhuang <killme2008@gmail.com>
---
 Cargo.lock                                    |  19 +-
 Cargo.toml                                    |   2 +-
 src/common/datasource/Cargo.toml              |   2 +-
 src/common/function/Cargo.toml                |   1 +
 src/common/function/src/function_registry.rs  |   4 +
 src/common/function/src/scalars.rs            |   1 +
 .../function/src/scalars/date/date_format.rs  |  78 +++-
 src/common/function/src/scalars/string.rs     |  26 ++
 .../src/scalars/string/regexp_extract.rs      | 339 ++++++++++++++++++
 src/mito2/Cargo.toml                          |   2 +-
 .../common/function/string/concat.result      | 211 +++++++++++
 .../common/function/string/concat.sql         |  63 ++++
 .../common/function/string/length.result      | 183 ++++++++++
 .../common/function/string/length.sql         |  58 +++
 .../function/string/like_pattern.result       | 280 +++++++++++++++
 .../common/function/string/like_pattern.sql   |  97 +++++
 .../common/function/string/position.result    | 278 ++++++++++++++
 .../common/function/string/position.sql       |  84 +++++
 .../common/function/string/regex.result       | 143 ++++++++
 .../common/function/string/regex.sql          |  44 +++
 .../common/function/string/repeat.result      | 217 +++++++++++
 .../common/function/string/repeat.sql         |  68 ++++
 .../common/function/string/replace.result     | 180 ++++++++++
 .../common/function/string/replace.sql        |  57 +++
 .../common/function/string/reverse.result     | 200 +++++++++++
 .../common/function/string/reverse.sql        |  63 ++++
 .../function/string/string_split.result       | 213 +++++++++++
 .../common/function/string/string_split.sql   |  75 ++++
 .../common/function/string/substring.result   | 173 +++++++++
 .../common/function/string/substring.sql      |  53 +++
 .../common/function/string/trim_pad.result    | 274 ++++++++++++++
 .../common/function/string/trim_pad.sql       |  88 +++++
 .../common/function/string/upper_lower.result | 291 +++++++++++++++
 .../common/function/string/upper_lower.sql    |  93 +++++
 .../common/order/nulls_first_last.result      | 141 ++++++++
 .../common/order/nulls_first_last.sql         |  46 +++
 .../common/order/order_by_basic.result        | 134 +++++++
 .../common/order/order_by_basic.sql           |  39 ++
 .../common/order/order_by_expressions.result  | 137 +++++++
 .../common/order/order_by_expressions.sql     |  54 +++
 .../common/sample/basic_sample.result         |  93 +++++
 .../standalone/common/sample/basic_sample.sql |  35 ++
 .../common/types/date/test_date.result        | 135 +++++++
 .../common/types/date/test_date.sql           |  50 +++
 .../types/float/ieee_floating_points.result   | 144 ++++++++
 .../types/float/ieee_floating_points.sql      |  51 +++
 .../common/types/float/infinity_nan.result    | 184 ++++++++++
 .../common/types/float/infinity_nan.sql       |  61 ++++
 .../float/nan_arithmetic_extended.result      | 317 ++++++++++++++++
 .../types/float/nan_arithmetic_extended.sql   |  91 +++++
 .../types/float/nan_cast_extended.result      | 252 +++++++++++++
 .../common/types/float/nan_cast_extended.sql  |  76 ++++
 .../common/types/null/null_handling.result    | 171 +++++++++
 .../common/types/null/null_handling.sql       |  49 +++
 .../common/types/string/big_strings.result    | 116 ++++++
 .../common/types/string/big_strings.sql       |  43 +++
 .../types/string/unicode_extended.result      | 103 ++++++
 .../common/types/string/unicode_extended.sql  |  35 ++
 58 files changed, 6502 insertions(+), 15 deletions(-)
 create mode 100644 src/common/function/src/scalars/string.rs
 create mode 100644 src/common/function/src/scalars/string/regexp_extract.rs
 create mode 100644 tests/cases/standalone/common/function/string/concat.result
 create mode 100644 tests/cases/standalone/common/function/string/concat.sql
 create mode 100644 tests/cases/standalone/common/function/string/length.result
 create mode 100644 tests/cases/standalone/common/function/string/length.sql
 create mode 100644 tests/cases/standalone/common/function/string/like_pattern.result
 create mode 100644 tests/cases/standalone/common/function/string/like_pattern.sql
 create mode 100644 tests/cases/standalone/common/function/string/position.result
 create mode 100644 tests/cases/standalone/common/function/string/position.sql
 create mode 100644 tests/cases/standalone/common/function/string/regex.result
 create mode 100644 tests/cases/standalone/common/function/string/regex.sql
 create mode 100644 tests/cases/standalone/common/function/string/repeat.result
 create mode 100644 tests/cases/standalone/common/function/string/repeat.sql
 create mode 100644 tests/cases/standalone/common/function/string/replace.result
 create mode 100644 tests/cases/standalone/common/function/string/replace.sql
 create mode 100644 tests/cases/standalone/common/function/string/reverse.result
 create mode 100644 tests/cases/standalone/common/function/string/reverse.sql
 create mode 100644 tests/cases/standalone/common/function/string/string_split.result
 create mode 100644 tests/cases/standalone/common/function/string/string_split.sql
 create mode 100644 tests/cases/standalone/common/function/string/substring.result
 create mode 100644 tests/cases/standalone/common/function/string/substring.sql
 create mode 100644 tests/cases/standalone/common/function/string/trim_pad.result
 create mode 100644 tests/cases/standalone/common/function/string/trim_pad.sql
 create mode 100644 tests/cases/standalone/common/function/string/upper_lower.result
 create mode 100644 tests/cases/standalone/common/function/string/upper_lower.sql
 create mode 100644 tests/cases/standalone/common/order/nulls_first_last.result
 create mode 100644 tests/cases/standalone/common/order/nulls_first_last.sql
 create mode 100644 tests/cases/standalone/common/order/order_by_basic.result
 create mode 100644 tests/cases/standalone/common/order/order_by_basic.sql
 create mode 100644 tests/cases/standalone/common/order/order_by_expressions.result
 create mode 100644 tests/cases/standalone/common/order/order_by_expressions.sql
 create mode 100644 tests/cases/standalone/common/sample/basic_sample.result
 create mode 100644 tests/cases/standalone/common/sample/basic_sample.sql
 create mode 100644 tests/cases/standalone/common/types/date/test_date.result
 create mode 100644 tests/cases/standalone/common/types/date/test_date.sql
 create mode 100644 tests/cases/standalone/common/types/float/ieee_floating_points.result
 create mode 100644 tests/cases/standalone/common/types/float/ieee_floating_points.sql
 create mode 100644 tests/cases/standalone/common/types/float/infinity_nan.result
 create mode 100644 tests/cases/standalone/common/types/float/infinity_nan.sql
 create mode 100644 tests/cases/standalone/common/types/float/nan_arithmetic_extended.result
 create mode 100644 tests/cases/standalone/common/types/float/nan_arithmetic_extended.sql
 create mode 100644 tests/cases/standalone/common/types/float/nan_cast_extended.result
 create mode 100644 tests/cases/standalone/common/types/float/nan_cast_extended.sql
 create mode 100644 tests/cases/standalone/common/types/null/null_handling.result
 create mode 100644 tests/cases/standalone/common/types/null/null_handling.sql
 create mode 100644 tests/cases/standalone/common/types/string/big_strings.result
 create mode 100644 tests/cases/standalone/common/types/string/big_strings.sql
 create mode 100644 tests/cases/standalone/common/types/string/unicode_extended.result
 create mode 100644 tests/cases/standalone/common/types/string/unicode_extended.sql

diff --git a/Cargo.lock b/Cargo.lock
index 07b1695817..c2bad3d971 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1264,7 +1264,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "234113d19d0d7d613b40e86fb654acf958910802bcceab913a4f9e7cda03b1a4"
 dependencies = [
  "memchr",
- "regex-automata 0.4.9",
+ "regex-automata 0.4.13",
  "serde",
 ]
 
@@ -2190,6 +2190,7 @@ dependencies = [
  "num-traits",
  "paste",
  "pretty_assertions",
+ "regex",
  "s2",
  "serde",
  "serde_json",
@@ -4588,7 +4589,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "6e24cb5a94bcae1e5408b0effca5cd7172ea3c5755049c5f3af4cd283a165298"
 dependencies = [
  "bit-set",
- "regex-automata 0.4.9",
+ "regex-automata 0.4.13",
  "regex-syntax 0.8.7",
 ]
 
@@ -6118,7 +6119,7 @@ dependencies = [
  "rand 0.9.1",
  "rand_chacha 0.9.0",
  "regex",
- "regex-automata 0.4.9",
+ "regex-automata 0.4.13",
  "roaring",
  "serde",
  "serde_json",
@@ -6735,7 +6736,7 @@ version = "0.22.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "b5baa5e9ff84f1aefd264e6869907646538a52147a755d494517a8007fb48733"
 dependencies = [
- "regex-automata 0.4.9",
+ "regex-automata 0.4.13",
  "rustversion",
 ]
 
@@ -10469,13 +10470,13 @@ dependencies = [
 
 [[package]]
 name = "regex"
-version = "1.11.1"
+version = "1.12.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b544ef1b4eac5dc2db33ea63606ae9ffcfac26c1416a2806ae0bf5f56b201191"
+checksum = "843bc0191f75f3e22651ae5f1e72939ab2f72a4bc30fa80a066bd66edefc24d4"
 dependencies = [
  "aho-corasick",
  "memchr",
- "regex-automata 0.4.9",
+ "regex-automata 0.4.13",
  "regex-syntax 0.8.7",
 ]
 
@@ -10490,9 +10491,9 @@ dependencies = [
 
 [[package]]
 name = "regex-automata"
-version = "0.4.9"
+version = "0.4.13"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "809e8dc61f6de73b46c85f4c96486310fe304c434cfa43669d7b40f711150908"
+checksum = "5276caf25ac86c8d810222b3dbb938e512c55c6831a10f3e6ed1c93b84041f1c"
 dependencies = [
  "aho-corasick",
  "memchr",
diff --git a/Cargo.toml b/Cargo.toml
index a4ce20bfd1..ebafce51ba 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -191,7 +191,7 @@ prost-types = "0.13"
 raft-engine = { version = "0.4.1", default-features = false }
 rand = "0.9"
 ratelimit = "0.10"
-regex = "1.8"
+regex = "1.12"
 regex-automata = "0.4"
 reqwest = { version = "0.12", default-features = false, features = [
     "json",
diff --git a/src/common/datasource/Cargo.toml b/src/common/datasource/Cargo.toml
index 303d05ceb1..964f41736c 100644
--- a/src/common/datasource/Cargo.toml
+++ b/src/common/datasource/Cargo.toml
@@ -36,7 +36,7 @@ object_store_opendal.workspace = true
 orc-rust = { version = "0.6.3", default-features = false, features = ["async"] }
 parquet.workspace = true
 paste.workspace = true
-regex = "1.7"
+regex.workspace = true
 serde.workspace = true
 snafu.workspace = true
 strum.workspace = true
diff --git a/src/common/function/Cargo.toml b/src/common/function/Cargo.toml
index d5b928e2a1..1d272f5d04 100644
--- a/src/common/function/Cargo.toml
+++ b/src/common/function/Cargo.toml
@@ -51,6 +51,7 @@ nalgebra.workspace = true
 num = "0.4"
 num-traits = "0.2"
 paste.workspace = true
+regex.workspace = true
 s2 = { version = "0.0.12", optional = true }
 serde.workspace = true
 serde_json.workspace = true
diff --git a/src/common/function/src/function_registry.rs b/src/common/function/src/function_registry.rs
index 75bb71c63a..e51dcf4cb8 100644
--- a/src/common/function/src/function_registry.rs
+++ b/src/common/function/src/function_registry.rs
@@ -34,6 +34,7 @@ use crate::scalars::json::JsonFunction;
 use crate::scalars::matches::MatchesFunction;
 use crate::scalars::matches_term::MatchesTermFunction;
 use crate::scalars::math::MathFunction;
+use crate::scalars::string::register_string_functions;
 use crate::scalars::timestamp::TimestampFunction;
 use crate::scalars::uddsketch_calc::UddSketchCalcFunction;
 use crate::scalars::vector::VectorFunction as VectorScalarFunction;
@@ -154,6 +155,9 @@ pub static FUNCTION_REGISTRY: LazyLock<Arc<FunctionRegistry>> = LazyLock::new(||
     // Json related functions
     JsonFunction::register(&function_registry);
 
+    // String related functions
+    register_string_functions(&function_registry);
+
     // Vector related functions
     VectorScalarFunction::register(&function_registry);
     VectorAggrFunction::register(&function_registry);
diff --git a/src/common/function/src/scalars.rs b/src/common/function/src/scalars.rs
index 6f93f2741d..9a8c9cc3a0 100644
--- a/src/common/function/src/scalars.rs
+++ b/src/common/function/src/scalars.rs
@@ -20,6 +20,7 @@ pub mod json;
 pub mod matches;
 pub mod matches_term;
 pub mod math;
+pub(crate) mod string;
 pub mod vector;
 
 pub(crate) mod hll_count;
diff --git a/src/common/function/src/scalars/date/date_format.rs b/src/common/function/src/scalars/date/date_format.rs
index 0e321c957e..dfa5a444ca 100644
--- a/src/common/function/src/scalars/date/date_format.rs
+++ b/src/common/function/src/scalars/date/date_format.rs
@@ -20,7 +20,9 @@ use common_query::error;
 use common_time::{Date, Timestamp};
 use datafusion_common::DataFusionError;
 use datafusion_common::arrow::array::{Array, AsArray, StringViewBuilder};
-use datafusion_common::arrow::datatypes::{ArrowTimestampType, DataType, Date32Type, TimeUnit};
+use datafusion_common::arrow::datatypes::{
+    ArrowTimestampType, DataType, Date32Type, Date64Type, TimeUnit,
+};
 use datafusion_expr::{ColumnarValue, ScalarFunctionArgs, Signature};
 use snafu::ResultExt;
 
@@ -40,6 +42,7 @@ impl Default for DateFormatFunction {
             signature: helper::one_of_sigs2(
                 vec![
                     DataType::Date32,
+                    DataType::Date64,
                     DataType::Timestamp(TimeUnit::Second, None),
                     DataType::Timestamp(TimeUnit::Millisecond, None),
                     DataType::Timestamp(TimeUnit::Microsecond, None),
@@ -115,6 +118,29 @@ impl Function for DateFormatFunction {
                     builder.append_option(result.as_deref());
                 }
             }
+            DataType::Date64 => {
+                let left = left.as_primitive::<Date64Type>();
+                for i in 0..size {
+                    let date = left.is_valid(i).then(|| {
+                        let ms = left.value(i);
+                        Timestamp::new_millisecond(ms)
+                    });
+                    let format = formats.is_valid(i).then(|| formats.value(i));
+
+                    let result = match (date, format) {
+                        (Some(ts), Some(fmt)) => {
+                            Some(ts.as_formatted_string(fmt, Some(timezone)).map_err(|e| {
+                                DataFusionError::Execution(format!(
+                                    "cannot format {ts:?} as '{fmt}': {e}"
+                                ))
+                            })?)
+                        }
+                        _ => None,
+                    };
+
+                    builder.append_option(result.as_deref());
+                }
+            }
             x => {
                 return Err(DataFusionError::Execution(format!(
                     "unsupported input data type {x}"
@@ -137,7 +163,9 @@ mod tests {
     use std::sync::Arc;
 
     use arrow_schema::Field;
-    use datafusion_common::arrow::array::{Date32Array, StringArray, TimestampSecondArray};
+    use datafusion_common::arrow::array::{
+        Date32Array, Date64Array, StringArray, TimestampSecondArray,
+    };
     use datafusion_common::config::ConfigOptions;
     use datafusion_expr::{TypeSignature, Volatility};
 
@@ -166,7 +194,7 @@ mod tests {
                          Signature {
                              type_signature: TypeSignature::OneOf(sigs),
                              volatility: Volatility::Immutable
-                         } if  sigs.len() == 5));
+                         } if  sigs.len() == 6));
     }
 
     #[test]
@@ -213,6 +241,50 @@ mod tests {
         }
     }
 
+    #[test]
+    fn test_date64_date_format() {
+        let f = DateFormatFunction::default();
+
+        let dates = vec![Some(123000), None, Some(42000), None];
+        let formats = vec![
+            "%Y-%m-%d %T.%3f",
+            "%Y-%m-%d %T.%3f",
+            "%Y-%m-%d %T.%3f",
+            "%Y-%m-%d %T.%3f",
+        ];
+        let results = [
+            Some("1970-01-01 00:02:03.000"),
+            None,
+            Some("1970-01-01 00:00:42.000"),
+            None,
+        ];
+
+        let mut config_options = ConfigOptions::default();
+        config_options.extensions.insert(FunctionContext::default());
+        let config_options = Arc::new(config_options);
+
+        let args = ScalarFunctionArgs {
+            args: vec![
+                ColumnarValue::Array(Arc::new(Date64Array::from(dates))),
+                ColumnarValue::Array(Arc::new(StringArray::from_iter_values(formats))),
+            ],
+            arg_fields: vec![],
+            number_rows: 4,
+            return_field: Arc::new(Field::new("x", DataType::Utf8View, false)),
+            config_options,
+        };
+        let result = f
+            .invoke_with_args(args)
+            .and_then(|x| x.to_array(4))
+            .unwrap();
+        let vector = result.as_string_view();
+
+        assert_eq!(4, vector.len());
+        for (actual, expect) in vector.iter().zip(results) {
+            assert_eq!(actual, expect);
+        }
+    }
+
     #[test]
     fn test_date_date_format() {
         let f = DateFormatFunction::default();
diff --git a/src/common/function/src/scalars/string.rs b/src/common/function/src/scalars/string.rs
new file mode 100644
index 0000000000..95c6201ee2
--- /dev/null
+++ b/src/common/function/src/scalars/string.rs
@@ -0,0 +1,26 @@
+// Copyright 2023 Greptime Team
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+//! String scalar functions
+
+mod regexp_extract;
+
+pub(crate) use regexp_extract::RegexpExtractFunction;
+
+use crate::function_registry::FunctionRegistry;
+
+/// Register all string functions
+pub fn register_string_functions(registry: &FunctionRegistry) {
+    RegexpExtractFunction::register(registry);
+}
diff --git a/src/common/function/src/scalars/string/regexp_extract.rs b/src/common/function/src/scalars/string/regexp_extract.rs
new file mode 100644
index 0000000000..bc78c4df74
--- /dev/null
+++ b/src/common/function/src/scalars/string/regexp_extract.rs
@@ -0,0 +1,339 @@
+// Copyright 2023 Greptime Team
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+//! Implementation of REGEXP_EXTRACT function
+use std::fmt;
+use std::sync::Arc;
+
+use datafusion_common::DataFusionError;
+use datafusion_common::arrow::array::{Array, AsArray, LargeStringBuilder};
+use datafusion_common::arrow::compute::cast;
+use datafusion_common::arrow::datatypes::DataType;
+use datafusion_expr::{ColumnarValue, ScalarFunctionArgs, Signature, TypeSignature, Volatility};
+use regex::{Regex, RegexBuilder};
+
+use crate::function::Function;
+use crate::function_registry::FunctionRegistry;
+
+const NAME: &str = "regexp_extract";
+
+// Safety limits
+const MAX_REGEX_SIZE: usize = 1024 * 1024; // compiled regex heap cap
+const MAX_DFA_SIZE: usize = 2 * 1024 * 1024; // lazy DFA cap
+const MAX_TOTAL_RESULT_SIZE: usize = 64 * 1024 * 1024; // total batch cap
+const MAX_SINGLE_MATCH: usize = 1024 * 1024; // per-row cap
+const MAX_PATTERN_LEN: usize = 10_000; // pattern text length cap
+
+/// REGEXP_EXTRACT function implementation
+/// Extracts the first substring matching the given regular expression pattern.
+/// If no match is found, returns NULL.
+///
+#[derive(Debug)]
+pub struct RegexpExtractFunction {
+    signature: Signature,
+}
+
+impl RegexpExtractFunction {
+    pub fn register(registry: &FunctionRegistry) {
+        registry.register_scalar(RegexpExtractFunction::default());
+    }
+}
+
+impl Default for RegexpExtractFunction {
+    fn default() -> Self {
+        Self {
+            signature: Signature::one_of(
+                vec![
+                    TypeSignature::Exact(vec![DataType::Utf8View, DataType::Utf8]),
+                    TypeSignature::Exact(vec![DataType::Utf8View, DataType::Utf8View]),
+                    TypeSignature::Exact(vec![DataType::Utf8, DataType::Utf8View]),
+                    TypeSignature::Exact(vec![DataType::LargeUtf8, DataType::Utf8View]),
+                    TypeSignature::Exact(vec![DataType::Utf8View, DataType::LargeUtf8]),
+                    TypeSignature::Exact(vec![DataType::Utf8, DataType::Utf8]),
+                    TypeSignature::Exact(vec![DataType::LargeUtf8, DataType::Utf8]),
+                    TypeSignature::Exact(vec![DataType::Utf8, DataType::LargeUtf8]),
+                    TypeSignature::Exact(vec![DataType::LargeUtf8, DataType::LargeUtf8]),
+                ],
+                Volatility::Immutable,
+            ),
+        }
+    }
+}
+
+impl fmt::Display for RegexpExtractFunction {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        write!(f, "{}", NAME.to_ascii_uppercase())
+    }
+}
+
+impl Function for RegexpExtractFunction {
+    fn name(&self) -> &str {
+        NAME
+    }
+
+    // Always return LargeUtf8 for simplicity and safety
+    fn return_type(&self, _: &[DataType]) -> datafusion_common::Result<DataType> {
+        Ok(DataType::LargeUtf8)
+    }
+
+    fn signature(&self) -> &Signature {
+        &self.signature
+    }
+
+    fn invoke_with_args(
+        &self,
+        args: ScalarFunctionArgs,
+    ) -> datafusion_common::Result<ColumnarValue> {
+        if args.args.len() != 2 {
+            return Err(DataFusionError::Execution(
+                "REGEXP_EXTRACT requires exactly two arguments (text, pattern)".to_string(),
+            ));
+        }
+
+        // Keep original ColumnarValue variants for scalar-pattern fast path
+        let pattern_is_scalar = matches!(args.args[1], ColumnarValue::Scalar(_));
+
+        let arrays = ColumnarValue::values_to_arrays(&args.args)?;
+        let text_array = &arrays[0];
+        let pattern_array = &arrays[1];
+
+        // Cast both to LargeUtf8 for uniform access (supports Utf8/Utf8View/Dictionary<String>)
+        let text_large = cast(text_array.as_ref(), &DataType::LargeUtf8).map_err(|e| {
+            DataFusionError::Execution(format!("REGEXP_EXTRACT: text cast failed: {e}"))
+        })?;
+        let pattern_large = cast(pattern_array.as_ref(), &DataType::LargeUtf8).map_err(|e| {
+            DataFusionError::Execution(format!("REGEXP_EXTRACT: pattern cast failed: {e}"))
+        })?;
+
+        let text = text_large.as_string::<i64>();
+        let pattern = pattern_large.as_string::<i64>();
+        let len = text.len();
+
+        // Pre-size result builder with conservative estimate
+        let mut estimated_total = 0usize;
+        for i in 0..len {
+            if !text.is_null(i) {
+                estimated_total = estimated_total.saturating_add(text.value_length(i) as usize);
+                if estimated_total > MAX_TOTAL_RESULT_SIZE {
+                    return Err(DataFusionError::ResourcesExhausted(format!(
+                        "REGEXP_EXTRACT total output exceeds {} bytes",
+                        MAX_TOTAL_RESULT_SIZE
+                    )));
+                }
+            }
+        }
+        let mut builder = LargeStringBuilder::with_capacity(len, estimated_total);
+
+        // Fast path: if pattern is scalar, compile once
+        let compiled_scalar: Option<Regex> = if pattern_is_scalar && len > 0 && !pattern.is_null(0)
+        {
+            Some(compile_regex_checked(pattern.value(0))?)
+        } else {
+            None
+        };
+
+        for i in 0..len {
+            if text.is_null(i) || pattern.is_null(i) {
+                builder.append_null();
+                continue;
+            }
+
+            let s = text.value(i);
+            let pat = pattern.value(i);
+
+            // Compile or reuse regex
+            let re = if let Some(ref compiled) = compiled_scalar {
+                compiled
+            } else {
+                // TODO: For performance-critical applications with repeating patterns,
+                // consider adding a small LRU cache here
+                &compile_regex_checked(pat)?
+            };
+
+            // First match only
+            if let Some(m) = re.find(s) {
+                let m_str = m.as_str();
+                if m_str.len() > MAX_SINGLE_MATCH {
+                    return Err(DataFusionError::Execution(
+                        "REGEXP_EXTRACT match exceeds per-row limit (1MB)".to_string(),
+                    ));
+                }
+                builder.append_value(m_str);
+            } else {
+                builder.append_null();
+            }
+        }
+
+        Ok(ColumnarValue::Array(Arc::new(builder.finish())))
+    }
+}
+
+// Compile a regex with safety checks
+fn compile_regex_checked(pattern: &str) -> datafusion_common::Result<Regex> {
+    if pattern.len() > MAX_PATTERN_LEN {
+        return Err(DataFusionError::Execution(format!(
+            "REGEXP_EXTRACT pattern too long (> {} chars)",
+            MAX_PATTERN_LEN
+        )));
+    }
+    RegexBuilder::new(pattern)
+        .size_limit(MAX_REGEX_SIZE)
+        .dfa_size_limit(MAX_DFA_SIZE)
+        .build()
+        .map_err(|e| {
+            DataFusionError::Execution(format!("REGEXP_EXTRACT invalid pattern '{}': {e}", pattern))
+        })
+}
+
+#[cfg(test)]
+mod tests {
+    use datafusion_common::arrow::array::StringArray;
+    use datafusion_common::arrow::datatypes::Field;
+    use datafusion_expr::ScalarFunctionArgs;
+
+    use super::*;
+
+    #[test]
+    fn test_regexp_extract_function_basic() {
+        let text_array = Arc::new(StringArray::from(vec!["version 1.2.3", "no match here"]));
+        let pattern_array = Arc::new(StringArray::from(vec!["\\d+\\.\\d+\\.\\d+", "\\d+"]));
+
+        let args = ScalarFunctionArgs {
+            args: vec![
+                ColumnarValue::Array(text_array),
+                ColumnarValue::Array(pattern_array),
+            ],
+            arg_fields: vec![
+                Arc::new(Field::new("arg_0", DataType::Utf8, false)),
+                Arc::new(Field::new("arg_1", DataType::Utf8, false)),
+            ],
+            return_field: Arc::new(Field::new("result", DataType::LargeUtf8, true)),
+            number_rows: 2,
+            config_options: Arc::new(datafusion_common::config::ConfigOptions::default()),
+        };
+
+        let function = RegexpExtractFunction::default();
+        let result = function.invoke_with_args(args).unwrap();
+
+        if let ColumnarValue::Array(array) = result {
+            let string_array = array.as_string::<i64>();
+            assert_eq!(string_array.value(0), "1.2.3");
+            assert!(string_array.is_null(1)); // no match should return NULL
+        } else {
+            panic!("Expected array result");
+        }
+    }
+
+    #[test]
+    fn test_regexp_extract_phone_number() {
+        let text_array = Arc::new(StringArray::from(vec!["Phone: 123-456-7890", "No phone"]));
+        let pattern_array = Arc::new(StringArray::from(vec![
+            "\\d{3}-\\d{3}-\\d{4}",
+            "\\d{3}-\\d{3}-\\d{4}",
+        ]));
+
+        let args = ScalarFunctionArgs {
+            args: vec![
+                ColumnarValue::Array(text_array),
+                ColumnarValue::Array(pattern_array),
+            ],
+            arg_fields: vec![
+                Arc::new(Field::new("arg_0", DataType::Utf8, false)),
+                Arc::new(Field::new("arg_1", DataType::Utf8, false)),
+            ],
+            return_field: Arc::new(Field::new("result", DataType::LargeUtf8, true)),
+            number_rows: 2,
+            config_options: Arc::new(datafusion_common::config::ConfigOptions::default()),
+        };
+
+        let function = RegexpExtractFunction::default();
+        let result = function.invoke_with_args(args).unwrap();
+
+        if let ColumnarValue::Array(array) = result {
+            let string_array = array.as_string::<i64>();
+            assert_eq!(string_array.value(0), "123-456-7890");
+            assert!(string_array.is_null(1)); // no match should return NULL
+        } else {
+            panic!("Expected array result");
+        }
+    }
+
+    #[test]
+    fn test_regexp_extract_email() {
+        let text_array = Arc::new(StringArray::from(vec![
+            "Email: user@domain.com",
+            "Invalid email",
+        ]));
+        let pattern_array = Arc::new(StringArray::from(vec![
+            "[a-zA-Z0-9]+@[a-zA-Z0-9]+\\.[a-zA-Z]+",
+            "[a-zA-Z0-9]+@[a-zA-Z0-9]+\\.[a-zA-Z]+",
+        ]));
+
+        let args = ScalarFunctionArgs {
+            args: vec![
+                ColumnarValue::Array(text_array),
+                ColumnarValue::Array(pattern_array),
+            ],
+            arg_fields: vec![
+                Arc::new(Field::new("arg_0", DataType::Utf8, false)),
+                Arc::new(Field::new("arg_1", DataType::Utf8, false)),
+            ],
+            return_field: Arc::new(Field::new("result", DataType::LargeUtf8, true)),
+            number_rows: 2,
+            config_options: Arc::new(datafusion_common::config::ConfigOptions::default()),
+        };
+
+        let function = RegexpExtractFunction::default();
+        let result = function.invoke_with_args(args).unwrap();
+
+        if let ColumnarValue::Array(array) = result {
+            let string_array = array.as_string::<i64>();
+            assert_eq!(string_array.value(0), "user@domain.com");
+            assert!(string_array.is_null(1)); // no match should return NULL
+        } else {
+            panic!("Expected array result");
+        }
+    }
+
+    #[test]
+    fn test_regexp_extract_with_nulls() {
+        let text_array = Arc::new(StringArray::from(vec![Some("test 123"), None]));
+        let pattern_array = Arc::new(StringArray::from(vec![Some("\\d+"), Some("\\d+")]));
+
+        let args = ScalarFunctionArgs {
+            args: vec![
+                ColumnarValue::Array(text_array),
+                ColumnarValue::Array(pattern_array),
+            ],
+            arg_fields: vec![
+                Arc::new(Field::new("arg_0", DataType::Utf8, true)),
+                Arc::new(Field::new("arg_1", DataType::Utf8, false)),
+            ],
+            return_field: Arc::new(Field::new("result", DataType::LargeUtf8, true)),
+            number_rows: 2,
+            config_options: Arc::new(datafusion_common::config::ConfigOptions::default()),
+        };
+
+        let function = RegexpExtractFunction::default();
+        let result = function.invoke_with_args(args).unwrap();
+
+        if let ColumnarValue::Array(array) = result {
+            let string_array = array.as_string::<i64>();
+            assert_eq!(string_array.value(0), "123");
+            assert!(string_array.is_null(1)); // NULL input should return NULL
+        } else {
+            panic!("Expected array result");
+        }
+    }
+}
diff --git a/src/mito2/Cargo.toml b/src/mito2/Cargo.toml
index 4cc1efb8bc..7926ae198a 100644
--- a/src/mito2/Cargo.toml
+++ b/src/mito2/Cargo.toml
@@ -65,7 +65,7 @@ partition.workspace = true
 puffin.workspace = true
 rand.workspace = true
 rayon = "1.10"
-regex = "1.5"
+regex.workspace = true
 rskafka = { workspace = true, optional = true }
 rstest = { workspace = true, optional = true }
 rstest_reuse = { workspace = true, optional = true }
diff --git a/tests/cases/standalone/common/function/string/concat.result b/tests/cases/standalone/common/function/string/concat.result
new file mode 100644
index 0000000000..5c0907d5cb
--- /dev/null
+++ b/tests/cases/standalone/common/function/string/concat.result
@@ -0,0 +1,211 @@
+-- String concatenation function tests
+-- Test CONCAT function
+-- Basic concatenation
+SELECT CONCAT('hello', 'world');
+
++-------------------------------------+
+| concat(Utf8("hello"),Utf8("world")) |
++-------------------------------------+
+| helloworld                          |
++-------------------------------------+
+
+SELECT CONCAT('hello', ' ', 'world');
+
++-----------------------------------------------+
+| concat(Utf8("hello"),Utf8(" "),Utf8("world")) |
++-----------------------------------------------+
+| hello world                                   |
++-----------------------------------------------+
+
+SELECT CONCAT('a', 'b', 'c', 'd');
+
++-------------------------------------------------+
+| concat(Utf8("a"),Utf8("b"),Utf8("c"),Utf8("d")) |
++-------------------------------------------------+
+| abcd                                            |
++-------------------------------------------------+
+
+-- Concatenation with NULL values
+SELECT CONCAT('hello', NULL);
+
++----------------------------+
+| concat(Utf8("hello"),NULL) |
++----------------------------+
+| hello                      |
++----------------------------+
+
+SELECT CONCAT(NULL, 'world');
+
++----------------------------+
+| concat(NULL,Utf8("world")) |
++----------------------------+
+| world                      |
++----------------------------+
+
+SELECT CONCAT(NULL, NULL);
+
++-------------------+
+| concat(NULL,NULL) |
++-------------------+
+|                   |
++-------------------+
+
+-- Concatenation with numbers (should convert to string)
+SELECT CONCAT('value: ', 42);
+
++-----------------------------------+
+| concat(Utf8("value: "),Int64(42)) |
++-----------------------------------+
+| value: 42                         |
++-----------------------------------+
+
+SELECT CONCAT(1, 2, 3);
+
++------------------------------------+
+| concat(Int64(1),Int64(2),Int64(3)) |
++------------------------------------+
+| 123                                |
++------------------------------------+
+
+-- Test with table data
+CREATE TABLE concat_test(first_name VARCHAR, last_name VARCHAR, age INTEGER, ts TIMESTAMP TIME INDEX);
+
+Affected Rows: 0
+
+INSERT INTO concat_test VALUES
+    ('John', 'Doe', 30, 1000),
+    ('Jane', 'Smith', 25, 2000),
+    ('Bob', NULL, 35, 3000),
+    (NULL, 'Wilson', 40, 4000);
+
+Affected Rows: 4
+
+-- Concatenate table columns
+SELECT CONCAT(first_name, ' ', last_name) as full_name FROM concat_test ORDER BY ts;
+
++------------+
+| full_name  |
++------------+
+| John Doe   |
+| Jane Smith |
+| Bob        |
+|  Wilson    |
++------------+
+
+SELECT CONCAT(first_name, ' is ', age, ' years old') FROM concat_test ORDER BY ts;
+
++--------------------------------------------------------------------------------+
+| concat(concat_test.first_name,Utf8(" is "),concat_test.age,Utf8(" years old")) |
++--------------------------------------------------------------------------------+
+| John is 30 years old                                                           |
+| Jane is 25 years old                                                           |
+| Bob is 35 years old                                                            |
+|  is 40 years old                                                               |
++--------------------------------------------------------------------------------+
+
+-- Test CONCAT_WS (concat with separator)
+SELECT CONCAT_WS(' ', first_name, last_name) as full_name FROM concat_test ORDER BY ts;
+
++------------+
+| full_name  |
++------------+
+| John Doe   |
+| Jane Smith |
+| Bob        |
+| Wilson     |
++------------+
+
+SELECT CONCAT_WS('-', first_name, last_name, age) FROM concat_test ORDER BY ts;
+
++-----------------------------------------------------------------------------------+
+| concat_ws(Utf8("-"),concat_test.first_name,concat_test.last_name,concat_test.age) |
++-----------------------------------------------------------------------------------+
+| John-Doe-30                                                                       |
+| Jane-Smith-25                                                                     |
+| Bob-35                                                                            |
+| Wilson-40                                                                         |
++-----------------------------------------------------------------------------------+
+
+SELECT CONCAT_WS(',', 'a', 'b', 'c', 'd');
+
++--------------------------------------------------------------+
+| concat_ws(Utf8(","),Utf8("a"),Utf8("b"),Utf8("c"),Utf8("d")) |
++--------------------------------------------------------------+
+| a,b,c,d                                                      |
++--------------------------------------------------------------+
+
+-- CONCAT_WS with NULL values (should skip NULLs)
+SELECT CONCAT_WS(' ', 'hello', NULL, 'world');
+
++-------------------------------------------------------+
+| concat_ws(Utf8(" "),Utf8("hello"),NULL,Utf8("world")) |
++-------------------------------------------------------+
+| hello world                                           |
++-------------------------------------------------------+
+
+SELECT CONCAT_WS('|', first_name, last_name) FROM concat_test ORDER BY ts;
+
++-------------------------------------------------------------------+
+| concat_ws(Utf8("|"),concat_test.first_name,concat_test.last_name) |
++-------------------------------------------------------------------+
+| John|Doe                                                          |
+| Jane|Smith                                                        |
+| Bob                                                               |
+| Wilson                                                            |
++-------------------------------------------------------------------+
+
+-- Test pipe operator ||
+SELECT 'hello' || 'world';
+
++--------------------------------+
+| Utf8("hello") || Utf8("world") |
++--------------------------------+
+| helloworld                     |
++--------------------------------+
+
+SELECT 'hello' || ' ' || 'world';
+
++---------------------------------------------+
+| Utf8("hello") || Utf8(" ") || Utf8("world") |
++---------------------------------------------+
+| hello world                                 |
++---------------------------------------------+
+
+SELECT first_name || ' ' || last_name FROM concat_test WHERE first_name IS NOT NULL AND last_name IS NOT NULL ORDER BY ts;
+
++--------------------------------------------------------------+
+| concat_test.first_name || Utf8(" ") || concat_test.last_name |
++--------------------------------------------------------------+
+| John Doe                                                     |
+| Jane Smith                                                   |
++--------------------------------------------------------------+
+
+-- Unicode concatenation
+SELECT CONCAT('Hello ', '世界');
+
++-------------------------------------+
+| concat(Utf8("Hello "),Utf8("世界")) |
++-------------------------------------+
+| Hello 世界                          |
++-------------------------------------+
+
+SELECT CONCAT('🚀', ' ', '🌟');
+
++-----------------------------------------+
+| concat(Utf8("🚀"),Utf8(" "),Utf8("🌟")) |
++-----------------------------------------+
+| 🚀 🌟                                   |
++-----------------------------------------+
+
+SELECT '中文' || '🐄';
+
++----------------------------+
+| Utf8("中文") || Utf8("🐄") |
++----------------------------+
+| 中文🐄                     |
++----------------------------+
+
+DROP TABLE concat_test;
+
+Affected Rows: 0
+
diff --git a/tests/cases/standalone/common/function/string/concat.sql b/tests/cases/standalone/common/function/string/concat.sql
new file mode 100644
index 0000000000..4f73eed62e
--- /dev/null
+++ b/tests/cases/standalone/common/function/string/concat.sql
@@ -0,0 +1,63 @@
+-- String concatenation function tests
+-- Test CONCAT function
+
+-- Basic concatenation
+SELECT CONCAT('hello', 'world');
+
+SELECT CONCAT('hello', ' ', 'world');
+
+SELECT CONCAT('a', 'b', 'c', 'd');
+
+-- Concatenation with NULL values
+SELECT CONCAT('hello', NULL);
+
+SELECT CONCAT(NULL, 'world');
+
+SELECT CONCAT(NULL, NULL);
+
+-- Concatenation with numbers (should convert to string)
+SELECT CONCAT('value: ', 42);
+
+SELECT CONCAT(1, 2, 3);
+
+-- Test with table data
+CREATE TABLE concat_test(first_name VARCHAR, last_name VARCHAR, age INTEGER, ts TIMESTAMP TIME INDEX);
+
+INSERT INTO concat_test VALUES
+    ('John', 'Doe', 30, 1000),
+    ('Jane', 'Smith', 25, 2000),
+    ('Bob', NULL, 35, 3000),
+    (NULL, 'Wilson', 40, 4000);
+
+-- Concatenate table columns
+SELECT CONCAT(first_name, ' ', last_name) as full_name FROM concat_test ORDER BY ts;
+
+SELECT CONCAT(first_name, ' is ', age, ' years old') FROM concat_test ORDER BY ts;
+
+-- Test CONCAT_WS (concat with separator)
+SELECT CONCAT_WS(' ', first_name, last_name) as full_name FROM concat_test ORDER BY ts;
+
+SELECT CONCAT_WS('-', first_name, last_name, age) FROM concat_test ORDER BY ts;
+
+SELECT CONCAT_WS(',', 'a', 'b', 'c', 'd');
+
+-- CONCAT_WS with NULL values (should skip NULLs)
+SELECT CONCAT_WS(' ', 'hello', NULL, 'world');
+
+SELECT CONCAT_WS('|', first_name, last_name) FROM concat_test ORDER BY ts;
+
+-- Test pipe operator ||
+SELECT 'hello' || 'world';
+
+SELECT 'hello' || ' ' || 'world';
+
+SELECT first_name || ' ' || last_name FROM concat_test WHERE first_name IS NOT NULL AND last_name IS NOT NULL ORDER BY ts;
+
+-- Unicode concatenation
+SELECT CONCAT('Hello ', '世界');
+
+SELECT CONCAT('🚀', ' ', '🌟');
+
+SELECT '中文' || '🐄';
+
+DROP TABLE concat_test;
diff --git a/tests/cases/standalone/common/function/string/length.result b/tests/cases/standalone/common/function/string/length.result
new file mode 100644
index 0000000000..e508750626
--- /dev/null
+++ b/tests/cases/standalone/common/function/string/length.result
@@ -0,0 +1,183 @@
+-- String length function tests
+-- LENGTH function
+SELECT LENGTH('hello');
+
++-----------------------+
+| length(Utf8("hello")) |
++-----------------------+
+| 5                     |
++-----------------------+
+
+SELECT LENGTH('');
+
++------------------+
+| length(Utf8("")) |
++------------------+
+| 0                |
++------------------+
+
+SELECT LENGTH(NULL);
+
++--------------+
+| length(NULL) |
++--------------+
+|              |
++--------------+
+
+SELECT LENGTH('hello world');
+
++-----------------------------+
+| length(Utf8("hello world")) |
++-----------------------------+
+| 11                          |
++-----------------------------+
+
+-- CHAR_LENGTH (character length)
+SELECT CHAR_LENGTH('hello');
+
++----------------------------+
+| char_length(Utf8("hello")) |
++----------------------------+
+| 5                          |
++----------------------------+
+
+SELECT CHAR_LENGTH('');
+
++-----------------------+
+| char_length(Utf8("")) |
++-----------------------+
+| 0                     |
++-----------------------+
+
+SELECT CHAR_LENGTH(NULL);
+
++-------------------+
+| char_length(NULL) |
++-------------------+
+|                   |
++-------------------+
+
+-- CHARACTER_LENGTH (alias for CHAR_LENGTH)
+SELECT CHARACTER_LENGTH('hello world');
+
++---------------------------------------+
+| character_length(Utf8("hello world")) |
++---------------------------------------+
+| 11                                    |
++---------------------------------------+
+
+-- Unicode character length
+SELECT LENGTH('世界') AS a, CHAR_LENGTH('世界') AS b;
+
++---+---+
+| a | b |
++---+---+
+| 2 | 2 |
++---+---+
+
+SELECT LENGTH('🚀🌟') AS a, CHAR_LENGTH('🚀🌟') AS b;
+
++---+---+
+| a | b |
++---+---+
+| 2 | 2 |
++---+---+
+
+SELECT LENGTH('café') AS a, CHAR_LENGTH('café') AS b;
+
++---+---+
+| a | b |
++---+---+
+| 4 | 4 |
++---+---+
+
+-- Test with table data
+CREATE TABLE length_test(s VARCHAR, ts TIMESTAMP TIME INDEX);
+
+Affected Rows: 0
+
+INSERT INTO length_test VALUES
+    ('hello', 1000),
+    ('world!', 2000),
+    ('', 3000),
+    ('中文测试', 4000),
+    ('🚀🎉🌟', 5000),
+    (NULL, 6000);
+
+Affected Rows: 6
+
+SELECT s, LENGTH(s) AS a, CHAR_LENGTH(s) AS b FROM length_test ORDER BY ts;
+
++----------+---+---+
+| s        | a | b |
++----------+---+---+
+| hello    | 5 | 5 |
+| world!   | 6 | 6 |
+|          | 0 | 0 |
+| 中文测试 | 4 | 4 |
+| 🚀🎉🌟   | 3 | 3 |
+|          |   |   |
++----------+---+---+
+
+-- BIT_LENGTH (length in bits)
+SELECT BIT_LENGTH('hello');
+
++---------------------------+
+| bit_length(Utf8("hello")) |
++---------------------------+
+| 40                        |
++---------------------------+
+
+SELECT BIT_LENGTH('');
+
++----------------------+
+| bit_length(Utf8("")) |
++----------------------+
+| 0                    |
++----------------------+
+
+SELECT BIT_LENGTH('世界');
+
++--------------------------+
+| bit_length(Utf8("世界")) |
++--------------------------+
+| 48                       |
++--------------------------+
+
+-- OCTET_LENGTH (length in bytes)
+SELECT OCTET_LENGTH('hello');
+
++-----------------------------+
+| octet_length(Utf8("hello")) |
++-----------------------------+
+| 5                           |
++-----------------------------+
+
+SELECT OCTET_LENGTH('');
+
++------------------------+
+| octet_length(Utf8("")) |
++------------------------+
+| 0                      |
++------------------------+
+
+SELECT OCTET_LENGTH('世界');
+
++----------------------------+
+| octet_length(Utf8("世界")) |
++----------------------------+
+| 6                          |
++----------------------------+
+
+SELECT OCTET_LENGTH('🚀');
+
++--------------------------+
+| octet_length(Utf8("🚀")) |
++--------------------------+
+| 4                        |
++--------------------------+
+
+DROP TABLE length_test;
+
+Affected Rows: 0
+
diff --git a/tests/cases/standalone/common/function/string/length.sql b/tests/cases/standalone/common/function/string/length.sql
new file mode 100644
index 0000000000..26f683858d
--- /dev/null
+++ b/tests/cases/standalone/common/function/string/length.sql
@@ -0,0 +1,58 @@
+-- String length function tests
+
+-- LENGTH function
+SELECT LENGTH('hello');
+
+SELECT LENGTH('');
+
+SELECT LENGTH(NULL);
+
+SELECT LENGTH('hello world');
+
+-- CHAR_LENGTH (character length)
+SELECT CHAR_LENGTH('hello');
+
+SELECT CHAR_LENGTH('');
+
+SELECT CHAR_LENGTH(NULL);
+
+-- CHARACTER_LENGTH (alias for CHAR_LENGTH)
+SELECT CHARACTER_LENGTH('hello world');
+
+-- Unicode character length
+SELECT LENGTH('世界') AS a, CHAR_LENGTH('世界') AS b;
+
+SELECT LENGTH('🚀🌟') AS a, CHAR_LENGTH('🚀🌟') AS b;
+
+SELECT LENGTH('café') AS a, CHAR_LENGTH('café') AS b;
+
+-- Test with table data
+CREATE TABLE length_test(s VARCHAR, ts TIMESTAMP TIME INDEX);
+
+INSERT INTO length_test VALUES
+    ('hello', 1000),
+    ('world!', 2000),
+    ('', 3000),
+    ('中文测试', 4000),
+    ('🚀🎉🌟', 5000),
+    (NULL, 6000);
+
+SELECT s, LENGTH(s) AS a, CHAR_LENGTH(s) AS b FROM length_test ORDER BY ts;
+
+-- BIT_LENGTH (length in bits)
+SELECT BIT_LENGTH('hello');
+
+SELECT BIT_LENGTH('');
+
+SELECT BIT_LENGTH('世界');
+
+-- OCTET_LENGTH (length in bytes)
+SELECT OCTET_LENGTH('hello');
+
+SELECT OCTET_LENGTH('');
+
+SELECT OCTET_LENGTH('世界');
+
+SELECT OCTET_LENGTH('🚀');
+
+DROP TABLE length_test;
diff --git a/tests/cases/standalone/common/function/string/like_pattern.result b/tests/cases/standalone/common/function/string/like_pattern.result
new file mode 100644
index 0000000000..515582a1fc
--- /dev/null
+++ b/tests/cases/standalone/common/function/string/like_pattern.result
@@ -0,0 +1,280 @@
+-- String LIKE pattern matching tests
+-- Basic LIKE patterns
+SELECT 'hello world' LIKE 'hello%';
+
++-----------------------------------------+
+| Utf8("hello world") LIKE Utf8("hello%") |
++-----------------------------------------+
+| true                                    |
++-----------------------------------------+
+
+SELECT 'hello world' LIKE '%world';
+
++-----------------------------------------+
+| Utf8("hello world") LIKE Utf8("%world") |
++-----------------------------------------+
+| true                                    |
++-----------------------------------------+
+
+SELECT 'hello world' LIKE '%llo%';
+
++----------------------------------------+
+| Utf8("hello world") LIKE Utf8("%llo%") |
++----------------------------------------+
+| true                                   |
++----------------------------------------+
+
+SELECT 'hello world' LIKE 'hello_world';
+
++----------------------------------------------+
+| Utf8("hello world") LIKE Utf8("hello_world") |
++----------------------------------------------+
+| true                                         |
++----------------------------------------------+
+
+SELECT 'hello world' LIKE 'hello world';
+
++----------------------------------------------+
+| Utf8("hello world") LIKE Utf8("hello world") |
++----------------------------------------------+
+| true                                         |
++----------------------------------------------+
+
+-- LIKE with NOT
+SELECT 'hello world' NOT LIKE 'goodbye%';
+
++-----------------------------------------------+
+| Utf8("hello world") NOT LIKE Utf8("goodbye%") |
++-----------------------------------------------+
+| true                                          |
++-----------------------------------------------+
+
+SELECT 'hello world' NOT LIKE 'hello%';
+
++---------------------------------------------+
+| Utf8("hello world") NOT LIKE Utf8("hello%") |
++---------------------------------------------+
+| false                                       |
++---------------------------------------------+
+
+-- Case sensitivity
+SELECT 'Hello World' LIKE 'hello%';
+
++-----------------------------------------+
+| Utf8("Hello World") LIKE Utf8("hello%") |
++-----------------------------------------+
+| false                                   |
++-----------------------------------------+
+
+SELECT 'Hello World' ILIKE 'hello%';
+
++------------------------------------------+
+| Utf8("Hello World") ILIKE Utf8("hello%") |
++------------------------------------------+
+| true                                     |
++------------------------------------------+
+
+SELECT 'Hello World' ILIKE 'HELLO%';
+
++------------------------------------------+
+| Utf8("Hello World") ILIKE Utf8("HELLO%") |
++------------------------------------------+
+| true                                     |
++------------------------------------------+
+
+-- Test with table data
+CREATE TABLE like_test("name" VARCHAR, email VARCHAR, ts TIMESTAMP TIME INDEX);
+
+Affected Rows: 0
+
+INSERT INTO like_test VALUES
+    ('John Doe', 'john@example.com', 1000),
+    ('Jane Smith', 'jane@gmail.com', 2000),
+    ('Bob Wilson', 'bob@yahoo.com', 3000),
+    ('Alice Johnson', 'alice@company.org', 4000),
+    ('Charlie Brown', 'charlie@test.net', 5000);
+
+Affected Rows: 5
+
+-- Pattern matching on names
+SELECT "name" FROM like_test WHERE "name" LIKE 'J%' ORDER BY ts;
+
++------------+
+| name       |
++------------+
+| John Doe   |
+| Jane Smith |
++------------+
+
+SELECT "name" FROM like_test WHERE "name" LIKE '%son' ORDER BY ts;
+
++---------------+
+| name          |
++---------------+
+| Bob Wilson    |
+| Alice Johnson |
++---------------+
+
+-- Contains space
+SELECT "name" FROM like_test WHERE "name" LIKE '% %' ORDER BY ts;
+
++---------------+
+| name          |
++---------------+
+| John Doe      |
+| Jane Smith    |
+| Bob Wilson    |
+| Alice Johnson |
+| Charlie Brown |
++---------------+
+
+-- Pattern matching on emails
+SELECT "name", email FROM like_test WHERE email LIKE '%@gmail.com' ORDER BY ts;
+
++------------+----------------+
+| name       | email          |
++------------+----------------+
+| Jane Smith | jane@gmail.com |
++------------+----------------+
+
+SELECT "name", email FROM like_test WHERE email LIKE '%.com' ORDER BY ts;
+
++------------+------------------+
+| name       | email            |
++------------+------------------+
+| John Doe   | john@example.com |
+| Jane Smith | jane@gmail.com   |
+| Bob Wilson | bob@yahoo.com    |
++------------+------------------+
+
+SELECT "name", email FROM like_test WHERE email LIKE '%@%.org' ORDER BY ts;
+
++---------------+-------------------+
+| name          | email             |
++---------------+-------------------+
+| Alice Johnson | alice@company.org |
++---------------+-------------------+
+
+-- Underscore wildcard
+SELECT "name" FROM like_test WHERE "name" LIKE 'Jo__ ___' ORDER BY ts;
+
++----------+
+| name     |
++----------+
+| John Doe |
++----------+
+
+SELECT email FROM like_test WHERE email LIKE '____@%' ORDER BY ts;
+
++------------------+
+| email            |
++------------------+
+| john@example.com |
+| jane@gmail.com   |
++------------------+
+
+-- Multiple wildcards
+-- Contains 'o'
+SELECT "name" FROM like_test WHERE "name" LIKE '%o%' ORDER BY ts;
+
++---------------+
+| name          |
++---------------+
+| John Doe      |
+| Bob Wilson    |
+| Alice Johnson |
+| Charlie Brown |
++---------------+
+
+-- 'a' before and after @
+SELECT email FROM like_test WHERE email LIKE '%a%@%a%' ORDER BY ts;
+
++-------------------+
+| email             |
++-------------------+
+| jane@gmail.com    |
+| alice@company.org |
++-------------------+
+
+-- Escaping special characters
+CREATE TABLE escape_test("text" VARCHAR, ts TIMESTAMP TIME INDEX);
+
+Affected Rows: 0
+
+INSERT INTO escape_test VALUES
+    ('100% complete', 1000),
+    ('test_file.txt', 2000),
+    ('50% done', 3000),
+    ('backup_2023.sql', 4000);
+
+Affected Rows: 4
+
+-- Need to escape % and _
+-- Contains %
+SELECT "text" FROM escape_test WHERE "text" LIKE '%\%%' ORDER BY ts;
+
++---------------+
+| text          |
++---------------+
+| 100% complete |
+| 50% done      |
++---------------+
+
+-- Contains _
+SELECT "text" FROM escape_test WHERE "text" LIKE '%\_%' ORDER BY ts;
+
++-----------------+
+| text            |
++-----------------+
+| test_file.txt   |
+| backup_2023.sql |
++-----------------+
+
+-- Unicode pattern matching
+CREATE TABLE unicode_like(s VARCHAR, ts TIMESTAMP TIME INDEX);
+
+Affected Rows: 0
+
+INSERT INTO unicode_like VALUES
+    ('Hello 世界', 1000),
+    ('🚀 rocket', 2000),
+    ('café shop', 3000);
+
+Affected Rows: 3
+
+SELECT s FROM unicode_like WHERE s LIKE '%世界' ORDER BY ts;
+
++------------+
+| s          |
++------------+
+| Hello 世界 |
++------------+
+
+SELECT s FROM unicode_like WHERE s LIKE '🚀%' ORDER BY ts;
+
++-----------+
+| s         |
++-----------+
+| 🚀 rocket |
++-----------+
+
+SELECT s FROM unicode_like WHERE s LIKE '%é%' ORDER BY ts;
+
++-----------+
+| s         |
++-----------+
+| café shop |
++-----------+
+
+DROP TABLE like_test;
+
+Affected Rows: 0
+
+DROP TABLE escape_test;
+
+Affected Rows: 0
+
+DROP TABLE unicode_like;
+
+Affected Rows: 0
+
diff --git a/tests/cases/standalone/common/function/string/like_pattern.sql b/tests/cases/standalone/common/function/string/like_pattern.sql
new file mode 100644
index 0000000000..460fc42e33
--- /dev/null
+++ b/tests/cases/standalone/common/function/string/like_pattern.sql
@@ -0,0 +1,97 @@
+-- String LIKE pattern matching tests
+
+-- Basic LIKE patterns
+SELECT 'hello world' LIKE 'hello%';
+
+SELECT 'hello world' LIKE '%world';
+
+SELECT 'hello world' LIKE '%llo%';
+
+SELECT 'hello world' LIKE 'hello_world';
+
+SELECT 'hello world' LIKE 'hello world';
+
+-- LIKE with NOT
+SELECT 'hello world' NOT LIKE 'goodbye%';
+
+SELECT 'hello world' NOT LIKE 'hello%';
+
+-- Case sensitivity
+SELECT 'Hello World' LIKE 'hello%';
+
+SELECT 'Hello World' ILIKE 'hello%';
+
+SELECT 'Hello World' ILIKE 'HELLO%';
+
+-- Test with table data
+CREATE TABLE like_test("name" VARCHAR, email VARCHAR, ts TIMESTAMP TIME INDEX);
+
+INSERT INTO like_test VALUES
+    ('John Doe', 'john@example.com', 1000),
+    ('Jane Smith', 'jane@gmail.com', 2000),
+    ('Bob Wilson', 'bob@yahoo.com', 3000),
+    ('Alice Johnson', 'alice@company.org', 4000),
+    ('Charlie Brown', 'charlie@test.net', 5000);
+
+-- Pattern matching on names
+SELECT "name" FROM like_test WHERE "name" LIKE 'J%' ORDER BY ts;
+
+SELECT "name" FROM like_test WHERE "name" LIKE '%son' ORDER BY ts;
+
+-- Contains space
+SELECT "name" FROM like_test WHERE "name" LIKE '% %' ORDER BY ts;
+
+-- Pattern matching on emails
+SELECT "name", email FROM like_test WHERE email LIKE '%@gmail.com' ORDER BY ts;
+
+SELECT "name", email FROM like_test WHERE email LIKE '%.com' ORDER BY ts;
+
+SELECT "name", email FROM like_test WHERE email LIKE '%@%.org' ORDER BY ts;
+
+-- Underscore wildcard
+SELECT "name" FROM like_test WHERE "name" LIKE 'Jo__ ___' ORDER BY ts;
+
+SELECT email FROM like_test WHERE email LIKE '____@%' ORDER BY ts;
+
+-- Multiple wildcards
+-- Contains 'o'
+SELECT "name" FROM like_test WHERE "name" LIKE '%o%' ORDER BY ts;
+
+-- 'a' before and after @
+SELECT email FROM like_test WHERE email LIKE '%a%@%a%' ORDER BY ts;
+
+-- Escaping special characters
+CREATE TABLE escape_test("text" VARCHAR, ts TIMESTAMP TIME INDEX);
+
+INSERT INTO escape_test VALUES
+    ('100% complete', 1000),
+    ('test_file.txt', 2000),
+    ('50% done', 3000),
+    ('backup_2023.sql', 4000);
+
+-- Need to escape % and _
+-- Contains %
+SELECT "text" FROM escape_test WHERE "text" LIKE '%\%%' ORDER BY ts;
+
+-- Contains _
+SELECT "text" FROM escape_test WHERE "text" LIKE '%\_%' ORDER BY ts;
+
+-- Unicode pattern matching
+CREATE TABLE unicode_like(s VARCHAR, ts TIMESTAMP TIME INDEX);
+
+INSERT INTO unicode_like VALUES
+    ('Hello 世界', 1000),
+    ('🚀 rocket', 2000),
+    ('café shop', 3000);
+
+SELECT s FROM unicode_like WHERE s LIKE '%世界' ORDER BY ts;
+
+SELECT s FROM unicode_like WHERE s LIKE '🚀%' ORDER BY ts;
+
+SELECT s FROM unicode_like WHERE s LIKE '%é%' ORDER BY ts;
+
+DROP TABLE like_test;
+
+DROP TABLE escape_test;
+
+DROP TABLE unicode_like;
diff --git a/tests/cases/standalone/common/function/string/position.result b/tests/cases/standalone/common/function/string/position.result
new file mode 100644
index 0000000000..1b65fb6fba
--- /dev/null
+++ b/tests/cases/standalone/common/function/string/position.result
@@ -0,0 +1,278 @@
+-- String position/search function tests
+-- POSITION function
+SELECT POSITION('world' IN 'hello world');
+
++-------------------------------------------+
+| strpos(Utf8("hello world"),Utf8("world")) |
++-------------------------------------------+
+| 7                                         |
++-------------------------------------------+
+
+SELECT POSITION('xyz' IN 'hello world');
+
++-----------------------------------------+
+| strpos(Utf8("hello world"),Utf8("xyz")) |
++-----------------------------------------+
+| 0                                       |
++-----------------------------------------+
+
+SELECT POSITION('' IN 'hello world');
+
++--------------------------------------+
+| strpos(Utf8("hello world"),Utf8("")) |
++--------------------------------------+
+| 1                                    |
++--------------------------------------+
+
+SELECT POSITION('world' IN '');
+
++--------------------------------+
+| strpos(Utf8(""),Utf8("world")) |
++--------------------------------+
+| 0                              |
++--------------------------------+
+
+-- STRPOS function (same as POSITION)
+SELECT STRPOS('hello world', 'world');
+
++-------------------------------------------+
+| strpos(Utf8("hello world"),Utf8("world")) |
++-------------------------------------------+
+| 7                                         |
++-------------------------------------------+
+
+SELECT STRPOS('hello world', 'xyz');
+
++-----------------------------------------+
+| strpos(Utf8("hello world"),Utf8("xyz")) |
++-----------------------------------------+
+| 0                                       |
++-----------------------------------------+
+
+SELECT STRPOS('hello world', 'hello');
+
++-------------------------------------------+
+| strpos(Utf8("hello world"),Utf8("hello")) |
++-------------------------------------------+
+| 1                                         |
++-------------------------------------------+
+
+SELECT STRPOS('hello world', 'o');
+
++---------------------------------------+
+| strpos(Utf8("hello world"),Utf8("o")) |
++---------------------------------------+
+| 5                                     |
++---------------------------------------+
+
+-- INSTR function
+SELECT INSTR('hello world', 'world');
+
++------------------------------------------+
+| instr(Utf8("hello world"),Utf8("world")) |
++------------------------------------------+
+| 7                                        |
++------------------------------------------+
+
+SELECT INSTR('hello world', 'o');
+
++--------------------------------------+
+| instr(Utf8("hello world"),Utf8("o")) |
++--------------------------------------+
+| 5                                    |
++--------------------------------------+
+
+SELECT INSTR('hello world', 'xyz');
+
++----------------------------------------+
+| instr(Utf8("hello world"),Utf8("xyz")) |
++----------------------------------------+
+| 0                                      |
++----------------------------------------+
+
+-- Case sensitive search
+SELECT POSITION('WORLD' IN 'hello world');
+
++-------------------------------------------+
+| strpos(Utf8("hello world"),Utf8("WORLD")) |
++-------------------------------------------+
+| 0                                         |
++-------------------------------------------+
+
+SELECT POSITION('World' IN 'hello world');
+
++-------------------------------------------+
+| strpos(Utf8("hello world"),Utf8("World")) |
++-------------------------------------------+
+| 0                                         |
++-------------------------------------------+
+
+-- LEFT and RIGHT functions
+SELECT LEFT('hello world', 5);
+
++------------------------------------+
+| left(Utf8("hello world"),Int64(5)) |
++------------------------------------+
+| hello                              |
++------------------------------------+
+
+SELECT RIGHT('hello world', 5);
+
++-------------------------------------+
+| right(Utf8("hello world"),Int64(5)) |
++-------------------------------------+
+| world                               |
++-------------------------------------+
+
+-- More than string length
+SELECT LEFT('hello', 10);
+
++-------------------------------+
+| left(Utf8("hello"),Int64(10)) |
++-------------------------------+
+| hello                         |
++-------------------------------+
+
+-- More than string length
+SELECT RIGHT('hello', 10);
+
++--------------------------------+
+| right(Utf8("hello"),Int64(10)) |
++--------------------------------+
+| hello                          |
++--------------------------------+
+
+-- Test with NULL values
+SELECT POSITION('world' IN NULL);
+
++----------------------------+
+| strpos(NULL,Utf8("world")) |
++----------------------------+
+|                            |
++----------------------------+
+
+SELECT POSITION(NULL IN 'hello world');
+
++----------------------------------+
+| strpos(Utf8("hello world"),NULL) |
++----------------------------------+
+|                                  |
++----------------------------------+
+
+SELECT LEFT(NULL, 5);
+
++---------------------+
+| left(NULL,Int64(5)) |
++---------------------+
+|                     |
++---------------------+
+
+SELECT RIGHT('hello', NULL);
+
++---------------------------+
+| right(Utf8("hello"),NULL) |
++---------------------------+
+|                           |
++---------------------------+
+
+-- Test with table data
+CREATE TABLE position_test(s VARCHAR, "search" VARCHAR, ts TIMESTAMP TIME INDEX);
+
+Affected Rows: 0
+
+INSERT INTO position_test VALUES
+    ('hello world', 'world', 1000),
+    ('hello world', 'hello', 2000),
+    ('hello world', 'xyz', 3000),
+    ('programming', 'gram', 4000),
+    ('database', 'base', 5000);
+
+Affected Rows: 5
+
+SELECT s, "search", POSITION("search" IN s) AS a, STRPOS(s, "search") AS b FROM position_test ORDER BY ts;
+
++-------------+--------+---+---+
+| s           | search | a | b |
++-------------+--------+---+---+
+| hello world | world  | 7 | 7 |
+| hello world | hello  | 1 | 1 |
+| hello world | xyz    | 0 | 0 |
+| programming | gram   | 4 | 4 |
+| database    | base   | 5 | 5 |
++-------------+--------+---+---+
+
+-- Test LEFT and RIGHT with table data
+SELECT s, LEFT(s, 5), RIGHT(s, 5) FROM position_test ORDER BY ts;
+
++-------------+--------------------------------+---------------------------------+
+| s           | left(position_test.s,Int64(5)) | right(position_test.s,Int64(5)) |
++-------------+--------------------------------+---------------------------------+
+| hello world | hello                          | world                           |
+| hello world | hello                          | world                           |
+| hello world | hello                          | world                           |
+| programming | progr                          | mming                           |
+| database    | datab                          | abase                           |
++-------------+--------------------------------+---------------------------------+
+
+-- Unicode position tests
+SELECT POSITION('世' IN 'hello世界');
+
++--------------------------------------+
+| strpos(Utf8("hello世界"),Utf8("世")) |
++--------------------------------------+
+| 6                                    |
++--------------------------------------+
+
+SELECT POSITION('界' IN 'hello世界');
+
++--------------------------------------+
+| strpos(Utf8("hello世界"),Utf8("界")) |
++--------------------------------------+
+| 7                                    |
++--------------------------------------+
+
+SELECT STRPOS('café shop', 'é');
+
++-------------------------------------+
+| strpos(Utf8("café shop"),Utf8("é")) |
++-------------------------------------+
+| 4                                   |
++-------------------------------------+
+
+SELECT LEFT('中文测试', 2);
+
++---------------------------------+
+| left(Utf8("中文测试"),Int64(2)) |
++---------------------------------+
+| 中文                            |
++---------------------------------+
+
+SELECT RIGHT('中文测试', 2);
+
++----------------------------------+
+| right(Utf8("中文测试"),Int64(2)) |
++----------------------------------+
+| 测试                             |
++----------------------------------+
+
+-- Multiple occurrences (finds first one)
+SELECT POSITION('o' IN 'hello world');
+
++---------------------------------------+
+| strpos(Utf8("hello world"),Utf8("o")) |
++---------------------------------------+
+| 5                                     |
++---------------------------------------+
+
+SELECT STRPOS('hello world', 'l');
+
++---------------------------------------+
+| strpos(Utf8("hello world"),Utf8("l")) |
++---------------------------------------+
+| 3                                     |
++---------------------------------------+
+
+DROP TABLE position_test;
+
+Affected Rows: 0
+
diff --git a/tests/cases/standalone/common/function/string/position.sql b/tests/cases/standalone/common/function/string/position.sql
new file mode 100644
index 0000000000..519a9146d3
--- /dev/null
+++ b/tests/cases/standalone/common/function/string/position.sql
@@ -0,0 +1,84 @@
+-- String position/search function tests
+
+-- POSITION function
+SELECT POSITION('world' IN 'hello world');
+
+SELECT POSITION('xyz' IN 'hello world');
+
+SELECT POSITION('' IN 'hello world');
+
+SELECT POSITION('world' IN '');
+
+-- STRPOS function (same as POSITION)
+SELECT STRPOS('hello world', 'world');
+
+SELECT STRPOS('hello world', 'xyz');
+
+SELECT STRPOS('hello world', 'hello');
+
+SELECT STRPOS('hello world', 'o');
+
+-- INSTR function
+SELECT INSTR('hello world', 'world');
+
+SELECT INSTR('hello world', 'o');
+
+SELECT INSTR('hello world', 'xyz');
+
+-- Case sensitive search
+SELECT POSITION('WORLD' IN 'hello world');
+
+SELECT POSITION('World' IN 'hello world');
+
+-- LEFT and RIGHT functions
+SELECT LEFT('hello world', 5);
+
+SELECT RIGHT('hello world', 5);
+
+-- More than string length
+SELECT LEFT('hello', 10);
+
+-- More than string length
+SELECT RIGHT('hello', 10);
+
+-- Test with NULL values
+SELECT POSITION('world' IN NULL);
+
+SELECT POSITION(NULL IN 'hello world');
+
+SELECT LEFT(NULL, 5);
+
+SELECT RIGHT('hello', NULL);
+
+-- Test with table data
+CREATE TABLE position_test(s VARCHAR, "search" VARCHAR, ts TIMESTAMP TIME INDEX);
+
+INSERT INTO position_test VALUES
+    ('hello world', 'world', 1000),
+    ('hello world', 'hello', 2000),
+    ('hello world', 'xyz', 3000),
+    ('programming', 'gram', 4000),
+    ('database', 'base', 5000);
+
+SELECT s, "search", POSITION("search" IN s) AS a, STRPOS(s, "search") AS b FROM position_test ORDER BY ts;
+
+-- Test LEFT and RIGHT with table data
+SELECT s, LEFT(s, 5), RIGHT(s, 5) FROM position_test ORDER BY ts;
+
+-- Unicode position tests
+SELECT POSITION('世' IN 'hello世界');
+
+SELECT POSITION('界' IN 'hello世界');
+
+SELECT STRPOS('café shop', 'é');
+
+SELECT LEFT('中文测试', 2);
+
+SELECT RIGHT('中文测试', 2);
+
+-- Multiple occurrences (finds first one)
+SELECT POSITION('o' IN 'hello world');
+
+SELECT STRPOS('hello world', 'l');
+
+DROP TABLE position_test;
diff --git a/tests/cases/standalone/common/function/string/regex.result b/tests/cases/standalone/common/function/string/regex.result
new file mode 100644
index 0000000000..b7030f4346
--- /dev/null
+++ b/tests/cases/standalone/common/function/string/regex.result
@@ -0,0 +1,143 @@
+-- Regular expression function tests
+-- REGEXP_MATCHES function
+SELECT regexp_like('hello123world', '\d+');
+
++------------------------------------------------+
+| regexp_like(Utf8("hello123world"),Utf8("\d+")) |
++------------------------------------------------+
+| true                                           |
++------------------------------------------------+
+
+SELECT regexp_like('no numbers here', '\d+');
+
++--------------------------------------------------+
+| regexp_like(Utf8("no numbers here"),Utf8("\d+")) |
++--------------------------------------------------+
+| false                                            |
++--------------------------------------------------+
+
+SELECT regexp_like('email@example.com', '[a-zA-Z0-9]+@[a-zA-Z0-9]+\.[a-zA-Z]+');
+
++-------------------------------------------------------------------------------------+
+| regexp_like(Utf8("email@example.com"),Utf8("[a-zA-Z0-9]+@[a-zA-Z0-9]+\.[a-zA-Z]+")) |
++-------------------------------------------------------------------------------------+
+| true                                                                                |
++-------------------------------------------------------------------------------------+
+
+-- REGEXP_REPLACE function
+SELECT REGEXP_REPLACE('hello123world', '\d+', 'XXX');
+
++---------------------------------------------------------------+
+| regexp_replace(Utf8("hello123world"),Utf8("\d+"),Utf8("XXX")) |
++---------------------------------------------------------------+
+| helloXXXworld                                                 |
++---------------------------------------------------------------+
+
+SELECT REGEXP_REPLACE('phone: 123-456-7890', '\d{3}-\d{3}-\d{4}', 'XXX-XXX-XXXX');
+
++--------------------------------------------------------------------------------------------+
+| regexp_replace(Utf8("phone: 123-456-7890"),Utf8("\d{3}-\d{3}-\d{4}"),Utf8("XXX-XXX-XXXX")) |
++--------------------------------------------------------------------------------------------+
+| phone: XXX-XXX-XXXX                                                                        |
++--------------------------------------------------------------------------------------------+
+
+SELECT REGEXP_REPLACE('  extra   spaces  ', '\s+', ' ');
+
++------------------------------------------------------------------+
+| regexp_replace(Utf8("  extra   spaces  "),Utf8("\s+"),Utf8(" ")) |
++------------------------------------------------------------------+
+|  extra   spaces                                                  |
++------------------------------------------------------------------+
+
+-- REGEXP_EXTRACT function
+SELECT REGEXP_EXTRACT('version 1.2.3', '\d+\.\d+\.\d+');
+
++-------------------------------------------------------------+
+| regexp_extract(Utf8("version 1.2.3"),Utf8("\d+\.\d+\.\d+")) |
++-------------------------------------------------------------+
+| 1.2.3                                                       |
++-------------------------------------------------------------+
+
+SELECT REGEXP_EXTRACT('no match here', '\d+\.\d+\.\d+');
+
++-------------------------------------------------------------+
+| regexp_extract(Utf8("no match here"),Utf8("\d+\.\d+\.\d+")) |
++-------------------------------------------------------------+
+|                                                             |
++-------------------------------------------------------------+
+
+-- Test with ~ operator (regex match)
+SELECT 'hello123' ~ '\d+';
+
++--------------------------------+
+| Utf8("hello123") ~ Utf8("\d+") |
++--------------------------------+
+| true                           |
++--------------------------------+
+
+SELECT 'hello world' ~ '\d+';
+
++-----------------------------------+
+| Utf8("hello world") ~ Utf8("\d+") |
++-----------------------------------+
+| false                             |
++-----------------------------------+
+
+SELECT 'email@example.com' ~ '[a-zA-Z0-9]+@[a-zA-Z0-9]+\.[a-zA-Z]+';
+
++--------------------------------------------------------------------------+
+| Utf8("email@example.com") ~ Utf8("[a-zA-Z0-9]+@[a-zA-Z0-9]+\.[a-zA-Z]+") |
++--------------------------------------------------------------------------+
+| true                                                                     |
++--------------------------------------------------------------------------+
+
+-- Test with table data
+CREATE TABLE regex_test("text" VARCHAR, ts TIMESTAMP TIME INDEX);
+
+Affected Rows: 0
+
+INSERT INTO regex_test VALUES
+    ('Phone: 123-456-7890', 1000),
+    ('Email: user@domain.com', 2000),
+    ('Version 2.1.0', 3000),
+    ('No pattern here', 4000);
+
+Affected Rows: 4
+
+SELECT "text", REGEXP_EXTRACT("text", '\d{3}-\d{3}-\d{4}') as phone FROM regex_test ORDER BY ts;
+
++------------------------+--------------+
+| text                   | phone        |
++------------------------+--------------+
+| Phone: 123-456-7890    | 123-456-7890 |
+| Email: user@domain.com |              |
+| Version 2.1.0          |              |
+| No pattern here        |              |
++------------------------+--------------+
+
+SELECT "text", REGEXP_EXTRACT("text", '[a-zA-Z0-9]+@[a-zA-Z0-9]+\.[a-zA-Z]+') as email FROM regex_test ORDER BY ts;
+
++------------------------+-----------------+
+| text                   | email           |
++------------------------+-----------------+
+| Phone: 123-456-7890    |                 |
+| Email: user@domain.com | user@domain.com |
+| Version 2.1.0          |                 |
+| No pattern here        |                 |
++------------------------+-----------------+
+
+SELECT "text", REGEXP_EXTRACT("text", '\d+\.\d+\.\d+') as version FROM regex_test ORDER BY ts;
+
++------------------------+---------+
+| text                   | version |
++------------------------+---------+
+| Phone: 123-456-7890    |         |
+| Email: user@domain.com |         |
+| Version 2.1.0          | 2.1.0   |
+| No pattern here        |         |
++------------------------+---------+
+
+DROP TABLE regex_test;
+
+Affected Rows: 0
+
diff --git a/tests/cases/standalone/common/function/string/regex.sql b/tests/cases/standalone/common/function/string/regex.sql
new file mode 100644
index 0000000000..10e0cbcc4d
--- /dev/null
+++ b/tests/cases/standalone/common/function/string/regex.sql
@@ -0,0 +1,44 @@
+-- Regular expression function tests
+
+-- REGEXP_MATCHES function
+SELECT regexp_like('hello123world', '\d+');
+
+SELECT regexp_like('no numbers here', '\d+');
+
+SELECT regexp_like('email@example.com', '[a-zA-Z0-9]+@[a-zA-Z0-9]+\.[a-zA-Z]+');
+
+-- REGEXP_REPLACE function
+SELECT REGEXP_REPLACE('hello123world', '\d+', 'XXX');
+
+SELECT REGEXP_REPLACE('phone: 123-456-7890', '\d{3}-\d{3}-\d{4}', 'XXX-XXX-XXXX');
+
+SELECT REGEXP_REPLACE('  extra   spaces  ', '\s+', ' ');
+
+-- REGEXP_EXTRACT function
+SELECT REGEXP_EXTRACT('version 1.2.3', '\d+\.\d+\.\d+');
+
+SELECT REGEXP_EXTRACT('no match here', '\d+\.\d+\.\d+');
+
+-- Test with ~ operator (regex match)
+SELECT 'hello123' ~ '\d+';
+
+SELECT 'hello world' ~ '\d+';
+
+SELECT 'email@example.com' ~ '[a-zA-Z0-9]+@[a-zA-Z0-9]+\.[a-zA-Z]+';
+
+-- Test with table data
+CREATE TABLE regex_test("text" VARCHAR, ts TIMESTAMP TIME INDEX);
+
+INSERT INTO regex_test VALUES
+    ('Phone: 123-456-7890', 1000),
+    ('Email: user@domain.com', 2000),
+    ('Version 2.1.0', 3000),
+    ('No pattern here', 4000);
+
+SELECT "text", REGEXP_EXTRACT("text", '\d{3}-\d{3}-\d{4}') as phone FROM regex_test ORDER BY ts;
+
+SELECT "text", REGEXP_EXTRACT("text", '[a-zA-Z0-9]+@[a-zA-Z0-9]+\.[a-zA-Z]+') as email FROM regex_test ORDER BY ts;
+
+SELECT "text", REGEXP_EXTRACT("text", '\d+\.\d+\.\d+') as version FROM regex_test ORDER BY ts;
+
+DROP TABLE regex_test;
diff --git a/tests/cases/standalone/common/function/string/repeat.result b/tests/cases/standalone/common/function/string/repeat.result
new file mode 100644
index 0000000000..32ecc614d1
--- /dev/null
+++ b/tests/cases/standalone/common/function/string/repeat.result
@@ -0,0 +1,217 @@
+-- String REPEAT function tests
+-- Basic REPEAT function
+SELECT REPEAT('hello', 3);
+
++--------------------------------+
+| repeat(Utf8("hello"),Int64(3)) |
++--------------------------------+
+| hellohellohello                |
++--------------------------------+
+
+SELECT REPEAT('a', 5);
+
++----------------------------+
+| repeat(Utf8("a"),Int64(5)) |
++----------------------------+
+| aaaaa                      |
++----------------------------+
+
+SELECT REPEAT('', 3);
+
++---------------------------+
+| repeat(Utf8(""),Int64(3)) |
++---------------------------+
+|                           |
++---------------------------+
+
+SELECT REPEAT('test', 0);
+
++-------------------------------+
+| repeat(Utf8("test"),Int64(0)) |
++-------------------------------+
+|                               |
++-------------------------------+
+
+SELECT REPEAT('test', 1);
+
++-------------------------------+
+| repeat(Utf8("test"),Int64(1)) |
++-------------------------------+
+| test                          |
++-------------------------------+
+
+-- REPEAT with NULL values
+SELECT REPEAT(NULL, 3);
+
++-----------------------+
+| repeat(NULL,Int64(3)) |
++-----------------------+
+|                       |
++-----------------------+
+
+SELECT REPEAT('hello', NULL);
+
++----------------------------+
+| repeat(Utf8("hello"),NULL) |
++----------------------------+
+|                            |
++----------------------------+
+
+-- REPEAT with negative numbers
+SELECT REPEAT('hello', -1);
+
++---------------------------------+
+| repeat(Utf8("hello"),Int64(-1)) |
++---------------------------------+
+|                                 |
++---------------------------------+
+
+-- REPEAT with special characters
+SELECT REPEAT('*', 10);
+
++-----------------------------+
+| repeat(Utf8("*"),Int64(10)) |
++-----------------------------+
+| **********                  |
++-----------------------------+
+
+SELECT REPEAT('-=', 5);
+
++-----------------------------+
+| repeat(Utf8("-="),Int64(5)) |
++-----------------------------+
+| -=-=-=-=-=                  |
++-----------------------------+
+
+SELECT REPEAT('!@#', 3);
+
++------------------------------+
+| repeat(Utf8("!@#"),Int64(3)) |
++------------------------------+
+| !@#!@#!@#                    |
++------------------------------+
+
+-- Test with table data
+CREATE TABLE repeat_test(s VARCHAR, n INTEGER, ts TIMESTAMP TIME INDEX);
+
+Affected Rows: 0
+
+INSERT INTO repeat_test VALUES
+    ('hello', 2, 1000),
+    ('*', 5, 2000),
+    ('test', 0, 3000),
+    ('a', 10, 4000),
+    (NULL, 3, 5000),
+    ('hi', NULL, 6000);
+
+Affected Rows: 6
+
+SELECT s, n, REPEAT(s, n) FROM repeat_test ORDER BY ts;
+
++-------+----+-------------------------------------+
+| s     | n  | repeat(repeat_test.s,repeat_test.n) |
++-------+----+-------------------------------------+
+| hello | 2  | hellohello                          |
+| *     | 5  | *****                               |
+| test  | 0  |                                     |
+| a     | 10 | aaaaaaaaaa                          |
+|       | 3  |                                     |
+| hi    |    |                                     |
++-------+----+-------------------------------------+
+
+-- Unicode REPEAT
+SELECT REPEAT('世', 3);
+
++-----------------------------+
+| repeat(Utf8("世"),Int64(3)) |
++-----------------------------+
+| 世世世                      |
++-----------------------------+
+
+SELECT REPEAT('🚀', 5);
+
++-----------------------------+
+| repeat(Utf8("🚀"),Int64(5)) |
++-----------------------------+
+| 🚀🚀🚀🚀🚀                  |
++-----------------------------+
+
+SELECT REPEAT('café', 2);
+
++-------------------------------+
+| repeat(Utf8("café"),Int64(2)) |
++-------------------------------+
+| cafécafé                      |
++-------------------------------+
+
+-- REPEAT with spaces and formatting
+SELECT REPEAT(' ', 10);
+
++-----------------------------+
+| repeat(Utf8(" "),Int64(10)) |
++-----------------------------+
+|                             |
++-----------------------------+
+
+SELECT REPEAT('\t', 3);
+
++-----------------------------+
+| repeat(Utf8("\t"),Int64(3)) |
++-----------------------------+
+| \t\t\t                      |
++-----------------------------+
+
+SELECT CONCAT('Start', REPEAT('-', 10), 'End');
+
++---------------------------------------------------------------+
+| concat(Utf8("Start"),repeat(Utf8("-"),Int64(10)),Utf8("End")) |
++---------------------------------------------------------------+
+| Start----------End                                            |
++---------------------------------------------------------------+
+
+-- Large REPEAT operations
+SELECT LENGTH(REPEAT('a', 100));
+
++---------------------------------------+
+| length(repeat(Utf8("a"), Int64(100))) |
++---------------------------------------+
+| 100                                   |
++---------------------------------------+
+
+SELECT LENGTH(REPEAT('ab', 50));
+
++---------------------------------------+
+| length(repeat(Utf8("ab"), Int64(50))) |
++---------------------------------------+
+| 100                                   |
++---------------------------------------+
+
+-- Combining REPEAT with other functions
+SELECT UPPER(REPEAT('hello', 3));
+
++---------------------------------------+
+| upper(repeat(Utf8("hello"),Int64(3))) |
++---------------------------------------+
+| HELLOHELLOHELLO                       |
++---------------------------------------+
+
+SELECT REPEAT(UPPER('hello'), 2);
+
++---------------------------------------+
+| repeat(upper(Utf8("hello")),Int64(2)) |
++---------------------------------------+
+| HELLOHELLO                            |
++---------------------------------------+
+
+SELECT REVERSE(REPEAT('abc', 3));
+
++---------------------------------------+
+| reverse(repeat(Utf8("abc"),Int64(3))) |
++---------------------------------------+
+| cbacbacba                             |
++---------------------------------------+
+
+DROP TABLE repeat_test;
+
+Affected Rows: 0
+
diff --git a/tests/cases/standalone/common/function/string/repeat.sql b/tests/cases/standalone/common/function/string/repeat.sql
new file mode 100644
index 0000000000..6e75de8113
--- /dev/null
+++ b/tests/cases/standalone/common/function/string/repeat.sql
@@ -0,0 +1,68 @@
+-- String REPEAT function tests
+
+-- Basic REPEAT function
+SELECT REPEAT('hello', 3);
+
+SELECT REPEAT('a', 5);
+
+SELECT REPEAT('', 3);
+
+SELECT REPEAT('test', 0);
+
+SELECT REPEAT('test', 1);
+
+-- REPEAT with NULL values
+SELECT REPEAT(NULL, 3);
+
+SELECT REPEAT('hello', NULL);
+
+-- REPEAT with negative numbers
+SELECT REPEAT('hello', -1);
+
+-- REPEAT with special characters
+SELECT REPEAT('*', 10);
+
+SELECT REPEAT('-=', 5);
+
+SELECT REPEAT('!@#', 3);
+
+-- Test with table data
+CREATE TABLE repeat_test(s VARCHAR, n INTEGER, ts TIMESTAMP TIME INDEX);
+
+INSERT INTO repeat_test VALUES
+    ('hello', 2, 1000),
+    ('*', 5, 2000),
+    ('test', 0, 3000),
+    ('a', 10, 4000),
+    (NULL, 3, 5000),
+    ('hi', NULL, 6000);
+
+SELECT s, n, REPEAT(s, n) FROM repeat_test ORDER BY ts;
+
+-- Unicode REPEAT
+SELECT REPEAT('世', 3);
+
+SELECT REPEAT('🚀', 5);
+
+SELECT REPEAT('café', 2);
+
+-- REPEAT with spaces and formatting
+SELECT REPEAT(' ', 10);
+
+SELECT REPEAT('\t', 3);
+
+SELECT CONCAT('Start', REPEAT('-', 10), 'End');
+
+-- Large REPEAT operations
+SELECT LENGTH(REPEAT('a', 100));
+
+SELECT LENGTH(REPEAT('ab', 50));
+
+-- Combining REPEAT with other functions
+SELECT UPPER(REPEAT('hello', 3));
+
+SELECT REPEAT(UPPER('hello'), 2);
+
+SELECT REVERSE(REPEAT('abc', 3));
+
+DROP TABLE repeat_test;
diff --git a/tests/cases/standalone/common/function/string/replace.result b/tests/cases/standalone/common/function/string/replace.result
new file mode 100644
index 0000000000..a4e1790d34
--- /dev/null
+++ b/tests/cases/standalone/common/function/string/replace.result
@@ -0,0 +1,180 @@
+-- String REPLACE function tests
+-- Basic REPLACE function
+SELECT REPLACE('hello world', 'world', 'universe');
+
++-------------------------------------------------------------+
+| replace(Utf8("hello world"),Utf8("world"),Utf8("universe")) |
++-------------------------------------------------------------+
+| hello universe                                              |
++-------------------------------------------------------------+
+
+SELECT REPLACE('hello world', 'xyz', 'abc');
+
++------------------------------------------------------+
+| replace(Utf8("hello world"),Utf8("xyz"),Utf8("abc")) |
++------------------------------------------------------+
+| hello world                                          |
++------------------------------------------------------+
+
+SELECT REPLACE('hello hello hello', 'hello', 'hi');
+
++-------------------------------------------------------------+
+| replace(Utf8("hello hello hello"),Utf8("hello"),Utf8("hi")) |
++-------------------------------------------------------------+
+| hi hi hi                                                    |
++-------------------------------------------------------------+
+
+-- REPLACE with empty strings
+SELECT REPLACE('hello world', 'world', '');
+
++-----------------------------------------------------+
+| replace(Utf8("hello world"),Utf8("world"),Utf8("")) |
++-----------------------------------------------------+
+| hello                                               |
++-----------------------------------------------------+
+
+SELECT REPLACE('hello world', '', 'xyz');
+
++---------------------------------------------------+
+| replace(Utf8("hello world"),Utf8(""),Utf8("xyz")) |
++---------------------------------------------------+
+| xyzhxyzexyzlxyzlxyzoxyz xyzwxyzoxyzrxyzlxyzdxyz   |
++---------------------------------------------------+
+
+SELECT REPLACE('', 'xyz', 'abc');
+
++-------------------------------------------+
+| replace(Utf8(""),Utf8("xyz"),Utf8("abc")) |
++-------------------------------------------+
+|                                           |
++-------------------------------------------+
+
+-- Case sensitive replacement
+SELECT REPLACE('Hello World', 'hello', 'hi');
+
++-------------------------------------------------------+
+| replace(Utf8("Hello World"),Utf8("hello"),Utf8("hi")) |
++-------------------------------------------------------+
+| Hello World                                           |
++-------------------------------------------------------+
+
+SELECT REPLACE('Hello World', 'Hello', 'Hi');
+
++-------------------------------------------------------+
+| replace(Utf8("Hello World"),Utf8("Hello"),Utf8("Hi")) |
++-------------------------------------------------------+
+| Hi World                                              |
++-------------------------------------------------------+
+
+-- NULL handling
+SELECT REPLACE(NULL, 'world', 'universe');
+
++----------------------------------------------+
+| replace(NULL,Utf8("world"),Utf8("universe")) |
++----------------------------------------------+
+|                                              |
++----------------------------------------------+
+
+SELECT REPLACE('hello world', NULL, 'universe');
+
++----------------------------------------------------+
+| replace(Utf8("hello world"),NULL,Utf8("universe")) |
++----------------------------------------------------+
+|                                                    |
++----------------------------------------------------+
+
+SELECT REPLACE('hello world', 'world', NULL);
+
++-------------------------------------------------+
+| replace(Utf8("hello world"),Utf8("world"),NULL) |
++-------------------------------------------------+
+|                                                 |
++-------------------------------------------------+
+
+-- Test with table data
+CREATE TABLE replace_test(s VARCHAR, old_str VARCHAR, new_str VARCHAR, ts TIMESTAMP TIME INDEX);
+
+Affected Rows: 0
+
+INSERT INTO replace_test VALUES
+    ('hello world', 'world', 'universe', 1000),
+    ('programming language', 'language', 'paradigm', 2000),
+    ('test test test', 'test', 'exam', 3000),
+    ('no match here', 'xyz', 'abc', 4000);
+
+Affected Rows: 4
+
+SELECT s, old_str, new_str, REPLACE(s, old_str, new_str) FROM replace_test ORDER BY ts;
+
++----------------------+----------+----------+-------------------------------------------------------------------+
+| s                    | old_str  | new_str  | replace(replace_test.s,replace_test.old_str,replace_test.new_str) |
++----------------------+----------+----------+-------------------------------------------------------------------+
+| hello world          | world    | universe | hello universe                                                    |
+| programming language | language | paradigm | programming paradigm                                              |
+| test test test       | test     | exam     | exam exam exam                                                    |
+| no match here        | xyz      | abc      | no match here                                                     |
++----------------------+----------+----------+-------------------------------------------------------------------+
+
+-- Unicode replacement
+SELECT REPLACE('hello 世界', '世界', 'world');
+
++--------------------------------------------------------+
+| replace(Utf8("hello 世界"),Utf8("世界"),Utf8("world")) |
++--------------------------------------------------------+
+| hello world                                            |
++--------------------------------------------------------+
+
+SELECT REPLACE('café shop', 'é', 'e');
+
++------------------------------------------------+
+| replace(Utf8("café shop"),Utf8("é"),Utf8("e")) |
++------------------------------------------------+
+| cafe shop                                      |
++------------------------------------------------+
+
+SELECT REPLACE('🚀 rocket 🚀', '🚀', '✈️');
+
++-----------------------------------------------------+
+| replace(Utf8("🚀 rocket 🚀"),Utf8("🚀"),Utf8("✈️")) |
++-----------------------------------------------------+
+| ✈️ rocket ✈️                                        |
++-----------------------------------------------------+
+
+-- Multiple character replacement
+SELECT REPLACE('hello-world-test', '-', '_');
+
++-------------------------------------------------------+
+| replace(Utf8("hello-world-test"),Utf8("-"),Utf8("_")) |
++-------------------------------------------------------+
+| hello_world_test                                      |
++-------------------------------------------------------+
+
+SELECT REPLACE('abc::def::ghi', '::', '-->');
+
++-------------------------------------------------------+
+| replace(Utf8("abc::def::ghi"),Utf8("::"),Utf8("-->")) |
++-------------------------------------------------------+
+| abc-->def-->ghi                                       |
++-------------------------------------------------------+
+
+-- Overlapping patterns
+SELECT REPLACE('ababab', 'ab', 'xy');
+
++-----------------------------------------------+
+| replace(Utf8("ababab"),Utf8("ab"),Utf8("xy")) |
++-----------------------------------------------+
+| xyxyxy                                        |
++-----------------------------------------------+
+
+SELECT REPLACE('aaa', 'aa', 'b');
+
++-------------------------------------------+
+| replace(Utf8("aaa"),Utf8("aa"),Utf8("b")) |
++-------------------------------------------+
+| ba                                        |
++-------------------------------------------+
+
+DROP TABLE replace_test;
+
+Affected Rows: 0
+
diff --git a/tests/cases/standalone/common/function/string/replace.sql b/tests/cases/standalone/common/function/string/replace.sql
new file mode 100644
index 0000000000..20006ae7c8
--- /dev/null
+++ b/tests/cases/standalone/common/function/string/replace.sql
@@ -0,0 +1,57 @@
+-- String REPLACE function tests
+
+-- Basic REPLACE function
+SELECT REPLACE('hello world', 'world', 'universe');
+
+SELECT REPLACE('hello world', 'xyz', 'abc');
+
+SELECT REPLACE('hello hello hello', 'hello', 'hi');
+
+-- REPLACE with empty strings
+SELECT REPLACE('hello world', 'world', '');
+
+SELECT REPLACE('hello world', '', 'xyz');
+
+SELECT REPLACE('', 'xyz', 'abc');
+
+-- Case sensitive replacement
+SELECT REPLACE('Hello World', 'hello', 'hi');
+
+SELECT REPLACE('Hello World', 'Hello', 'Hi');
+
+-- NULL handling
+SELECT REPLACE(NULL, 'world', 'universe');
+
+SELECT REPLACE('hello world', NULL, 'universe');
+
+SELECT REPLACE('hello world', 'world', NULL);
+
+-- Test with table data
+CREATE TABLE replace_test(s VARCHAR, old_str VARCHAR, new_str VARCHAR, ts TIMESTAMP TIME INDEX);
+
+INSERT INTO replace_test VALUES
+    ('hello world', 'world', 'universe', 1000),
+    ('programming language', 'language', 'paradigm', 2000),
+    ('test test test', 'test', 'exam', 3000),
+    ('no match here', 'xyz', 'abc', 4000);
+
+SELECT s, old_str, new_str, REPLACE(s, old_str, new_str) FROM replace_test ORDER BY ts;
+
+-- Unicode replacement
+SELECT REPLACE('hello 世界', '世界', 'world');
+
+SELECT REPLACE('café shop', 'é', 'e');
+
+SELECT REPLACE('🚀 rocket 🚀', '🚀', '✈️');
+
+-- Multiple character replacement
+SELECT REPLACE('hello-world-test', '-', '_');
+
+SELECT REPLACE('abc::def::ghi', '::', '-->');
+
+-- Overlapping patterns
+SELECT REPLACE('ababab', 'ab', 'xy');
+
+SELECT REPLACE('aaa', 'aa', 'b');
+
+DROP TABLE replace_test;
diff --git a/tests/cases/standalone/common/function/string/reverse.result b/tests/cases/standalone/common/function/string/reverse.result
new file mode 100644
index 0000000000..00bd73f49b
--- /dev/null
+++ b/tests/cases/standalone/common/function/string/reverse.result
@@ -0,0 +1,200 @@
+-- String REVERSE function tests
+-- Basic REVERSE function
+SELECT REVERSE('hello');
+
++------------------------+
+| reverse(Utf8("hello")) |
++------------------------+
+| olleh                  |
++------------------------+
+
+SELECT REVERSE('world');
+
++------------------------+
+| reverse(Utf8("world")) |
++------------------------+
+| dlrow                  |
++------------------------+
+
+SELECT REVERSE('');
+
++-------------------+
+| reverse(Utf8("")) |
++-------------------+
+|                   |
++-------------------+
+
+SELECT REVERSE(NULL);
+
++---------------+
+| reverse(NULL) |
++---------------+
+|               |
++---------------+
+
+-- REVERSE with numbers and special characters
+SELECT REVERSE('12345');
+
++------------------------+
+| reverse(Utf8("12345")) |
++------------------------+
+| 54321                  |
++------------------------+
+
+SELECT REVERSE('hello!');
+
++-------------------------+
+| reverse(Utf8("hello!")) |
++-------------------------+
+| !olleh                  |
++-------------------------+
+
+SELECT REVERSE('a!@#$%b');
+
++--------------------------+
+| reverse(Utf8("a!@#$%b")) |
++--------------------------+
+| b%$#@!a                  |
++--------------------------+
+
+-- REVERSE with palindromes
+SELECT REVERSE('radar');
+
++------------------------+
+| reverse(Utf8("radar")) |
++------------------------+
+| radar                  |
++------------------------+
+
+SELECT REVERSE('madam');
+
++------------------------+
+| reverse(Utf8("madam")) |
++------------------------+
+| madam                  |
++------------------------+
+
+SELECT REVERSE('racecar');
+
++--------------------------+
+| reverse(Utf8("racecar")) |
++--------------------------+
+| racecar                  |
++--------------------------+
+
+-- Test with table data
+CREATE TABLE reverse_test(s VARCHAR, ts TIMESTAMP TIME INDEX);
+
+Affected Rows: 0
+
+INSERT INTO reverse_test VALUES
+    ('hello', 1000),
+    ('world', 2000),
+    ('12345', 3000),
+    ('radar', 4000),
+    ('', 5000),
+    (NULL, 6000);
+
+Affected Rows: 6
+
+SELECT s, REVERSE(s) FROM reverse_test ORDER BY ts;
+
++-------+-------------------------+
+| s     | reverse(reverse_test.s) |
++-------+-------------------------+
+| hello | olleh                   |
+| world | dlrow                   |
+| 12345 | 54321                   |
+| radar | radar                   |
+|       |                         |
+|       |                         |
++-------+-------------------------+
+
+-- Unicode REVERSE
+SELECT REVERSE('世界');
+
++-----------------------+
+| reverse(Utf8("世界")) |
++-----------------------+
+| 界世                  |
++-----------------------+
+
+SELECT REVERSE('café');
+
++-----------------------+
+| reverse(Utf8("café")) |
++-----------------------+
+| éfac                  |
++-----------------------+
+
+SELECT REVERSE('🚀🌟');
+
++-----------------------+
+| reverse(Utf8("🚀🌟")) |
++-----------------------+
+| 🌟🚀                  |
++-----------------------+
+
+-- REVERSE with spaces
+SELECT REVERSE('hello world');
+
++------------------------------+
+| reverse(Utf8("hello world")) |
++------------------------------+
+| dlrow olleh                  |
++------------------------------+
+
+SELECT REVERSE('  spaces  ');
+
++-----------------------------+
+| reverse(Utf8("  spaces  ")) |
++-----------------------------+
+|   secaps                    |
++-----------------------------+
+
+-- Combining REVERSE with other functions
+SELECT UPPER(REVERSE('hello'));
+
++-------------------------------+
+| upper(reverse(Utf8("hello"))) |
++-------------------------------+
+| OLLEH                         |
++-------------------------------+
+
+SELECT REVERSE(UPPER('hello'));
+
++-------------------------------+
+| reverse(upper(Utf8("hello"))) |
++-------------------------------+
+| OLLEH                         |
++-------------------------------+
+
+SELECT LENGTH(REVERSE('hello world'));
+
++--------------------------------------+
+| length(reverse(Utf8("hello world"))) |
++--------------------------------------+
+| 11                                   |
++--------------------------------------+
+
+-- Double REVERSE (should return original)
+SELECT REVERSE(REVERSE('hello world'));
+
++---------------------------------------+
+| reverse(reverse(Utf8("hello world"))) |
++---------------------------------------+
+| hello world                           |
++---------------------------------------+
+
+SELECT REVERSE(REVERSE('中文测试'));
+
++------------------------------------+
+| reverse(reverse(Utf8("中文测试"))) |
++------------------------------------+
+| 中文测试                           |
++------------------------------------+
+
+DROP TABLE reverse_test;
+
+Affected Rows: 0
+
diff --git a/tests/cases/standalone/common/function/string/reverse.sql b/tests/cases/standalone/common/function/string/reverse.sql
new file mode 100644
index 0000000000..f33f359f73
--- /dev/null
+++ b/tests/cases/standalone/common/function/string/reverse.sql
@@ -0,0 +1,63 @@
+-- String REVERSE function tests
+
+-- Basic REVERSE function
+SELECT REVERSE('hello');
+
+SELECT REVERSE('world');
+
+SELECT REVERSE('');
+
+SELECT REVERSE(NULL);
+
+-- REVERSE with numbers and special characters
+SELECT REVERSE('12345');
+
+SELECT REVERSE('hello!');
+
+SELECT REVERSE('a!@#$%b');
+
+-- REVERSE with palindromes
+SELECT REVERSE('radar');
+
+SELECT REVERSE('madam');
+
+SELECT REVERSE('racecar');
+
+-- Test with table data
+CREATE TABLE reverse_test(s VARCHAR, ts TIMESTAMP TIME INDEX);
+
+INSERT INTO reverse_test VALUES
+    ('hello', 1000),
+    ('world', 2000),
+    ('12345', 3000),
+    ('radar', 4000),
+    ('', 5000),
+    (NULL, 6000);
+
+SELECT s, REVERSE(s) FROM reverse_test ORDER BY ts;
+
+-- Unicode REVERSE
+SELECT REVERSE('世界');
+
+SELECT REVERSE('café');
+
+SELECT REVERSE('🚀🌟');
+
+-- REVERSE with spaces
+SELECT REVERSE('hello world');
+
+SELECT REVERSE('  spaces  ');
+
+-- Combining REVERSE with other functions
+SELECT UPPER(REVERSE('hello'));
+
+SELECT REVERSE(UPPER('hello'));
+
+SELECT LENGTH(REVERSE('hello world'));
+
+-- Double REVERSE (should return original)
+SELECT REVERSE(REVERSE('hello world'));
+
+SELECT REVERSE(REVERSE('中文测试'));
+
+DROP TABLE reverse_test;
diff --git a/tests/cases/standalone/common/function/string/string_split.result b/tests/cases/standalone/common/function/string/string_split.result
new file mode 100644
index 0000000000..d67adc0a0a
--- /dev/null
+++ b/tests/cases/standalone/common/function/string/string_split.result
@@ -0,0 +1,213 @@
+-- Migrated from DuckDB test: test/sql/function/string/test_string_split.test
+-- String split function tests
+-- Test basic string_split functionality
+SELECT string_to_array(NULL, NULL);
+
++----------------------------+
+| string_to_array(NULL,NULL) |
++----------------------------+
+|                            |
++----------------------------+
+
+SELECT string_to_array('hello world', ' ');
+
++------------------------------------------------+
+| string_to_array(Utf8("hello world"),Utf8(" ")) |
++------------------------------------------------+
+| [hello, world]                                 |
++------------------------------------------------+
+
+SELECT string_to_array(NULL, ' ');
+
++---------------------------------+
+| string_to_array(NULL,Utf8(" ")) |
++---------------------------------+
+|                                 |
++---------------------------------+
+
+SELECT string_to_array('a b c', NULL);
+
++-------------------------------------+
+| string_to_array(Utf8("a b c"),NULL) |
++-------------------------------------+
+| [a,  , b,  , c]                     |
++-------------------------------------+
+
+SELECT string_to_array('a b c', ' ');
+
++------------------------------------------+
+| string_to_array(Utf8("a b c"),Utf8(" ")) |
++------------------------------------------+
+| [a, b, c]                                |
++------------------------------------------+
+
+-- Test with table data
+CREATE TABLE split_test(s VARCHAR, ts TIMESTAMP TIME INDEX);
+
+Affected Rows: 0
+
+INSERT INTO split_test VALUES
+    ('hello,world,test', 1000),
+    ('a|b|c|d', 2000),
+    ('no-separator', 3000),
+    ('', 4000),
+    (NULL, 5000);
+
+Affected Rows: 5
+
+-- Test splitting with different separators
+SELECT s, string_to_array(s, ',') FROM split_test ORDER BY ts;
+
++------------------+-----------------------------------------+
+| s                | string_to_array(split_test.s,Utf8(",")) |
++------------------+-----------------------------------------+
+| hello,world,test | [hello, world, test]                    |
+| a|b|c|d          | [a|b|c|d]                               |
+| no-separator     | [no-separator]                          |
+|                  | []                                      |
+|                  |                                         |
++------------------+-----------------------------------------+
+
+SELECT s, string_to_array(s, '|') FROM split_test ORDER BY ts;
+
++------------------+-----------------------------------------+
+| s                | string_to_array(split_test.s,Utf8("|")) |
++------------------+-----------------------------------------+
+| hello,world,test | [hello,world,test]                      |
+| a|b|c|d          | [a, b, c, d]                            |
+| no-separator     | [no-separator]                          |
+|                  | []                                      |
+|                  |                                         |
++------------------+-----------------------------------------+
+
+SELECT s, string_to_array(s, '-') FROM split_test ORDER BY ts;
+
++------------------+-----------------------------------------+
+| s                | string_to_array(split_test.s,Utf8("-")) |
++------------------+-----------------------------------------+
+| hello,world,test | [hello,world,test]                      |
+| a|b|c|d          | [a|b|c|d]                               |
+| no-separator     | [no, separator]                         |
+|                  | []                                      |
+|                  |                                         |
++------------------+-----------------------------------------+
+
+-- Test splitting with multi-character separator
+CREATE TABLE multi_sep_test(s VARCHAR, ts TIMESTAMP TIME INDEX);
+
+Affected Rows: 0
+
+INSERT INTO multi_sep_test VALUES
+    ('hello::world::test', 1000),
+    ('a---b---c', 2000),
+    ('single', 3000);
+
+Affected Rows: 3
+
+SELECT s, string_to_array(s, '::') FROM multi_sep_test ORDER BY ts;
+
++--------------------+----------------------------------------------+
+| s                  | string_to_array(multi_sep_test.s,Utf8("::")) |
++--------------------+----------------------------------------------+
+| hello::world::test | [hello, world, test]                         |
+| a---b---c          | [a---b---c]                                  |
+| single             | [single]                                     |
++--------------------+----------------------------------------------+
+
+SELECT s, string_to_array(s, '---') FROM multi_sep_test ORDER BY ts;
+
++--------------------+-----------------------------------------------+
+| s                  | string_to_array(multi_sep_test.s,Utf8("---")) |
++--------------------+-----------------------------------------------+
+| hello::world::test | [hello::world::test]                          |
+| a---b---c          | [a, b, c]                                     |
+| single             | [single]                                      |
++--------------------+-----------------------------------------------+
+
+-- Test with Unicode separators
+CREATE TABLE unicode_split_test(s VARCHAR, ts TIMESTAMP TIME INDEX);
+
+Affected Rows: 0
+
+INSERT INTO unicode_split_test VALUES
+    ('hello世world世test', 1000),
+    ('a🦆b🦆c', 2000);
+
+Affected Rows: 2
+
+SELECT s, string_to_array(s, '世') FROM unicode_split_test ORDER BY ts;
+
++--------------------+--------------------------------------------------+
+| s                  | string_to_array(unicode_split_test.s,Utf8("世")) |
++--------------------+--------------------------------------------------+
+| hello世world世test | [hello, world, test]                             |
+| a🦆b🦆c            | [a🦆b🦆c]                                        |
++--------------------+--------------------------------------------------+
+
+SELECT s, string_to_array(s, '🦆') FROM unicode_split_test ORDER BY ts;
+
++--------------------+--------------------------------------------------+
+| s                  | string_to_array(unicode_split_test.s,Utf8("🦆")) |
++--------------------+--------------------------------------------------+
+| hello世world世test | [hello世world世test]                             |
+| a🦆b🦆c            | [a, b, c]                                        |
++--------------------+--------------------------------------------------+
+
+-- Test edge cases
+-- Empty string
+SELECT string_to_array('', ',');
+
++-------------------------------------+
+| string_to_array(Utf8(""),Utf8(",")) |
++-------------------------------------+
+| []                                  |
++-------------------------------------+
+
+-- Empty separator
+SELECT string_to_array('hello', '');
+
++-----------------------------------------+
+| string_to_array(Utf8("hello"),Utf8("")) |
++-----------------------------------------+
+| [hello]                                 |
++-----------------------------------------+
+
+-- Multiple consecutive separators
+SELECT string_to_array(',,hello,,world,,', ',');
+
++-----------------------------------------------------+
+| string_to_array(Utf8(",,hello,,world,,"),Utf8(",")) |
++-----------------------------------------------------+
+| [, , hello, , world, , ]                            |
++-----------------------------------------------------+
+
+-- Trailing separator
+SELECT string_to_array('hello,', ',');
+
++-------------------------------------------+
+| string_to_array(Utf8("hello,"),Utf8(",")) |
++-------------------------------------------+
+| [hello, ]                                 |
++-------------------------------------------+
+
+-- Leading separator
+SELECT string_to_array(',hello', ',');
+
++-------------------------------------------+
+| string_to_array(Utf8(",hello"),Utf8(",")) |
++-------------------------------------------+
+| [, hello]                                 |
++-------------------------------------------+
+
+DROP TABLE split_test;
+
+Affected Rows: 0
+
+DROP TABLE multi_sep_test;
+
+Affected Rows: 0
+
+DROP TABLE unicode_split_test;
+
+Affected Rows: 0
+
diff --git a/tests/cases/standalone/common/function/string/string_split.sql b/tests/cases/standalone/common/function/string/string_split.sql
new file mode 100644
index 0000000000..ef0be5fff5
--- /dev/null
+++ b/tests/cases/standalone/common/function/string/string_split.sql
@@ -0,0 +1,75 @@
+-- Migrated from DuckDB test: test/sql/function/string/test_string_split.test
+-- String split function tests
+
+-- Test basic string_split functionality
+SELECT string_to_array(NULL, NULL);
+
+SELECT string_to_array('hello world', ' ');
+
+SELECT string_to_array(NULL, ' ');
+
+SELECT string_to_array('a b c', NULL);
+
+SELECT string_to_array('a b c', ' ');
+
+-- Test with table data
+CREATE TABLE split_test(s VARCHAR, ts TIMESTAMP TIME INDEX);
+
+INSERT INTO split_test VALUES
+    ('hello,world,test', 1000),
+    ('a|b|c|d', 2000),
+    ('no-separator', 3000),
+    ('', 4000),
+    (NULL, 5000);
+
+-- Test splitting with different separators
+SELECT s, string_to_array(s, ',') FROM split_test ORDER BY ts;
+
+SELECT s, string_to_array(s, '|') FROM split_test ORDER BY ts;
+
+SELECT s, string_to_array(s, '-') FROM split_test ORDER BY ts;
+
+-- Test splitting with multi-character separator
+CREATE TABLE multi_sep_test(s VARCHAR, ts TIMESTAMP TIME INDEX);
+
+INSERT INTO multi_sep_test VALUES
+    ('hello::world::test', 1000),
+    ('a---b---c', 2000),
+    ('single', 3000);
+
+SELECT s, string_to_array(s, '::') FROM multi_sep_test ORDER BY ts;
+
+SELECT s, string_to_array(s, '---') FROM multi_sep_test ORDER BY ts;
+
+-- Test with Unicode separators
+CREATE TABLE unicode_split_test(s VARCHAR, ts TIMESTAMP TIME INDEX);
+
+INSERT INTO unicode_split_test VALUES
+    ('hello世world世test', 1000),
+    ('a🦆b🦆c', 2000);
+
+SELECT s, string_to_array(s, '世') FROM unicode_split_test ORDER BY ts;
+
+SELECT s, string_to_array(s, '🦆') FROM unicode_split_test ORDER BY ts;
+
+-- Test edge cases
+-- Empty string
+SELECT string_to_array('', ',');
+
+-- Empty separator
+SELECT string_to_array('hello', '');
+
+-- Multiple consecutive separators
+SELECT string_to_array(',,hello,,world,,', ',');
+
+-- Trailing separator
+SELECT string_to_array('hello,', ',');
+
+-- Leading separator
+SELECT string_to_array(',hello', ',');
+
+DROP TABLE split_test;
+
+DROP TABLE multi_sep_test;
+
+DROP TABLE unicode_split_test;
diff --git a/tests/cases/standalone/common/function/string/substring.result b/tests/cases/standalone/common/function/string/substring.result
new file mode 100644
index 0000000000..642571084a
--- /dev/null
+++ b/tests/cases/standalone/common/function/string/substring.result
@@ -0,0 +1,173 @@
+-- Migrated from DuckDB test: test/sql/function/string/test_substring.test
+-- Substring function tests
+CREATE TABLE strings(s VARCHAR, "off" INTEGER, length INTEGER, ts TIMESTAMP TIME INDEX);
+
+Affected Rows: 0
+
+INSERT INTO strings VALUES
+    ('hello', 1, 2, 1000),
+    ('world', 2, 3, 2000),
+    ('b', 1, 1, 3000),
+    (NULL, 2, 2, 4000);
+
+Affected Rows: 4
+
+-- Test zero length
+SELECT SUBSTRING('🦆ab', 1, 0), SUBSTRING('abc', 1, 0);
+
++----------------------------------------+---------------------------------------+
+| substr(Utf8("🦆ab"),Int64(1),Int64(0)) | substr(Utf8("abc"),Int64(1),Int64(0)) |
++----------------------------------------+---------------------------------------+
+|                                        |                                       |
++----------------------------------------+---------------------------------------+
+
+-- Normal substring with constant offset/length
+SELECT SUBSTRING(s, 1, 2) FROM strings ORDER BY ts;
+
++-------------------------------------+
+| substr(strings.s,Int64(1),Int64(2)) |
++-------------------------------------+
+| he                                  |
+| wo                                  |
+| b                                   |
+|                                     |
++-------------------------------------+
+
+-- Substring out of range
+SELECT SUBSTRING(s, 2, 2) FROM strings ORDER BY ts;
+
++-------------------------------------+
+| substr(strings.s,Int64(2),Int64(2)) |
++-------------------------------------+
+| el                                  |
+| or                                  |
+|                                     |
+|                                     |
++-------------------------------------+
+
+-- Variable length offset/length
+SELECT SUBSTRING(s, "off", "length") FROM strings ORDER BY ts;
+
++----------------------------------------------+
+| substr(strings.s,strings.off,strings.length) |
++----------------------------------------------+
+| he                                           |
+| orl                                          |
+| b                                            |
+|                                              |
++----------------------------------------------+
+
+SELECT SUBSTRING(s, "off", 2) FROM strings ORDER BY ts;
+
++----------------------------------------+
+| substr(strings.s,strings.off,Int64(2)) |
++----------------------------------------+
+| he                                     |
+| or                                     |
+| b                                      |
+|                                        |
++----------------------------------------+
+
+SELECT SUBSTRING(s, 1, length) FROM strings ORDER BY ts;
+
++-------------------------------------------+
+| substr(strings.s,Int64(1),strings.length) |
++-------------------------------------------+
+| he                                        |
+| wor                                       |
+| b                                         |
+|                                           |
++-------------------------------------------+
+
+SELECT SUBSTRING('hello', "off", length) FROM strings ORDER BY ts;
+
++--------------------------------------------------+
+| substr(Utf8("hello"),strings.off,strings.length) |
++--------------------------------------------------+
+| he                                               |
+| ell                                              |
+| h                                                |
+| el                                               |
++--------------------------------------------------+
+
+-- Test with NULL values
+SELECT SUBSTRING(NULL,  "off", length) FROM strings ORDER BY ts;
+
++-----------------------------------------+
+| substr(NULL,strings.off,strings.length) |
++-----------------------------------------+
+|                                         |
+|                                         |
+|                                         |
+|                                         |
++-----------------------------------------+
+
+SELECT SUBSTRING(s, NULL, length) FROM strings ORDER BY ts;
+
++---------------------------------------+
+| substr(strings.s,NULL,strings.length) |
++---------------------------------------+
+|                                       |
+|                                       |
+|                                       |
+|                                       |
++---------------------------------------+
+
+SELECT SUBSTRING(s,  "off", NULL) FROM strings ORDER BY ts;
+
++------------------------------------+
+| substr(strings.s,strings.off,NULL) |
++------------------------------------+
+|                                    |
+|                                    |
+|                                    |
+|                                    |
++------------------------------------+
+
+-- Test negative offsets
+SELECT SUBSTRING('hello', -1, 3);
+
++------------------------------------------+
+| substr(Utf8("hello"),Int64(-1),Int64(3)) |
++------------------------------------------+
+| h                                        |
++------------------------------------------+
+
+SELECT SUBSTRING('hello', 0, 3);
+
++-----------------------------------------+
+| substr(Utf8("hello"),Int64(0),Int64(3)) |
++-----------------------------------------+
+| he                                      |
++-----------------------------------------+
+
+-- Test with Unicode characters
+CREATE TABLE unicode_strings(s VARCHAR, ts TIMESTAMP TIME INDEX);
+
+Affected Rows: 0
+
+INSERT INTO unicode_strings VALUES
+    ('Hello 世界', 1000),
+    ('🦆🦀🐧', 2000),
+    ('café', 3000);
+
+Affected Rows: 3
+
+SELECT s, SUBSTRING(s, 1, 5), SUBSTRING(s, 7, 2) FROM unicode_strings ORDER BY ts;
+
++------------+---------------------------------------------+---------------------------------------------+
+| s          | substr(unicode_strings.s,Int64(1),Int64(5)) | substr(unicode_strings.s,Int64(7),Int64(2)) |
++------------+---------------------------------------------+---------------------------------------------+
+| Hello 世界 | Hello                                       | 世界                                        |
+| 🦆🦀🐧     | 🦆🦀🐧                                      |                                             |
+| café       | café                                        |                                             |
++------------+---------------------------------------------+---------------------------------------------+
+
+DROP TABLE strings;
+
+Affected Rows: 0
+
+DROP TABLE unicode_strings;
+
+Affected Rows: 0
+
diff --git a/tests/cases/standalone/common/function/string/substring.sql b/tests/cases/standalone/common/function/string/substring.sql
new file mode 100644
index 0000000000..6c00bbe5f6
--- /dev/null
+++ b/tests/cases/standalone/common/function/string/substring.sql
@@ -0,0 +1,53 @@
+-- Migrated from DuckDB test: test/sql/function/string/test_substring.test
+-- Substring function tests
+
+CREATE TABLE strings(s VARCHAR, "off" INTEGER, length INTEGER, ts TIMESTAMP TIME INDEX);
+
+INSERT INTO strings VALUES
+    ('hello', 1, 2, 1000),
+    ('world', 2, 3, 2000),
+    ('b', 1, 1, 3000),
+    (NULL, 2, 2, 4000);
+
+-- Test zero length
+SELECT SUBSTRING('🦆ab', 1, 0), SUBSTRING('abc', 1, 0);
+
+-- Normal substring with constant offset/length
+SELECT SUBSTRING(s, 1, 2) FROM strings ORDER BY ts;
+
+-- Substring out of range
+SELECT SUBSTRING(s, 2, 2) FROM strings ORDER BY ts;
+
+-- Variable length offset/length
+SELECT SUBSTRING(s, "off", "length") FROM strings ORDER BY ts;
+
+SELECT SUBSTRING(s, "off", 2) FROM strings ORDER BY ts;
+
+SELECT SUBSTRING(s, 1, length) FROM strings ORDER BY ts;
+
+SELECT SUBSTRING('hello', "off", length) FROM strings ORDER BY ts;
+
+-- Test with NULL values
+SELECT SUBSTRING(NULL,  "off", length) FROM strings ORDER BY ts;
+
+SELECT SUBSTRING(s, NULL, length) FROM strings ORDER BY ts;
+
+SELECT SUBSTRING(s,  "off", NULL) FROM strings ORDER BY ts;
+
+-- Test negative offsets
+SELECT SUBSTRING('hello', -1, 3);
+SELECT SUBSTRING('hello', 0, 3);
+
+-- Test with Unicode characters
+CREATE TABLE unicode_strings(s VARCHAR, ts TIMESTAMP TIME INDEX);
+
+INSERT INTO unicode_strings VALUES
+    ('Hello 世界', 1000),
+    ('🦆🦀🐧', 2000),
+    ('café', 3000);
+
+SELECT s, SUBSTRING(s, 1, 5), SUBSTRING(s, 7, 2) FROM unicode_strings ORDER BY ts;
+
+DROP TABLE strings;
+
+DROP TABLE unicode_strings;
diff --git a/tests/cases/standalone/common/function/string/trim_pad.result b/tests/cases/standalone/common/function/string/trim_pad.result
new file mode 100644
index 0000000000..c29b430180
--- /dev/null
+++ b/tests/cases/standalone/common/function/string/trim_pad.result
@@ -0,0 +1,274 @@
+-- String TRIM and PAD function tests
+-- TRIM functions
+SELECT TRIM('  hello world  ');
+
++--------------------------------+
+| btrim(Utf8("  hello world  ")) |
++--------------------------------+
+| hello world                    |
++--------------------------------+
+
+SELECT LTRIM('  hello world  ');
+
++--------------------------------+
+| ltrim(Utf8("  hello world  ")) |
++--------------------------------+
+| hello world                    |
++--------------------------------+
+
+SELECT RTRIM('  hello world  ');
+
++--------------------------------+
+| rtrim(Utf8("  hello world  ")) |
++--------------------------------+
+|   hello world                  |
++--------------------------------+
+
+-- TRIM with specific characters
+SELECT TRIM('x' FROM 'xxxhello worldxxx');
+
++--------------------------------------------+
+| btrim(Utf8("xxxhello worldxxx"),Utf8("x")) |
++--------------------------------------------+
+| hello world                                |
++--------------------------------------------+
+
+SELECT LTRIM('hello world', 'hel');
+
++----------------------------------------+
+| ltrim(Utf8("hello world"),Utf8("hel")) |
++----------------------------------------+
+| o world                                |
++----------------------------------------+
+
+SELECT RTRIM('hello world', 'dlr');
+
++----------------------------------------+
+| rtrim(Utf8("hello world"),Utf8("dlr")) |
++----------------------------------------+
+| hello wo                               |
++----------------------------------------+
+
+-- PAD functions
+SELECT LPAD('hello', 10, '*');
+
++-----------------------------------------+
+| lpad(Utf8("hello"),Int64(10),Utf8("*")) |
++-----------------------------------------+
+| *****hello                              |
++-----------------------------------------+
+
+SELECT RPAD('hello', 10, '*');
+
++-----------------------------------------+
+| rpad(Utf8("hello"),Int64(10),Utf8("*")) |
++-----------------------------------------+
+| hello*****                              |
++-----------------------------------------+
+
+-- Truncate
+SELECT LPAD('hello', 3, '*');
+
++----------------------------------------+
+| lpad(Utf8("hello"),Int64(3),Utf8("*")) |
++----------------------------------------+
+| hel                                    |
++----------------------------------------+
+
+-- Truncate  
+SELECT RPAD('hello', 3, '*');
+
++----------------------------------------+
+| rpad(Utf8("hello"),Int64(3),Utf8("*")) |
++----------------------------------------+
+| hel                                    |
++----------------------------------------+
+
+-- PAD with multi-character padding
+SELECT LPAD('test', 10, 'ab');
+
++-----------------------------------------+
+| lpad(Utf8("test"),Int64(10),Utf8("ab")) |
++-----------------------------------------+
+| abababtest                              |
++-----------------------------------------+
+
+SELECT RPAD('test', 10, 'xy');
+
++-----------------------------------------+
+| rpad(Utf8("test"),Int64(10),Utf8("xy")) |
++-----------------------------------------+
+| testxyxyxy                              |
++-----------------------------------------+
+
+-- Test with table data
+CREATE TABLE trim_pad_test(s VARCHAR, ts TIMESTAMP TIME INDEX);
+
+Affected Rows: 0
+
+INSERT INTO trim_pad_test VALUES
+    ('  hello  ', 1000),
+    ('world   ', 2000),
+    ('   test', 3000),
+    ('no-spaces', 4000),
+    ('', 5000),
+    (NULL, 6000);
+
+Affected Rows: 6
+
+-- Apply TRIM functions to table data
+SELECT s, TRIM(s), LTRIM(s), RTRIM(s) FROM trim_pad_test ORDER BY ts;
+
++-----------+------------------------+------------------------+------------------------+
+| s         | btrim(trim_pad_test.s) | ltrim(trim_pad_test.s) | rtrim(trim_pad_test.s) |
++-----------+------------------------+------------------------+------------------------+
+|   hello   | hello                  | hello                  |   hello                |
+| world     | world                  | world                  | world                  |
+|    test   | test                   | test                   |    test                |
+| no-spaces | no-spaces              | no-spaces              | no-spaces              |
+|           |                        |                        |                        |
+|           |                        |                        |                        |
++-----------+------------------------+------------------------+------------------------+
+
+-- Apply PAD functions
+SELECT s, LPAD(TRIM(s), 15, '-'), RPAD(TRIM(s), 15, '+') FROM trim_pad_test WHERE s IS NOT NULL ORDER BY ts;
+
++-----------+--------------------------------------------------+--------------------------------------------------+
+| s         | lpad(btrim(trim_pad_test.s),Int64(15),Utf8("-")) | rpad(btrim(trim_pad_test.s),Int64(15),Utf8("+")) |
++-----------+--------------------------------------------------+--------------------------------------------------+
+|   hello   | ----------hello                                  | hello++++++++++                                  |
+| world     | ----------world                                  | world++++++++++                                  |
+|    test   | -----------test                                  | test+++++++++++                                  |
+| no-spaces | ------no-spaces                                  | no-spaces++++++                                  |
+|           | ---------------                                  | +++++++++++++++                                  |
++-----------+--------------------------------------------------+--------------------------------------------------+
+
+-- Test with Unicode characters
+SELECT TRIM('  中文测试  ');
+
++-----------------------------+
+| btrim(Utf8("  中文测试  ")) |
++-----------------------------+
+| 中文测试                    |
++-----------------------------+
+
+SELECT LPAD('🚀', 10, '★');
+
++--------------------------------------+
+| lpad(Utf8("🚀"),Int64(10),Utf8("★")) |
++--------------------------------------+
+| ★★★★★★★★★🚀                          |
++--------------------------------------+
+
+SELECT RPAD('café', 8, '•');
+
++---------------------------------------+
+| rpad(Utf8("café"),Int64(8),Utf8("•")) |
++---------------------------------------+
+| café••••                              |
++---------------------------------------+
+
+-- Edge cases
+SELECT TRIM('');
+
++-----------------+
+| btrim(Utf8("")) |
++-----------------+
+|                 |
++-----------------+
+
+SELECT TRIM(NULL);
+
++-------------+
+| btrim(NULL) |
++-------------+
+|             |
++-------------+
+
+SELECT LPAD('', 5, '*');
+
++-----------------------------------+
+| lpad(Utf8(""),Int64(5),Utf8("*")) |
++-----------------------------------+
+| *****                             |
++-----------------------------------+
+
+SELECT RPAD('', 5, '*');
+
++-----------------------------------+
+| rpad(Utf8(""),Int64(5),Utf8("*")) |
++-----------------------------------+
+| *****                             |
++-----------------------------------+
+
+SELECT LPAD('test', 0, '*');
+
++---------------------------------------+
+| lpad(Utf8("test"),Int64(0),Utf8("*")) |
++---------------------------------------+
+|                                       |
++---------------------------------------+
+
+SELECT RPAD('test', 0, '*');
+
++---------------------------------------+
+| rpad(Utf8("test"),Int64(0),Utf8("*")) |
++---------------------------------------+
+|                                       |
++---------------------------------------+
+
+-- TRIM with various whitespace characters
+SELECT TRIM('\t\nhello\r\n\t');
+
++--------------------------------+
+| btrim(Utf8("\t\nhello\r\n\t")) |
++--------------------------------+
+| \t\nhello\r\n\t                |
++--------------------------------+
+
+SELECT LTRIM('\t\nhello world');
+
++--------------------------------+
+| ltrim(Utf8("\t\nhello world")) |
++--------------------------------+
+| \t\nhello world                |
++--------------------------------+
+
+SELECT RTRIM('hello world\r\n');
+
++--------------------------------+
+| rtrim(Utf8("hello world\r\n")) |
++--------------------------------+
+| hello world\r\n                |
++--------------------------------+
+
+-- Custom TRIM characters
+CREATE TABLE custom_trim(s VARCHAR, ts TIMESTAMP TIME INDEX);
+
+Affected Rows: 0
+
+INSERT INTO custom_trim VALUES
+    ('***hello***', 1000),
+    ('---world---', 2000),
+    ('abcTESTabc', 3000);
+
+Affected Rows: 3
+
+SELECT s, TRIM('*' FROM s), TRIM('-' FROM s), TRIM('abc' FROM s) FROM custom_trim ORDER BY ts;
+
++-------------+--------------------------------+--------------------------------+----------------------------------+
+| s           | btrim(custom_trim.s,Utf8("*")) | btrim(custom_trim.s,Utf8("-")) | btrim(custom_trim.s,Utf8("abc")) |
++-------------+--------------------------------+--------------------------------+----------------------------------+
+| ***hello*** | hello                          | ***hello***                    | ***hello***                      |
+| ---world--- | ---world---                    | world                          | ---world---                      |
+| abcTESTabc  | abcTESTabc                     | abcTESTabc                     | TEST                             |
++-------------+--------------------------------+--------------------------------+----------------------------------+
+
+DROP TABLE trim_pad_test;
+
+Affected Rows: 0
+
+DROP TABLE custom_trim;
+
+Affected Rows: 0
+
diff --git a/tests/cases/standalone/common/function/string/trim_pad.sql b/tests/cases/standalone/common/function/string/trim_pad.sql
new file mode 100644
index 0000000000..6c6ba92c65
--- /dev/null
+++ b/tests/cases/standalone/common/function/string/trim_pad.sql
@@ -0,0 +1,88 @@
+-- String TRIM and PAD function tests
+
+-- TRIM functions
+SELECT TRIM('  hello world  ');
+
+SELECT LTRIM('  hello world  ');
+
+SELECT RTRIM('  hello world  ');
+
+-- TRIM with specific characters
+SELECT TRIM('x' FROM 'xxxhello worldxxx');
+
+SELECT LTRIM('hello world', 'hel');
+
+SELECT RTRIM('hello world', 'dlr');
+
+-- PAD functions
+SELECT LPAD('hello', 10, '*');
+
+SELECT RPAD('hello', 10, '*');
+
+-- Truncate
+SELECT LPAD('hello', 3, '*');
+
+-- Truncate  
+SELECT RPAD('hello', 3, '*');
+
+-- PAD with multi-character padding
+SELECT LPAD('test', 10, 'ab');
+
+SELECT RPAD('test', 10, 'xy');
+
+-- Test with table data
+CREATE TABLE trim_pad_test(s VARCHAR, ts TIMESTAMP TIME INDEX);
+
+INSERT INTO trim_pad_test VALUES
+    ('  hello  ', 1000),
+    ('world   ', 2000),
+    ('   test', 3000),
+    ('no-spaces', 4000),
+    ('', 5000),
+    (NULL, 6000);
+
+-- Apply TRIM functions to table data
+SELECT s, TRIM(s), LTRIM(s), RTRIM(s) FROM trim_pad_test ORDER BY ts;
+
+-- Apply PAD functions
+SELECT s, LPAD(TRIM(s), 15, '-'), RPAD(TRIM(s), 15, '+') FROM trim_pad_test WHERE s IS NOT NULL ORDER BY ts;
+
+-- Test with Unicode characters
+SELECT TRIM('  中文测试  ');
+
+SELECT LPAD('🚀', 10, '★');
+
+SELECT RPAD('café', 8, '•');
+
+-- Edge cases
+SELECT TRIM('');
+
+SELECT TRIM(NULL);
+SELECT LPAD('', 5, '*');
+
+SELECT RPAD('', 5, '*');
+
+SELECT LPAD('test', 0, '*');
+
+SELECT RPAD('test', 0, '*');
+
+-- TRIM with various whitespace characters
+SELECT TRIM('\t\nhello\r\n\t');
+
+SELECT LTRIM('\t\nhello world');
+
+SELECT RTRIM('hello world\r\n');
+
+-- Custom TRIM characters
+CREATE TABLE custom_trim(s VARCHAR, ts TIMESTAMP TIME INDEX);
+
+INSERT INTO custom_trim VALUES
+    ('***hello***', 1000),
+    ('---world---', 2000),
+    ('abcTESTabc', 3000);
+
+SELECT s, TRIM('*' FROM s), TRIM('-' FROM s), TRIM('abc' FROM s) FROM custom_trim ORDER BY ts;
+
+DROP TABLE trim_pad_test;
+
+DROP TABLE custom_trim;
diff --git a/tests/cases/standalone/common/function/string/upper_lower.result b/tests/cases/standalone/common/function/string/upper_lower.result
new file mode 100644
index 0000000000..4f283530ef
--- /dev/null
+++ b/tests/cases/standalone/common/function/string/upper_lower.result
@@ -0,0 +1,291 @@
+-- String case conversion function tests
+-- Basic UPPER and LOWER functions
+SELECT UPPER('hello world');
+
++----------------------------+
+| upper(Utf8("hello world")) |
++----------------------------+
+| HELLO WORLD                |
++----------------------------+
+
+SELECT LOWER('HELLO WORLD');
+
++----------------------------+
+| lower(Utf8("HELLO WORLD")) |
++----------------------------+
+| hello world                |
++----------------------------+
+
+SELECT UPPER('MiXeD cAsE');
+
++---------------------------+
+| upper(Utf8("MiXeD cAsE")) |
++---------------------------+
+| MIXED CASE                |
++---------------------------+
+
+SELECT LOWER('MiXeD cAsE');
+
++---------------------------+
+| lower(Utf8("MiXeD cAsE")) |
++---------------------------+
+| mixed case                |
++---------------------------+
+
+-- INITCAP (capitalize first letter of each word)
+SELECT INITCAP('hello world');
+
++------------------------------+
+| initcap(Utf8("hello world")) |
++------------------------------+
+| Hello World                  |
++------------------------------+
+
+SELECT INITCAP('HELLO WORLD');
+
++------------------------------+
+| initcap(Utf8("HELLO WORLD")) |
++------------------------------+
+| Hello World                  |
++------------------------------+
+
+SELECT INITCAP('mIxEd CaSe TeSt');
+
++----------------------------------+
+| initcap(Utf8("mIxEd CaSe TeSt")) |
++----------------------------------+
+| Mixed Case Test                  |
++----------------------------------+
+
+-- Test with NULL
+SELECT UPPER(NULL);
+
++-------------+
+| upper(NULL) |
++-------------+
+|             |
++-------------+
+
+SELECT LOWER(NULL);
+
++-------------+
+| lower(NULL) |
++-------------+
+|             |
++-------------+
+
+SELECT INITCAP(NULL);
+
++---------------+
+| initcap(NULL) |
++---------------+
+|               |
++---------------+
+
+-- Test with numbers and special characters
+SELECT UPPER('hello123!@#');
+
++----------------------------+
+| upper(Utf8("hello123!@#")) |
++----------------------------+
+| HELLO123!@#                |
++----------------------------+
+
+SELECT LOWER('HELLO123!@#');
+
++----------------------------+
+| lower(Utf8("HELLO123!@#")) |
++----------------------------+
+| hello123!@#                |
++----------------------------+
+
+SELECT INITCAP('hello-world_test');
+
++-----------------------------------+
+| initcap(Utf8("hello-world_test")) |
++-----------------------------------+
+| Hello-World_Test                  |
++-----------------------------------+
+
+-- Test with table data
+CREATE TABLE case_test("name" VARCHAR, city VARCHAR, ts TIMESTAMP TIME INDEX);
+
+Affected Rows: 0
+
+INSERT INTO case_test VALUES
+    ('john doe', 'new york', 1000),
+    ('JANE SMITH', 'LOS ANGELES', 2000),
+    ('Bob Wilson', 'Chicago', 3000),
+    ('alice johnson', 'BOSTON', 4000);
+
+Affected Rows: 4
+
+-- Apply case functions to table data
+SELECT "name", UPPER("name"), LOWER("name"), INITCAP("name") FROM case_test ORDER BY ts;
+
++---------------+-----------------------+-----------------------+-------------------------+
+| name          | upper(case_test.name) | lower(case_test.name) | initcap(case_test.name) |
++---------------+-----------------------+-----------------------+-------------------------+
+| john doe      | JOHN DOE              | john doe              | John Doe                |
+| JANE SMITH    | JANE SMITH            | jane smith            | Jane Smith              |
+| Bob Wilson    | BOB WILSON            | bob wilson            | Bob Wilson              |
+| alice johnson | ALICE JOHNSON         | alice johnson         | Alice Johnson           |
++---------------+-----------------------+-----------------------+-------------------------+
+
+SELECT city, UPPER(city), LOWER(city), INITCAP(city) FROM case_test ORDER BY ts;
+
++-------------+-----------------------+-----------------------+-------------------------+
+| city        | upper(case_test.city) | lower(case_test.city) | initcap(case_test.city) |
++-------------+-----------------------+-----------------------+-------------------------+
+| new york    | NEW YORK              | new york              | New York                |
+| LOS ANGELES | LOS ANGELES           | los angeles           | Los Angeles             |
+| Chicago     | CHICAGO               | chicago               | Chicago                 |
+| BOSTON      | BOSTON                | boston                | Boston                  |
++-------------+-----------------------+-----------------------+-------------------------+
+
+-- Combined case operations
+SELECT INITCAP(LOWER("name")) as formatted_name FROM case_test ORDER BY ts;
+
++----------------+
+| formatted_name |
++----------------+
+| John Doe       |
+| Jane Smith     |
+| Bob Wilson     |
+| Alice Johnson  |
++----------------+
+
+-- Unicode case conversion
+SELECT UPPER('café');
+
++---------------------+
+| upper(Utf8("café")) |
++---------------------+
+| CAFÉ                |
++---------------------+
+
+SELECT LOWER('CAFÉ');
+
++---------------------+
+| lower(Utf8("CAFÉ")) |
++---------------------+
+| café                |
++---------------------+
+
+-- German characters
+SELECT UPPER('äöüß');
+
++---------------------+
+| upper(Utf8("äöüß")) |
++---------------------+
+| ÄÖÜSS               |
++---------------------+
+
+-- German uppercase
+SELECT LOWER('ÄÖÜ');
+
++--------------------+
+| lower(Utf8("ÄÖÜ")) |
++--------------------+
+| äöü                |
++--------------------+
+
+-- Greek letters
+SELECT UPPER('αβγ');
+
++--------------------+
+| upper(Utf8("αβγ")) |
++--------------------+
+| ΑΒΓ                |
++--------------------+
+
+SELECT LOWER('ΑΒΓ');
+
++--------------------+
+| lower(Utf8("ΑΒΓ")) |
++--------------------+
+| αβγ                |
++--------------------+
+
+-- Test with empty string
+SELECT UPPER('');
+
++-----------------+
+| upper(Utf8("")) |
++-----------------+
+|                 |
++-----------------+
+
+SELECT LOWER('');
+
++-----------------+
+| lower(Utf8("")) |
++-----------------+
+|                 |
++-----------------+
+
+SELECT INITCAP('');
+
++-------------------+
+| initcap(Utf8("")) |
++-------------------+
+|                   |
++-------------------+
+
+-- Test with single characters
+SELECT UPPER('a'), UPPER('A'), UPPER('1'), UPPER(' ');
+
++------------------+------------------+------------------+------------------+
+| upper(Utf8("a")) | upper(Utf8("A")) | upper(Utf8("1")) | upper(Utf8(" ")) |
++------------------+------------------+------------------+------------------+
+| A                | A                | 1                |                  |
++------------------+------------------+------------------+------------------+
+
+SELECT LOWER('a'), LOWER('A'), LOWER('1'), LOWER(' ');
+
++------------------+------------------+------------------+------------------+
+| lower(Utf8("a")) | lower(Utf8("A")) | lower(Utf8("1")) | lower(Utf8(" ")) |
++------------------+------------------+------------------+------------------+
+| a                | a                | 1                |                  |
++------------------+------------------+------------------+------------------+
+
+SELECT INITCAP('a'), INITCAP('A'), INITCAP('1');
+
++--------------------+--------------------+--------------------+
+| initcap(Utf8("a")) | initcap(Utf8("A")) | initcap(Utf8("1")) |
++--------------------+--------------------+--------------------+
+| A                  | A                  | 1                  |
++--------------------+--------------------+--------------------+
+
+-- Complex Unicode examples
+CREATE TABLE unicode_case(s VARCHAR, ts TIMESTAMP TIME INDEX);
+
+Affected Rows: 0
+
+INSERT INTO unicode_case VALUES
+    ('hello 世界', 1000),
+    ('HELLO 世界', 2000),
+    ('café à paris', 3000),
+    ('CAFÉ À PARIS', 4000);
+
+Affected Rows: 4
+
+SELECT s, UPPER(s), LOWER(s), INITCAP(s) FROM unicode_case ORDER BY ts;
+
++--------------+-----------------------+-----------------------+-------------------------+
+| s            | upper(unicode_case.s) | lower(unicode_case.s) | initcap(unicode_case.s) |
++--------------+-----------------------+-----------------------+-------------------------+
+| hello 世界   | HELLO 世界            | hello 世界            | Hello 世界              |
+| HELLO 世界   | HELLO 世界            | hello 世界            | Hello 世界              |
+| café à paris | CAFÉ À PARIS          | café à paris          | Café À Paris            |
+| CAFÉ À PARIS | CAFÉ À PARIS          | café à paris          | Café À Paris            |
++--------------+-----------------------+-----------------------+-------------------------+
+
+DROP TABLE case_test;
+
+Affected Rows: 0
+
+DROP TABLE unicode_case;
+
+Affected Rows: 0
+
diff --git a/tests/cases/standalone/common/function/string/upper_lower.sql b/tests/cases/standalone/common/function/string/upper_lower.sql
new file mode 100644
index 0000000000..d89f78cfe8
--- /dev/null
+++ b/tests/cases/standalone/common/function/string/upper_lower.sql
@@ -0,0 +1,93 @@
+-- String case conversion function tests
+
+-- Basic UPPER and LOWER functions
+SELECT UPPER('hello world');
+
+SELECT LOWER('HELLO WORLD');
+
+SELECT UPPER('MiXeD cAsE');
+
+SELECT LOWER('MiXeD cAsE');
+
+-- INITCAP (capitalize first letter of each word)
+SELECT INITCAP('hello world');
+
+SELECT INITCAP('HELLO WORLD');
+
+SELECT INITCAP('mIxEd CaSe TeSt');
+
+-- Test with NULL
+SELECT UPPER(NULL);
+
+SELECT LOWER(NULL);
+
+SELECT INITCAP(NULL);
+
+-- Test with numbers and special characters
+SELECT UPPER('hello123!@#');
+
+SELECT LOWER('HELLO123!@#');
+
+SELECT INITCAP('hello-world_test');
+
+-- Test with table data
+CREATE TABLE case_test("name" VARCHAR, city VARCHAR, ts TIMESTAMP TIME INDEX);
+
+INSERT INTO case_test VALUES
+    ('john doe', 'new york', 1000),
+    ('JANE SMITH', 'LOS ANGELES', 2000),
+    ('Bob Wilson', 'Chicago', 3000),
+    ('alice johnson', 'BOSTON', 4000);
+
+-- Apply case functions to table data
+SELECT "name", UPPER("name"), LOWER("name"), INITCAP("name") FROM case_test ORDER BY ts;
+
+SELECT city, UPPER(city), LOWER(city), INITCAP(city) FROM case_test ORDER BY ts;
+
+-- Combined case operations
+SELECT INITCAP(LOWER("name")) as formatted_name FROM case_test ORDER BY ts;
+
+-- Unicode case conversion
+SELECT UPPER('café');
+
+SELECT LOWER('CAFÉ');
+
+-- German characters
+SELECT UPPER('äöüß');
+
+-- German uppercase
+SELECT LOWER('ÄÖÜ');
+
+-- Greek letters
+SELECT UPPER('αβγ');
+
+SELECT LOWER('ΑΒΓ');
+
+-- Test with empty string
+SELECT UPPER('');
+
+SELECT LOWER('');
+
+SELECT INITCAP('');
+
+-- Test with single characters
+SELECT UPPER('a'), UPPER('A'), UPPER('1'), UPPER(' ');
+
+SELECT LOWER('a'), LOWER('A'), LOWER('1'), LOWER(' ');
+
+SELECT INITCAP('a'), INITCAP('A'), INITCAP('1');
+
+-- Complex Unicode examples
+CREATE TABLE unicode_case(s VARCHAR, ts TIMESTAMP TIME INDEX);
+
+INSERT INTO unicode_case VALUES
+    ('hello 世界', 1000),
+    ('HELLO 世界', 2000),
+    ('café à paris', 3000),
+    ('CAFÉ À PARIS', 4000);
+
+SELECT s, UPPER(s), LOWER(s), INITCAP(s) FROM unicode_case ORDER BY ts;
+
+DROP TABLE case_test;
+
+DROP TABLE unicode_case;
diff --git a/tests/cases/standalone/common/order/nulls_first_last.result b/tests/cases/standalone/common/order/nulls_first_last.result
new file mode 100644
index 0000000000..3bf9570729
--- /dev/null
+++ b/tests/cases/standalone/common/order/nulls_first_last.result
@@ -0,0 +1,141 @@
+-- Migrated from DuckDB test: test/sql/order/test_nulls_first.test
+-- Test NULLS FIRST/NULLS LAST
+CREATE TABLE integers(i INTEGER, ts TIMESTAMP TIME INDEX);
+
+Affected Rows: 0
+
+INSERT INTO integers VALUES (1, 1000), (NULL, 2000);
+
+Affected Rows: 2
+
+-- Default NULL ordering (usually NULLS LAST in most systems)
+SELECT i FROM integers ORDER BY i;
+
++---+
+| i |
++---+
+| 1 |
+|   |
++---+
+
+-- Explicit NULLS FIRST
+SELECT i FROM integers ORDER BY i NULLS FIRST;
+
++---+
+| i |
++---+
+|   |
+| 1 |
++---+
+
+-- Explicit NULLS LAST
+SELECT i FROM integers ORDER BY i NULLS LAST;
+
++---+
+| i |
++---+
+| 1 |
+|   |
++---+
+
+-- Multiple columns with mixed NULL handling
+CREATE TABLE test(i INTEGER, j INTEGER, ts TIMESTAMP TIME INDEX);
+
+Affected Rows: 0
+
+INSERT INTO test VALUES (1, 1, 1000), (NULL, 1, 2000), (1, NULL, 3000);
+
+Affected Rows: 3
+
+SELECT i, j FROM test ORDER BY i NULLS FIRST, j NULLS LAST;
+
++---+---+
+| i | j |
++---+---+
+|   | 1 |
+| 1 | 1 |
+| 1 |   |
++---+---+
+
+SELECT i, j FROM test ORDER BY i NULLS FIRST, j NULLS FIRST;
+
++---+---+
+| i | j |
++---+---+
+|   | 1 |
+| 1 |   |
+| 1 | 1 |
++---+---+
+
+SELECT i, j FROM test ORDER BY i NULLS LAST, j NULLS FIRST;
+
++---+---+
+| i | j |
++---+---+
+| 1 |   |
+| 1 | 1 |
+|   | 1 |
++---+---+
+
+-- Test with DESC ordering
+SELECT i, j FROM test ORDER BY i DESC NULLS FIRST, j DESC NULLS LAST;
+
++---+---+
+| i | j |
++---+---+
+|   | 1 |
+| 1 | 1 |
+| 1 |   |
++---+---+
+
+SELECT i, j FROM test ORDER BY i DESC NULLS LAST, j DESC NULLS FIRST;
+
++---+---+
+| i | j |
++---+---+
+| 1 |   |
+| 1 | 1 |
+|   | 1 |
++---+---+
+
+-- Test with strings
+CREATE TABLE strings(s VARCHAR, i INTEGER, ts TIMESTAMP TIME INDEX);
+
+Affected Rows: 0
+
+INSERT INTO strings VALUES ('apple', 1, 1000), (NULL, 2, 2000), ('banana', NULL, 3000);
+
+Affected Rows: 3
+
+SELECT s, i FROM strings ORDER BY s NULLS FIRST, i NULLS LAST;
+
++--------+---+
+| s      | i |
++--------+---+
+|        | 2 |
+| apple  | 1 |
+| banana |   |
++--------+---+
+
+SELECT s, i FROM strings ORDER BY s NULLS LAST, i NULLS FIRST;
+
++--------+---+
+| s      | i |
++--------+---+
+| apple  | 1 |
+| banana |   |
+|        | 2 |
++--------+---+
+
+DROP TABLE integers;
+
+Affected Rows: 0
+
+DROP TABLE test;
+
+Affected Rows: 0
+
+DROP TABLE strings;
+
+Affected Rows: 0
+
diff --git a/tests/cases/standalone/common/order/nulls_first_last.sql b/tests/cases/standalone/common/order/nulls_first_last.sql
new file mode 100644
index 0000000000..dca46b3d21
--- /dev/null
+++ b/tests/cases/standalone/common/order/nulls_first_last.sql
@@ -0,0 +1,46 @@
+-- Migrated from DuckDB test: test/sql/order/test_nulls_first.test
+-- Test NULLS FIRST/NULLS LAST
+
+CREATE TABLE integers(i INTEGER, ts TIMESTAMP TIME INDEX);
+
+INSERT INTO integers VALUES (1, 1000), (NULL, 2000);
+
+-- Default NULL ordering (usually NULLS LAST in most systems)
+SELECT i FROM integers ORDER BY i;
+
+-- Explicit NULLS FIRST
+SELECT i FROM integers ORDER BY i NULLS FIRST;
+
+-- Explicit NULLS LAST
+SELECT i FROM integers ORDER BY i NULLS LAST;
+
+-- Multiple columns with mixed NULL handling
+CREATE TABLE test(i INTEGER, j INTEGER, ts TIMESTAMP TIME INDEX);
+
+INSERT INTO test VALUES (1, 1, 1000), (NULL, 1, 2000), (1, NULL, 3000);
+
+SELECT i, j FROM test ORDER BY i NULLS FIRST, j NULLS LAST;
+
+SELECT i, j FROM test ORDER BY i NULLS FIRST, j NULLS FIRST;
+
+SELECT i, j FROM test ORDER BY i NULLS LAST, j NULLS FIRST;
+
+-- Test with DESC ordering
+SELECT i, j FROM test ORDER BY i DESC NULLS FIRST, j DESC NULLS LAST;
+
+SELECT i, j FROM test ORDER BY i DESC NULLS LAST, j DESC NULLS FIRST;
+
+-- Test with strings
+CREATE TABLE strings(s VARCHAR, i INTEGER, ts TIMESTAMP TIME INDEX);
+
+INSERT INTO strings VALUES ('apple', 1, 1000), (NULL, 2, 2000), ('banana', NULL, 3000);
+
+SELECT s, i FROM strings ORDER BY s NULLS FIRST, i NULLS LAST;
+
+SELECT s, i FROM strings ORDER BY s NULLS LAST, i NULLS FIRST;
+
+DROP TABLE integers;
+
+DROP TABLE test;
+
+DROP TABLE strings;
diff --git a/tests/cases/standalone/common/order/order_by_basic.result b/tests/cases/standalone/common/order/order_by_basic.result
new file mode 100644
index 0000000000..747507f7a1
--- /dev/null
+++ b/tests/cases/standalone/common/order/order_by_basic.result
@@ -0,0 +1,134 @@
+-- Migrated from DuckDB test: test/sql/order/test_order_by.test
+-- Test ORDER BY keyword
+CREATE TABLE test(a INTEGER, b INTEGER, ts TIMESTAMP TIME INDEX);
+
+Affected Rows: 0
+
+INSERT INTO test VALUES (11, 22, 1000), (12, 21, 2000), (13, 22, 3000);
+
+Affected Rows: 3
+
+-- Simple ORDER BY
+SELECT b FROM test ORDER BY a DESC;
+
++----+
+| b  |
++----+
+| 22 |
+| 21 |
+| 22 |
++----+
+
+SELECT a, b FROM test ORDER BY a;
+
++----+----+
+| a  | b  |
++----+----+
+| 11 | 22 |
+| 12 | 21 |
+| 13 | 22 |
++----+----+
+
+SELECT a, b FROM test ORDER BY a DESC;
+
++----+----+
+| a  | b  |
++----+----+
+| 13 | 22 |
+| 12 | 21 |
+| 11 | 22 |
++----+----+
+
+-- ORDER BY on multiple columns
+SELECT a, b FROM test ORDER BY b, a;
+
++----+----+
+| a  | b  |
++----+----+
+| 12 | 21 |
+| 11 | 22 |
+| 13 | 22 |
++----+----+
+
+-- ORDER BY using select indices
+SELECT a, b FROM test ORDER BY 2, 1;
+
++----+----+
+| a  | b  |
++----+----+
+| 12 | 21 |
+| 11 | 22 |
+| 13 | 22 |
++----+----+
+
+SELECT a, b FROM test ORDER BY b DESC, a;
+
++----+----+
+| a  | b  |
++----+----+
+| 11 | 22 |
+| 13 | 22 |
+| 12 | 21 |
++----+----+
+
+SELECT a, b FROM test ORDER BY b, a DESC;
+
++----+----+
+| a  | b  |
++----+----+
+| 12 | 21 |
+| 13 | 22 |
+| 11 | 22 |
++----+----+
+
+-- TOP N queries with LIMIT
+SELECT a, b FROM test ORDER BY b, a DESC LIMIT 1;
+
++----+----+
+| a  | b  |
++----+----+
+| 12 | 21 |
++----+----+
+
+-- OFFSET
+SELECT a, b FROM test ORDER BY b, a DESC LIMIT 1 OFFSET 1;
+
++----+----+
+| a  | b  |
++----+----+
+| 13 | 22 |
++----+----+
+
+-- OFFSET without limit
+SELECT a, b FROM test ORDER BY b, a DESC OFFSET 1;
+
++----+----+
+| a  | b  |
++----+----+
+| 13 | 22 |
+| 11 | 22 |
++----+----+
+
+-- ORDER BY with WHERE
+SELECT a, b FROM test WHERE a < 13 ORDER BY b;
+
++----+----+
+| a  | b  |
++----+----+
+| 12 | 21 |
+| 11 | 22 |
++----+----+
+
+SELECT a, b FROM test WHERE a < 13 ORDER BY 2;
+
++----+----+
+| a  | b  |
++----+----+
+| 12 | 21 |
+| 11 | 22 |
++----+----+
+
+DROP TABLE test;
+
+Affected Rows: 0
+
diff --git a/tests/cases/standalone/common/order/order_by_basic.sql b/tests/cases/standalone/common/order/order_by_basic.sql
new file mode 100644
index 0000000000..68cba60911
--- /dev/null
+++ b/tests/cases/standalone/common/order/order_by_basic.sql
@@ -0,0 +1,39 @@
+-- Migrated from DuckDB test: test/sql/order/test_order_by.test
+-- Test ORDER BY keyword
+
+CREATE TABLE test(a INTEGER, b INTEGER, ts TIMESTAMP TIME INDEX);
+
+INSERT INTO test VALUES (11, 22, 1000), (12, 21, 2000), (13, 22, 3000);
+
+-- Simple ORDER BY
+SELECT b FROM test ORDER BY a DESC;
+
+SELECT a, b FROM test ORDER BY a;
+
+SELECT a, b FROM test ORDER BY a DESC;
+
+-- ORDER BY on multiple columns
+SELECT a, b FROM test ORDER BY b, a;
+
+-- ORDER BY using select indices
+SELECT a, b FROM test ORDER BY 2, 1;
+
+SELECT a, b FROM test ORDER BY b DESC, a;
+
+SELECT a, b FROM test ORDER BY b, a DESC;
+
+-- TOP N queries with LIMIT
+SELECT a, b FROM test ORDER BY b, a DESC LIMIT 1;
+
+-- OFFSET
+SELECT a, b FROM test ORDER BY b, a DESC LIMIT 1 OFFSET 1;
+
+-- OFFSET without limit
+SELECT a, b FROM test ORDER BY b, a DESC OFFSET 1;
+
+-- ORDER BY with WHERE
+SELECT a, b FROM test WHERE a < 13 ORDER BY b;
+
+SELECT a, b FROM test WHERE a < 13 ORDER BY 2;
+
+DROP TABLE test;
diff --git a/tests/cases/standalone/common/order/order_by_expressions.result b/tests/cases/standalone/common/order/order_by_expressions.result
new file mode 100644
index 0000000000..f121fac188
--- /dev/null
+++ b/tests/cases/standalone/common/order/order_by_expressions.result
@@ -0,0 +1,137 @@
+-- Migrated from DuckDB test: test/sql/order/test_order_by_expressions.test
+-- Test ORDER BY with expressions
+CREATE TABLE test(a INTEGER, b INTEGER, s VARCHAR, ts TIMESTAMP TIME INDEX);
+
+Affected Rows: 0
+
+INSERT INTO test VALUES
+    (1, 10, 'apple', 1000),
+    (2, 20, 'banana', 2000),
+    (3, 15, 'cherry', 3000),
+    (4, 25, 'date', 4000);
+
+Affected Rows: 4
+
+-- ORDER BY with arithmetic expressions
+SELECT a, b, a + b as sum FROM test ORDER BY a + b;
+
++---+----+-----+
+| a | b  | sum |
++---+----+-----+
+| 1 | 10 | 11  |
+| 3 | 15 | 18  |
+| 2 | 20 | 22  |
+| 4 | 25 | 29  |
++---+----+-----+
+
+SELECT a, b, a * b as product FROM test ORDER BY a * b DESC;
+
++---+----+---------+
+| a | b  | product |
++---+----+---------+
+| 4 | 25 | 100     |
+| 3 | 15 | 45      |
+| 2 | 20 | 40      |
+| 1 | 10 | 10      |
++---+----+---------+
+
+-- ORDER BY with string functions
+SELECT s, LENGTH(s) as len FROM test ORDER BY LENGTH(s);
+
++--------+-----+
+| s      | len |
++--------+-----+
+| date   | 4   |
+| apple  | 5   |
+| banana | 6   |
+| cherry | 6   |
++--------+-----+
+
+SELECT s, UPPER(s) as upper_s FROM test ORDER BY UPPER(s);
+
++--------+---------+
+| s      | upper_s |
++--------+---------+
+| apple  | APPLE   |
+| banana | BANANA  |
+| cherry | CHERRY  |
+| date   | DATE    |
++--------+---------+
+
+-- ORDER BY with CASE expressions
+SELECT a, b,
+    CASE
+        WHEN a % 2 = 0 THEN 'even'
+        ELSE 'odd'
+    END as parity
+FROM test
+ORDER BY
+    CASE
+        WHEN a % 2 = 0 THEN 1
+        ELSE 2
+    END, a;
+
++---+----+--------+
+| a | b  | parity |
++---+----+--------+
+| 2 | 20 | even   |
+| 4 | 25 | even   |
+| 1 | 10 | odd    |
+| 3 | 15 | odd    |
++---+----+--------+
+
+-- ORDER BY with conditional expressions
+SELECT a, b FROM test ORDER BY GREATEST(a, b) DESC;
+
++---+----+
+| a | b  |
++---+----+
+| 4 | 25 |
+| 2 | 20 |
+| 3 | 15 |
+| 1 | 10 |
++---+----+
+
+SELECT a, b FROM test ORDER BY LEAST(a, b);
+
++---+----+
+| a | b  |
++---+----+
+| 1 | 10 |
+| 2 | 20 |
+| 3 | 15 |
+| 4 | 25 |
++---+----+
+
+-- ORDER BY with NULL-related expressions
+INSERT INTO test VALUES (NULL, NULL, NULL, 5000);
+
+Affected Rows: 1
+
+SELECT a, b, COALESCE(a, 999) as a_or_999
+FROM test
+ORDER BY COALESCE(a, 999);
+
++---+----+----------+
+| a | b  | a_or_999 |
++---+----+----------+
+| 1 | 10 | 1        |
+| 2 | 20 | 2        |
+| 3 | 15 | 3        |
+| 4 | 25 | 4        |
+|   |    | 999      |
++---+----+----------+
+
+-- ORDER BY with subqueries in expressions
+SELECT a, b,
+    a - (SELECT MIN(a) FROM test WHERE a IS NOT NULL) as diff_from_min
+FROM test
+WHERE a IS NOT NULL
+ORDER BY a - (SELECT MIN(a) FROM test WHERE a IS NOT NULL);
+
+Error: 1001(Unsupported), This feature is not implemented: Physical plan does not support logical expression ScalarSubquery(<subquery>)
+
+DROP TABLE test;
+
+Affected Rows: 0
+
diff --git a/tests/cases/standalone/common/order/order_by_expressions.sql b/tests/cases/standalone/common/order/order_by_expressions.sql
new file mode 100644
index 0000000000..d4467c9528
--- /dev/null
+++ b/tests/cases/standalone/common/order/order_by_expressions.sql
@@ -0,0 +1,54 @@
+-- Migrated from DuckDB test: test/sql/order/test_order_by_expressions.test
+-- Test ORDER BY with expressions
+
+CREATE TABLE test(a INTEGER, b INTEGER, s VARCHAR, ts TIMESTAMP TIME INDEX);
+
+INSERT INTO test VALUES
+    (1, 10, 'apple', 1000),
+    (2, 20, 'banana', 2000),
+    (3, 15, 'cherry', 3000),
+    (4, 25, 'date', 4000);
+
+-- ORDER BY with arithmetic expressions
+SELECT a, b, a + b as sum FROM test ORDER BY a + b;
+
+SELECT a, b, a * b as product FROM test ORDER BY a * b DESC;
+
+-- ORDER BY with string functions
+SELECT s, LENGTH(s) as len FROM test ORDER BY LENGTH(s);
+
+SELECT s, UPPER(s) as upper_s FROM test ORDER BY UPPER(s);
+
+-- ORDER BY with CASE expressions
+SELECT a, b,
+    CASE
+        WHEN a % 2 = 0 THEN 'even'
+        ELSE 'odd'
+    END as parity
+FROM test
+ORDER BY
+    CASE
+        WHEN a % 2 = 0 THEN 1
+        ELSE 2
+    END, a;
+
+-- ORDER BY with conditional expressions
+SELECT a, b FROM test ORDER BY GREATEST(a, b) DESC;
+
+SELECT a, b FROM test ORDER BY LEAST(a, b);
+
+-- ORDER BY with NULL-related expressions
+INSERT INTO test VALUES (NULL, NULL, NULL, 5000);
+
+SELECT a, b, COALESCE(a, 999) as a_or_999
+FROM test
+ORDER BY COALESCE(a, 999);
+
+-- ORDER BY with subqueries in expressions
+SELECT a, b,
+    a - (SELECT MIN(a) FROM test WHERE a IS NOT NULL) as diff_from_min
+FROM test
+WHERE a IS NOT NULL
+ORDER BY a - (SELECT MIN(a) FROM test WHERE a IS NOT NULL);
+
+DROP TABLE test;
diff --git a/tests/cases/standalone/common/sample/basic_sample.result b/tests/cases/standalone/common/sample/basic_sample.result
new file mode 100644
index 0000000000..1691337cd4
--- /dev/null
+++ b/tests/cases/standalone/common/sample/basic_sample.result
@@ -0,0 +1,93 @@
+-- Migrated from DuckDB test: test/sql/sample/same_seed_same_sample.test
+-- FIXME: the results are wrong in this test, waits for https://github.com/apache/datafusion/pull/16325
+-- Test basic SAMPLE functionality
+-- Create test table
+CREATE TABLE test(x INTEGER, ts TIMESTAMP TIME INDEX);
+
+Affected Rows: 0
+
+-- Insert test data
+INSERT INTO test SELECT number, number * 1000 FROM numbers limit 10000;
+
+Affected Rows: 10000
+
+-- Test TABLESAMPLE with percentage
+SELECT COUNT(*) > 0 FROM test TABLESAMPLE (10 PERCENT);
+
++---------------------+
+| count(*) > Int64(0) |
++---------------------+
+| true                |
++---------------------+
+
+-- Test TABLESAMPLE with row count
+SELECT COUNT(*) FROM test TABLESAMPLE (100 ROWS);
+
++----------+
+| count(*) |
++----------+
+| 10000    |
++----------+
+
+-- Test TABLESAMPLE SYSTEM
+SELECT COUNT(*) > 0 FROM test TABLESAMPLE SYSTEM (25 PERCENT);
+
++---------------------+
+| count(*) > Int64(0) |
++---------------------+
+| true                |
++---------------------+
+
+-- Test TABLESAMPLE BERNOULLI
+SELECT COUNT(*) > 0 FROM test TABLESAMPLE BERNOULLI (25 PERCENT);
+
++---------------------+
+| count(*) > Int64(0) |
++---------------------+
+| true                |
++---------------------+
+
+-- Test with REPEATABLE for consistent results
+SELECT COUNT(*) AS cnt1 FROM test TABLESAMPLE SYSTEM (10 PERCENT) REPEATABLE (42);
+
++-------+
+| cnt1  |
++-------+
+| 10000 |
++-------+
+
+SELECT COUNT(*) AS cnt2 FROM test TABLESAMPLE SYSTEM (10 PERCENT) REPEATABLE (42);
+
++-------+
+| cnt2  |
++-------+
+| 10000 |
++-------+
+
+-- Test sampling with WHERE clause
+SELECT COUNT(*) FROM test TABLESAMPLE (10 PERCENT) WHERE x > 5000;
+
++----------+
+| count(*) |
++----------+
+| 4999     |
++----------+
+
+-- Test sampling with ORDER BY
+SELECT x FROM test TABLESAMPLE (5 ROWS) ORDER BY x LIMIT 5;
+
++---+
+| x |
++---+
+| 0 |
+| 1 |
+| 2 |
+| 3 |
+| 4 |
++---+
+
+-- cleanup
+DROP TABLE test;
+
+Affected Rows: 0
+
diff --git a/tests/cases/standalone/common/sample/basic_sample.sql b/tests/cases/standalone/common/sample/basic_sample.sql
new file mode 100644
index 0000000000..1e00de81af
--- /dev/null
+++ b/tests/cases/standalone/common/sample/basic_sample.sql
@@ -0,0 +1,35 @@
+-- Migrated from DuckDB test: test/sql/sample/same_seed_same_sample.test
+-- FIXME: the results are wrong in this test, waits for https://github.com/apache/datafusion/pull/16325
+-- Test basic SAMPLE functionality
+
+-- Create test table
+CREATE TABLE test(x INTEGER, ts TIMESTAMP TIME INDEX);
+
+-- Insert test data
+INSERT INTO test SELECT number, number * 1000 FROM numbers limit 10000;
+
+-- Test TABLESAMPLE with percentage
+SELECT COUNT(*) > 0 FROM test TABLESAMPLE (10 PERCENT);
+
+-- Test TABLESAMPLE with row count
+SELECT COUNT(*) FROM test TABLESAMPLE (100 ROWS);
+
+-- Test TABLESAMPLE SYSTEM
+SELECT COUNT(*) > 0 FROM test TABLESAMPLE SYSTEM (25 PERCENT);
+
+-- Test TABLESAMPLE BERNOULLI
+SELECT COUNT(*) > 0 FROM test TABLESAMPLE BERNOULLI (25 PERCENT);
+
+-- Test with REPEATABLE for consistent results
+SELECT COUNT(*) AS cnt1 FROM test TABLESAMPLE SYSTEM (10 PERCENT) REPEATABLE (42);
+
+SELECT COUNT(*) AS cnt2 FROM test TABLESAMPLE SYSTEM (10 PERCENT) REPEATABLE (42);
+
+-- Test sampling with WHERE clause
+SELECT COUNT(*) FROM test TABLESAMPLE (10 PERCENT) WHERE x > 5000;
+
+-- Test sampling with ORDER BY
+SELECT x FROM test TABLESAMPLE (5 ROWS) ORDER BY x LIMIT 5;
+
+-- cleanup
+DROP TABLE test;
\ No newline at end of file
diff --git a/tests/cases/standalone/common/types/date/test_date.result b/tests/cases/standalone/common/types/date/test_date.result
new file mode 100644
index 0000000000..ed7f213742
--- /dev/null
+++ b/tests/cases/standalone/common/types/date/test_date.result
@@ -0,0 +1,135 @@
+-- Migrated from DuckDB test: test/sql/types/date/test_date.test
+-- Test basic DATE functionality
+-- Create and insert into table
+CREATE TABLE dates(i DATE, ts TIMESTAMP TIME INDEX);
+
+Affected Rows: 0
+
+INSERT INTO dates VALUES ('1993-08-14', 1000), (NULL, 2000);
+
+Affected Rows: 2
+
+-- Check that we can select dates
+SELECT * FROM dates ORDER BY ts;
+
++------------+---------------------+
+| i          | ts                  |
++------------+---------------------+
+| 1993-08-14 | 1970-01-01T00:00:01 |
+|            | 1970-01-01T00:00:02 |
++------------+---------------------+
+
+-- extract function
+SELECT extract(year FROM i) FROM dates ORDER BY ts;
+
++---------------------------------+
+| date_part(Utf8("YEAR"),dates.i) |
++---------------------------------+
+| 1993                            |
+|                                 |
++---------------------------------+
+
+-- Check that we can convert dates to string
+SELECT CAST(i AS VARCHAR) FROM dates ORDER BY ts;
+
++------------+
+| dates.i    |
++------------+
+| 1993-08-14 |
+|            |
++------------+
+
+-- Check that we can add days to a date
+SELECT i + INTERVAL '5 days' FROM dates ORDER BY ts;
+
++-----------------------------------------------------------------------------------------------+
+| dates.i + IntervalMonthDayNano("IntervalMonthDayNano { months: 0, days: 5, nanoseconds: 0 }") |
++-----------------------------------------------------------------------------------------------+
+| 1993-08-19                                                                                    |
+|                                                                                               |
++-----------------------------------------------------------------------------------------------+
+
+-- Check that we can subtract days from a date
+SELECT i - INTERVAL '5 days' FROM dates ORDER BY ts;
+
++-----------------------------------------------------------------------------------------------+
+| dates.i - IntervalMonthDayNano("IntervalMonthDayNano { months: 0, days: 5, nanoseconds: 0 }") |
++-----------------------------------------------------------------------------------------------+
+| 1993-08-09                                                                                    |
+|                                                                                               |
++-----------------------------------------------------------------------------------------------+
+
+-- Test date subtraction resulting in interval
+SELECT i - DATE '1993-08-14' FROM dates ORDER BY ts;
+
++------------------------------+
+| dates.i - Utf8("1993-08-14") |
++------------------------------+
+| P0D                          |
+|                              |
++------------------------------+
+
+-- Test various date formats
+CREATE TABLE date_formats(d DATE, ts TIMESTAMP TIME INDEX);
+
+Affected Rows: 0
+
+INSERT INTO date_formats VALUES
+    ('2021-03-01', 1000),
+    ('2021-12-31', 2000),
+    ('2000-01-01', 3000),
+    ('1970-01-01', 4000);
+
+Affected Rows: 4
+
+SELECT d,  extract(year FROM d), extract(month FROM d), extract(day FROM d) FROM date_formats ORDER BY d;
+
++------------+----------------------------------------+-----------------------------------------+---------------------------------------+
+| d          | date_part(Utf8("YEAR"),date_formats.d) | date_part(Utf8("MONTH"),date_formats.d) | date_part(Utf8("DAY"),date_formats.d) |
++------------+----------------------------------------+-----------------------------------------+---------------------------------------+
+| 1970-01-01 | 1970                                   | 1                                       | 1                                     |
+| 2000-01-01 | 2000                                   | 1                                       | 1                                     |
+| 2021-03-01 | 2021                                   | 3                                       | 1                                     |
+| 2021-12-31 | 2021                                   | 12                                      | 31                                    |
++------------+----------------------------------------+-----------------------------------------+---------------------------------------+
+
+-- Test date comparison
+SELECT d FROM date_formats WHERE d > '2000-01-01' ORDER BY d;
+
++------------+
+| d          |
++------------+
+| 2021-03-01 |
+| 2021-12-31 |
++------------+
+
+SELECT d FROM date_formats WHERE d BETWEEN '2000-01-01' AND '2021-06-01' ORDER BY d;
+
++------------+
+| d          |
++------------+
+| 2000-01-01 |
+| 2021-03-01 |
++------------+
+
+-- Test NULL handling
+INSERT INTO date_formats VALUES (NULL, 5000);
+
+Affected Rows: 1
+
+SELECT COUNT(*), COUNT(d) FROM date_formats;
+
++----------+-----------------------+
+| count(*) | count(date_formats.d) |
++----------+-----------------------+
+| 5        | 4                     |
++----------+-----------------------+
+
+DROP TABLE dates;
+
+Affected Rows: 0
+
+DROP TABLE date_formats;
+
+Affected Rows: 0
+
diff --git a/tests/cases/standalone/common/types/date/test_date.sql b/tests/cases/standalone/common/types/date/test_date.sql
new file mode 100644
index 0000000000..5bf0db4b4b
--- /dev/null
+++ b/tests/cases/standalone/common/types/date/test_date.sql
@@ -0,0 +1,50 @@
+-- Migrated from DuckDB test: test/sql/types/date/test_date.test
+-- Test basic DATE functionality
+
+-- Create and insert into table
+CREATE TABLE dates(i DATE, ts TIMESTAMP TIME INDEX);
+
+INSERT INTO dates VALUES ('1993-08-14', 1000), (NULL, 2000);
+
+-- Check that we can select dates
+SELECT * FROM dates ORDER BY ts;
+
+-- extract function
+SELECT extract(year FROM i) FROM dates ORDER BY ts;
+
+-- Check that we can convert dates to string
+SELECT CAST(i AS VARCHAR) FROM dates ORDER BY ts;
+
+-- Check that we can add days to a date
+SELECT i + INTERVAL '5 days' FROM dates ORDER BY ts;
+
+-- Check that we can subtract days from a date
+SELECT i - INTERVAL '5 days' FROM dates ORDER BY ts;
+
+-- Test date subtraction resulting in interval
+SELECT i - DATE '1993-08-14' FROM dates ORDER BY ts;
+
+-- Test various date formats
+CREATE TABLE date_formats(d DATE, ts TIMESTAMP TIME INDEX);
+
+INSERT INTO date_formats VALUES
+    ('2021-03-01', 1000),
+    ('2021-12-31', 2000),
+    ('2000-01-01', 3000),
+    ('1970-01-01', 4000);
+
+SELECT d,  extract(year FROM d), extract(month FROM d), extract(day FROM d) FROM date_formats ORDER BY d;
+
+-- Test date comparison
+SELECT d FROM date_formats WHERE d > '2000-01-01' ORDER BY d;
+
+SELECT d FROM date_formats WHERE d BETWEEN '2000-01-01' AND '2021-06-01' ORDER BY d;
+
+-- Test NULL handling
+INSERT INTO date_formats VALUES (NULL, 5000);
+
+SELECT COUNT(*), COUNT(d) FROM date_formats;
+
+DROP TABLE dates;
+
+DROP TABLE date_formats;
diff --git a/tests/cases/standalone/common/types/float/ieee_floating_points.result b/tests/cases/standalone/common/types/float/ieee_floating_points.result
new file mode 100644
index 0000000000..69198d490e
--- /dev/null
+++ b/tests/cases/standalone/common/types/float/ieee_floating_points.result
@@ -0,0 +1,144 @@
+-- Migrated from DuckDB test: test/sql/types/float/ieee_floating_points.test
+-- Test IEEE floating point behavior
+-- Test special float values
+CREATE TABLE float_special(f FLOAT, d DOUBLE, ts TIMESTAMP TIME INDEX);
+
+Affected Rows: 0
+
+-- Insert special values
+INSERT INTO float_special VALUES
+    (0.0, 0.0, 1000),
+    (-0.0, -0.0, 2000),
+    ('inf'::FLOAT, 'inf'::DOUBLE, 3000),
+    ('-inf'::FLOAT, '-inf'::DOUBLE, 4000),
+    ('nan'::FLOAT, 'nan'::DOUBLE, 5000);
+
+Affected Rows: 5
+
+-- Test basic operations with special values
+SELECT f, d FROM float_special ORDER BY ts;
+
++------+------+
+| f    | d    |
++------+------+
+| 0.0  | 0.0  |
+| -0.0 | -0.0 |
+| inf  | inf  |
+| -inf | -inf |
+| NaN  | NaN  |
++------+------+
+
+-- Test comparison with infinity
+-- It doesn't follow the IEEE standard, but follows PG instead.
+SELECT f, f > 1000000 FROM float_special ORDER BY ts;
+
++------+----------------------------------+
+| f    | float_special.f > Int64(1000000) |
++------+----------------------------------+
+| 0.0  | false                            |
+| -0.0 | false                            |
+| inf  | true                             |
+| -inf | false                            |
+| NaN  | true                             |
++------+----------------------------------+
+
+SELECT d, d < -1000000 FROM float_special ORDER BY ts;
+
++------+-----------------------------------+
+| d    | float_special.d < Int64(-1000000) |
++------+-----------------------------------+
+| 0.0  | false                             |
+| -0.0 | false                             |
+| inf  | false                             |
+| -inf | true                              |
+| NaN  | false                             |
++------+-----------------------------------+
+
+-- Test NaN behavior
+-- NaN != NaN
+SELECT f, f = f FROM float_special WHERE f != f ORDER BY ts;
+
+++
+++
+
+SELECT d, d IS NULL FROM float_special ORDER BY ts;
+
++------+-------------------------+
+| d    | float_special.d IS NULL |
++------+-------------------------+
+| 0.0  | false                   |
+| -0.0 | false                   |
+| inf  | false                   |
+| -inf | false                   |
+| NaN  | false                   |
++------+-------------------------+
+
+-- Test arithmetic with special values
+SELECT f, f + 1 FROM float_special ORDER BY ts;
+
++------+----------------------------+
+| f    | float_special.f + Int64(1) |
++------+----------------------------+
+| 0.0  | 1.0                        |
+| -0.0 | 1.0                        |
+| inf  | inf                        |
+| -inf | -inf                       |
+| NaN  | NaN                        |
++------+----------------------------+
+
+SELECT d, d * 2 FROM float_special ORDER BY ts;
+
++------+----------------------------+
+| d    | float_special.d * Int64(2) |
++------+----------------------------+
+| 0.0  | 0.0                        |
+| -0.0 | -0.0                       |
+| inf  | inf                        |
+| -inf | -inf                       |
+| NaN  | NaN                        |
++------+----------------------------+
+
+-- Test normal floating point precision
+CREATE TABLE float_precision(f FLOAT, d DOUBLE, ts TIMESTAMP TIME INDEX);
+
+Affected Rows: 0
+
+INSERT INTO float_precision VALUES
+    (1.23456789, 1.23456789012345, 1000),
+    (0.000001, 0.000000000001, 2000),
+    (1e10, 1e15, 3000),
+    (1e-10, 1e-15, 4000);
+
+Affected Rows: 4
+
+SELECT f, d FROM float_precision ORDER BY ts;
+
++---------------+--------------------+
+| f             | d                  |
++---------------+--------------------+
+| 1.2345679     | 1.23456789012345   |
+| 0.000001      | 1e-12              |
+| 10000000000.0 | 1000000000000000.0 |
+| 1e-10         | 1e-15              |
++---------------+--------------------+
+
+-- Test rounding and precision
+SELECT ROUND(f, 3), ROUND(d, 6) FROM float_precision ORDER BY ts;
+
++-----------------------------------+-----------------------------------+
+| round(float_precision.f,Int64(3)) | round(float_precision.d,Int64(6)) |
++-----------------------------------+-----------------------------------+
+| 1.235                             | 1.234568                          |
+| 0.0                               | 0.0                               |
+| 10000000000.0                     | 1000000000000000.0                |
+| 0.0                               | 0.0                               |
++-----------------------------------+-----------------------------------+
+
+DROP TABLE float_special;
+
+Affected Rows: 0
+
+DROP TABLE float_precision;
+
+Affected Rows: 0
+
diff --git a/tests/cases/standalone/common/types/float/ieee_floating_points.sql b/tests/cases/standalone/common/types/float/ieee_floating_points.sql
new file mode 100644
index 0000000000..755b206554
--- /dev/null
+++ b/tests/cases/standalone/common/types/float/ieee_floating_points.sql
@@ -0,0 +1,51 @@
+-- Migrated from DuckDB test: test/sql/types/float/ieee_floating_points.test
+-- Test IEEE floating point behavior
+
+-- Test special float values
+CREATE TABLE float_special(f FLOAT, d DOUBLE, ts TIMESTAMP TIME INDEX);
+
+-- Insert special values
+INSERT INTO float_special VALUES
+    (0.0, 0.0, 1000),
+    (-0.0, -0.0, 2000),
+    ('inf'::FLOAT, 'inf'::DOUBLE, 3000),
+    ('-inf'::FLOAT, '-inf'::DOUBLE, 4000),
+    ('nan'::FLOAT, 'nan'::DOUBLE, 5000);
+
+-- Test basic operations with special values
+SELECT f, d FROM float_special ORDER BY ts;
+
+-- Test comparison with infinity
+-- It doesn't follow the IEEE standard, but follows PG instead.
+SELECT f, f > 1000000 FROM float_special ORDER BY ts;
+
+SELECT d, d < -1000000 FROM float_special ORDER BY ts;
+
+-- Test NaN behavior
+-- NaN != NaN
+SELECT f, f = f FROM float_special WHERE f != f ORDER BY ts;
+
+SELECT d, d IS NULL FROM float_special ORDER BY ts;
+
+-- Test arithmetic with special values
+SELECT f, f + 1 FROM float_special ORDER BY ts;
+
+SELECT d, d * 2 FROM float_special ORDER BY ts;
+
+-- Test normal floating point precision
+CREATE TABLE float_precision(f FLOAT, d DOUBLE, ts TIMESTAMP TIME INDEX);
+
+INSERT INTO float_precision VALUES
+    (1.23456789, 1.23456789012345, 1000),
+    (0.000001, 0.000000000001, 2000),
+    (1e10, 1e15, 3000),
+    (1e-10, 1e-15, 4000);
+
+SELECT f, d FROM float_precision ORDER BY ts;
+
+-- Test rounding and precision
+SELECT ROUND(f, 3), ROUND(d, 6) FROM float_precision ORDER BY ts;
+
+DROP TABLE float_special;
+
+DROP TABLE float_precision;
diff --git a/tests/cases/standalone/common/types/float/infinity_nan.result b/tests/cases/standalone/common/types/float/infinity_nan.result
new file mode 100644
index 0000000000..dcfbdd81ce
--- /dev/null
+++ b/tests/cases/standalone/common/types/float/infinity_nan.result
@@ -0,0 +1,184 @@
+-- Migrated from DuckDB test: test/sql/types/float/infinity_test.test, nan_aggregate.test
+-- Test infinity and NaN handling
+-- Note: it doesn't follow the IEEE standard, but follows PG instead: https://www.postgresql.org/docs/current/datatype-numeric.html
+-- Test infinity operations
+CREATE TABLE inf_test(val DOUBLE, ts TIMESTAMP TIME INDEX);
+
+Affected Rows: 0
+
+INSERT INTO inf_test VALUES
+    ('inf'::DOUBLE, 1000),
+    ('-inf'::DOUBLE, 2000),
+    (1.0, 3000),
+    (-1.0, 4000),
+    (0.0, 5000);
+
+Affected Rows: 5
+
+-- Test infinity comparisons
+SELECT val, val > 0 FROM inf_test ORDER BY ts;
+
++------+-------------------------+
+| val  | inf_test.val > Int64(0) |
++------+-------------------------+
+| inf  | true                    |
+| -inf | false                   |
+| 1.0  | true                    |
+| -1.0 | false                   |
+| 0.0  | false                   |
++------+-------------------------+
+
+SELECT val, val < 0 FROM inf_test ORDER BY ts;
+
++------+-------------------------+
+| val  | inf_test.val < Int64(0) |
++------+-------------------------+
+| inf  | false                   |
+| -inf | true                    |
+| 1.0  | false                   |
+| -1.0 | true                    |
+| 0.0  | false                   |
++------+-------------------------+
+
+SELECT val, val = 'inf'::DOUBLE FROM inf_test ORDER BY ts;
+
++------+----------------------------+
+| val  | inf_test.val = Utf8("inf") |
++------+----------------------------+
+| inf  | true                       |
+| -inf | false                      |
+| 1.0  | false                      |
+| -1.0 | false                      |
+| 0.0  | false                      |
++------+----------------------------+
+
+-- Test infinity in aggregates
+SELECT MAX(val), MIN(val) FROM inf_test;
+
++-------------------+-------------------+
+| max(inf_test.val) | min(inf_test.val) |
++-------------------+-------------------+
+| inf               | -inf              |
++-------------------+-------------------+
+
+SELECT SUM(val), AVG(val) FROM inf_test;
+
++-------------------+-------------------+
+| sum(inf_test.val) | avg(inf_test.val) |
++-------------------+-------------------+
+| NaN               | NaN               |
++-------------------+-------------------+
+
+-- Test NaN behavior
+CREATE TABLE nan_test(val DOUBLE, ts TIMESTAMP TIME INDEX);
+
+Affected Rows: 0
+
+INSERT INTO nan_test VALUES
+    ('nan'::DOUBLE, 1000),
+    (1.0, 2000),
+    (2.0, 3000),
+    ('nan'::DOUBLE, 4000),
+    (3.0, 5000);
+
+Affected Rows: 5
+
+-- Test NaN in aggregates
+SELECT COUNT(*), COUNT(val) FROM nan_test;
+
++----------+---------------------+
+| count(*) | count(nan_test.val) |
++----------+---------------------+
+| 5        | 5                   |
++----------+---------------------+
+
+SELECT MAX(val), MIN(val) FROM nan_test;
+
++-------------------+-------------------+
+| max(nan_test.val) | min(nan_test.val) |
++-------------------+-------------------+
+| NaN               | 1.0               |
++-------------------+-------------------+
+
+SELECT SUM(val), AVG(val) FROM nan_test;
+
++-------------------+-------------------+
+| sum(nan_test.val) | avg(nan_test.val) |
++-------------------+-------------------+
+| NaN               | NaN               |
++-------------------+-------------------+
+
+-- Test NaN comparisons
+SELECT val, val = val FROM nan_test ORDER BY ts;
+
++-----+-----------------------------+
+| val | nan_test.val = nan_test.val |
++-----+-----------------------------+
+| NaN | true                        |
+| 1.0 | true                        |
+| 2.0 | true                        |
+| NaN | true                        |
+| 3.0 | true                        |
++-----+-----------------------------+
+
+SELECT val, val IS NULL FROM nan_test ORDER BY ts;
+
++-----+----------------------+
+| val | nan_test.val IS NULL |
++-----+----------------------+
+| NaN | false                |
+| 1.0 | false                |
+| 2.0 | false                |
+| NaN | false                |
+| 3.0 | false                |
++-----+----------------------+
+
+-- Test arithmetic with infinity and NaN
+SELECT 'inf'::DOUBLE + 1;
+
++------------------------+
+| Utf8("inf") + Int64(1) |
++------------------------+
+| inf                    |
++------------------------+
+
+SELECT 'inf'::DOUBLE - 'inf'::DOUBLE;
+
++---------------------------+
+| Utf8("inf") - Utf8("inf") |
++---------------------------+
+| NaN                       |
++---------------------------+
+
+SELECT 'inf'::DOUBLE * 0;
+
++------------------------+
+| Utf8("inf") * Int64(0) |
++------------------------+
+| NaN                    |
++------------------------+
+
+SELECT 'nan'::DOUBLE + 1;
+
++------------------------+
+| Utf8("nan") + Int64(1) |
++------------------------+
+| NaN                    |
++------------------------+
+
+SELECT 'nan'::DOUBLE * 0;
+
++------------------------+
+| Utf8("nan") * Int64(0) |
++------------------------+
+| NaN                    |
++------------------------+
+
+DROP TABLE inf_test;
+
+Affected Rows: 0
+
+DROP TABLE nan_test;
+
+Affected Rows: 0
+
diff --git a/tests/cases/standalone/common/types/float/infinity_nan.sql b/tests/cases/standalone/common/types/float/infinity_nan.sql
new file mode 100644
index 0000000000..5f495170e1
--- /dev/null
+++ b/tests/cases/standalone/common/types/float/infinity_nan.sql
@@ -0,0 +1,61 @@
+-- Migrated from DuckDB test: test/sql/types/float/infinity_test.test, nan_aggregate.test
+-- Test infinity and NaN handling
+-- Note: it doesn't follow the IEEE standard, but follows PG instead: https://www.postgresql.org/docs/current/datatype-numeric.html
+-- Test infinity operations
+CREATE TABLE inf_test(val DOUBLE, ts TIMESTAMP TIME INDEX);
+
+INSERT INTO inf_test VALUES
+    ('inf'::DOUBLE, 1000),
+    ('-inf'::DOUBLE, 2000),
+    (1.0, 3000),
+    (-1.0, 4000),
+    (0.0, 5000);
+
+-- Test infinity comparisons
+SELECT val, val > 0 FROM inf_test ORDER BY ts;
+
+SELECT val, val < 0 FROM inf_test ORDER BY ts;
+
+SELECT val, val = 'inf'::DOUBLE FROM inf_test ORDER BY ts;
+
+-- Test infinity in aggregates
+SELECT MAX(val), MIN(val) FROM inf_test;
+
+SELECT SUM(val), AVG(val) FROM inf_test;
+
+-- Test NaN behavior
+CREATE TABLE nan_test(val DOUBLE, ts TIMESTAMP TIME INDEX);
+
+INSERT INTO nan_test VALUES
+    ('nan'::DOUBLE, 1000),
+    (1.0, 2000),
+    (2.0, 3000),
+    ('nan'::DOUBLE, 4000),
+    (3.0, 5000);
+
+-- Test NaN in aggregates
+SELECT COUNT(*), COUNT(val) FROM nan_test;
+
+SELECT MAX(val), MIN(val) FROM nan_test;
+
+SELECT SUM(val), AVG(val) FROM nan_test;
+
+-- Test NaN comparisons
+SELECT val, val = val FROM nan_test ORDER BY ts;
+
+SELECT val, val IS NULL FROM nan_test ORDER BY ts;
+
+-- Test arithmetic with infinity and NaN
+SELECT 'inf'::DOUBLE + 1;
+
+SELECT 'inf'::DOUBLE - 'inf'::DOUBLE;
+
+SELECT 'inf'::DOUBLE * 0;
+
+SELECT 'nan'::DOUBLE + 1;
+
+SELECT 'nan'::DOUBLE * 0;
+
+DROP TABLE inf_test;
+
+DROP TABLE nan_test;
diff --git a/tests/cases/standalone/common/types/float/nan_arithmetic_extended.result b/tests/cases/standalone/common/types/float/nan_arithmetic_extended.result
new file mode 100644
index 0000000000..392b79fa4a
--- /dev/null
+++ b/tests/cases/standalone/common/types/float/nan_arithmetic_extended.result
@@ -0,0 +1,317 @@
+-- Migrated from DuckDB test: test/sql/types/float/nan_arithmetic.test
+-- Test arithmetic on NaN values
+-- Test NaN arithmetic with FLOAT
+-- Any arithmetic on a NaN value will result in a NaN value
+SELECT 'nan'::FLOAT + 1;
+
++------------------------+
+| Utf8("nan") + Int64(1) |
++------------------------+
+| NaN                    |
++------------------------+
+
+SELECT 'nan'::FLOAT + 'inf'::FLOAT;
+
++---------------------------+
+| Utf8("nan") + Utf8("inf") |
++---------------------------+
+| NaN                       |
++---------------------------+
+
+SELECT 'nan'::FLOAT - 1;
+
++------------------------+
+| Utf8("nan") - Int64(1) |
++------------------------+
+| NaN                    |
++------------------------+
+
+SELECT 'nan'::FLOAT - 'inf'::FLOAT;
+
++---------------------------+
+| Utf8("nan") - Utf8("inf") |
++---------------------------+
+| NaN                       |
++---------------------------+
+
+SELECT 'nan'::FLOAT * 1;
+
++------------------------+
+| Utf8("nan") * Int64(1) |
++------------------------+
+| NaN                    |
++------------------------+
+
+SELECT 'nan'::FLOAT * 'inf'::FLOAT;
+
++---------------------------+
+| Utf8("nan") * Utf8("inf") |
++---------------------------+
+| NaN                       |
++---------------------------+
+
+SELECT 'nan'::FLOAT / 1;
+
++------------------------+
+| Utf8("nan") / Int64(1) |
++------------------------+
+| NaN                    |
++------------------------+
+
+SELECT 'nan'::FLOAT / 'inf'::FLOAT;
+
++---------------------------+
+| Utf8("nan") / Utf8("inf") |
++---------------------------+
+| NaN                       |
++---------------------------+
+
+SELECT 'nan'::FLOAT % 1;
+
++------------------------+
+| Utf8("nan") % Int64(1) |
++------------------------+
+| NaN                    |
++------------------------+
+
+SELECT 'nan'::FLOAT % 'inf'::FLOAT;
+
++---------------------------+
+| Utf8("nan") % Utf8("inf") |
++---------------------------+
+| NaN                       |
++---------------------------+
+
+SELECT -('nan'::FLOAT);
+
++-----------------+
+| (- Utf8("nan")) |
++-----------------+
+| NaN             |
++-----------------+
+
+-- Test NaN arithmetic with DOUBLE
+SELECT 'nan'::DOUBLE + 1;
+
++------------------------+
+| Utf8("nan") + Int64(1) |
++------------------------+
+| NaN                    |
++------------------------+
+
+SELECT 'nan'::DOUBLE + 'inf'::DOUBLE;
+
++---------------------------+
+| Utf8("nan") + Utf8("inf") |
++---------------------------+
+| NaN                       |
++---------------------------+
+
+SELECT 'nan'::DOUBLE - 1;
+
++------------------------+
+| Utf8("nan") - Int64(1) |
++------------------------+
+| NaN                    |
++------------------------+
+
+SELECT 'nan'::DOUBLE - 'inf'::DOUBLE;
+
++---------------------------+
+| Utf8("nan") - Utf8("inf") |
++---------------------------+
+| NaN                       |
++---------------------------+
+
+SELECT 'nan'::DOUBLE * 1;
+
++------------------------+
+| Utf8("nan") * Int64(1) |
++------------------------+
+| NaN                    |
++------------------------+
+
+SELECT 'nan'::DOUBLE * 'inf'::DOUBLE;
+
++---------------------------+
+| Utf8("nan") * Utf8("inf") |
++---------------------------+
+| NaN                       |
++---------------------------+
+
+SELECT 'nan'::DOUBLE / 1;
+
++------------------------+
+| Utf8("nan") / Int64(1) |
++------------------------+
+| NaN                    |
++------------------------+
+
+SELECT 'nan'::DOUBLE / 'inf'::DOUBLE;
+
++---------------------------+
+| Utf8("nan") / Utf8("inf") |
++---------------------------+
+| NaN                       |
++---------------------------+
+
+SELECT 'nan'::DOUBLE % 1;
+
++------------------------+
+| Utf8("nan") % Int64(1) |
++------------------------+
+| NaN                    |
++------------------------+
+
+SELECT 'nan'::DOUBLE % 'inf'::DOUBLE;
+
++---------------------------+
+| Utf8("nan") % Utf8("inf") |
++---------------------------+
+| NaN                       |
++---------------------------+
+
+SELECT -('nan'::DOUBLE);
+
++-----------------+
+| (- Utf8("nan")) |
++-----------------+
+| NaN             |
++-----------------+
+
+-- Test infinity arithmetic
+SELECT 'inf'::FLOAT + 1;
+
++------------------------+
+| Utf8("inf") + Int64(1) |
++------------------------+
+| inf                    |
++------------------------+
+
+SELECT 'inf'::FLOAT - 1;
+
++------------------------+
+| Utf8("inf") - Int64(1) |
++------------------------+
+| inf                    |
++------------------------+
+
+SELECT 'inf'::FLOAT * 2;
+
++------------------------+
+| Utf8("inf") * Int64(2) |
++------------------------+
+| inf                    |
++------------------------+
+
+SELECT 'inf'::FLOAT / 2;
+
++------------------------+
+| Utf8("inf") / Int64(2) |
++------------------------+
+| inf                    |
++------------------------+
+
+SELECT -('inf'::FLOAT);
+
++-----------------+
+| (- Utf8("inf")) |
++-----------------+
+| -inf            |
++-----------------+
+
+SELECT 'inf'::DOUBLE + 1;
+
++------------------------+
+| Utf8("inf") + Int64(1) |
++------------------------+
+| inf                    |
++------------------------+
+
+SELECT 'inf'::DOUBLE - 1;
+
++------------------------+
+| Utf8("inf") - Int64(1) |
++------------------------+
+| inf                    |
++------------------------+
+
+SELECT 'inf'::DOUBLE * 2;
+
++------------------------+
+| Utf8("inf") * Int64(2) |
++------------------------+
+| inf                    |
++------------------------+
+
+SELECT 'inf'::DOUBLE / 2;
+
++------------------------+
+| Utf8("inf") / Int64(2) |
++------------------------+
+| inf                    |
++------------------------+
+
+SELECT -('inf'::DOUBLE);
+
++-----------------+
+| (- Utf8("inf")) |
++-----------------+
+| -inf            |
++-----------------+
+
+-- Test special infinity cases
+-- Should be NaN
+SELECT 'inf'::FLOAT - 'inf'::FLOAT;
+
++---------------------------+
+| Utf8("inf") - Utf8("inf") |
++---------------------------+
+| NaN                       |
++---------------------------+
+
+-- Should be NaN
+SELECT 'inf'::FLOAT / 'inf'::FLOAT;
+
++---------------------------+
+| Utf8("inf") / Utf8("inf") |
++---------------------------+
+| NaN                       |
++---------------------------+
+
+-- Should be NaN
+SELECT 'inf'::FLOAT * 0;
+
++------------------------+
+| Utf8("inf") * Int64(0) |
++------------------------+
+| NaN                    |
++------------------------+
+
+-- Should be NaN
+SELECT 'inf'::DOUBLE - 'inf'::DOUBLE;
+
++---------------------------+
+| Utf8("inf") - Utf8("inf") |
++---------------------------+
+| NaN                       |
++---------------------------+
+
+-- Should be NaN
+SELECT 'inf'::DOUBLE / 'inf'::DOUBLE;
+
++---------------------------+
+| Utf8("inf") / Utf8("inf") |
++---------------------------+
+| NaN                       |
++---------------------------+
+
+-- Should be NaN
+SELECT 'inf'::DOUBLE * 0;
+
++------------------------+
+| Utf8("inf") * Int64(0) |
++------------------------+
+| NaN                    |
++------------------------+
+
diff --git a/tests/cases/standalone/common/types/float/nan_arithmetic_extended.sql b/tests/cases/standalone/common/types/float/nan_arithmetic_extended.sql
new file mode 100644
index 0000000000..fe3d24c35c
--- /dev/null
+++ b/tests/cases/standalone/common/types/float/nan_arithmetic_extended.sql
@@ -0,0 +1,91 @@
+-- Migrated from DuckDB test: test/sql/types/float/nan_arithmetic.test
+-- Test arithmetic on NaN values
+
+-- Test NaN arithmetic with FLOAT
+-- Any arithmetic on a NaN value will result in a NaN value
+
+SELECT 'nan'::FLOAT + 1;
+
+SELECT 'nan'::FLOAT + 'inf'::FLOAT;
+
+SELECT 'nan'::FLOAT - 1;
+
+SELECT 'nan'::FLOAT - 'inf'::FLOAT;
+
+SELECT 'nan'::FLOAT * 1;
+
+SELECT 'nan'::FLOAT * 'inf'::FLOAT;
+
+SELECT 'nan'::FLOAT / 1;
+
+SELECT 'nan'::FLOAT / 'inf'::FLOAT;
+
+SELECT 'nan'::FLOAT % 1;
+
+SELECT 'nan'::FLOAT % 'inf'::FLOAT;
+
+SELECT -('nan'::FLOAT);
+
+-- Test NaN arithmetic with DOUBLE
+SELECT 'nan'::DOUBLE + 1;
+
+SELECT 'nan'::DOUBLE + 'inf'::DOUBLE;
+
+SELECT 'nan'::DOUBLE - 1;
+
+SELECT 'nan'::DOUBLE - 'inf'::DOUBLE;
+
+SELECT 'nan'::DOUBLE * 1;
+
+SELECT 'nan'::DOUBLE * 'inf'::DOUBLE;
+
+SELECT 'nan'::DOUBLE / 1;
+
+SELECT 'nan'::DOUBLE / 'inf'::DOUBLE;
+
+SELECT 'nan'::DOUBLE % 1;
+
+SELECT 'nan'::DOUBLE % 'inf'::DOUBLE;
+
+SELECT -('nan'::DOUBLE);
+
+-- Test infinity arithmetic
+SELECT 'inf'::FLOAT + 1;
+
+SELECT 'inf'::FLOAT - 1;
+
+SELECT 'inf'::FLOAT * 2;
+
+SELECT 'inf'::FLOAT / 2;
+
+SELECT -('inf'::FLOAT);
+
+SELECT 'inf'::DOUBLE + 1;
+
+SELECT 'inf'::DOUBLE - 1;
+
+SELECT 'inf'::DOUBLE * 2;
+
+SELECT 'inf'::DOUBLE / 2;
+
+SELECT -('inf'::DOUBLE);
+
+-- Test special infinity cases
+-- Should be NaN
+SELECT 'inf'::FLOAT - 'inf'::FLOAT;
+
+-- Should be NaN
+SELECT 'inf'::FLOAT / 'inf'::FLOAT;
+
+-- Should be NaN
+SELECT 'inf'::FLOAT * 0;
+
+-- Should be NaN
+SELECT 'inf'::DOUBLE - 'inf'::DOUBLE;
+
+-- Should be NaN
+SELECT 'inf'::DOUBLE / 'inf'::DOUBLE;
+
+-- Should be NaN
+SELECT 'inf'::DOUBLE * 0;
+
diff --git a/tests/cases/standalone/common/types/float/nan_cast_extended.result b/tests/cases/standalone/common/types/float/nan_cast_extended.result
new file mode 100644
index 0000000000..11098a1001
--- /dev/null
+++ b/tests/cases/standalone/common/types/float/nan_cast_extended.result
@@ -0,0 +1,252 @@
+-- Migrated from DuckDB test: test/sql/types/float/nan_cast.test
+-- Test casting of NaN and inf values
+-- Test valid casts between FLOAT, DOUBLE, and VARCHAR
+-- NaN casts
+SELECT 'nan'::FLOAT::DOUBLE;
+
++-------------+
+| Utf8("nan") |
++-------------+
+| NaN         |
++-------------+
+
+SELECT 'nan'::FLOAT::VARCHAR;
+
++-------------+
+| Utf8("nan") |
++-------------+
+| NaN         |
++-------------+
+
+SELECT 'nan'::DOUBLE::FLOAT;
+
++-------------+
+| Utf8("nan") |
++-------------+
+| NaN         |
++-------------+
+
+SELECT 'nan'::DOUBLE::VARCHAR;
+
++-------------+
+| Utf8("nan") |
++-------------+
+| NaN         |
++-------------+
+
+SELECT 'nan'::VARCHAR::FLOAT;
+
++-------------+
+| Utf8("nan") |
++-------------+
+| NaN         |
++-------------+
+
+SELECT 'nan'::VARCHAR::DOUBLE;
+
++-------------+
+| Utf8("nan") |
++-------------+
+| NaN         |
++-------------+
+
+-- Infinity casts
+SELECT 'inf'::FLOAT::DOUBLE;
+
++-------------+
+| Utf8("inf") |
++-------------+
+| inf         |
++-------------+
+
+SELECT 'inf'::FLOAT::VARCHAR;
+
++-------------+
+| Utf8("inf") |
++-------------+
+| inf         |
++-------------+
+
+SELECT 'inf'::DOUBLE::FLOAT;
+
++-------------+
+| Utf8("inf") |
++-------------+
+| inf         |
++-------------+
+
+SELECT 'inf'::DOUBLE::VARCHAR;
+
++-------------+
+| Utf8("inf") |
++-------------+
+| inf         |
++-------------+
+
+SELECT 'inf'::VARCHAR::FLOAT;
+
++-------------+
+| Utf8("inf") |
++-------------+
+| inf         |
++-------------+
+
+SELECT 'inf'::VARCHAR::DOUBLE;
+
++-------------+
+| Utf8("inf") |
++-------------+
+| inf         |
++-------------+
+
+-- Negative infinity casts
+SELECT '-inf'::FLOAT::DOUBLE;
+
++--------------+
+| Utf8("-inf") |
++--------------+
+| -inf         |
++--------------+
+
+SELECT '-inf'::FLOAT::VARCHAR;
+
++--------------+
+| Utf8("-inf") |
++--------------+
+| -inf         |
++--------------+
+
+SELECT '-inf'::DOUBLE::FLOAT;
+
++--------------+
+| Utf8("-inf") |
++--------------+
+| -inf         |
++--------------+
+
+SELECT '-inf'::DOUBLE::VARCHAR;
+
++--------------+
+| Utf8("-inf") |
++--------------+
+| -inf         |
++--------------+
+
+SELECT '-inf'::VARCHAR::FLOAT;
+
++--------------+
+| Utf8("-inf") |
++--------------+
+| -inf         |
++--------------+
+
+SELECT '-inf'::VARCHAR::DOUBLE;
+
++--------------+
+| Utf8("-inf") |
++--------------+
+| -inf         |
++--------------+
+
+-- Test TRY_CAST for invalid conversions (should return NULL)
+SELECT TRY_CAST('nan'::FLOAT AS INTEGER);
+
++-------------+
+| Utf8("nan") |
++-------------+
+|             |
++-------------+
+
+SELECT TRY_CAST('inf'::FLOAT AS INTEGER);
+
++-------------+
+| Utf8("inf") |
++-------------+
+|             |
++-------------+
+
+SELECT TRY_CAST('-inf'::FLOAT AS INTEGER);
+
++--------------+
+| Utf8("-inf") |
++--------------+
+|              |
++--------------+
+
+SELECT TRY_CAST('nan'::DOUBLE AS BIGINT);
+
++-------------+
+| Utf8("nan") |
++-------------+
+|             |
++-------------+
+
+SELECT TRY_CAST('inf'::DOUBLE AS BIGINT);
+
++-------------+
+| Utf8("inf") |
++-------------+
+|             |
++-------------+
+
+SELECT TRY_CAST('-inf'::DOUBLE AS BIGINT);
+
++--------------+
+| Utf8("-inf") |
++--------------+
+|              |
++--------------+
+
+-- Test with table data
+CREATE TABLE cast_test(f FLOAT, d DOUBLE, s VARCHAR, ts TIMESTAMP TIME INDEX);
+
+Affected Rows: 0
+
+INSERT INTO cast_test VALUES
+    ('nan'::FLOAT, 'nan'::DOUBLE, 'nan', 1000),
+    ('inf'::FLOAT, 'inf'::DOUBLE, 'inf', 2000),
+    ('-inf'::FLOAT, '-inf'::DOUBLE, '-inf', 3000),
+    (1.5, 2.5, '3.5', 4000);
+
+Affected Rows: 4
+
+-- Cast between float types
+SELECT f, f::DOUBLE AS fd, d, d::FLOAT AS df FROM cast_test ORDER BY ts;
+
++------+------+------+------+
+| f    | fd   | d    | df   |
++------+------+------+------+
+| NaN  | NaN  | NaN  | NaN  |
+| inf  | inf  | inf  | inf  |
+| -inf | -inf | -inf | -inf |
+| 1.5  | 1.5  | 2.5  | 2.5  |
++------+------+------+------+
+
+-- Cast to string
+SELECT f::VARCHAR, d::VARCHAR FROM cast_test ORDER BY ts;
+
++-------------+-------------+
+| cast_test.f | cast_test.d |
++-------------+-------------+
+| NaN         | NaN         |
+| inf         | inf         |
+| -inf        | -inf        |
+| 1.5         | 2.5         |
++-------------+-------------+
+
+-- Cast from string
+SELECT s, TRY_CAST(s AS FLOAT) AS sf, TRY_CAST(s AS DOUBLE) AS sd FROM cast_test ORDER BY ts;
+
++------+------+------+
+| s    | sf   | sd   |
++------+------+------+
+| nan  | NaN  | NaN  |
+| inf  | inf  | inf  |
+| -inf | -inf | -inf |
+| 3.5  | 3.5  | 3.5  |
++------+------+------+
+
+DROP TABLE cast_test;
+
+Affected Rows: 0
+
diff --git a/tests/cases/standalone/common/types/float/nan_cast_extended.sql b/tests/cases/standalone/common/types/float/nan_cast_extended.sql
new file mode 100644
index 0000000000..5c5caeeec9
--- /dev/null
+++ b/tests/cases/standalone/common/types/float/nan_cast_extended.sql
@@ -0,0 +1,76 @@
+-- Migrated from DuckDB test: test/sql/types/float/nan_cast.test
+-- Test casting of NaN and inf values
+
+-- Test valid casts between FLOAT, DOUBLE, and VARCHAR
+
+-- NaN casts
+SELECT 'nan'::FLOAT::DOUBLE;
+
+SELECT 'nan'::FLOAT::VARCHAR;
+
+SELECT 'nan'::DOUBLE::FLOAT;
+
+SELECT 'nan'::DOUBLE::VARCHAR;
+
+SELECT 'nan'::VARCHAR::FLOAT;
+
+SELECT 'nan'::VARCHAR::DOUBLE;
+
+-- Infinity casts
+SELECT 'inf'::FLOAT::DOUBLE;
+
+SELECT 'inf'::FLOAT::VARCHAR;
+
+SELECT 'inf'::DOUBLE::FLOAT;
+
+SELECT 'inf'::DOUBLE::VARCHAR;
+
+SELECT 'inf'::VARCHAR::FLOAT;
+
+SELECT 'inf'::VARCHAR::DOUBLE;
+
+-- Negative infinity casts
+SELECT '-inf'::FLOAT::DOUBLE;
+
+SELECT '-inf'::FLOAT::VARCHAR;
+
+SELECT '-inf'::DOUBLE::FLOAT;
+
+SELECT '-inf'::DOUBLE::VARCHAR;
+
+SELECT '-inf'::VARCHAR::FLOAT;
+
+SELECT '-inf'::VARCHAR::DOUBLE;
+
+-- Test TRY_CAST for invalid conversions (should return NULL)
+SELECT TRY_CAST('nan'::FLOAT AS INTEGER);
+
+SELECT TRY_CAST('inf'::FLOAT AS INTEGER);
+
+SELECT TRY_CAST('-inf'::FLOAT AS INTEGER);
+
+SELECT TRY_CAST('nan'::DOUBLE AS BIGINT);
+
+SELECT TRY_CAST('inf'::DOUBLE AS BIGINT);
+
+SELECT TRY_CAST('-inf'::DOUBLE AS BIGINT);
+
+-- Test with table data
+CREATE TABLE cast_test(f FLOAT, d DOUBLE, s VARCHAR, ts TIMESTAMP TIME INDEX);
+
+INSERT INTO cast_test VALUES
+    ('nan'::FLOAT, 'nan'::DOUBLE, 'nan', 1000),
+    ('inf'::FLOAT, 'inf'::DOUBLE, 'inf', 2000),
+    ('-inf'::FLOAT, '-inf'::DOUBLE, '-inf', 3000),
+    (1.5, 2.5, '3.5', 4000);
+
+-- Cast between float types
+SELECT f, f::DOUBLE AS fd, d, d::FLOAT AS df FROM cast_test ORDER BY ts;
+
+-- Cast to string
+SELECT f::VARCHAR, d::VARCHAR FROM cast_test ORDER BY ts;
+
+-- Cast from string
+SELECT s, TRY_CAST(s AS FLOAT) AS sf, TRY_CAST(s AS DOUBLE) AS sd FROM cast_test ORDER BY ts;
+
+DROP TABLE cast_test;
diff --git a/tests/cases/standalone/common/types/null/null_handling.result b/tests/cases/standalone/common/types/null/null_handling.result
new file mode 100644
index 0000000000..320ced424c
--- /dev/null
+++ b/tests/cases/standalone/common/types/null/null_handling.result
@@ -0,0 +1,171 @@
+-- Migrated from DuckDB test: test/sql/types/null/test_null.test
+-- Test NULL value handling across different contexts
+-- Test NULL in basic operations
+CREATE TABLE null_test(i INTEGER, s VARCHAR, ts TIMESTAMP TIME INDEX);
+
+Affected Rows: 0
+
+INSERT INTO null_test VALUES
+    (1, 'hello', 1000),
+    (NULL, 'world', 2000),
+    (3, NULL, 3000),
+    (NULL, NULL, 4000);
+
+Affected Rows: 4
+
+-- Test NULL comparisons
+SELECT i, s FROM null_test WHERE i IS NULL ORDER BY ts;
+
++---+-------+
+| i | s     |
++---+-------+
+|   | world |
+|   |       |
++---+-------+
+
+SELECT i, s FROM null_test WHERE i IS NOT NULL ORDER BY ts;
+
++---+-------+
+| i | s     |
++---+-------+
+| 1 | hello |
+| 3 |       |
++---+-------+
+
+SELECT i, s FROM null_test WHERE s IS NULL ORDER BY ts;
+
++---+---+
+| i | s |
++---+---+
+| 3 |   |
+|   |   |
++---+---+
+
+SELECT i, s FROM null_test WHERE s IS NOT NULL ORDER BY ts;
+
++---+-------+
+| i | s     |
++---+-------+
+| 1 | hello |
+|   | world |
++---+-------+
+
+-- Test NULL in arithmetic
+SELECT i, i + 1, i * 2, i - 5 FROM null_test ORDER BY ts;
+
++---+------------------------+------------------------+------------------------+
+| i | null_test.i + Int64(1) | null_test.i * Int64(2) | null_test.i - Int64(5) |
++---+------------------------+------------------------+------------------------+
+| 1 | 2                      | 2                      | -4                     |
+|   |                        |                        |                        |
+| 3 | 4                      | 6                      | -2                     |
+|   |                        |                        |                        |
++---+------------------------+------------------------+------------------------+
+
+-- Test NULL in string operations
+SELECT s, CONCAT(s, ' test'), UPPER(s), LENGTH(s) FROM null_test ORDER BY ts;
+
++-------+-----------------------------------+--------------------+---------------------+
+| s     | concat(null_test.s,Utf8(" test")) | upper(null_test.s) | length(null_test.s) |
++-------+-----------------------------------+--------------------+---------------------+
+| hello | hello test                        | HELLO              | 5                   |
+| world | world test                        | WORLD              | 5                   |
+|       |  test                             |                    |                     |
+|       |  test                             |                    |                     |
++-------+-----------------------------------+--------------------+---------------------+
+
+-- Test NULL with COALESCE
+SELECT i, s, COALESCE(i, -1), COALESCE(s, 'missing') FROM null_test ORDER BY ts;
+
++---+-------+---------------------------------+---------------------------------------+
+| i | s     | coalesce(null_test.i,Int64(-1)) | coalesce(null_test.s,Utf8("missing")) |
++---+-------+---------------------------------+---------------------------------------+
+| 1 | hello | 1                               | hello                                 |
+|   | world | -1                              | world                                 |
+| 3 |       | 3                               | missing                               |
+|   |       | -1                              | missing                               |
++---+-------+---------------------------------+---------------------------------------+
+
+-- Test NULL in aggregates
+SELECT COUNT(*), COUNT(i), COUNT(s) FROM null_test;
+
++----------+--------------------+--------------------+
+| count(*) | count(null_test.i) | count(null_test.s) |
++----------+--------------------+--------------------+
+| 4        | 2                  | 2                  |
++----------+--------------------+--------------------+
+
+SELECT SUM(i), AVG(i), MAX(i), MIN(i) FROM null_test;
+
++------------------+------------------+------------------+------------------+
+| sum(null_test.i) | avg(null_test.i) | max(null_test.i) | min(null_test.i) |
++------------------+------------------+------------------+------------------+
+| 4                | 2.0              | 3                | 1                |
++------------------+------------------+------------------+------------------+
+
+-- Test NULL in CASE expressions
+SELECT i, s,
+    CASE
+        WHEN i IS NULL THEN 'no number'
+        WHEN i > 2 THEN 'big number'
+        ELSE 'small number'
+    END as category
+FROM null_test ORDER BY ts;
+
++---+-------+--------------+
+| i | s     | category     |
++---+-------+--------------+
+| 1 | hello | small number |
+|   | world | no number    |
+| 3 |       | big number   |
+|   |       | no number    |
++---+-------+--------------+
+
+-- Test NULL in GROUP BY
+SELECT i, COUNT(*) FROM null_test GROUP BY i ORDER BY i;
+
++---+----------+
+| i | count(*) |
++---+----------+
+| 1 | 1        |
+| 3 | 1        |
+|   | 2        |
++---+----------+
+
+SELECT s, COUNT(*) FROM null_test GROUP BY s ORDER BY s;
+
++-------+----------+
+| s     | count(*) |
++-------+----------+
+| hello | 1        |
+| world | 1        |
+|       | 2        |
++-------+----------+
+
+-- Test NULLIF function
+SELECT i, NULLIF(i, 1) FROM null_test ORDER BY ts;
+
++---+------------------------------+
+| i | nullif(null_test.i,Int64(1)) |
++---+------------------------------+
+| 1 |                              |
+|   |                              |
+| 3 | 3                            |
+|   |                              |
++---+------------------------------+
+
+SELECT s, NULLIF(s, 'hello') FROM null_test ORDER BY ts;
+
++-------+-----------------------------------+
+| s     | nullif(null_test.s,Utf8("hello")) |
++-------+-----------------------------------+
+| hello |                                   |
+| world | world                             |
+|       |                                   |
+|       |                                   |
++-------+-----------------------------------+
+
+DROP TABLE null_test;
+
+Affected Rows: 0
+
diff --git a/tests/cases/standalone/common/types/null/null_handling.sql b/tests/cases/standalone/common/types/null/null_handling.sql
new file mode 100644
index 0000000000..e0fb460778
--- /dev/null
+++ b/tests/cases/standalone/common/types/null/null_handling.sql
@@ -0,0 +1,49 @@
+-- Migrated from DuckDB test: test/sql/types/null/test_null.test
+-- Test NULL value handling across different contexts
+
+-- Test NULL in basic operations
+CREATE TABLE null_test(i INTEGER, s VARCHAR, ts TIMESTAMP TIME INDEX);
+
+INSERT INTO null_test VALUES
+    (1, 'hello', 1000),
+    (NULL, 'world', 2000),
+    (3, NULL, 3000),
+    (NULL, NULL, 4000);
+
+-- Test NULL comparisons
+SELECT i, s FROM null_test WHERE i IS NULL ORDER BY ts;
+SELECT i, s FROM null_test WHERE i IS NOT NULL ORDER BY ts;
+SELECT i, s FROM null_test WHERE s IS NULL ORDER BY ts;
+SELECT i, s FROM null_test WHERE s IS NOT NULL ORDER BY ts;
+
+-- Test NULL in arithmetic
+SELECT i, i + 1, i * 2, i - 5 FROM null_test ORDER BY ts;
+
+-- Test NULL in string operations
+SELECT s, CONCAT(s, ' test'), UPPER(s), LENGTH(s) FROM null_test ORDER BY ts;
+
+-- Test NULL with COALESCE
+SELECT i, s, COALESCE(i, -1), COALESCE(s, 'missing') FROM null_test ORDER BY ts;
+
+-- Test NULL in aggregates
+SELECT COUNT(*), COUNT(i), COUNT(s) FROM null_test;
+SELECT SUM(i), AVG(i), MAX(i), MIN(i) FROM null_test;
+
+-- Test NULL in CASE expressions
+SELECT i, s,
+    CASE
+        WHEN i IS NULL THEN 'no number'
+        WHEN i > 2 THEN 'big number'
+        ELSE 'small number'
+    END as category
+FROM null_test ORDER BY ts;
+
+-- Test NULL in GROUP BY
+SELECT i, COUNT(*) FROM null_test GROUP BY i ORDER BY i;
+SELECT s, COUNT(*) FROM null_test GROUP BY s ORDER BY s;
+
+-- Test NULLIF function
+SELECT i, NULLIF(i, 1) FROM null_test ORDER BY ts;
+SELECT s, NULLIF(s, 'hello') FROM null_test ORDER BY ts;
+
+DROP TABLE null_test;
diff --git a/tests/cases/standalone/common/types/string/big_strings.result b/tests/cases/standalone/common/types/string/big_strings.result
new file mode 100644
index 0000000000..a81ff17cf5
--- /dev/null
+++ b/tests/cases/standalone/common/types/string/big_strings.result
@@ -0,0 +1,116 @@
+-- Migrated from DuckDB test: test/sql/types/string/test_big_strings.test
+-- Test handling of large strings
+-- Test large string creation and manipulation
+CREATE TABLE big_strings("id" INTEGER, s VARCHAR, ts TIMESTAMP TIME INDEX);
+
+Affected Rows: 0
+
+-- Insert strings of various sizes
+INSERT INTO big_strings VALUES
+    (1, REPEAT('a', 100), 1000),
+    (2, REPEAT('Hello World! ', 50), 2000),
+    (3, REPEAT('Unicode 世界 ', 100), 3000),
+    (4, REPEAT('x', 1000), 4000);
+
+Affected Rows: 4
+
+-- Test length of big strings
+SELECT "id", LENGTH(s) FROM big_strings ORDER BY "id";
+
++----+-----------------------+
+| id | length(big_strings.s) |
++----+-----------------------+
+| 1  | 100                   |
+| 2  | 650                   |
+| 3  | 1100                  |
+| 4  | 1000                  |
++----+-----------------------+
+
+-- Test substring operations on big strings
+SELECT "id", SUBSTRING(s, 1, 20) FROM big_strings ORDER BY "id";
+
++----+------------------------------------------+
+| id | substr(big_strings.s,Int64(1),Int64(20)) |
++----+------------------------------------------+
+| 1  | aaaaaaaaaaaaaaaaaaaa                     |
+| 2  | Hello World! Hello W                     |
+| 3  | Unicode 世界 Unicode 世                  |
+| 4  | xxxxxxxxxxxxxxxxxxxx                     |
++----+------------------------------------------+
+
+SELECT "id", RIGHT(s, 10) FROM big_strings ORDER BY "id";
+
++----+--------------------------------+
+| id | right(big_strings.s,Int64(10)) |
++----+--------------------------------+
+| 1  | aaaaaaaaaa                     |
+| 2  | lo World!                      |
+| 3  | nicode 世界                    |
+| 4  | xxxxxxxxxx                     |
++----+--------------------------------+
+
+-- Test concatenation with big strings
+SELECT "id", LENGTH(s || s) FROM big_strings WHERE "id" = 1;
+
++----+----------------------------------------+
+| id | length(big_strings.s || big_strings.s) |
++----+----------------------------------------+
+| 1  | 200                                    |
++----+----------------------------------------+
+
+-- Test pattern matching on big strings
+SELECT "id", s LIKE '%World%' FROM big_strings ORDER BY "id";
+
++----+------------------------------------+
+| id | big_strings.s LIKE Utf8("%World%") |
++----+------------------------------------+
+| 1  | false                              |
+| 2  | true                               |
+| 3  | false                              |
+| 4  | false                              |
++----+------------------------------------+
+
+-- Test comparison with big strings
+SELECT COUNT(*) FROM big_strings WHERE s = REPEAT('a', 100);
+
++----------+
+| count(*) |
++----------+
+| 1        |
++----------+
+
+-- Test UPPER/LOWER on big strings
+SELECT "id", LENGTH(UPPER(s)) FROM big_strings WHERE "id" <= 2 ORDER BY "id";
+
++----+------------------------------+
+| id | length(upper(big_strings.s)) |
++----+------------------------------+
+| 1  | 100                          |
+| 2  | 650                          |
++----+------------------------------+
+
+-- Test trimming big strings
+CREATE TABLE padded_strings(s VARCHAR, ts TIMESTAMP TIME INDEX);
+
+Affected Rows: 0
+
+INSERT INTO padded_strings VALUES (CONCAT('   ', REPEAT('test', 100), '   '), 1000);
+
+Affected Rows: 1
+
+SELECT LENGTH(s), LENGTH(TRIM(s)) FROM padded_strings;
+
++--------------------------+---------------------------------+
+| length(padded_strings.s) | length(btrim(padded_strings.s)) |
++--------------------------+---------------------------------+
+| 406                      | 400                             |
++--------------------------+---------------------------------+
+
+DROP TABLE big_strings;
+
+Affected Rows: 0
+
+DROP TABLE padded_strings;
+
+Affected Rows: 0
+
diff --git a/tests/cases/standalone/common/types/string/big_strings.sql b/tests/cases/standalone/common/types/string/big_strings.sql
new file mode 100644
index 0000000000..0c654ecb01
--- /dev/null
+++ b/tests/cases/standalone/common/types/string/big_strings.sql
@@ -0,0 +1,43 @@
+-- Migrated from DuckDB test: test/sql/types/string/test_big_strings.test
+-- Test handling of large strings
+
+-- Test large string creation and manipulation
+CREATE TABLE big_strings("id" INTEGER, s VARCHAR, ts TIMESTAMP TIME INDEX);
+
+-- Insert strings of various sizes
+INSERT INTO big_strings VALUES
+    (1, REPEAT('a', 100), 1000),
+    (2, REPEAT('Hello World! ', 50), 2000),
+    (3, REPEAT('Unicode 世界 ', 100), 3000),
+    (4, REPEAT('x', 1000), 4000);
+
+-- Test length of big strings
+SELECT "id", LENGTH(s) FROM big_strings ORDER BY "id";
+
+-- Test substring operations on big strings
+SELECT "id", SUBSTRING(s, 1, 20) FROM big_strings ORDER BY "id";
+
+SELECT "id", RIGHT(s, 10) FROM big_strings ORDER BY "id";
+
+-- Test concatenation with big strings
+SELECT "id", LENGTH(s || s) FROM big_strings WHERE "id" = 1;
+
+-- Test pattern matching on big strings
+SELECT "id", s LIKE '%World%' FROM big_strings ORDER BY "id";
+
+-- Test comparison with big strings
+SELECT COUNT(*) FROM big_strings WHERE s = REPEAT('a', 100);
+
+-- Test UPPER/LOWER on big strings
+SELECT "id", LENGTH(UPPER(s)) FROM big_strings WHERE "id" <= 2 ORDER BY "id";
+
+-- Test trimming big strings
+CREATE TABLE padded_strings(s VARCHAR, ts TIMESTAMP TIME INDEX);
+
+INSERT INTO padded_strings VALUES (CONCAT('   ', REPEAT('test', 100), '   '), 1000);
+
+SELECT LENGTH(s), LENGTH(TRIM(s)) FROM padded_strings;
+
+DROP TABLE big_strings;
+
+DROP TABLE padded_strings;
diff --git a/tests/cases/standalone/common/types/string/unicode_extended.result b/tests/cases/standalone/common/types/string/unicode_extended.result
new file mode 100644
index 0000000000..6a1ad83b85
--- /dev/null
+++ b/tests/cases/standalone/common/types/string/unicode_extended.result
@@ -0,0 +1,103 @@
+-- Migrated from DuckDB test: test/sql/types/string/test_unicode.test
+-- Test Unicode string handling
+-- Test basic Unicode strings
+CREATE TABLE unicode_test(s VARCHAR, ts TIMESTAMP TIME INDEX);
+
+Affected Rows: 0
+
+INSERT INTO unicode_test VALUES
+    ('Hello 世界', 1000),
+    ('Ññññ', 2000),
+    ('🚀🎉🌟', 3000),
+    ('Здравствуй мир', 4000),
+    ('مرحبا بالعالم', 5000),
+    ('こんにちは世界', 6000);
+
+Affected Rows: 6
+
+-- Test basic selection
+SELECT s FROM unicode_test ORDER BY ts;
+
++----------------+
+| s              |
++----------------+
+| Hello 世界     |
+| Ññññ           |
+| 🚀🎉🌟         |
+| Здравствуй мир |
+| مرحبا بالعالم  |
+| こんにちは世界 |
++----------------+
+
+-- Test length function with Unicode
+SELECT s, LENGTH(s) AS a, CHAR_LENGTH(s) AS b FROM unicode_test ORDER BY ts;
+
++----------------+----+----+
+| s              | a  | b  |
++----------------+----+----+
+| Hello 世界     | 8  | 8  |
+| Ññññ           | 4  | 4  |
+| 🚀🎉🌟         | 3  | 3  |
+| Здравствуй мир | 14 | 14 |
+| مرحبا بالعالم  | 13 | 13 |
+| こんにちは世界 | 7  | 7  |
++----------------+----+----+
+
+-- Test substring with Unicode
+SELECT s, SUBSTRING(s, 1, 5) FROM unicode_test ORDER BY ts;
+
++----------------+------------------------------------------+
+| s              | substr(unicode_test.s,Int64(1),Int64(5)) |
++----------------+------------------------------------------+
+| Hello 世界     | Hello                                    |
+| Ññññ           | Ññññ                                     |
+| 🚀🎉🌟         | 🚀🎉🌟                                   |
+| Здравствуй мир | Здрав                                    |
+| مرحبا بالعالم  | مرحبا                                    |
+| こんにちは世界 | こんにちは                               |
++----------------+------------------------------------------+
+
+-- Test UPPER/LOWER with Unicode
+SELECT s, UPPER(s), LOWER(s) FROM unicode_test WHERE s = 'Hello 世界';
+
++------------+-----------------------+-----------------------+
+| s          | upper(unicode_test.s) | lower(unicode_test.s) |
++------------+-----------------------+-----------------------+
+| Hello 世界 | HELLO 世界            | hello 世界            |
++------------+-----------------------+-----------------------+
+
+-- Test comparison with Unicode
+SELECT COUNT(*) FROM unicode_test WHERE s LIKE '%世界%';
+
++----------+
+| count(*) |
++----------+
+| 2        |
++----------+
+
+SELECT COUNT(*) FROM unicode_test WHERE s LIKE '%🚀%';
+
++----------+
+| count(*) |
++----------+
+| 1        |
++----------+
+
+-- Test concatenation with Unicode
+SELECT CONCAT(s, ' - test') FROM unicode_test ORDER BY ts;
+
++----------------------------------------+
+| concat(unicode_test.s,Utf8(" - test")) |
++----------------------------------------+
+| Hello 世界 - test                      |
+| Ññññ - test                            |
+| 🚀🎉🌟 - test                          |
+| Здравствуй мир - test                  |
+| مرحبا بالعالم - test                   |
+| こんにちは世界 - test                  |
++----------------------------------------+
+
+DROP TABLE unicode_test;
+
+Affected Rows: 0
+
diff --git a/tests/cases/standalone/common/types/string/unicode_extended.sql b/tests/cases/standalone/common/types/string/unicode_extended.sql
new file mode 100644
index 0000000000..3e6f47f3c8
--- /dev/null
+++ b/tests/cases/standalone/common/types/string/unicode_extended.sql
@@ -0,0 +1,35 @@
+-- Migrated from DuckDB test: test/sql/types/string/test_unicode.test
+-- Test Unicode string handling
+
+-- Test basic Unicode strings
+CREATE TABLE unicode_test(s VARCHAR, ts TIMESTAMP TIME INDEX);
+
+INSERT INTO unicode_test VALUES
+    ('Hello 世界', 1000),
+    ('Ññññ', 2000),
+    ('🚀🎉🌟', 3000),
+    ('Здравствуй мир', 4000),
+    ('مرحبا بالعالم', 5000),
+    ('こんにちは世界', 6000);
+
+-- Test basic selection
+SELECT s FROM unicode_test ORDER BY ts;
+
+-- Test length function with Unicode
+SELECT s, LENGTH(s) AS a, CHAR_LENGTH(s) AS b FROM unicode_test ORDER BY ts;
+
+-- Test substring with Unicode
+SELECT s, SUBSTRING(s, 1, 5) FROM unicode_test ORDER BY ts;
+
+-- Test UPPER/LOWER with Unicode
+SELECT s, UPPER(s), LOWER(s) FROM unicode_test WHERE s = 'Hello 世界';
+
+-- Test comparison with Unicode
+SELECT COUNT(*) FROM unicode_test WHERE s LIKE '%世界%';
+
+SELECT COUNT(*) FROM unicode_test WHERE s LIKE '%🚀%';
+
+-- Test concatenation with Unicode
+SELECT CONCAT(s, ' - test') FROM unicode_test ORDER BY ts;
+
+DROP TABLE unicode_test;