diff --git a/Cargo.lock b/Cargo.lock
index f0ca46b271..0254f51607 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -7985,6 +7985,7 @@ version = "1.0.0-rc.2"
 dependencies = [
  "api",
  "aquamarine",
+ "arrow-schema 57.3.0",
  "async-channel 1.9.0",
  "async-stream",
  "async-trait",
diff --git a/src/api/src/helper.rs b/src/api/src/helper.rs
index 4664c0434b..6d5ea13461 100644
--- a/src/api/src/helper.rs
+++ b/src/api/src/helper.rs
@@ -129,7 +129,7 @@ impl From<ColumnDataTypeWrapper> for ConcreteDataType {
                         };
                         ConcreteDataType::json_native_datatype(inner_type.into())
                     }
-                    None => ConcreteDataType::Json(JsonType::null()),
+                    None => ConcreteDataType::Json(JsonType::new(JsonFormat::Json2)),
                     _ => {
                         // invalid state, type extension is missing or invalid
                         ConcreteDataType::null_datatype()
@@ -461,6 +461,7 @@ impl TryFrom<ConcreteDataType> for ColumnDataTypeWrapper {
                                 })
                             }
                         }
+                        JsonFormat::Json2 => Some(ColumnDataTypeExtension { type_ext: None }),
                     }
                 } else {
                     None
diff --git a/src/cmd/src/datanode/objbench.rs b/src/cmd/src/datanode/objbench.rs
index 1f3591635f..3599f84fa5 100644
--- a/src/cmd/src/datanode/objbench.rs
+++ b/src/cmd/src/datanode/objbench.rs
@@ -247,6 +247,7 @@ impl ObjbenchCommand {
             op_type: OperationType::Flush,
             metadata: region_meta,
             source: FlatSource::Stream(reader_stream),
+            schema: None,
             cache_manager,
             storage: None,
             max_sequence: None,
diff --git a/src/common/function/src/scalars/json.rs b/src/common/function/src/scalars/json.rs
index e9d638c6cb..f92c849332 100644
--- a/src/common/function/src/scalars/json.rs
+++ b/src/common/function/src/scalars/json.rs
@@ -12,6 +12,8 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
+pub mod json2_get;
+mod json2_get_rewriter;
 pub mod json_get;
 mod json_get_rewriter;
 mod json_is;
@@ -26,6 +28,8 @@ use json_is::{
     JsonIsArray, JsonIsBool, JsonIsFloat, JsonIsInt, JsonIsNull, JsonIsObject, JsonIsString,
 };
 use json_to_string::JsonToStringFunction;
+use json2_get::Json2GetFunction;
+use json2_get_rewriter::Json2GetRewriter;
 use parse_json::ParseJsonFunction;
 
 use crate::function_registry::FunctionRegistry;
@@ -44,6 +48,7 @@ impl JsonFunction {
         registry.register_scalar(JsonGetBool::default());
         registry.register_scalar(JsonGetObject::default());
         registry.register_scalar(JsonGetWithType::default());
+        registry.register_scalar(Json2GetFunction::default());
 
         registry.register_scalar(JsonIsNull::default());
         registry.register_scalar(JsonIsInt::default());
@@ -57,5 +62,6 @@ impl JsonFunction {
         registry.register_scalar(json_path_match::JsonPathMatchFunction::default());
 
         registry.register_function_rewrite(JsonGetRewriter);
+        registry.register_function_rewrite(Json2GetRewriter);
     }
 }
diff --git a/src/common/function/src/scalars/json/json2_get.rs b/src/common/function/src/scalars/json/json2_get.rs
new file mode 100644
index 0000000000..9b71fb59fb
--- /dev/null
+++ b/src/common/function/src/scalars/json/json2_get.rs
@@ -0,0 +1,145 @@
+// Copyright 2023 Greptime Team
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use std::sync::Arc;
+
+use arrow::array::StringViewBuilder;
+use arrow_cast::display::ArrayFormatter;
+use datafusion_common::arrow::array::{Array, ArrayRef, StructArray, new_null_array};
+use datafusion_common::arrow::datatypes::{DataType, Field};
+use datafusion_common::{Result, ScalarValue, exec_err, internal_err};
+use datafusion_expr::{
+    ColumnarValue, Expr, ReturnFieldArgs, ScalarFunctionArgs, Signature, Volatility,
+};
+use derive_more::Display;
+
+use crate::function::Function;
+
+#[derive(Display, Debug)]
+#[display("{}", Self::NAME.to_ascii_uppercase())]
+pub struct Json2GetFunction {
+    signature: Signature,
+}
+
+impl Json2GetFunction {
+    pub const NAME: &'static str = "json2_get";
+}
+
+impl Function for Json2GetFunction {
+    fn name(&self) -> &str {
+        Self::NAME
+    }
+
+    fn return_type(&self, _: &[DataType]) -> Result<DataType> {
+        internal_err!("this method isn't meant to be called")
+    }
+
+    fn signature(&self) -> &Signature {
+        &self.signature
+    }
+
+    fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> {
+        if args.args.len() != 3 {
+            return exec_err!("json2_get expects 3 arguments, got {}", args.args.len());
+        }
+
+        let input = args.args[0].to_array(args.number_rows)?;
+        let path = path_from_arg(&args.args[1])?;
+        let return_type = args.return_field.data_type();
+
+        let segments = path.split('.').collect::<Vec<_>>();
+        let Some(leaf) = resolve_leaf_path(&input, &segments) else {
+            return Ok(ColumnarValue::Array(new_null_array(
+                return_type,
+                input.len(),
+            )));
+        };
+
+        let casted = if arrow_cast::can_cast_types(leaf.data_type(), return_type) {
+            arrow_cast::cast(leaf.as_ref(), return_type)?
+        } else if return_type.is_string() {
+            cast_array_to_string(&leaf)?
+        } else {
+            return Ok(ColumnarValue::Array(new_null_array(
+                return_type,
+                input.len(),
+            )));
+        };
+
+        Ok(ColumnarValue::Array(casted))
+    }
+
+    fn return_field_from_args(&self, args: ReturnFieldArgs<'_>) -> Result<Arc<Field>> {
+        let Some(Some(value)) = args.scalar_arguments.get(2) else {
+            return internal_err!(
+                "third argument of function {} must be present and is scalar",
+                self.name()
+            );
+        };
+        Ok(Arc::new(Field::new(
+            "json2_get expected type",
+            value.data_type(),
+            true,
+        )))
+    }
+}
+
+impl Default for Json2GetFunction {
+    fn default() -> Self {
+        Self {
+            signature: Signature::any(3, Volatility::Immutable),
+        }
+    }
+}
+
+fn path_from_arg(arg: &ColumnarValue) -> Result<&String> {
+    match arg {
+        ColumnarValue::Scalar(ScalarValue::Utf8(Some(path)))
+        | ColumnarValue::Scalar(ScalarValue::LargeUtf8(Some(path)))
+        | ColumnarValue::Scalar(ScalarValue::Utf8View(Some(path))) => Ok(path),
+        ColumnarValue::Scalar(_) => exec_err!("json2_get expects a string path"),
+        ColumnarValue::Array(_) => exec_err!("json2_get expects a literal path"),
+    }
+}
+
+fn resolve_leaf_path(array: &ArrayRef, segments: &[&str]) -> Option<ArrayRef> {
+    if segments.is_empty() {
+        return None;
+    }
+
+    let mut current = array.clone();
+    for segment in segments {
+        let struct_array = current.as_any().downcast_ref::<StructArray>()?;
+        let DataType::Struct(fields) = current.data_type() else {
+            unreachable!()
+        };
+        let index = fields.iter().position(|field| field.name() == *segment)?;
+        current = struct_array.column(index).clone();
+    }
+    Some(current)
+}
+
+fn cast_array_to_string(array: &ArrayRef) -> Result<ArrayRef> {
+    let mut builder = StringViewBuilder::with_capacity(array.len());
+    let formatter = ArrayFormatter::try_new(array, &Default::default())?;
+    for i in 0..array.len() {
+        let value = array.is_valid(i).then(|| formatter.value(i).to_string());
+        builder.append_option(value);
+    }
+    Ok(Arc::new(builder.finish()))
+}
+
+pub fn datatype_expr(data_type: &DataType) -> Result<Expr> {
+    ScalarValue::try_new_null(data_type).map(|x| Expr::Literal(x, None))
+}
diff --git a/src/common/function/src/scalars/json/json2_get_rewriter.rs b/src/common/function/src/scalars/json/json2_get_rewriter.rs
new file mode 100644
index 0000000000..fb807c1fca
--- /dev/null
+++ b/src/common/function/src/scalars/json/json2_get_rewriter.rs
@@ -0,0 +1,82 @@
+// Copyright 2023 Greptime Team
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use arrow_schema::DataType;
+use datafusion::common::config::ConfigOptions;
+use datafusion::common::tree_node::Transformed;
+use datafusion::common::{DFSchema, Result};
+use datafusion::logical_expr::expr_rewriter::FunctionRewrite;
+use datafusion::scalar::ScalarValue;
+use datafusion_common::{exec_err, internal_err};
+use datafusion_expr::Expr;
+
+use crate::scalars::json::json2_get::{Json2GetFunction, datatype_expr};
+
+#[derive(Debug)]
+pub(crate) struct Json2GetRewriter;
+
+impl FunctionRewrite for Json2GetRewriter {
+    fn name(&self) -> &'static str {
+        "Json2GetRewriter"
+    }
+
+    fn rewrite(
+        &self,
+        expr: Expr,
+        _schema: &DFSchema,
+        _config: &ConfigOptions,
+    ) -> Result<Transformed<Expr>> {
+        let (expr, rewritten) = reduce_arrow_cast(expr)?;
+        if rewritten {
+            Ok(Transformed::yes(expr))
+        } else {
+            Ok(Transformed::no(expr))
+        }
+    }
+}
+
+// arrow_cast(json2_get(_, _, _), "<type>") => json2_get(_, _, "<type>")
+fn reduce_arrow_cast(expr: Expr) -> Result<(Expr, bool)> {
+    let mut f = match expr {
+        Expr::ScalarFunction(f) => f,
+        expr => return Ok((expr, false)),
+    };
+    if f.name() != "arrow_cast" {
+        return Ok((Expr::ScalarFunction(f), false));
+    }
+    if !matches!(&f.args[0], Expr::ScalarFunction(f) if f.name() == Json2GetFunction::NAME) {
+        return Ok((Expr::ScalarFunction(f), false));
+    }
+
+    if f.args.len() != 2 {
+        return internal_err!("arrow_cast must have 2 arguments");
+    }
+    let target_type = match &f.args[1] {
+        Expr::Literal(ScalarValue::Utf8(Some(target_type)), _) => target_type
+            .parse::<DataType>()
+            .map_err(Into::into)
+            .and_then(|x| datatype_expr(&x))?,
+        x => return exec_err!("arrow_cast expects 2nd argument a string, got: {:?}", x),
+    };
+
+    let Expr::ScalarFunction(mut json2_get) = f.args.remove(0) else {
+        // checked in above "matches!"
+        unreachable!()
+    };
+    if json2_get.args.len() != 3 {
+        return internal_err!("function {} must have 3 arguments", Json2GetFunction::NAME);
+    }
+    json2_get.args[2] = target_type;
+    Ok((Expr::ScalarFunction(json2_get), true))
+}
diff --git a/src/common/recordbatch/src/error.rs b/src/common/recordbatch/src/error.rs
index 6d794463a0..00d4291ead 100644
--- a/src/common/recordbatch/src/error.rs
+++ b/src/common/recordbatch/src/error.rs
@@ -188,13 +188,6 @@ pub enum Error {
         #[snafu(implicit)]
         location: Location,
     },
-
-    #[snafu(display("Failed to align JSON array, reason: {reason}"))]
-    AlignJsonArray {
-        reason: String,
-        #[snafu(implicit)]
-        location: Location,
-    },
 }
 
 impl ErrorExt for Error {
@@ -210,8 +203,7 @@ impl ErrorExt for Error {
             | Error::ToArrowScalar { .. }
             | Error::ProjectArrowRecordBatch { .. }
             | Error::PhysicalExpr { .. }
-            | Error::RecordBatchSliceIndexOverflow { .. }
-            | Error::AlignJsonArray { .. } => StatusCode::Internal,
+            | Error::RecordBatchSliceIndexOverflow { .. } => StatusCode::Internal,
 
             Error::PollStream { .. } => StatusCode::EngineExecuteQuery,
 
diff --git a/src/common/recordbatch/src/recordbatch.rs b/src/common/recordbatch/src/recordbatch.rs
index 2e92b9e87a..413d89fb78 100644
--- a/src/common/recordbatch/src/recordbatch.rs
+++ b/src/common/recordbatch/src/recordbatch.rs
@@ -20,10 +20,11 @@ use datafusion::arrow::util::pretty::pretty_format_batches;
 use datafusion_common::arrow::array::ArrayRef;
 use datafusion_common::arrow::compute;
 use datafusion_common::arrow::datatypes::{DataType as ArrowDataType, SchemaRef as ArrowSchemaRef};
-use datatypes::arrow::array::{Array, AsArray, RecordBatchOptions, StructArray, new_null_array};
+use datatypes::arrow::array::{Array, AsArray, RecordBatchOptions};
 use datatypes::extension::json::is_json_extension_type;
 use datatypes::prelude::DataType;
 use datatypes::schema::SchemaRef;
+use datatypes::vectors::json::array::JsonArray;
 use datatypes::vectors::{Helper, VectorRef};
 use serde::ser::{Error, SerializeStruct};
 use serde::{Serialize, Serializer};
@@ -31,8 +32,8 @@ use snafu::{OptionExt, ResultExt, ensure};
 
 use crate::DfRecordBatch;
 use crate::error::{
-    self, AlignJsonArraySnafu, ArrowComputeSnafu, ColumnNotExistsSnafu, DataTypesSnafu,
-    NewDfRecordBatchSnafu, ProjectArrowRecordBatchSnafu, Result,
+    self, ArrowComputeSnafu, ColumnNotExistsSnafu, DataTypesSnafu, ProjectArrowRecordBatchSnafu,
+    Result,
 };
 
 /// A two-dimensional batch of column-oriented data with a defined schema.
@@ -354,81 +355,7 @@ pub fn merge_record_batches(schema: SchemaRef, batches: &[RecordBatch]) -> Resul
     Ok(RecordBatch::from_df_record_batch(schema, record_batch))
 }
 
-/// Align a json array `json_array` to the json type `schema_type`. The `schema_type` is often the
-/// "largest" json type after some insertions in the table schema, while the json array previously
-/// written in the SST could be lagged behind it. So it's important to "amend" the json array's
-/// missing fields with null arrays, to align the array's data type with the provided one.
-///
-/// # Panics
-///
-/// - The json array is not an Arrow [StructArray], or the provided data type `schema_type` is not
-///   of Struct type. Both of which shouldn't happen unless we switch our implementation of how
-///   json array is physically stored.
-pub fn align_json_array(json_array: &ArrayRef, schema_type: &ArrowDataType) -> Result<ArrayRef> {
-    let json_type = json_array.data_type();
-    if json_type == schema_type {
-        return Ok(json_array.clone());
-    }
-
-    let json_array = json_array.as_struct();
-    let array_fields = json_array.fields();
-    let array_columns = json_array.columns();
-    let ArrowDataType::Struct(schema_fields) = schema_type else {
-        unreachable!()
-    };
-    let mut aligned = Vec::with_capacity(schema_fields.len());
-
-    // Compare the fields in the json array and the to-be-aligned schema, amending with null arrays
-    // on the way. It's very important to note that fields in the json array and in the json type
-    // are both SORTED.
-
-    let mut i = 0; // point to the schema fields
-    let mut j = 0; // point to the array fields
-    while i < schema_fields.len() && j < array_fields.len() {
-        let schema_field = &schema_fields[i];
-        let array_field = &array_fields[j];
-        if schema_field.name() == array_field.name() {
-            if matches!(schema_field.data_type(), ArrowDataType::Struct(_)) {
-                // A `StructArray`s in a json array must be another json array. (Like a nested json
-                // object in a json value.)
-                aligned.push(align_json_array(
-                    &array_columns[j],
-                    schema_field.data_type(),
-                )?);
-            } else {
-                aligned.push(array_columns[j].clone());
-            }
-            j += 1;
-        } else {
-            aligned.push(new_null_array(schema_field.data_type(), json_array.len()));
-        }
-        i += 1;
-    }
-    if i < schema_fields.len() {
-        for field in &schema_fields[i..] {
-            aligned.push(new_null_array(field.data_type(), json_array.len()));
-        }
-    }
-    ensure!(
-        j == array_fields.len(),
-        AlignJsonArraySnafu {
-            reason: format!(
-                "this json array has more fields {:?}",
-                array_fields[j..]
-                    .iter()
-                    .map(|x| x.name())
-                    .collect::<Vec<_>>(),
-            )
-        }
-    );
-
-    let json_array =
-        StructArray::try_new(schema_fields.clone(), aligned, json_array.nulls().cloned())
-            .context(NewDfRecordBatchSnafu)?;
-    Ok(Arc::new(json_array))
-}
-
-fn maybe_align_json_array_with_schema(
+pub fn maybe_align_json_array_with_schema(
     schema: &ArrowSchemaRef,
     arrays: Vec<ArrayRef>,
 ) -> Result<Vec<ArrayRef>> {
@@ -443,7 +370,9 @@ fn maybe_align_json_array_with_schema(
             continue;
         }
 
-        let json_array = align_json_array(&array, field.data_type())?;
+        let json_array = JsonArray::from(&array)
+            .try_align(field.data_type())
+            .context(DataTypesSnafu)?;
         aligned.push(json_array);
     }
     Ok(aligned)
@@ -453,12 +382,8 @@ fn maybe_align_json_array_with_schema(
 mod tests {
     use std::sync::Arc;
 
-    use datatypes::arrow::array::{
-        AsArray, BooleanArray, Float64Array, Int64Array, ListArray, UInt32Array,
-    };
-    use datatypes::arrow::datatypes::{
-        DataType, Field, Fields, Int64Type, Schema as ArrowSchema, UInt32Type,
-    };
+    use datatypes::arrow::array::{AsArray, UInt32Array};
+    use datatypes::arrow::datatypes::{DataType, Field, Schema as ArrowSchema, UInt32Type};
     use datatypes::arrow_array::StringArray;
     use datatypes::data_type::ConcreteDataType;
     use datatypes::schema::{ColumnSchema, Schema};
@@ -466,165 +391,6 @@ mod tests {
 
     use super::*;
 
-    #[test]
-    fn test_align_json_array() -> Result<()> {
-        struct TestCase {
-            json_array: ArrayRef,
-            schema_type: DataType,
-            expected: std::result::Result<ArrayRef, String>,
-        }
-
-        impl TestCase {
-            fn new(
-                json_array: StructArray,
-                schema_type: Fields,
-                expected: std::result::Result<Vec<ArrayRef>, String>,
-            ) -> Self {
-                Self {
-                    json_array: Arc::new(json_array),
-                    schema_type: DataType::Struct(schema_type.clone()),
-                    expected: expected
-                        .map(|x| Arc::new(StructArray::new(schema_type, x, None)) as ArrayRef),
-                }
-            }
-
-            fn test(self) -> Result<()> {
-                let result = align_json_array(&self.json_array, &self.schema_type);
-                match (result, self.expected) {
-                    (Ok(json_array), Ok(expected)) => assert_eq!(&json_array, &expected),
-                    (Ok(json_array), Err(e)) => {
-                        panic!("expecting error {e} but actually get: {json_array:?}")
-                    }
-                    (Err(e), Err(expected)) => assert_eq!(e.to_string(), expected),
-                    (Err(e), Ok(_)) => return Err(e),
-                }
-                Ok(())
-            }
-        }
-
-        // Test empty json array can be aligned with a complex json type.
-        TestCase::new(
-            StructArray::new_empty_fields(2, None),
-            Fields::from(vec![
-                Field::new("int", DataType::Int64, true),
-                Field::new_struct(
-                    "nested",
-                    vec![Field::new("bool", DataType::Boolean, true)],
-                    true,
-                ),
-                Field::new("string", DataType::Utf8, true),
-            ]),
-            Ok(vec![
-                Arc::new(Int64Array::new_null(2)) as ArrayRef,
-                Arc::new(StructArray::new_null(
-                    Fields::from(vec![Arc::new(Field::new("bool", DataType::Boolean, true))]),
-                    2,
-                )),
-                Arc::new(StringArray::new_null(2)),
-            ]),
-        )
-        .test()?;
-
-        // Test simple json array alignment.
-        TestCase::new(
-            StructArray::from(vec![(
-                Arc::new(Field::new("float", DataType::Float64, true)),
-                Arc::new(Float64Array::from(vec![1.0, 2.0, 3.0])) as ArrayRef,
-            )]),
-            Fields::from(vec![
-                Field::new("float", DataType::Float64, true),
-                Field::new("string", DataType::Utf8, true),
-            ]),
-            Ok(vec![
-                Arc::new(Float64Array::from(vec![1.0, 2.0, 3.0])) as ArrayRef,
-                Arc::new(StringArray::new_null(3)),
-            ]),
-        )
-        .test()?;
-
-        // Test complex json array alignment.
-        TestCase::new(
-            StructArray::from(vec![
-                (
-                    Arc::new(Field::new_list(
-                        "list",
-                        Field::new_list_field(DataType::Int64, true),
-                        true,
-                    )),
-                    Arc::new(ListArray::from_iter_primitive::<Int64Type, _, _>(vec![
-                        Some(vec![Some(1)]),
-                        None,
-                        Some(vec![Some(2), Some(3)]),
-                    ])) as ArrayRef,
-                ),
-                (
-                    Arc::new(Field::new_struct(
-                        "nested",
-                        vec![Field::new("int", DataType::Int64, true)],
-                        true,
-                    )),
-                    Arc::new(StructArray::from(vec![(
-                        Arc::new(Field::new("int", DataType::Int64, true)),
-                        Arc::new(Int64Array::from(vec![-1, -2, -3])) as ArrayRef,
-                    )])),
-                ),
-                (
-                    Arc::new(Field::new("string", DataType::Utf8, true)),
-                    Arc::new(StringArray::from(vec!["a", "b", "c"])),
-                ),
-            ]),
-            Fields::from(vec![
-                Field::new("bool", DataType::Boolean, true),
-                Field::new_list("list", Field::new_list_field(DataType::Int64, true), true),
-                Field::new_struct(
-                    "nested",
-                    vec![
-                        Field::new("float", DataType::Float64, true),
-                        Field::new("int", DataType::Int64, true),
-                    ],
-                    true,
-                ),
-                Field::new("string", DataType::Utf8, true),
-            ]),
-            Ok(vec![
-                Arc::new(BooleanArray::new_null(3)) as ArrayRef,
-                Arc::new(ListArray::from_iter_primitive::<Int64Type, _, _>(vec![
-                    Some(vec![Some(1)]),
-                    None,
-                    Some(vec![Some(2), Some(3)]),
-                ])),
-                Arc::new(StructArray::from(vec![
-                    (
-                        Arc::new(Field::new("float", DataType::Float64, true)),
-                        Arc::new(Float64Array::new_null(3)) as ArrayRef,
-                    ),
-                    (
-                        Arc::new(Field::new("int", DataType::Int64, true)),
-                        Arc::new(Int64Array::from(vec![-1, -2, -3])),
-                    ),
-                ])),
-                Arc::new(StringArray::from(vec!["a", "b", "c"])),
-            ]),
-        )
-        .test()?;
-
-        // Test align failed.
-        TestCase::new(
-            StructArray::try_from(vec![
-                ("i", Arc::new(Int64Array::from(vec![1])) as ArrayRef),
-                ("j", Arc::new(Int64Array::from(vec![2])) as ArrayRef),
-            ])
-            .unwrap(),
-            Fields::from(vec![Field::new("i", DataType::Int64, true)]),
-            Err(
-                r#"Failed to align JSON array, reason: this json array has more fields ["j"]"#
-                    .to_string(),
-            ),
-        )
-        .test()?;
-        Ok(())
-    }
-
     #[test]
     fn test_record_batch() {
         let arrow_schema = Arc::new(ArrowSchema::new(vec![
diff --git a/src/common/sql/src/convert.rs b/src/common/sql/src/convert.rs
index bd9a1d0769..32a2407db4 100644
--- a/src/common/sql/src/convert.rs
+++ b/src/common/sql/src/convert.rs
@@ -306,7 +306,7 @@ pub(crate) fn parse_string_to_value(
                 let v = parse_string_to_jsonb(&s).context(DatatypeSnafu)?;
                 Ok(Value::Binary(v.into()))
             }
-            JsonFormat::Native(_) => {
+            JsonFormat::Native(_) | JsonFormat::Json2 => {
                 let extension_type: Option<JsonExtensionType> =
                     column_schema.extension_type().context(DatatypeSnafu)?;
                 let json_structure_settings = extension_type
diff --git a/src/datanode/src/lib.rs b/src/datanode/src/lib.rs
index 7e0db3cabc..f1c33a4d3b 100644
--- a/src/datanode/src/lib.rs
+++ b/src/datanode/src/lib.rs
@@ -12,6 +12,8 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
+#![recursion_limit = "256"]
+
 pub mod alive_keeper;
 pub mod config;
 pub mod datanode;
diff --git a/src/datatypes/src/error.rs b/src/datatypes/src/error.rs
index 65aca699ec..f75459ea86 100644
--- a/src/datatypes/src/error.rs
+++ b/src/datatypes/src/error.rs
@@ -274,6 +274,13 @@ pub enum Error {
         #[snafu(implicit)]
         location: Location,
     },
+
+    #[snafu(display("Failed to align JSON array, reason: {reason}"))]
+    AlignJsonArray {
+        reason: String,
+        #[snafu(implicit)]
+        location: Location,
+    },
 }
 
 impl ErrorExt for Error {
@@ -316,7 +323,8 @@ impl ErrorExt for Error {
             | ConvertScalarToArrowArray { .. }
             | ParseExtendedType { .. }
             | InconsistentStructFieldsAndItems { .. }
-            | ArrowMetadata { .. } => StatusCode::Internal,
+            | ArrowMetadata { .. }
+            | AlignJsonArray { .. } => StatusCode::Internal,
         }
     }
 
diff --git a/src/datatypes/src/json.rs b/src/datatypes/src/json.rs
index db657abbcb..64b6d6b132 100644
--- a/src/datatypes/src/json.rs
+++ b/src/datatypes/src/json.rs
@@ -19,6 +19,7 @@
 //! The struct will carry all the fields of the Json object. We will not flatten any json object in this implementation.
 //!
 
+pub mod requirement;
 pub mod value;
 
 use std::collections::{BTreeMap, HashSet};
diff --git a/src/datatypes/src/json/requirement.rs b/src/datatypes/src/json/requirement.rs
new file mode 100644
index 0000000000..5572aa0b66
--- /dev/null
+++ b/src/datatypes/src/json/requirement.rs
@@ -0,0 +1,77 @@
+// Copyright 2023 Greptime Team
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use std::collections::BTreeMap;
+use std::sync::Arc;
+
+use crate::data_type::ConcreteDataType;
+use crate::types::{StructField, StructType};
+
+#[derive(Debug, Clone, Default, PartialEq, Eq)]
+pub struct JsonPathTarget {
+    root: JsonPathTargetNode,
+}
+
+#[derive(Debug, Clone, Default, PartialEq, Eq)]
+struct JsonPathTargetNode {
+    children: BTreeMap<String, JsonPathTargetNode>,
+    leaf_type: Option<ConcreteDataType>,
+}
+
+impl JsonPathTarget {
+    pub fn require_typed_path(&mut self, path: &str, data_type: ConcreteDataType) {
+        let mut current = &mut self.root;
+        for segment in path.split('.') {
+            current = current.children.entry(segment.to_string()).or_default();
+        }
+        current.require_leaf_type(data_type);
+    }
+
+    pub fn is_empty(&self) -> bool {
+        self.root.children.is_empty()
+    }
+
+    pub fn build_type(&self) -> Option<ConcreteDataType> {
+        if self.is_empty() {
+            None
+        } else {
+            Some(ConcreteDataType::Struct(self.root.build_struct_type()))
+        }
+    }
+}
+
+impl JsonPathTargetNode {
+    fn require_leaf_type(&mut self, data_type: ConcreteDataType) {
+        self.leaf_type = Some(data_type);
+    }
+
+    fn build_data_type(&self) -> ConcreteDataType {
+        if self.children.is_empty() {
+            self.leaf_type
+                .clone()
+                .unwrap_or_else(ConcreteDataType::string_datatype)
+        } else {
+            ConcreteDataType::Struct(self.build_struct_type())
+        }
+    }
+
+    fn build_struct_type(&self) -> StructType {
+        let fields = self
+            .children
+            .iter()
+            .map(|(name, child)| StructField::new(name.clone(), child.build_data_type(), true))
+            .collect::<Vec<_>>();
+        StructType::new(Arc::new(fields))
+    }
+}
diff --git a/src/datatypes/src/json/value.rs b/src/datatypes/src/json/value.rs
index f8cf71e936..c666796b7d 100644
--- a/src/datatypes/src/json/value.rs
+++ b/src/datatypes/src/json/value.rs
@@ -160,12 +160,18 @@ impl JsonVariant {
                 };
                 JsonNativeType::Array(Box::new(item_type))
             }
-            JsonVariant::Object(object) => JsonNativeType::Object(
-                object
-                    .iter()
-                    .map(|(k, v)| (k.clone(), v.native_type()))
-                    .collect(),
-            ),
+            JsonVariant::Object(object) => {
+                if object.is_empty() {
+                    JsonNativeType::Null
+                } else {
+                    JsonNativeType::Object(
+                        object
+                            .iter()
+                            .map(|(k, v)| (k.clone(), v.native_type()))
+                            .collect(),
+                    )
+                }
+            }
         }
     }
 
@@ -518,12 +524,18 @@ impl JsonVariantRef<'_> {
                     };
                     JsonNativeType::Array(Box::new(item_type))
                 }
-                JsonVariantRef::Object(object) => JsonNativeType::Object(
-                    object
-                        .iter()
-                        .map(|(k, v)| (k.to_string(), native_type(v)))
-                        .collect(),
-                ),
+                JsonVariantRef::Object(object) => {
+                    if object.is_empty() {
+                        JsonNativeType::Null
+                    } else {
+                        JsonNativeType::Object(
+                            object
+                                .iter()
+                                .map(|(k, v)| (k.to_string(), native_type(v)))
+                                .collect(),
+                        )
+                    }
+                }
             }
         }
         JsonType::new_native(native_type(self))
diff --git a/src/datatypes/src/schema.rs b/src/datatypes/src/schema.rs
index 50f2dba270..d53b42e3e7 100644
--- a/src/datatypes/src/schema.rs
+++ b/src/datatypes/src/schema.rs
@@ -14,6 +14,7 @@
 
 mod column_schema;
 pub mod constraint;
+pub mod ext;
 
 use std::collections::HashMap;
 use std::sync::Arc;
diff --git a/src/datatypes/src/schema/ext.rs b/src/datatypes/src/schema/ext.rs
new file mode 100644
index 0000000000..d36e6f13d8
--- /dev/null
+++ b/src/datatypes/src/schema/ext.rs
@@ -0,0 +1,25 @@
+// Copyright 2023 Greptime Team
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use crate::extension::json;
+
+pub trait ArrowSchemaExt {
+    fn has_json_extension_field(&self) -> bool;
+}
+
+impl ArrowSchemaExt for arrow_schema::Schema {
+    fn has_json_extension_field(&self) -> bool {
+        self.fields().iter().any(json::is_json_extension_type)
+    }
+}
diff --git a/src/datatypes/src/types/json_type.rs b/src/datatypes/src/types/json_type.rs
index 13aeffb26c..6283ce7fb4 100644
--- a/src/datatypes/src/types/json_type.rs
+++ b/src/datatypes/src/types/json_type.rs
@@ -12,12 +12,14 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
+use std::borrow::Cow;
 use std::collections::BTreeMap;
 use std::fmt::{Debug, Display, Formatter};
 use std::str::FromStr;
 use std::sync::{Arc, LazyLock};
 
 use arrow::datatypes::DataType as ArrowDataType;
+use arrow_schema::Fields;
 use common_base::bytes::Bytes;
 use regex::{Captures, Regex};
 use serde::{Deserialize, Serialize};
@@ -33,6 +35,7 @@ use crate::type_id::LogicalTypeId;
 use crate::types::{ListType, StructField, StructType};
 use crate::value::Value;
 use crate::vectors::json::builder::JsonVectorBuilder;
+use crate::vectors::json::builder2::Json2VectorBuilder;
 use crate::vectors::{BinaryVectorBuilder, MutableVector};
 
 pub const JSON_TYPE_NAME: &str = "Json";
@@ -164,6 +167,7 @@ pub enum JsonFormat {
     #[default]
     Jsonb,
     Native(Box<JsonNativeType>),
+    Json2,
 }
 
 /// JsonType is a data type for JSON data. It is stored as binary data of jsonb format.
@@ -188,10 +192,15 @@ impl JsonType {
         matches!(self.format, JsonFormat::Native(_))
     }
 
+    pub fn is_json2(&self) -> bool {
+        matches!(self.format, JsonFormat::Json2)
+    }
+
     pub fn native_type(&self) -> &JsonNativeType {
         match &self.format {
             JsonFormat::Jsonb => &JsonNativeType::String,
             JsonFormat::Native(x) => x.as_ref(),
+            JsonFormat::Json2 => unimplemented!(),
         }
     }
 
@@ -212,15 +221,24 @@ impl JsonType {
                 ConcreteDataType::Struct(t) => t.clone(),
                 x => plain_json_struct_type(x),
             },
+            JsonFormat::Json2 => unimplemented!(),
         }
     }
 
     /// Try to merge this json type with others, error on datatype conflict.
     pub fn merge(&mut self, other: &JsonType) -> Result<()> {
+        self.merge_with(other, false)
+    }
+
+    pub fn merge_with_lifting(&mut self, other: &JsonType) -> Result<()> {
+        self.merge_with(other, true)
+    }
+
+    fn merge_with(&mut self, other: &JsonType, lift: bool) -> Result<()> {
         match (&self.format, &other.format) {
             (JsonFormat::Jsonb, JsonFormat::Jsonb) => Ok(()),
             (JsonFormat::Native(this), JsonFormat::Native(that)) => {
-                let merged = merge(this.as_ref(), that.as_ref())?;
+                let merged = merge(this.as_ref(), that.as_ref(), lift)?;
                 self.format = JsonFormat::Native(Box::new(merged));
                 Ok(())
             }
@@ -313,13 +331,17 @@ fn is_mergeable(this: &JsonNativeType, that: &JsonNativeType) -> bool {
     }
 }
 
-fn merge(this: &JsonNativeType, that: &JsonNativeType) -> Result<JsonNativeType> {
-    fn merge_object(this: &JsonObjectType, that: &JsonObjectType) -> Result<JsonObjectType> {
+fn merge(this: &JsonNativeType, that: &JsonNativeType, lift: bool) -> Result<JsonNativeType> {
+    fn merge_object(
+        this: &JsonObjectType,
+        that: &JsonObjectType,
+        lift: bool,
+    ) -> Result<JsonObjectType> {
         let mut this = this.clone();
         // merge "that" into "this" directly:
         for (type_name, that_type) in that {
             if let Some(this_type) = this.get_mut(type_name) {
-                let merged_type = merge(this_type, that_type)?;
+                let merged_type = merge(this_type, that_type, lift)?;
                 *this_type = merged_type;
             } else {
                 this.insert(type_name.clone(), that_type.clone());
@@ -331,16 +353,45 @@ fn merge(this: &JsonNativeType, that: &JsonNativeType) -> Result<JsonNativeType>
     match (this, that) {
         (this, that) if this == that => Ok(this.clone()),
         (JsonNativeType::Array(this), JsonNativeType::Array(that)) => {
-            merge(this.as_ref(), that.as_ref()).map(|x| JsonNativeType::Array(Box::new(x)))
+            merge(this.as_ref(), that.as_ref(), lift).map(|x| JsonNativeType::Array(Box::new(x)))
         }
         (JsonNativeType::Object(this), JsonNativeType::Object(that)) => {
-            merge_object(this, that).map(JsonNativeType::Object)
+            merge_object(this, that, lift).map(JsonNativeType::Object)
         }
         (JsonNativeType::Null, x) | (x, JsonNativeType::Null) => Ok(x.clone()),
-        _ => MergeJsonDatatypeSnafu {
-            reason: format!("datatypes have conflict, this: {this}, that: {that}"),
+        _ => {
+            if lift {
+                Ok(JsonNativeType::String)
+            } else {
+                MergeJsonDatatypeSnafu {
+                    reason: format!("datatypes have conflict, this: {this}, that: {that}"),
+                }
+                .fail()
+            }
         }
-        .fail(),
+    }
+}
+
+pub fn merge_as_json_type<'a>(
+    left: &'a ArrowDataType,
+    right: &ArrowDataType,
+) -> Cow<'a, ArrowDataType> {
+    if left == right {
+        return Cow::Borrowed(left);
+    }
+
+    let mut left = JsonType::from(left);
+    let right = JsonType::from(right);
+    Cow::Owned(if left.merge_with_lifting(&right).is_ok() {
+        left.as_arrow_type()
+    } else {
+        ArrowDataType::Utf8
+    })
+}
+
+impl From<&ArrowDataType> for JsonType {
+    fn from(t: &ArrowDataType) -> Self {
+        JsonType::new_native(JsonNativeType::from(&ConcreteDataType::from_arrow_type(t)))
     }
 }
 
@@ -349,6 +400,7 @@ impl DataType for JsonType {
         match &self.format {
             JsonFormat::Jsonb => JSON_TYPE_NAME.to_string(),
             JsonFormat::Native(x) => format!("Json<{x}>"),
+            JsonFormat::Json2 => "JSON2".to_string(),
         }
     }
 
@@ -364,6 +416,7 @@ impl DataType for JsonType {
         match self.format {
             JsonFormat::Jsonb => ArrowDataType::Binary,
             JsonFormat::Native(_) => self.as_struct_type().as_arrow_type(),
+            JsonFormat::Json2 => ArrowDataType::Struct(Fields::empty()),
         }
     }
 
@@ -371,6 +424,7 @@ impl DataType for JsonType {
         match &self.format {
             JsonFormat::Jsonb => Box::new(BinaryVectorBuilder::with_capacity(capacity)),
             JsonFormat::Native(x) => Box::new(JsonVectorBuilder::new(*x.clone(), capacity)),
+            JsonFormat::Json2 => Box::new(Json2VectorBuilder::new(JsonNativeType::Null, capacity)),
         }
     }
 
diff --git a/src/datatypes/src/value.rs b/src/datatypes/src/value.rs
index 8cfb8da7ad..fd27d6e0c6 100644
--- a/src/datatypes/src/value.rs
+++ b/src/datatypes/src/value.rs
@@ -3206,7 +3206,7 @@ pub(crate) mod tests {
                 ]
                 .into(),
             )),
-            48,
+            56,
         );
     }
 
diff --git a/src/datatypes/src/vectors.rs b/src/datatypes/src/vectors.rs
index 7c7d2a4ad6..5f116e0952 100644
--- a/src/datatypes/src/vectors.rs
+++ b/src/datatypes/src/vectors.rs
@@ -35,7 +35,7 @@ mod duration;
 mod eq;
 mod helper;
 mod interval;
-pub(crate) mod json;
+pub mod json;
 mod list;
 mod null;
 pub(crate) mod operations;
diff --git a/src/datatypes/src/vectors/json.rs b/src/datatypes/src/vectors/json.rs
index 83aa1dd2aa..4bcc32e3b7 100644
--- a/src/datatypes/src/vectors/json.rs
+++ b/src/datatypes/src/vectors/json.rs
@@ -12,4 +12,6 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
+pub mod array;
 pub(crate) mod builder;
+pub(crate) mod builder2;
diff --git a/src/datatypes/src/vectors/json/array.rs b/src/datatypes/src/vectors/json/array.rs
new file mode 100644
index 0000000000..ee60c47d3f
--- /dev/null
+++ b/src/datatypes/src/vectors/json/array.rs
@@ -0,0 +1,326 @@
+// Copyright 2023 Greptime Team
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use std::cmp::Ordering;
+use std::sync::Arc;
+
+use arrow::compute;
+use arrow::util::display::{ArrayFormatter, FormatOptions};
+use arrow_array::cast::AsArray;
+use arrow_array::{Array, ArrayRef, GenericListArray, StructArray, new_null_array};
+use arrow_schema::DataType;
+use snafu::ResultExt;
+
+use crate::arrow_array::StringArray;
+use crate::error::{AlignJsonArraySnafu, ArrowComputeSnafu, Result};
+
+pub struct JsonArray<'a> {
+    inner: &'a ArrayRef,
+}
+
+impl JsonArray<'_> {
+    /// Align a JSON array to the `expect` data type. The `expect` data type is often the
+    /// "largest" JSON type after some insertions in the table schema, while the JSON array previously
+    /// written in the SST could be lagged behind it. So it's important to "align" the JSON array by
+    /// setting the missing fields with null arrays, or casting the data.
+    ///
+    /// # Panics
+    ///
+    /// - The JSON array is not an Arrow [StructArray], or the provided `expect` data type is not
+    ///   of Struct type. Both of which shouldn't happen unless we switch our implementation of how
+    ///   JSON array is physically stored.
+    pub fn try_align(&self, expect: &DataType) -> Result<ArrayRef> {
+        let json_type = self.inner.data_type();
+        if json_type == expect {
+            return Ok(self.inner.clone());
+        }
+
+        let struct_array = self.inner.as_struct();
+        let array_fields = struct_array.fields();
+        let array_columns = struct_array.columns();
+        let DataType::Struct(expect_fields) = expect else {
+            unreachable!()
+        };
+        let mut aligned = Vec::with_capacity(expect_fields.len());
+
+        // Compare the fields in the JSON array and the to-be-aligned schema, amending with null arrays
+        // on the way. It's very important to note that fields in the JSON array and those in the JSON type
+        // are both **SORTED**.
+        debug_assert!(expect_fields.iter().map(|f| f.name()).is_sorted());
+        debug_assert!(array_fields.iter().map(|f| f.name()).is_sorted());
+
+        let mut i = 0; // point to the expect fields
+        let mut j = 0; // point to the array fields
+        while i < expect_fields.len() && j < array_fields.len() {
+            let expect_field = &expect_fields[i];
+            let array_field = &array_fields[j];
+            match expect_field.name().cmp(array_field.name()) {
+                Ordering::Equal => {
+                    if expect_field.data_type() == array_field.data_type() {
+                        aligned.push(array_columns[j].clone());
+                    } else {
+                        let expect_type = expect_field.data_type();
+                        let array_type = array_field.data_type();
+                        let array = match (expect_type, array_type) {
+                            (DataType::Struct(_), DataType::Struct(_)) => {
+                                JsonArray::from(&array_columns[j]).try_align(expect_type)?
+                            }
+                            (DataType::List(expect_item), DataType::List(array_item)) => {
+                                let list_array = array_columns[j].as_list::<i32>();
+                                let item_aligned =
+                                    match (expect_item.data_type(), array_item.data_type()) {
+                                        (DataType::Struct(_), DataType::Struct(_)) => {
+                                            JsonArray::from(list_array.values())
+                                                .try_align(expect_item.data_type())?
+                                        }
+                                        _ => JsonArray::from(list_array.values())
+                                            .try_cast(expect_item.data_type())?,
+                                    };
+                                Arc::new(
+                                    GenericListArray::<i32>::try_new(
+                                        expect_item.clone(),
+                                        list_array.offsets().clone(),
+                                        item_aligned,
+                                        list_array.nulls().cloned(),
+                                    )
+                                    .context(ArrowComputeSnafu)?,
+                                )
+                            }
+                            _ => JsonArray::from(&array_columns[j]).try_cast(expect_type)?,
+                        };
+                        aligned.push(array);
+                    }
+                    i += 1;
+                    j += 1;
+                }
+                Ordering::Less => {
+                    aligned.push(new_null_array(expect_field.data_type(), struct_array.len()));
+                    i += 1;
+                }
+                Ordering::Greater => {
+                    j += 1;
+                }
+            }
+        }
+        if i < expect_fields.len() {
+            for field in &expect_fields[i..] {
+                aligned.push(new_null_array(field.data_type(), struct_array.len()));
+            }
+        }
+
+        let json_array = StructArray::try_new(
+            expect_fields.clone(),
+            aligned,
+            struct_array.nulls().cloned(),
+        )
+        .map_err(|e| {
+            AlignJsonArraySnafu {
+                reason: e.to_string(),
+            }
+            .build()
+        })?;
+        Ok(Arc::new(json_array))
+    }
+
+    fn try_cast(&self, to_type: &DataType) -> Result<ArrayRef> {
+        if compute::can_cast_types(self.inner.data_type(), to_type) {
+            return compute::cast(&self.inner, to_type).context(ArrowComputeSnafu);
+        }
+
+        let formatter = ArrayFormatter::try_new(&self.inner, &FormatOptions::default())
+            .context(ArrowComputeSnafu)?;
+        let values = (0..self.inner.len())
+            .map(|i| {
+                self.inner
+                    .is_valid(i)
+                    .then(|| formatter.value(i).to_string())
+            })
+            .collect::<Vec<_>>();
+        Ok(Arc::new(StringArray::from(values)))
+    }
+}
+
+impl<'a> From<&'a ArrayRef> for JsonArray<'a> {
+    fn from(inner: &'a ArrayRef) -> Self {
+        Self { inner }
+    }
+}
+
+#[cfg(test)]
+mod test {
+    use arrow_array::types::Int64Type;
+    use arrow_array::{BooleanArray, Float64Array, Int64Array, ListArray};
+    use arrow_schema::{Field, Fields};
+
+    use super::*;
+
+    #[test]
+    fn test_align_json_array() -> Result<()> {
+        struct TestCase {
+            json_array: ArrayRef,
+            schema_type: DataType,
+            expected: std::result::Result<ArrayRef, String>,
+        }
+
+        impl TestCase {
+            fn new(
+                json_array: StructArray,
+                schema_type: Fields,
+                expected: std::result::Result<Vec<ArrayRef>, String>,
+            ) -> Self {
+                Self {
+                    json_array: Arc::new(json_array),
+                    schema_type: DataType::Struct(schema_type.clone()),
+                    expected: expected
+                        .map(|x| Arc::new(StructArray::new(schema_type, x, None)) as ArrayRef),
+                }
+            }
+
+            fn test(self) -> Result<()> {
+                let result = JsonArray::from(&self.json_array).try_align(&self.schema_type);
+                match (result, self.expected) {
+                    (Ok(json_array), Ok(expected)) => assert_eq!(&json_array, &expected),
+                    (Ok(json_array), Err(e)) => {
+                        panic!("expecting error {e} but actually get: {json_array:?}")
+                    }
+                    (Err(e), Err(expected)) => assert_eq!(e.to_string(), expected),
+                    (Err(e), Ok(_)) => return Err(e),
+                }
+                Ok(())
+            }
+        }
+
+        // Test empty json array can be aligned with a complex json type.
+        TestCase::new(
+            StructArray::new_empty_fields(2, None),
+            Fields::from(vec![
+                Field::new("int", DataType::Int64, true),
+                Field::new_struct(
+                    "nested",
+                    vec![Field::new("bool", DataType::Boolean, true)],
+                    true,
+                ),
+                Field::new("string", DataType::Utf8, true),
+            ]),
+            Ok(vec![
+                Arc::new(Int64Array::new_null(2)) as ArrayRef,
+                Arc::new(StructArray::new_null(
+                    Fields::from(vec![Arc::new(Field::new("bool", DataType::Boolean, true))]),
+                    2,
+                )),
+                Arc::new(StringArray::new_null(2)),
+            ]),
+        )
+        .test()?;
+
+        // Test simple json array alignment.
+        TestCase::new(
+            StructArray::from(vec![(
+                Arc::new(Field::new("float", DataType::Float64, true)),
+                Arc::new(Float64Array::from(vec![1.0, 2.0, 3.0])) as ArrayRef,
+            )]),
+            Fields::from(vec![
+                Field::new("float", DataType::Float64, true),
+                Field::new("string", DataType::Utf8, true),
+            ]),
+            Ok(vec![
+                Arc::new(Float64Array::from(vec![1.0, 2.0, 3.0])) as ArrayRef,
+                Arc::new(StringArray::new_null(3)),
+            ]),
+        )
+        .test()?;
+
+        // Test complex json array alignment.
+        TestCase::new(
+            StructArray::from(vec![
+                (
+                    Arc::new(Field::new_list(
+                        "list",
+                        Field::new_list_field(DataType::Int64, true),
+                        true,
+                    )),
+                    Arc::new(ListArray::from_iter_primitive::<Int64Type, _, _>(vec![
+                        Some(vec![Some(1)]),
+                        None,
+                        Some(vec![Some(2), Some(3)]),
+                    ])) as ArrayRef,
+                ),
+                (
+                    Arc::new(Field::new_struct(
+                        "nested",
+                        vec![Field::new("int", DataType::Int64, true)],
+                        true,
+                    )),
+                    Arc::new(StructArray::from(vec![(
+                        Arc::new(Field::new("int", DataType::Int64, true)),
+                        Arc::new(Int64Array::from(vec![-1, -2, -3])) as ArrayRef,
+                    )])),
+                ),
+                (
+                    Arc::new(Field::new("string", DataType::Utf8, true)),
+                    Arc::new(StringArray::from(vec!["a", "b", "c"])),
+                ),
+            ]),
+            Fields::from(vec![
+                Field::new("bool", DataType::Boolean, true),
+                Field::new_list("list", Field::new_list_field(DataType::Int64, true), true),
+                Field::new_struct(
+                    "nested",
+                    vec![
+                        Field::new("float", DataType::Float64, true),
+                        Field::new("int", DataType::Int64, true),
+                    ],
+                    true,
+                ),
+                Field::new("string", DataType::Utf8, true),
+            ]),
+            Ok(vec![
+                Arc::new(BooleanArray::new_null(3)) as ArrayRef,
+                Arc::new(ListArray::from_iter_primitive::<Int64Type, _, _>(vec![
+                    Some(vec![Some(1)]),
+                    None,
+                    Some(vec![Some(2), Some(3)]),
+                ])),
+                Arc::new(StructArray::from(vec![
+                    (
+                        Arc::new(Field::new("float", DataType::Float64, true)),
+                        Arc::new(Float64Array::new_null(3)) as ArrayRef,
+                    ),
+                    (
+                        Arc::new(Field::new("int", DataType::Int64, true)),
+                        Arc::new(Int64Array::from(vec![-1, -2, -3])),
+                    ),
+                ])),
+                Arc::new(StringArray::from(vec!["a", "b", "c"])),
+            ]),
+        )
+        .test()?;
+
+        // Test align failed.
+        TestCase::new(
+            StructArray::try_from(vec![
+                ("i", Arc::new(Int64Array::from(vec![1])) as ArrayRef),
+                ("j", Arc::new(Int64Array::from(vec![2])) as ArrayRef),
+            ])
+            .unwrap(),
+            Fields::from(vec![Field::new("i", DataType::Int64, true)]),
+            Err(
+                r#"Failed to align JSON array, reason: this json array has more fields ["j"]"#
+                    .to_string(),
+            ),
+        )
+        .test()?;
+        Ok(())
+    }
+}
diff --git a/src/datatypes/src/vectors/json/builder2.rs b/src/datatypes/src/vectors/json/builder2.rs
new file mode 100644
index 0000000000..5fff890dfc
--- /dev/null
+++ b/src/datatypes/src/vectors/json/builder2.rs
@@ -0,0 +1,163 @@
+// Copyright 2023 Greptime Team
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use std::any::Any;
+use std::borrow::Cow;
+use std::sync::LazyLock;
+
+use crate::data_type::ConcreteDataType;
+use crate::error::{Result, TryFromValueSnafu, UnsupportedOperationSnafu};
+use crate::json::value::{JsonValue, JsonValueRef, JsonVariant};
+use crate::prelude::{ValueRef, Vector, VectorRef};
+use crate::types::JsonType;
+use crate::types::json_type::JsonNativeType;
+use crate::vectors::{MutableVector, StructVectorBuilder};
+
+pub(crate) struct Json2VectorBuilder {
+    merged_type: JsonType,
+    capacity: usize,
+    values: Vec<JsonValue>,
+}
+
+impl Json2VectorBuilder {
+    pub(crate) fn new(json_type: JsonNativeType, capacity: usize) -> Self {
+        Self {
+            merged_type: JsonType::new_native(json_type),
+            capacity,
+            values: vec![],
+        }
+    }
+
+    fn build(&self) -> VectorRef {
+        let mut builder = StructVectorBuilder::with_type_and_capacity(
+            self.merged_type.as_struct_type(),
+            self.capacity,
+        );
+        for value in self.values.iter() {
+            let value = align_json_value_with_type(&self.merged_type, value);
+            builder
+                .try_push_value_ref(&(*value).as_ref().as_value_ref())
+                // Safety: after the `align_json_value_with_type`, the values to push must have
+                // the same types with the builder, so it's not expected to meet any errors here.
+                .unwrap_or_else(|e| panic!("Failed to push JSON value {value}: {e:?}"));
+        }
+        builder.to_vector()
+    }
+}
+
+impl MutableVector for Json2VectorBuilder {
+    fn data_type(&self) -> ConcreteDataType {
+        ConcreteDataType::Json(self.merged_type.clone())
+    }
+
+    fn len(&self) -> usize {
+        self.values.len()
+    }
+
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+
+    fn as_mut_any(&mut self) -> &mut dyn Any {
+        self
+    }
+
+    fn to_vector(&mut self) -> VectorRef {
+        self.build()
+    }
+
+    fn to_vector_cloned(&self) -> VectorRef {
+        self.build()
+    }
+
+    fn try_push_value_ref(&mut self, value: &ValueRef) -> Result<()> {
+        let ValueRef::Json(value) = value else {
+            return TryFromValueSnafu {
+                reason: format!("expected json value, got {value:?}"),
+            }
+            .fail();
+        };
+        let json_type = value.json_type();
+        self.merged_type.merge_with_lifting(json_type)?;
+
+        let value = JsonValue::from(value.clone().into_variant());
+        self.values.push(value);
+        Ok(())
+    }
+
+    fn push_null(&mut self) {
+        static NULL_JSON: LazyLock<ValueRef> =
+            LazyLock::new(|| ValueRef::Json(Box::new(JsonValueRef::null())));
+        self.try_push_value_ref(&NULL_JSON)
+            // Safety: learning from the method "try_push_value_ref", a null json value should be
+            // always able to push into any json vectors.
+            .unwrap_or_else(|e| panic!("failed to push null json value, error: {e}"));
+    }
+
+    fn extend_slice_of(&mut self, _: &dyn Vector, _: usize, _: usize) -> Result<()> {
+        UnsupportedOperationSnafu {
+            op: "extend_slice_of",
+            vector_type: "JsonVector",
+        }
+        .fail()
+    }
+}
+
+fn align_json_value_with_type<'a>(
+    expected_type: &JsonType,
+    value: &'a JsonValue,
+) -> Cow<'a, JsonValue> {
+    if value.json_type() == expected_type {
+        return Cow::Borrowed(value);
+    }
+
+    fn helper(expected_type: &JsonNativeType, value: JsonVariant) -> JsonVariant {
+        match (expected_type, value) {
+            (_, JsonVariant::Null) | (JsonNativeType::Null, _) => JsonVariant::Null,
+            (JsonNativeType::Bool, JsonVariant::Bool(v)) => JsonVariant::Bool(v),
+            (JsonNativeType::Number(_), JsonVariant::Number(v)) => JsonVariant::Number(v),
+            (JsonNativeType::String, JsonVariant::String(v)) => JsonVariant::String(v),
+
+            (JsonNativeType::Array(item_type), JsonVariant::Array(items)) => JsonVariant::Array(
+                items
+                    .into_iter()
+                    .map(|item| helper(item_type.as_ref(), item))
+                    .collect(),
+            ),
+
+            (JsonNativeType::Object(expected_fields), JsonVariant::Object(object)) => {
+                JsonVariant::Object(
+                    expected_fields
+                        .iter()
+                        .map(|(field_name, expected_field_type)| {
+                            let value =
+                                object.get(field_name).cloned().unwrap_or(JsonVariant::Null);
+                            (field_name.clone(), helper(expected_field_type, value))
+                        })
+                        .collect(),
+                )
+            }
+
+            (JsonNativeType::String, v) => {
+                let json: serde_json::Value = JsonValue::from(v).into();
+                JsonVariant::String(json.to_string())
+            }
+
+            (t, v) => panic!("unsupported json alignment cast from {v} to {t}"),
+        }
+    }
+
+    let value = helper(expected_type.native_type(), value.clone().into_variant());
+    Cow::Owned(JsonValue::from(value))
+}
diff --git a/src/mito2/Cargo.toml b/src/mito2/Cargo.toml
index a78bf079b0..e2b63d6548 100644
--- a/src/mito2/Cargo.toml
+++ b/src/mito2/Cargo.toml
@@ -16,6 +16,7 @@ workspace = true
 [dependencies]
 api.workspace = true
 aquamarine.workspace = true
+arrow-schema.workspace = true
 async-channel = "1.9"
 common-stat.workspace = true
 async-stream.workspace = true
diff --git a/src/mito2/benches/memtable_bench.rs b/src/mito2/benches/memtable_bench.rs
index 8336625e3c..e6881a766a 100644
--- a/src/mito2/benches/memtable_bench.rs
+++ b/src/mito2/benches/memtable_bench.rs
@@ -229,6 +229,7 @@ fn bulk_part_converter(c: &mut Criterion) {
                     &FlatSchemaOptions {
                         raw_pk_columns: false,
                         string_pk_use_dict: false,
+                        ..Default::default()
                     },
                 );
                 let mut converter = BulkPartConverter::new(&metadata, schema, rows, codec, false);
@@ -255,6 +256,7 @@ fn bulk_part_converter(c: &mut Criterion) {
                         &FlatSchemaOptions {
                             raw_pk_columns: true,
                             string_pk_use_dict: true,
+                            ..Default::default()
                         },
                     );
                     let mut converter =
diff --git a/src/mito2/src/access_layer.rs b/src/mito2/src/access_layer.rs
index 33180ebf46..5b53af988b 100644
--- a/src/mito2/src/access_layer.rs
+++ b/src/mito2/src/access_layer.rs
@@ -15,6 +15,7 @@
 use std::sync::Arc;
 use std::time::{Duration, Instant};
 
+use arrow_schema::SchemaRef;
 use async_stream::try_stream;
 use common_time::Timestamp;
 use futures::{Stream, TryStreamExt};
@@ -403,7 +404,12 @@ impl AccessLayer {
                 }
                 FormatType::Flat => {
                     writer
-                        .write_all_flat(request.source, request.max_sequence, write_opts)
+                        .write_all_flat(
+                            request.source,
+                            request.schema,
+                            request.max_sequence,
+                            write_opts,
+                        )
                         .await?
                 }
             }
@@ -526,6 +532,8 @@ pub struct SstWriteRequest {
     pub op_type: OperationType,
     pub metadata: RegionMetadataRef,
     pub source: FlatSource,
+    // FIXME(LFC): this schema is actually the "merged json2 datatype in `source` parquets", rename it
+    pub schema: Option<SchemaRef>,
     pub cache_manager: CacheManagerRef,
     #[allow(dead_code)]
     pub storage: Option<String>,
diff --git a/src/mito2/src/cache/write_cache.rs b/src/mito2/src/cache/write_cache.rs
index e2483ed4e4..ad4cae8bda 100644
--- a/src/mito2/src/cache/write_cache.rs
+++ b/src/mito2/src/cache/write_cache.rs
@@ -256,7 +256,12 @@ impl WriteCache {
             }
             crate::sst::FormatType::Flat => {
                 writer
-                    .write_all_flat(write_request.source, write_request.max_sequence, write_opts)
+                    .write_all_flat(
+                        write_request.source,
+                        write_request.schema,
+                        write_request.max_sequence,
+                        write_opts,
+                    )
                     .await?
             }
         };
@@ -561,6 +566,7 @@ mod tests {
             bloom_filter_index_config: Default::default(),
             #[cfg(feature = "vector_index")]
             vector_index_config: Default::default(),
+            schema: None,
         };
 
         let upload_request = SstUploadRequest {
@@ -664,6 +670,7 @@ mod tests {
             bloom_filter_index_config: Default::default(),
             #[cfg(feature = "vector_index")]
             vector_index_config: Default::default(),
+            schema: None,
         };
         let write_opts = WriteOptions {
             row_group_size: 512,
@@ -755,6 +762,7 @@ mod tests {
             bloom_filter_index_config: Default::default(),
             #[cfg(feature = "vector_index")]
             vector_index_config: Default::default(),
+            schema: None,
         };
         let write_opts = WriteOptions {
             row_group_size: 512,
diff --git a/src/mito2/src/compaction.rs b/src/mito2/src/compaction.rs
index 944c51ebd6..b02675be0b 100644
--- a/src/mito2/src/compaction.rs
+++ b/src/mito2/src/compaction.rs
@@ -29,6 +29,7 @@ use std::time::Instant;
 
 use api::v1::region::compact_request;
 use api::v1::region::compact_request::Options;
+use arrow_schema::SchemaRef;
 use common_base::Plugins;
 use common_memory_manager::OnExhaustedPolicy;
 use common_meta::key::SchemaMetadataManagerRef;
@@ -38,6 +39,7 @@ use common_time::timestamp::TimeUnit;
 use common_time::{TimeToLive, Timestamp};
 use datafusion_common::ScalarValue;
 use datafusion_expr::Expr;
+use datatypes::schema::ext::ArrowSchemaExt;
 use serde::{Deserialize, Serialize};
 use snafu::{OptionExt, ResultExt};
 use store_api::metadata::RegionMetadataRef;
@@ -57,6 +59,7 @@ use crate::error::{
     RegionClosedSnafu, RegionDroppedSnafu, RegionTruncatedSnafu, RemoteCompactionSnafu, Result,
     TimeRangePredicateOverflowSnafu, TimeoutSnafu,
 };
+use crate::memtable::merge_json_extension_fields;
 use crate::metrics::{COMPACTION_STAGE_ELAPSED, INFLIGHT_COMPACTION_COUNT};
 use crate::read::BoxedRecordBatchStream;
 use crate::read::projection::ProjectionMapper;
@@ -839,12 +842,29 @@ struct CompactionSstReaderBuilder<'a> {
 
 impl CompactionSstReaderBuilder<'_> {
     /// Builds [BoxedRecordBatchStream] that reads all SST files and yields batches in flat format for compaction.
-    async fn build_flat_sst_reader(self) -> Result<BoxedRecordBatchStream> {
+    async fn build_flat_sst_reader(self) -> Result<(Option<SchemaRef>, BoxedRecordBatchStream)> {
         let scan_input = self.build_scan_input()?.with_compaction(true);
 
-        SeqScan::new(scan_input)
-            .build_flat_reader_for_compaction()
-            .await
+        let json_concretized_schema = if scan_input
+            .mapper
+            .output_schema()
+            .arrow_schema()
+            .has_json_extension_field()
+        {
+            let parquet_schemas = scan_input.collect_parquet_record_batch_schemas().await?;
+            if let Some((base, others)) = parquet_schemas.split_first() {
+                Some(merge_json_extension_fields(base, others))
+            } else {
+                None
+            }
+        } else {
+            None
+        };
+
+        let reader = SeqScan::new(scan_input)
+            .build_flat_reader_for_compaction(json_concretized_schema.clone())
+            .await?;
+        Ok((json_concretized_schema, reader))
     }
 
     fn build_scan_input(self) -> Result<ScanInput> {
diff --git a/src/mito2/src/compaction/compactor.rs b/src/mito2/src/compaction/compactor.rs
index ff4317331f..744604f5ba 100644
--- a/src/mito2/src/compaction/compactor.rs
+++ b/src/mito2/src/compaction/compactor.rs
@@ -352,7 +352,7 @@ impl DefaultCompactor {
             time_range: output.output_time_range,
             merge_mode,
         };
-        let reader = builder.build_flat_sst_reader().await?;
+        let (schema, reader) = builder.build_flat_sst_reader().await?;
         let source = FlatSource::Stream(reader);
         let mut metrics = Metrics::new(WriteType::Compaction);
         let region_metadata = compaction_region.region_metadata.clone();
@@ -363,6 +363,7 @@ impl DefaultCompactor {
                     op_type: OperationType::Compact,
                     metadata: region_metadata.clone(),
                     source,
+                    schema,
                     cache_manager: compaction_region.cache_manager.clone(),
                     storage,
                     max_sequence: max_sequence.map(NonZero::get),
diff --git a/src/mito2/src/flush.rs b/src/mito2/src/flush.rs
index 7be81dec8d..92d0468ee3 100644
--- a/src/mito2/src/flush.rs
+++ b/src/mito2/src/flush.rs
@@ -543,14 +543,14 @@ impl RegionFlushTask {
         write_opts: &WriteOptions,
         mem_ranges: MemtableRanges,
     ) -> Result<FlushFlatMemResult> {
-        let batch_schema = to_flat_sst_arrow_schema(
-            &version.metadata,
-            &FlatSchemaOptions::from_encoding(version.metadata.primary_key_encoding),
-        );
+        let mut options = FlatSchemaOptions::from_encoding(version.metadata.primary_key_encoding);
+        options.override_schema = mem_ranges.schema();
+
+        let batch_schema = to_flat_sst_arrow_schema(&version.metadata, &options);
         let field_column_start =
             flat_format::field_column_start(&version.metadata, batch_schema.fields().len());
         let flat_sources = memtable_flat_sources(
-            batch_schema,
+            batch_schema.clone(),
             mem_ranges,
             &version.options,
             field_column_start,
@@ -558,7 +558,8 @@ impl RegionFlushTask {
         let mut tasks = Vec::with_capacity(flat_sources.encoded.len() + flat_sources.sources.len());
         let num_encoded = flat_sources.encoded.len();
         for (source, max_sequence) in flat_sources.sources {
-            let write_request = self.new_write_request(version, max_sequence, source);
+            let write_request =
+                self.new_write_request(version, max_sequence, source, batch_schema.clone());
             let access_layer = self.access_layer.clone();
             let write_opts = write_opts.clone();
             let semaphore = self.flush_semaphore.clone();
@@ -629,6 +630,7 @@ impl RegionFlushTask {
         version: &VersionRef,
         max_sequence: u64,
         source: FlatSource,
+        schema: SchemaRef,
     ) -> SstWriteRequest {
         let flat_format = version
             .options
@@ -639,6 +641,7 @@ impl RegionFlushTask {
             op_type: OperationType::Flush,
             metadata: version.metadata.clone(),
             source,
+            schema: Some(schema),
             cache_manager: self.cache_manager.clone(),
             storage: version.options.storage.clone(),
             max_sequence: Some(max_sequence),
diff --git a/src/mito2/src/memtable.rs b/src/mito2/src/memtable.rs
index e1494aa47b..3218b3c2ca 100644
--- a/src/mito2/src/memtable.rs
+++ b/src/mito2/src/memtable.rs
@@ -14,6 +14,7 @@
 
 //! Memtables are write buffers for regions.
 
+use std::borrow::Cow;
 use std::collections::BTreeMap;
 use std::fmt;
 use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering};
@@ -62,6 +63,10 @@ pub use bulk::part::{
     BulkPart, BulkPartEncoder, BulkPartMeta, UnorderedPart, record_batch_estimated_size,
     sort_primary_key_record_batch,
 };
+use datatypes::arrow::datatypes::{Schema, SchemaRef};
+use datatypes::extension::json;
+use datatypes::schema::ext::ArrowSchemaExt;
+use datatypes::types::json_type;
 #[cfg(any(test, feature = "test"))]
 pub use time_partition::filter_record_batch;
 
@@ -228,6 +233,55 @@ impl MemtableRanges {
             .max()
             .unwrap_or(0)
     }
+
+    pub(crate) fn schema(&self) -> Option<SchemaRef> {
+        let mut schemas = self
+            .ranges
+            .values()
+            .filter_map(|x| x.record_batch_schema())
+            .collect::<Vec<_>>();
+
+        if schemas.iter().all(|x| !x.has_json_extension_field()) {
+            // If there are no JSON extension fields in any schemas, the invariant must be hold,
+            // that all schemas are same (they are all derived from same region metadata).
+            // So it's ok to return the first one as the schema of the whole memtable ranges.
+            return (!schemas.is_empty()).then(|| schemas.swap_remove(0));
+        }
+
+        // If there are JSON extension fields, by convention, only their concrete data types
+        // (Arrow's Struct) may differ. Other things like the metadata or the fields count are same.
+        // So to produce the final schema, we can solely merge the data types.
+        schemas
+            .split_first()
+            .map(|(first, rest)| merge_json_extension_fields(first, rest))
+    }
+}
+
+pub(crate) fn merge_json_extension_fields(base: &SchemaRef, others: &[SchemaRef]) -> SchemaRef {
+    let mut fields = base.fields().iter().cloned().collect::<Vec<_>>();
+    for (i, field) in fields.iter_mut().enumerate() {
+        if !json::is_json_extension_type(field) {
+            continue;
+        }
+
+        let merged = others
+            .iter()
+            .map(|x| Cow::Borrowed(x.field(i).data_type()))
+            .reduce(|acc, e| {
+                Cow::Owned(json_type::merge_as_json_type(acc.as_ref(), e.as_ref()).into_owned())
+            });
+        if let Some(merged) = merged
+            && field.data_type() != merged.as_ref()
+        {
+            let merged =
+                json_type::merge_as_json_type(field.data_type(), merged.as_ref()).into_owned();
+
+            let mut new = field.as_ref().clone();
+            new.set_data_type(merged);
+            *field = Arc::new(new);
+        }
+    }
+    Arc::new(Schema::new_with_metadata(fields, base.metadata().clone()))
 }
 
 impl IterBuilder for MemtableRanges {
@@ -558,6 +612,11 @@ pub trait IterBuilder: Send + Sync {
         .fail()
     }
 
+    /// Returns the schema of record batches produced by this iterator.
+    fn record_batch_schema(&self) -> Option<SchemaRef> {
+        None
+    }
+
     /// Returns the [EncodedRange] if the range is already encoded into SST.
     fn encoded_range(&self) -> Option<EncodedRange> {
         None
@@ -735,6 +794,11 @@ impl MemtableRange {
         self.context.builder.is_record_batch()
     }
 
+    /// Returns the schema of record batches if this range supports record batch iteration.
+    pub fn record_batch_schema(&self) -> Option<SchemaRef> {
+        self.context.builder.record_batch_schema()
+    }
+
     pub fn num_rows(&self) -> usize {
         self.stats.num_rows
     }
diff --git a/src/mito2/src/memtable/bulk.rs b/src/mito2/src/memtable/bulk.rs
index 9d25d0c39f..91dd1f662f 100644
--- a/src/mito2/src/memtable/bulk.rs
+++ b/src/mito2/src/memtable/bulk.rs
@@ -810,6 +810,10 @@ impl IterBuilder for BulkRangeIterBuilder {
     fn encoded_range(&self) -> Option<EncodedRange> {
         None
     }
+
+    fn record_batch_schema(&self) -> Option<SchemaRef> {
+        Some(self.part.batch.schema())
+    }
 }
 
 impl IterBuilder for MultiBulkRangeIterBuilder {
@@ -842,6 +846,10 @@ impl IterBuilder for MultiBulkRangeIterBuilder {
     fn encoded_range(&self) -> Option<EncodedRange> {
         None
     }
+
+    fn record_batch_schema(&self) -> Option<SchemaRef> {
+        self.part.record_batch_schema()
+    }
 }
 
 /// Iterator builder for encoded bulk range
diff --git a/src/mito2/src/memtable/bulk/part.rs b/src/mito2/src/memtable/bulk/part.rs
index 986e9409ee..0f34fd4c7b 100644
--- a/src/mito2/src/memtable/bulk/part.rs
+++ b/src/mito2/src/memtable/bulk/part.rs
@@ -14,6 +14,7 @@
 
 //! Bulk part encoder/decoder.
 
+use std::borrow::Cow;
 use std::collections::{HashMap, HashSet};
 use std::sync::Arc;
 use std::time::{Duration, Instant};
@@ -32,9 +33,13 @@ use datatypes::arrow::datatypes::{
     DataType as ArrowDataType, Field, Schema, SchemaRef, UInt32Type,
 };
 use datatypes::data_type::DataType;
+use datatypes::extension::json::is_json_extension_type;
 use datatypes::prelude::{MutableVector, Vector};
+use datatypes::schema::ext::ArrowSchemaExt;
+use datatypes::types::json_type;
 use datatypes::value::ValueRef;
 use datatypes::vectors::Helper;
+use datatypes::vectors::json::array::JsonArray;
 use mito_codec::key_values::{KeyValue, KeyValues};
 use mito_codec::row_converter::PrimaryKeyCodec;
 use parquet::arrow::ArrowWriter;
@@ -49,9 +54,9 @@ use store_api::storage::consts::PRIMARY_KEY_COLUMN_NAME;
 use store_api::storage::{FileId, SequenceNumber, SequenceRange};
 
 use crate::error::{
-    self, ColumnNotFoundSnafu, ComputeArrowSnafu, CreateDefaultSnafu, DataTypeMismatchSnafu,
-    EncodeMemtableSnafu, EncodeSnafu, InvalidMetadataSnafu, InvalidRequestSnafu,
-    NewRecordBatchSnafu, Result,
+    self, ColumnNotFoundSnafu, ComputeArrowSnafu, ConvertValueSnafu, CreateDefaultSnafu,
+    DataTypeMismatchSnafu, EncodeMemtableSnafu, EncodeSnafu, InvalidMetadataSnafu,
+    InvalidRequestSnafu, NewRecordBatchSnafu, Result,
 };
 use crate::memtable::bulk::context::BulkIterContextRef;
 use crate::memtable::bulk::part_reader::EncodedBulkPartIter;
@@ -425,13 +430,15 @@ impl UnorderedPart {
             return Ok(Some(self.parts[0].batch.clone()));
         }
 
-        // Get the schema from the first part
+        // Get the schema from the first part and normalize JSON2 columns across all parts.
         let schema = self.parts[0].batch.schema();
-
-        // Concatenate all record batches
-        let batches: Vec<RecordBatch> = self.parts.iter().map(|p| p.batch.clone()).collect();
-        let concatenated =
-            arrow::compute::concat_batches(&schema, &batches).context(ComputeArrowSnafu)?;
+        let concatenated = if schema.has_json_extension_field() {
+            let (schema, batches) = normalize_json_columns_for_concat(schema, &self.parts)?;
+            arrow::compute::concat_batches(&schema, &batches).context(ComputeArrowSnafu)?
+        } else {
+            arrow::compute::concat_batches(&schema, self.parts.iter().map(|x| &x.batch))
+                .context(ComputeArrowSnafu)?
+        };
 
         // Sort the concatenated batch
         let sorted_batch = sort_primary_key_record_batch(&concatenated)?;
@@ -468,6 +475,77 @@ impl UnorderedPart {
     }
 }
 
+fn normalize_json_columns_for_concat(
+    base_schema: SchemaRef,
+    parts: &[BulkPart],
+) -> Result<(SchemaRef, Vec<RecordBatch>)> {
+    debug_assert!(
+        parts
+            .iter()
+            .all(|x| x.batch.schema().fields().len() == base_schema.fields().len())
+    );
+
+    let mut merged_json_types = HashMap::new();
+    for (index, field) in base_schema.fields().iter().enumerate() {
+        if !is_json_extension_type(field) {
+            continue;
+        }
+
+        let merged = parts
+            .iter()
+            .map(|x| Cow::Borrowed(x.batch.schema_ref().field(index).data_type()))
+            .reduce(|acc, e| {
+                Cow::Owned(json_type::merge_as_json_type(acc.as_ref(), e.as_ref()).into_owned())
+            });
+        if let Some(merged) = merged {
+            merged_json_types.insert(index, merged.into_owned());
+        }
+    }
+
+    if merged_json_types.is_empty() {
+        let batches = parts.iter().map(|p| p.batch.clone()).collect();
+        return Ok((base_schema, batches));
+    }
+
+    let fields = base_schema
+        .fields()
+        .iter()
+        .enumerate()
+        .map(|(index, field)| {
+            if let Some(data_type) = merged_json_types.get(&index) {
+                Arc::new(
+                    Field::new(field.name().clone(), data_type.clone(), field.is_nullable())
+                        .with_metadata(field.metadata().clone()),
+                )
+            } else {
+                field.clone()
+            }
+        })
+        .collect::<Vec<_>>();
+    let normalized_schema = Arc::new(Schema::new(fields));
+
+    let mut normalized_batches = Vec::with_capacity(parts.len());
+    for part in parts {
+        let mut columns = Vec::with_capacity(part.batch.num_columns());
+        for (index, column) in part.batch.columns().iter().enumerate() {
+            if let Some(target_type) = merged_json_types.get(&index) {
+                columns.push(
+                    JsonArray::from(column)
+                        .try_align(target_type)
+                        .context(ConvertValueSnafu)?,
+                );
+            } else {
+                columns.push(column.clone());
+            }
+        }
+        let batch = RecordBatch::try_new(normalized_schema.clone(), columns)
+            .context(NewRecordBatchSnafu)?;
+        normalized_batches.push(batch);
+    }
+
+    Ok((normalized_schema, normalized_batches))
+}
+
 /// More accurate estimation of the size of a record batch.
 pub fn record_batch_estimated_size(batch: &RecordBatch) -> usize {
     batch
@@ -679,7 +757,8 @@ impl BulkPartConverter {
         columns.push(values.sequence.to_arrow_array());
         columns.push(values.op_type.to_arrow_array());
 
-        let batch = RecordBatch::try_new(self.schema, columns).context(NewRecordBatchSnafu)?;
+        let schema = align_schema_with_json_array(self.schema, &columns);
+        let batch = RecordBatch::try_new(schema, columns).context(NewRecordBatchSnafu)?;
         // Sorts the record batch.
         let batch = sort_primary_key_record_batch(&batch)?;
 
@@ -694,6 +773,26 @@ impl BulkPartConverter {
     }
 }
 
+fn align_schema_with_json_array(schema: SchemaRef, columns: &[ArrayRef]) -> SchemaRef {
+    if schema.fields().iter().all(|f| !is_json_extension_type(f)) {
+        return schema;
+    }
+
+    let mut fields = Vec::with_capacity(schema.fields().len());
+    for (field, array) in schema.fields().iter().zip(columns) {
+        if !is_json_extension_type(field) {
+            fields.push(field.clone());
+            continue;
+        }
+
+        let mut field = field.as_ref().clone();
+        field.set_data_type(array.data_type().clone());
+        fields.push(Arc::new(field));
+    }
+
+    Arc::new(Schema::new_with_metadata(fields, schema.metadata().clone()))
+}
+
 fn new_primary_key_column_builders(
     metadata: &RegionMetadata,
     capacity: usize,
@@ -1326,6 +1425,11 @@ impl MultiBulkPart {
         self.series_count
     }
 
+    /// Returns the schema of batches in this part.
+    pub(crate) fn record_batch_schema(&self) -> Option<SchemaRef> {
+        self.batches.first().map(|batch| batch.schema())
+    }
+
     /// Returns the number of record batches in this part.
     pub fn num_batches(&self) -> usize {
         self.batches.len()
@@ -1796,6 +1900,7 @@ mod tests {
             &FlatSchemaOptions {
                 raw_pk_columns: false,
                 string_pk_use_dict: true,
+                ..Default::default()
             },
         );
 
@@ -2233,6 +2338,7 @@ mod tests {
             &FlatSchemaOptions {
                 raw_pk_columns: false,
                 string_pk_use_dict: true,
+                ..Default::default()
             },
         );
 
diff --git a/src/mito2/src/memtable/time_series.rs b/src/mito2/src/memtable/time_series.rs
index 9666bee51c..f5b772b076 100644
--- a/src/mito2/src/memtable/time_series.rs
+++ b/src/mito2/src/memtable/time_series.rs
@@ -893,7 +893,9 @@ impl ValueBuilder {
             size += field_value.data_size();
             if !field_value.is_null() || self.fields[idx].is_some() {
                 if let Some(field) = self.fields[idx].as_mut() {
-                    let _ = field.push(field_value);
+                    field
+                        .push(field_value)
+                        .unwrap_or_else(|e| panic!("Failed to push field value: {e:?}"));
                 } else {
                     let mut mutable_vector =
                         if let ConcreteDataType::String(_) = &self.field_types[idx] {
diff --git a/src/mito2/src/read/batch_adapter.rs b/src/mito2/src/read/batch_adapter.rs
index 4698229c5b..360f591658 100644
--- a/src/mito2/src/read/batch_adapter.rs
+++ b/src/mito2/src/read/batch_adapter.rs
@@ -682,7 +682,7 @@ mod tests {
         let rb = adapter.into_iter().next().unwrap().unwrap();
 
         let mapper = FlatProjectionMapper::new(&metadata, [0, 3].into_iter()).unwrap();
-        assert_eq!(rb.schema(), mapper.input_arrow_schema(false));
+        assert_eq!(rb.schema(), mapper.input_arrow_schema(false, None));
         // tag_0 + field_1 + ts + 3 internal columns.
         assert_eq!(6, rb.num_columns());
         assert_eq!(3, rb.num_rows());
diff --git a/src/mito2/src/read/compat.rs b/src/mito2/src/read/compat.rs
index 90d664a4bd..1c81f7341a 100644
--- a/src/mito2/src/read/compat.rs
+++ b/src/mito2/src/read/compat.rs
@@ -18,7 +18,6 @@ use std::collections::HashMap;
 use std::sync::Arc;
 
 use api::v1::SemanticType;
-use common_recordbatch::recordbatch::align_json_array;
 use datatypes::arrow::array::{
     Array, ArrayRef, BinaryArray, BinaryBuilder, DictionaryArray, UInt32Array,
 };
@@ -28,6 +27,7 @@ use datatypes::arrow::record_batch::RecordBatch;
 use datatypes::data_type::ConcreteDataType;
 use datatypes::prelude::DataType;
 use datatypes::value::Value;
+use datatypes::vectors::json::array::JsonArray;
 use datatypes::vectors::{Helper, VectorRef};
 use mito_codec::row_converter::{
     CompositeValues, PrimaryKeyCodec, SortField, build_primary_key_codec,
@@ -39,8 +39,8 @@ use store_api::metadata::{RegionMetadata, RegionMetadataRef};
 use store_api::storage::ColumnId;
 
 use crate::error::{
-    CastVectorSnafu, CompatReaderSnafu, ComputeArrowSnafu, ConvertVectorSnafu, CreateDefaultSnafu,
-    DecodeSnafu, EncodeSnafu, NewRecordBatchSnafu, RecordBatchSnafu, Result, UnexpectedSnafu,
+    CastVectorSnafu, CompatReaderSnafu, ComputeArrowSnafu, ConvertValueSnafu, ConvertVectorSnafu,
+    CreateDefaultSnafu, DecodeSnafu, EncodeSnafu, NewRecordBatchSnafu, Result, UnexpectedSnafu,
     UnsupportedOperationSnafu,
 };
 use crate::read::flat_projection::{FlatProjectionMapper, flat_projected_columns};
@@ -355,8 +355,9 @@ impl FlatCompatBatch {
 
                     if let Some(ty) = cast_type {
                         let casted = if let Some(json_type) = ty.as_json() {
-                            align_json_array(old_column, &json_type.as_arrow_type())
-                                .context(RecordBatchSnafu)?
+                            JsonArray::from(old_column)
+                                .try_align(&json_type.as_arrow_type())
+                                .context(ConvertValueSnafu)?
                         } else {
                             datatypes::arrow::compute::cast(old_column, &ty.as_arrow_type())
                                 .context(ComputeArrowSnafu)?
@@ -475,10 +476,9 @@ impl CompatFields {
 
                     let data = if let Some(ty) = cast_type {
                         if let Some(json_type) = ty.as_json() {
-                            let json_array = old_column.data.to_arrow_array();
-                            let json_array =
-                                align_json_array(&json_array, &json_type.as_arrow_type())
-                                    .context(RecordBatchSnafu)?;
+                            let json_array = JsonArray::from(&old_column.data.to_arrow_array())
+                                .try_align(&json_type.as_arrow_type())
+                                .context(ConvertValueSnafu)?;
                             Helper::try_into_vector(&json_array).context(ConvertVectorSnafu)?
                         } else {
                             old_column.data.cast(ty).with_context(|_| CastVectorSnafu {
diff --git a/src/mito2/src/read/flat_merge.rs b/src/mito2/src/read/flat_merge.rs
index 946f2a610c..dc3905735c 100644
--- a/src/mito2/src/read/flat_merge.rs
+++ b/src/mito2/src/read/flat_merge.rs
@@ -25,12 +25,14 @@ use datatypes::arrow::datatypes::{ArrowNativeType, BinaryType, DataType, SchemaR
 use datatypes::arrow::error::ArrowError;
 use datatypes::arrow::record_batch::RecordBatch;
 use datatypes::arrow_array::BinaryArray;
+use datatypes::extension::json::is_json_extension_type;
 use datatypes::timestamp::timestamp_array_to_primitive;
+use datatypes::vectors::json::array::JsonArray;
 use futures::{Stream, TryStreamExt};
 use snafu::ResultExt;
 use store_api::storage::SequenceNumber;
 
-use crate::error::{ComputeArrowSnafu, Result};
+use crate::error::{ComputeArrowSnafu, DataTypeMismatchSnafu, Result};
 use crate::memtable::BoxedRecordBatchIterator;
 use crate::metrics::READ_STAGE_ELAPSED;
 use crate::read::BoxedRecordBatchStream;
@@ -180,14 +182,29 @@ impl BatchBuilder {
 
         check_interleave_overflow(&self.batches, &self.schema, &self.indices)?;
 
-        let columns = (0..self.schema.fields.len())
-            .map(|column_idx| {
-                let arrays: Vec<_> = self
+        let columns = self
+            .schema
+            .fields()
+            .iter()
+            .enumerate()
+            .map(|(column_idx, field)| {
+                let arrays = self
                     .batches
                     .iter()
-                    .map(|(_, batch)| batch.column(column_idx).as_ref())
-                    .collect();
-                interleave(&arrays, &self.indices).context(ComputeArrowSnafu)
+                    .map(|(_, batch)| {
+                        let column = batch.column(column_idx);
+                        let column = if is_json_extension_type(field) {
+                            JsonArray::from(column)
+                                .try_align(field.data_type())
+                                .context(DataTypeMismatchSnafu)?
+                        } else {
+                            column.clone()
+                        };
+                        Ok(column)
+                    })
+                    .collect::<Result<Vec<_>>>()?;
+                let aligned = arrays.iter().map(|x| x.as_ref()).collect::<Vec<_>>();
+                interleave(&aligned, &self.indices).context(ComputeArrowSnafu)
             })
             .collect::<Result<Vec<_>>>()?;
 
diff --git a/src/mito2/src/read/flat_projection.rs b/src/mito2/src/read/flat_projection.rs
index 02b4c6b3c1..8552c04f7d 100644
--- a/src/mito2/src/read/flat_projection.rs
+++ b/src/mito2/src/read/flat_projection.rs
@@ -17,15 +17,21 @@
 use std::sync::Arc;
 
 use api::v1::SemanticType;
+use arrow_schema::SchemaRef as ArrowSchemaRef;
+use arrow_schema::extension::ExtensionType;
 use common_error::ext::BoxedError;
-use common_recordbatch::error::{ArrowComputeSnafu, ExternalSnafu, NewDfRecordBatchSnafu};
+use common_recordbatch::error::{
+    ArrowComputeSnafu, DataTypesSnafu, ExternalSnafu, NewDfRecordBatchSnafu,
+};
 use common_recordbatch::{DfRecordBatch, RecordBatch};
 use datatypes::arrow::array::Array;
 use datatypes::arrow::datatypes::{DataType as ArrowDataType, Field};
+use datatypes::extension::json::JsonExtensionType;
 use datatypes::prelude::{ConcreteDataType, DataType};
 use datatypes::schema::{Schema, SchemaRef};
 use datatypes::value::Value;
 use datatypes::vectors::Helper;
+use datatypes::vectors::json::array::JsonArray;
 use snafu::{OptionExt, ResultExt};
 use store_api::metadata::{RegionMetadata, RegionMetadataRef};
 use store_api::storage::ColumnId;
@@ -43,6 +49,7 @@ use crate::sst::{
 ///
 /// This mapper support duplicate and unsorted projection indices.
 /// The output schema is determined by the projection indices.
+#[derive(Clone)]
 pub struct FlatProjectionMapper {
     /// Metadata of the region.
     metadata: RegionMetadataRef,
@@ -221,15 +228,15 @@ impl FlatProjectionMapper {
     pub(crate) fn input_arrow_schema(
         &self,
         compaction: bool,
+        json_concretized_schema: Option<ArrowSchemaRef>,
     ) -> datatypes::arrow::datatypes::SchemaRef {
         if !compaction {
             self.input_arrow_schema.clone()
         } else {
             // For compaction, we need to build a different schema from encoding.
-            to_flat_sst_arrow_schema(
-                &self.metadata,
-                &FlatSchemaOptions::from_encoding(self.metadata.primary_key_encoding),
-            )
+            let mut options = FlatSchemaOptions::from_encoding(self.metadata.primary_key_encoding);
+            options.override_schema = json_concretized_schema;
+            to_flat_sst_arrow_schema(&self.metadata, &options)
         }
     }
 
@@ -240,6 +247,10 @@ impl FlatProjectionMapper {
         self.output_schema.clone()
     }
 
+    pub(crate) fn with_output_schema(&mut self, output_schema: SchemaRef) {
+        self.output_schema = output_schema;
+    }
+
     /// Returns an empty [RecordBatch].
     pub(crate) fn empty_record_batch(&self) -> RecordBatch {
         RecordBatch::new_empty(self.output_schema.clone())
@@ -290,6 +301,13 @@ impl FlatProjectionMapper {
                     array = casted;
                 }
             }
+
+            let field = self.output_schema.arrow_schema().field(output_idx);
+            if field.extension_type_name() == Some(JsonExtensionType::NAME) {
+                array = JsonArray::from(&array)
+                    .try_align(field.data_type())
+                    .context(DataTypesSnafu)?;
+            }
             arrays.push(array);
         }
 
diff --git a/src/mito2/src/read/projection.rs b/src/mito2/src/read/projection.rs
index d22c87bcc2..9f5d5d31ef 100644
--- a/src/mito2/src/read/projection.rs
+++ b/src/mito2/src/read/projection.rs
@@ -40,6 +40,7 @@ use crate::read::flat_projection::FlatProjectionMapper;
 pub(crate) const MAX_VECTOR_LENGTH_TO_CACHE: usize = 16384;
 
 /// Wrapper enum for different projection mapper implementations.
+#[derive(Clone)]
 pub enum ProjectionMapper {
     /// Projection mapper for primary key format.
     PrimaryKey(PrimaryKeyProjectionMapper),
@@ -124,6 +125,12 @@ impl ProjectionMapper {
         }
     }
 
+    pub(crate) fn with_flat_output_schema(&mut self, output_schema: SchemaRef) {
+        if let ProjectionMapper::Flat(m) = self {
+            m.with_output_schema(output_schema)
+        }
+    }
+
     /// Returns an empty [RecordBatch].
     // TODO(yingwen): This is unused now. Use it after we finishing the flat format.
     pub fn empty_record_batch(&self) -> RecordBatch {
@@ -136,6 +143,7 @@ impl ProjectionMapper {
 
 /// Handles projection and converts a projected [Batch] to a projected [RecordBatch].
 #[allow(dead_code)]
+#[derive(Clone)]
 pub struct PrimaryKeyProjectionMapper {
     /// Metadata of the region.
     metadata: RegionMetadataRef,
diff --git a/src/mito2/src/read/range.rs b/src/mito2/src/read/range.rs
index d667be9cb8..e420a7d00c 100644
--- a/src/mito2/src/read/range.rs
+++ b/src/mito2/src/read/range.rs
@@ -15,6 +15,7 @@
 //! Structs for partition ranges.
 
 use common_time::Timestamp;
+use datatypes::arrow::datatypes::SchemaRef;
 use smallvec::{SmallVec, smallvec};
 use store_api::region_engine::PartitionRange;
 use store_api::storage::TimeSeriesDistribution;
@@ -478,6 +479,11 @@ impl MemRangeBuilder {
     pub(crate) fn stats(&self) -> &MemtableStats {
         &self.stats
     }
+
+    /// Returns the record batch schema for this memtable range if available.
+    pub(crate) fn record_batch_schema(&self) -> Option<SchemaRef> {
+        self.range.record_batch_schema()
+    }
 }
 
 #[cfg(test)]
diff --git a/src/mito2/src/read/scan_region.rs b/src/mito2/src/read/scan_region.rs
index c447685822..53b1350bae 100644
--- a/src/mito2/src/read/scan_region.rs
+++ b/src/mito2/src/read/scan_region.rs
@@ -14,7 +14,7 @@
 
 //! Scans a region according to the scan request.
 
-use std::collections::HashSet;
+use std::collections::{HashMap, HashSet};
 use std::fmt;
 use std::num::NonZeroU64;
 use std::sync::Arc;
@@ -27,11 +27,19 @@ use common_recordbatch::filter::SimpleFilterEvaluator;
 use common_telemetry::tracing::Instrument;
 use common_telemetry::{debug, error, tracing, warn};
 use common_time::range::TimestampRange;
+use datafusion::parquet::arrow::parquet_to_arrow_schema;
 use datafusion::physical_plan::expressions::DynamicFilterPhysicalExpr;
 use datafusion_common::Column;
 use datafusion_expr::Expr;
 use datafusion_expr::utils::expr_to_columns;
+use datatypes::arrow::datatypes::DataType as ArrowDataType;
+use datatypes::data_type::{ConcreteDataType, DataType};
+use datatypes::extension::json::is_json_extension_type;
+use datatypes::schema::Schema;
+use datatypes::schema::ext::ArrowSchemaExt;
+use datatypes::types::json_type;
 use futures::StreamExt;
+use parquet::file::metadata::{PageIndexPolicy, ParquetMetaData};
 use partition::expr::PartitionExpr;
 use smallvec::SmallVec;
 use snafu::{OptionExt as _, ResultExt};
@@ -45,9 +53,9 @@ use tokio::sync::{Semaphore, mpsc};
 use tokio_stream::wrappers::ReceiverStream;
 
 use crate::access_layer::AccessLayerRef;
-use crate::cache::CacheStrategy;
+use crate::cache::{CacheStrategy, CachedSstMeta};
 use crate::config::DEFAULT_MAX_CONCURRENT_SCAN_FILES;
-use crate::error::{InvalidPartitionExprSnafu, InvalidRequestSnafu, Result};
+use crate::error::{InvalidMetaSnafu, InvalidPartitionExprSnafu, InvalidRequestSnafu, Result};
 #[cfg(feature = "enterprise")]
 use crate::extension::{BoxedExtensionRange, BoxedExtensionRangeProvider};
 use crate::memtable::{MemtableRange, RangesOptions};
@@ -74,7 +82,8 @@ use crate::sst::index::inverted_index::applier::builder::InvertedIndexApplierBui
 #[cfg(feature = "vector_index")]
 use crate::sst::index::vector_index::applier::{VectorIndexApplier, VectorIndexApplierRef};
 use crate::sst::parquet::file_range::PreFilterMode;
-use crate::sst::parquet::reader::ReaderMetrics;
+use crate::sst::parquet::metadata::MetadataLoader;
+use crate::sst::parquet::reader::{MetadataCacheMetrics, ReaderMetrics};
 
 #[cfg(feature = "vector_index")]
 const VECTOR_INDEX_OVERFETCH_MULTIPLIER: usize = 2;
@@ -521,6 +530,7 @@ impl ScanRegion {
             .with_merge_mode(self.version.options.merge_mode())
             .with_series_row_selector(self.request.series_row_selector)
             .with_distribution(self.request.distribution)
+            .with_json2_column_types(self.request.json2_column_types.clone())
             .with_explain_flat_format(
                 self.version.options.sst_format == Some(crate::sst::FormatType::Flat),
             );
@@ -539,6 +549,8 @@ impl ScanRegion {
         } else {
             input
         };
+
+        let input = concretize_json2_types(input).await?;
         Ok(input)
     }
 
@@ -765,6 +777,109 @@ impl ScanRegion {
     }
 }
 
+pub(crate) async fn concretize_json2_types(input: ScanInput) -> Result<ScanInput> {
+    let Some(output_schema) = input.mapper.as_flat().map(|x| x.output_schema()) else {
+        return Ok(input);
+    };
+    let output_arrow_schema = output_schema.arrow_schema();
+    if !output_arrow_schema.has_json_extension_field() {
+        return Ok(input);
+    }
+
+    let memtable_schemas = input
+        .memtables
+        .iter()
+        .filter_map(|mem| mem.record_batch_schema())
+        .collect::<Vec<_>>();
+    let parquet_schemas = input.collect_parquet_record_batch_schemas().await?;
+    if memtable_schemas.is_empty()
+        && parquet_schemas.is_empty()
+        // TODO(LFC): If we can concrete json2 type solely by query-driven hint, we can skip data-driven concretize.
+        && input.json2_column_types.is_empty()
+    {
+        return Ok(input);
+    }
+
+    let mut column_schemas = output_schema.column_schemas().to_vec();
+    let mut changed = false;
+    for (idx, column_schema) in column_schemas.iter_mut().enumerate() {
+        let output_field = &output_arrow_schema.fields()[idx];
+        if !is_json_extension_type(output_field) {
+            continue;
+        }
+
+        let mut merged = input
+            .json2_column_types
+            .get(&column_schema.name)
+            .map(ConcreteDataType::as_arrow_type);
+        for schema in &memtable_schemas {
+            if let Some((_, field)) = schema.column_with_name(&column_schema.name) {
+                merge_json_type_candidate(&mut merged, field.data_type());
+            }
+        }
+        for schema in parquet_schemas.iter() {
+            if let Some((_, field)) = schema.as_ref().column_with_name(&column_schema.name) {
+                merge_json_type_candidate(&mut merged, field.data_type());
+            }
+        }
+
+        if let Some(merged) = merged
+            && merged != *output_field.data_type()
+        {
+            column_schema.data_type = ConcreteDataType::from_arrow_type(&merged);
+            common_telemetry::info!("merged type: {}", column_schema.data_type);
+            changed = true;
+        }
+    }
+
+    if changed {
+        let mut mapper = Arc::unwrap_or_clone(input.mapper);
+        mapper.with_flat_output_schema(Arc::new(Schema::new(column_schemas)));
+        Ok(ScanInput {
+            mapper: Arc::new(mapper),
+            ..input
+        })
+    } else {
+        Ok(input)
+    }
+}
+
+fn merge_json_type_candidate(merged: &mut Option<ArrowDataType>, candidate: &ArrowDataType) {
+    match merged {
+        Some(current) => {
+            *current = json_type::merge_as_json_type(current, candidate).into_owned();
+        }
+        None => {
+            *merged = Some(candidate.clone());
+        }
+    }
+}
+
+async fn read_or_load_parquet_metadata(
+    file: &FileHandle,
+    access_layer: &AccessLayerRef,
+    cache_strategy: &CacheStrategy,
+) -> Result<Arc<ParquetMetaData>> {
+    let mut metrics = MetadataCacheMetrics::default();
+    if let Some(metadata) = cache_strategy
+        .get_sst_meta_data(file.file_id(), &mut metrics, PageIndexPolicy::default())
+        .await
+    {
+        return Ok(metadata.parquet_metadata());
+    }
+
+    let file_path = file.file_path(access_layer.table_dir(), access_layer.path_type());
+    let file_size = file.meta_ref().file_size;
+    let metadata = MetadataLoader::new(access_layer.object_store().clone(), &file_path, file_size)
+        .load(&mut metrics)
+        .await
+        .and_then(|x| CachedSstMeta::try_new(&file_path, x))
+        .map(Arc::new)?;
+    cache_strategy.put_sst_meta_data(file.file_id(), metadata.clone());
+
+    Ok(metadata.parquet_metadata())
+}
+
 /// Returns true if the time range of a SST `file` matches the `predicate`.
 fn file_in_range(file: &FileHandle, predicate: &TimestampRange) -> bool {
     if predicate == &TimestampRange::min_to_max() {
@@ -824,6 +939,8 @@ pub struct ScanInput {
     pub(crate) series_row_selector: Option<TimeSeriesRowSelector>,
     /// Hint for the required distribution of the scanner.
     pub(crate) distribution: Option<TimeSeriesDistribution>,
+    /// Query-driven target types for JSON2 columns.
+    json2_column_types: HashMap<String, ConcreteDataType>,
     /// Whether the region's configured SST format is flat.
     explain_flat_format: bool,
     /// Whether this scan is for compaction.
@@ -861,6 +978,7 @@ impl ScanInput {
             merge_mode: MergeMode::default(),
             series_row_selector: None,
             distribution: None,
+            json2_column_types: HashMap::new(),
             explain_flat_format: false,
             compaction: false,
             #[cfg(feature = "enterprise")]
@@ -897,6 +1015,15 @@ impl ScanInput {
         self
     }
 
+    #[must_use]
+    fn with_json2_column_types(
+        mut self,
+        json2_column_types: HashMap<String, ConcreteDataType>,
+    ) -> Self {
+        self.json2_column_types = json2_column_types;
+        self
+    }
+
     /// Sets cache for this query.
     #[must_use]
     pub(crate) fn with_cache(mut self, cache: CacheStrategy) -> Self {
@@ -1330,6 +1457,35 @@ impl ScanInput {
     pub fn region_metadata(&self) -> &RegionMetadataRef {
         self.mapper.metadata()
     }
+
+    pub(crate) async fn collect_parquet_record_batch_schemas(
+        &self,
+    ) -> Result<Vec<datatypes::arrow::datatypes::SchemaRef>> {
+        let mut schemas = Vec::with_capacity(self.files.len());
+        for file in &self.files {
+            let parquet_metadata =
+                read_or_load_parquet_metadata(file, &self.access_layer, &self.cache_strategy)
+                    .await?;
+            let file_metadata = parquet_metadata.file_metadata();
+            let arrow_schema = parquet_to_arrow_schema(
+                file_metadata.schema_descr(),
+                file_metadata.key_value_metadata(),
+            )
+            .map_err(|e| {
+                InvalidMetaSnafu {
+                    reason: format!(
+                        "Failed to convert parquet metadata to arrow schema, file: {}, error: {e}",
+                        file.file_id()
+                    ),
+                }
+                .build()
+            })?;
+            if arrow_schema.has_json_extension_field() {
+                schemas.push(Arc::new(arrow_schema));
+            }
+        }
+        Ok(schemas)
+    }
 }
 
 #[cfg(feature = "enterprise")]
diff --git a/src/mito2/src/read/seq_scan.rs b/src/mito2/src/read/seq_scan.rs
index 15ab435425..60c523a732 100644
--- a/src/mito2/src/read/seq_scan.rs
+++ b/src/mito2/src/read/seq_scan.rs
@@ -18,6 +18,7 @@ use std::fmt;
 use std::sync::Arc;
 use std::time::Instant;
 
+use arrow_schema::SchemaRef as ArrowSchemaRef;
 use async_stream::try_stream;
 use common_error::ext::BoxedError;
 use common_recordbatch::util::ChainedRecordBatchStream;
@@ -127,7 +128,10 @@ impl SeqScan {
     ///
     /// # Panics
     /// Panics if the compaction flag is not set.
-    pub async fn build_flat_reader_for_compaction(&self) -> Result<BoxedRecordBatchStream> {
+    pub async fn build_flat_reader_for_compaction(
+        &self,
+        json_concretized_schema: Option<ArrowSchemaRef>,
+    ) -> Result<BoxedRecordBatchStream> {
         assert!(self.stream_ctx.input.compaction);
 
         let metrics_set = ExecutionPlanMetricsSet::new();
@@ -140,6 +144,7 @@ impl SeqScan {
             partition_ranges,
             &part_metrics,
             self.pruner.clone(),
+            json_concretized_schema,
         )
         .await?;
         Ok(reader)
@@ -152,6 +157,7 @@ impl SeqScan {
         partition_ranges: &[PartitionRange],
         part_metrics: &PartitionMetrics,
         pruner: Arc<Pruner>,
+        json_concretized_schema: Option<ArrowSchemaRef>,
     ) -> Result<BoxedRecordBatchStream> {
         pruner.add_partition_ranges(partition_ranges);
         let partition_pruner = Arc::new(PartitionPruner::new(pruner, partition_ranges));
@@ -182,6 +188,7 @@ impl SeqScan {
             None,
             None,
             compute_parallel_channel_size(DEFAULT_READ_BATCH_SIZE),
+            json_concretized_schema,
         )
         .await
     }
@@ -195,6 +202,7 @@ impl SeqScan {
         semaphore: Option<Arc<Semaphore>>,
         part_metrics: Option<&PartitionMetrics>,
         channel_size: usize,
+        json_concretized_schema: Option<ArrowSchemaRef>,
     ) -> Result<BoxedRecordBatchStream> {
         if let Some(semaphore) = semaphore.as_ref() {
             // Read sources in parallel.
@@ -208,7 +216,8 @@ impl SeqScan {
         }
 
         let mapper = stream_ctx.input.mapper.as_flat().unwrap();
-        let schema = mapper.input_arrow_schema(stream_ctx.input.compaction);
+        let schema =
+            mapper.input_arrow_schema(stream_ctx.input.compaction, json_concretized_schema);
 
         let metrics_reporter = part_metrics.map(|m| m.merge_metrics_reporter());
         let reader =
@@ -345,9 +354,14 @@ impl SeqScan {
                 let channel_size = compute_parallel_channel_size(
                     split_batch_size.unwrap_or(DEFAULT_READ_BATCH_SIZE),
                 );
-                let mut reader =
-                    Self::build_flat_reader_from_sources(&stream_ctx, sources, semaphore.clone(), Some(&part_metrics), channel_size)
-                        .await?;
+                let mut reader = Self::build_flat_reader_from_sources(
+                    &stream_ctx,
+                    sources,
+                    semaphore.clone(),
+                    Some(&part_metrics),
+                    channel_size,
+                    None,
+                ).await?;
 
                 let mut metrics = ScannerMetrics {
                     scan_cost: fetch_start.elapsed(),
diff --git a/src/mito2/src/read/series_scan.rs b/src/mito2/src/read/series_scan.rs
index bf7ed072ab..fa27a153a6 100644
--- a/src/mito2/src/read/series_scan.rs
+++ b/src/mito2/src/read/series_scan.rs
@@ -514,6 +514,7 @@ impl SeriesDistributor {
             self.semaphore.clone(),
             Some(&part_metrics),
             channel_size,
+            None,
         )
         .await?;
         let mut metrics = SeriesDistributorMetrics::default();
diff --git a/src/mito2/src/sst.rs b/src/mito2/src/sst.rs
index 94bc1feea8..b9a48b7939 100644
--- a/src/mito2/src/sst.rs
+++ b/src/mito2/src/sst.rs
@@ -22,6 +22,7 @@ use datatypes::arrow::datatypes::{
     DataType as ArrowDataType, Field, FieldRef, Fields, Schema, SchemaRef,
 };
 use datatypes::arrow::record_batch::RecordBatch;
+use datatypes::extension::json::is_json_extension_type;
 use datatypes::prelude::ConcreteDataType;
 use datatypes::timestamp::timestamp_array_to_primitive;
 use serde::{Deserialize, Serialize};
@@ -91,6 +92,7 @@ pub struct FlatSchemaOptions {
     /// when storing primary key columns.
     /// Only takes effect when `raw_pk_columns` is true.
     pub string_pk_use_dict: bool,
+    pub override_schema: Option<SchemaRef>,
 }
 
 impl Default for FlatSchemaOptions {
@@ -98,6 +100,7 @@ impl Default for FlatSchemaOptions {
         Self {
             raw_pk_columns: true,
             string_pk_use_dict: true,
+            override_schema: None,
         }
     }
 }
@@ -111,6 +114,7 @@ impl FlatSchemaOptions {
             Self {
                 raw_pk_columns: false,
                 string_pk_use_dict: false,
+                override_schema: None,
             }
         }
     }
@@ -131,7 +135,22 @@ pub fn to_flat_sst_arrow_schema(
 ) -> SchemaRef {
     let num_fields = flat_sst_arrow_schema_column_num(metadata, options);
     let mut fields = Vec::with_capacity(num_fields);
-    let schema = metadata.schema.arrow_schema();
+
+    let mut schema = metadata.schema.arrow_schema().clone();
+    if let Some(override_schema) = &options.override_schema {
+        let mut fields = Vec::with_capacity(schema.fields().len());
+        for field in schema.fields() {
+            if is_json_extension_type(field)
+                && let Some((_, override_field)) = override_schema.fields().find(field.name())
+            {
+                fields.push(override_field.clone());
+            } else {
+                fields.push(field.clone());
+            }
+        }
+        schema = Arc::new(Schema::new_with_metadata(fields, schema.metadata().clone()));
+    };
+
     if options.raw_pk_columns {
         for pk_id in &metadata.primary_key {
             let pk_index = metadata.column_index_by_id(*pk_id).unwrap();
diff --git a/src/mito2/src/sst/index.rs b/src/mito2/src/sst/index.rs
index 31a96eecea..7a2bc54045 100644
--- a/src/mito2/src/sst/index.rs
+++ b/src/mito2/src/sst/index.rs
@@ -1382,6 +1382,7 @@ mod tests {
             bloom_filter_index_config: Default::default(),
             #[cfg(feature = "vector_index")]
             vector_index_config: Default::default(),
+            schema: None,
         };
         let mut metrics = Metrics::new(WriteType::Flush);
         env.access_layer
diff --git a/src/mito2/src/sst/parquet.rs b/src/mito2/src/sst/parquet.rs
index 2ca83ca8cf..1656baf5d3 100644
--- a/src/mito2/src/sst/parquet.rs
+++ b/src/mito2/src/sst/parquet.rs
@@ -1244,7 +1244,7 @@ mod tests {
         .await;
 
         writer
-            .write_all_flat(flat_source, None, write_opts)
+            .write_all_flat(flat_source, None, None, write_opts)
             .await
             .unwrap()
             .remove(0)
@@ -1355,7 +1355,7 @@ mod tests {
         .await;
 
         let info = writer
-            .write_all_flat(flat_source, None, &write_opts)
+            .write_all_flat(flat_source, None, None, &write_opts)
             .await
             .unwrap()
             .remove(0);
diff --git a/src/mito2/src/sst/parquet/flat_format.rs b/src/mito2/src/sst/parquet/flat_format.rs
index d4d6c11a45..47924c3746 100644
--- a/src/mito2/src/sst/parquet/flat_format.rs
+++ b/src/mito2/src/sst/parquet/flat_format.rs
@@ -49,7 +49,7 @@ use store_api::storage::{ColumnId, SequenceNumber};
 
 use crate::error::{
     ComputeArrowSnafu, DecodeSnafu, InvalidParquetSnafu, InvalidRecordBatchSnafu,
-    NewRecordBatchSnafu, Result,
+    NewRecordBatchSnafu, RecordBatchSnafu, Result,
 };
 use crate::sst::parquet::format::{
     FIXED_POS_COLUMN_NUM, FormatProjection, INTERNAL_COLUMN_NUM, PrimaryKeyArray,
@@ -103,6 +103,11 @@ impl FlatWriteFormat {
         let sequence_array = Arc::new(UInt64Array::from(vec![override_sequence; batch.num_rows()]));
         columns[sequence_column_index(batch.num_columns())] = sequence_array;
 
+        let columns = common_recordbatch::recordbatch::maybe_align_json_array_with_schema(
+            &self.arrow_schema,
+            columns,
+        )
+        .context(RecordBatchSnafu)?;
         RecordBatch::try_new(self.arrow_schema.clone(), columns).context(NewRecordBatchSnafu)
     }
 }
diff --git a/src/mito2/src/sst/parquet/reader.rs b/src/mito2/src/sst/parquet/reader.rs
index 73ca7748e9..ac4836d459 100644
--- a/src/mito2/src/sst/parquet/reader.rs
+++ b/src/mito2/src/sst/parquet/reader.rs
@@ -442,8 +442,7 @@ impl ParquetReaderBuilder {
             .unwrap_or_else(|| region_meta.schema.clone());
 
         // Create ArrowReaderMetadata for async stream building.
-        let arrow_reader_options =
-            ArrowReaderOptions::new().with_schema(read_format.arrow_schema().clone());
+        let arrow_reader_options = ArrowReaderOptions::new();
         let arrow_metadata =
             ArrowReaderMetadata::try_new(parquet_meta.clone(), arrow_reader_options)
                 .context(ReadDataPartSnafu)?;
diff --git a/src/mito2/src/sst/parquet/writer.rs b/src/mito2/src/sst/parquet/writer.rs
index 4e75073e26..cbb288f8de 100644
--- a/src/mito2/src/sst/parquet/writer.rs
+++ b/src/mito2/src/sst/parquet/writer.rs
@@ -72,6 +72,7 @@ enum FlatBatchConverter {
 }
 
 impl FlatBatchConverter {
+    #[expect(unused)]
     fn arrow_schema(&self) -> &SchemaRef {
         match self {
             FlatBatchConverter::Flat(f) => f.arrow_schema(),
@@ -275,15 +276,16 @@ where
     pub async fn write_all_flat(
         &mut self,
         source: FlatSource,
+        override_schema: Option<SchemaRef>,
         override_sequence: Option<SequenceNumber>,
         opts: &WriteOptions,
     ) -> Result<SstInfoArray> {
+        let mut options = FlatSchemaOptions::from_encoding(self.metadata.primary_key_encoding);
+        options.override_schema = override_schema;
+
         let converter = FlatBatchConverter::Flat(
-            FlatWriteFormat::new(
-                self.metadata.clone(),
-                &FlatSchemaOptions::from_encoding(self.metadata.primary_key_encoding),
-            )
-            .with_override_sequence(override_sequence),
+            FlatWriteFormat::new(self.metadata.clone(), &options)
+                .with_override_sequence(override_sequence),
         );
         let res = self.write_all_flat_inner(source, &converter, opts).await;
         if res.is_err() {
@@ -406,7 +408,7 @@ where
         let arrow_batch = converter.convert_batch(&record_batch)?;
 
         let start = Instant::now();
-        self.maybe_init_writer(converter.arrow_schema(), opts)
+        self.maybe_init_writer(arrow_batch.schema_ref(), opts)
             .await?
             .write(&arrow_batch)
             .await
diff --git a/src/operator/src/req_convert/insert/stmt_to_region.rs b/src/operator/src/req_convert/insert/stmt_to_region.rs
index e2e0969035..83c126b9b4 100644
--- a/src/operator/src/req_convert/insert/stmt_to_region.rs
+++ b/src/operator/src/req_convert/insert/stmt_to_region.rs
@@ -301,12 +301,22 @@ impl<'a, 'b> JsonColumnTypeUpdater<'a, 'b> {
                 .or_insert_with(|| value_type.clone());
 
             if !merged_type.is_include(&value_type) {
-                merged_type.merge(&value_type).map_err(|e| {
+                if column_schema
+                    .data_type
+                    .as_json()
+                    .map(|x| x.is_native_type())
+                    .unwrap_or(false)
+                {
+                    merged_type.merge(&value_type)
+                } else {
+                    merged_type.merge_with_lifting(&value_type)
+                }
+                .map_err(|e| {
                     InvalidInsertRequestSnafu {
                         reason: format!(r#"cannot merge "{value_type}" into "{merged_type}": {e}"#),
                     }
                     .build()
-                })?;
+                })?
             }
         }
         Ok(())
@@ -323,7 +333,17 @@ impl<'a, 'b> JsonColumnTypeUpdater<'a, 'b> {
         for (column_name, merged_type) in self.merged_value_types.iter() {
             let Some(column_type) = insert_columns
                 .iter()
-                .find_map(|x| (&x.name == column_name).then(|| x.data_type.as_json()))
+                .find_map(|x| {
+                    (&x.name == column_name).then(|| {
+                        if let ConcreteDataType::Json(t) = &x.data_type
+                            && t.is_native_type()
+                        {
+                            Some(t)
+                        } else {
+                            None
+                        }
+                    })
+                })
                 .flatten()
             else {
                 continue;
diff --git a/src/pipeline/src/etl/transform/transformer/greptime.rs b/src/pipeline/src/etl/transform/transformer/greptime.rs
index 92b9dee4b9..b9f0767cbd 100644
--- a/src/pipeline/src/etl/transform/transformer/greptime.rs
+++ b/src/pipeline/src/etl/transform/transformer/greptime.rs
@@ -418,7 +418,7 @@ fn resolve_schema(
         match (column_type, value_type) {
             (column_type, value_type) if column_type == value_type => Ok(()),
             (ConcreteDataType::Json(column_type), ConcreteDataType::Json(value_type))
-                if column_type.is_include(value_type) =>
+                if column_type.is_json2() =>
             {
                 Ok(())
             }
@@ -689,17 +689,16 @@ fn resolve_value(
         }
 
         VrlValue::Array(_) | VrlValue::Object(_) => {
-            let is_json_native_type = schema_info
+            let is_json2_type = schema_info
                 .find_column_schema_in_table(&column_name)
                 .is_some_and(|x| {
-                    if let ConcreteDataType::Json(column_type) = &x.column_schema.data_type {
-                        column_type.is_native_type()
-                    } else {
-                        false
-                    }
+                    matches!(
+                        &x.column_schema.data_type,
+                        ConcreteDataType::Json(column_type) if column_type.is_json2()
+                    )
                 });
 
-            let value = if is_json_native_type {
+            let value = if is_json2_type {
                 let json_extension_type: Option<JsonExtensionType> =
                     if let Some(x) = schema_info.find_column_schema_in_table(&column_name) {
                         x.column_schema.extension_type()?
diff --git a/src/query/src/datafusion.rs b/src/query/src/datafusion.rs
index e2e577debf..7f6fb567fc 100644
--- a/src/query/src/datafusion.rs
+++ b/src/query/src/datafusion.rs
@@ -15,6 +15,7 @@
 //! Planner, QueryEngine implementations based on DataFusion.
 
 mod error;
+mod json2_expr_planner;
 mod planner;
 
 use std::any::Any;
diff --git a/src/query/src/datafusion/json2_expr_planner.rs b/src/query/src/datafusion/json2_expr_planner.rs
new file mode 100644
index 0000000000..558380e542
--- /dev/null
+++ b/src/query/src/datafusion/json2_expr_planner.rs
@@ -0,0 +1,127 @@
+// Copyright 2023 Greptime Team
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use std::sync::Arc;
+
+use arrow_schema::Field;
+use arrow_schema::extension::ExtensionType;
+use common_function::scalars::json::json2_get::{Json2GetFunction, datatype_expr};
+use common_function::scalars::udf::create_udf;
+use datafusion_common::arrow::datatypes::DataType;
+use datafusion_common::{Column, DataFusionError, Result, ScalarValue, TableReference};
+use datafusion_expr::expr::{BinaryExpr, ScalarFunction};
+use datafusion_expr::planner::{ExprPlanner, PlannerResult, RawBinaryExpr};
+use datafusion_expr::{Expr, ExprSchemable, Operator};
+use datatypes::extension::json::JsonExtensionType;
+use sqlparser::ast::BinaryOperator;
+
+#[derive(Debug)]
+pub(crate) struct Json2ExprPlanner;
+
+fn json2_get(base: Expr, path: String) -> Result<Expr> {
+    let args = vec![
+        base,
+        Expr::Literal(ScalarValue::Utf8(Some(path)), None),
+        datatype_expr(&DataType::Utf8View)?,
+    ];
+    let function = create_udf(Arc::new(Json2GetFunction::default()));
+    Ok(Expr::ScalarFunction(ScalarFunction::new_udf(
+        Arc::new(function),
+        args,
+    )))
+}
+
+impl ExprPlanner for Json2ExprPlanner {
+    fn plan_binary_op(
+        &self,
+        expr: RawBinaryExpr,
+        schema: &datafusion_common::DFSchema,
+    ) -> Result<PlannerResult<RawBinaryExpr>> {
+        let Some(operator) = parse_sql_binary_op(&expr.op) else {
+            return Ok(PlannerResult::Original(expr));
+        };
+
+        let left_type = expr.left.get_type(schema)?;
+        let right_type = expr.right.get_type(schema)?;
+        let left_rewritten = rewrite_expr_json2_get(&expr.left, right_type)?;
+        let right_rewritten = rewrite_expr_json2_get(&expr.right, left_type)?;
+        if left_rewritten.is_none() && right_rewritten.is_none() {
+            return Ok(PlannerResult::Original(expr));
+        }
+
+        let rewritten = Expr::BinaryExpr(BinaryExpr::new(
+            Box::new(left_rewritten.unwrap_or(expr.left)),
+            operator,
+            Box::new(right_rewritten.unwrap_or(expr.right)),
+        ));
+        common_telemetry::debug!("json2 plan_binary_op: rewritten={rewritten:?}");
+        Ok(PlannerResult::Planned(rewritten))
+    }
+
+    fn plan_compound_identifier(
+        &self,
+        field: &Field,
+        qualifier: Option<&TableReference>,
+        nested_names: &[String],
+    ) -> Result<PlannerResult<Vec<Expr>>> {
+        if field.extension_type_name() != Some(JsonExtensionType::NAME) {
+            return Ok(PlannerResult::Original(Vec::new()));
+        }
+
+        let path = nested_names.join(".");
+        let column = Column::from((qualifier, field));
+        json2_get(Expr::Column(column), path).map(PlannerResult::Planned)
+    }
+}
+
+fn rewrite_expr_json2_get(expr: &Expr, data_type: DataType) -> Result<Option<Expr>> {
+    let Expr::ScalarFunction(func) = expr else {
+        return Ok(None);
+    };
+    if func.func.name() != Json2GetFunction::NAME {
+        return Ok(None);
+    }
+    if func.args.len() != 3 {
+        return Err(DataFusionError::Internal(format!(
+            "Function {} is expected to have 3 arguments!",
+            func.name()
+        )));
+    }
+
+    let expected_expr = datatype_expr(&data_type)?;
+    let rewritten = Expr::ScalarFunction(ScalarFunction {
+        func: func.func.clone(),
+        args: vec![func.args[0].clone(), func.args[1].clone(), expected_expr],
+    });
+    Ok(Some(rewritten))
+}
+
+fn parse_sql_binary_op(op: &BinaryOperator) -> Option<Operator> {
+    match *op {
+        BinaryOperator::Gt => Some(Operator::Gt),
+        BinaryOperator::GtEq => Some(Operator::GtEq),
+        BinaryOperator::Lt => Some(Operator::Lt),
+        BinaryOperator::LtEq => Some(Operator::LtEq),
+        BinaryOperator::Eq => Some(Operator::Eq),
+        BinaryOperator::NotEq => Some(Operator::NotEq),
+        BinaryOperator::Plus => Some(Operator::Plus),
+        BinaryOperator::Minus => Some(Operator::Minus),
+        BinaryOperator::Multiply => Some(Operator::Multiply),
+        BinaryOperator::Divide => Some(Operator::Divide),
+        BinaryOperator::Modulo => Some(Operator::Modulo),
+        BinaryOperator::And => Some(Operator::And),
+        BinaryOperator::Or => Some(Operator::Or),
+        _ => None,
+    }
+}
diff --git a/src/query/src/datafusion/planner.rs b/src/query/src/datafusion/planner.rs
index d9c74b9d5a..7088111774 100644
--- a/src/query/src/datafusion/planner.rs
+++ b/src/query/src/datafusion/planner.rs
@@ -38,6 +38,7 @@ use datafusion_sql::parser::Statement as DfStatement;
 use session::context::QueryContextRef;
 use snafu::{Location, ResultExt};
 
+use crate::datafusion::json2_expr_planner::Json2ExprPlanner;
 use crate::error::{CatalogSnafu, Result};
 use crate::query_engine::{DefaultPlanDecoder, QueryEngineState};
 
@@ -87,6 +88,9 @@ impl DfContextProviderAdapter {
             .map(|format| (format.get_ext().to_lowercase(), format))
             .collect();
 
+        let mut expr_planners = SessionStateDefaults::default_expr_planners();
+        expr_planners.insert(0, Arc::new(Json2ExprPlanner));
+
         Ok(Self {
             engine_state,
             session_state,
@@ -94,7 +98,7 @@ impl DfContextProviderAdapter {
             table_provider,
             query_ctx,
             file_formats,
-            expr_planners: SessionStateDefaults::default_expr_planners(),
+            expr_planners,
         })
     }
 }
diff --git a/src/query/src/dummy_catalog.rs b/src/query/src/dummy_catalog.rs
index 7ce85afbbb..de3fa2fc15 100644
--- a/src/query/src/dummy_catalog.rs
+++ b/src/query/src/dummy_catalog.rs
@@ -15,6 +15,7 @@
 //! Dummy catalog for region server.
 
 use std::any::Any;
+use std::collections::HashMap;
 use std::fmt;
 use std::sync::{Arc, Mutex};
 
@@ -30,6 +31,7 @@ use datafusion::physical_plan::ExecutionPlan;
 use datafusion_common::DataFusionError;
 use datafusion_expr::{Expr, TableProviderFilterPushDown, TableType};
 use datatypes::arrow::datatypes::SchemaRef;
+use datatypes::data_type::ConcreteDataType;
 use futures::stream::BoxStream;
 use session::context::{QueryContext, QueryContextRef};
 use snafu::ResultExt;
@@ -266,6 +268,10 @@ impl DummyTableProvider {
         self.scan_request.lock().unwrap().vector_search.clone()
     }
 
+    pub fn with_json2_type_hint(&self, json2_column_types: &HashMap<String, ConcreteDataType>) {
+        self.scan_request.lock().unwrap().json2_column_types = json2_column_types.clone();
+    }
+
     pub fn with_sequence(&self, sequence: u64) {
         self.scan_request.lock().unwrap().memtable_max_sequence = Some(sequence);
     }
diff --git a/src/query/src/optimizer.rs b/src/query/src/optimizer.rs
index aaac1e3124..6deeee1a4a 100644
--- a/src/query/src/optimizer.rs
+++ b/src/query/src/optimizer.rs
@@ -15,6 +15,7 @@
 pub mod constant_term;
 pub mod count_nest_aggr;
 pub mod count_wildcard;
+pub mod json2_scan_hint;
 pub mod parallelize_scan;
 pub mod pass_distribution;
 pub mod remove_duplicate;
diff --git a/src/query/src/optimizer/json2_scan_hint.rs b/src/query/src/optimizer/json2_scan_hint.rs
new file mode 100644
index 0000000000..363f5c168b
--- /dev/null
+++ b/src/query/src/optimizer/json2_scan_hint.rs
@@ -0,0 +1,225 @@
+// Copyright 2023 Greptime Team
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use std::collections::HashMap;
+
+use common_function::scalars::json::json2_get::Json2GetFunction;
+use datafusion::datasource::DefaultTableSource;
+use datafusion_common::tree_node::{Transformed, TreeNode, TreeNodeRecursion};
+use datafusion_common::{Result, ScalarValue, TableReference, internal_err};
+use datafusion_expr::expr::ScalarFunction;
+use datafusion_expr::{Expr, LogicalPlan};
+use datafusion_optimizer::{OptimizerConfig, OptimizerRule};
+use datatypes::data_type::ConcreteDataType;
+use datatypes::json::requirement::JsonPathTarget;
+use datatypes::types::JsonFormat;
+
+use crate::dummy_catalog::DummyTableProvider;
+
+#[derive(Debug)]
+pub struct Json2ScanHintRule;
+
+impl OptimizerRule for Json2ScanHintRule {
+    fn name(&self) -> &str {
+        "Json2ScanHintRule"
+    }
+
+    fn rewrite(
+        &self,
+        plan: LogicalPlan,
+        _config: &dyn OptimizerConfig,
+    ) -> Result<Transformed<LogicalPlan>> {
+        let requirements = Json2TypeRequirements::collect(&plan)?;
+        if requirements.is_empty() {
+            return Ok(Transformed::no(plan));
+        }
+
+        plan.transform_down(&mut |plan| match &plan {
+            LogicalPlan::TableScan(table_scan) => {
+                let Some(source) = table_scan
+                    .source
+                    .as_any()
+                    .downcast_ref::<DefaultTableSource>()
+                else {
+                    return Ok(Transformed::no(plan));
+                };
+
+                let Some(adapter) = source
+                    .table_provider
+                    .as_any()
+                    .downcast_ref::<DummyTableProvider>()
+                else {
+                    return Ok(Transformed::no(plan));
+                };
+
+                let hints =
+                    requirements.merge(&table_scan.table_name, &adapter.region_metadata().schema);
+                adapter.with_json2_type_hint(&hints);
+                Ok(Transformed::yes(plan))
+            }
+            _ => Ok(Transformed::no(plan)),
+        })
+    }
+}
+
+#[derive(Debug, Clone, PartialEq, Eq, Hash)]
+struct Json2ColumnKey {
+    relation: Option<TableReference>,
+    name: String,
+}
+
+#[derive(Debug, Default)]
+struct Json2TypeRequirements {
+    path_targets: HashMap<Json2ColumnKey, JsonPathTarget>,
+}
+
+impl Json2TypeRequirements {
+    fn collect(plan: &LogicalPlan) -> Result<Self> {
+        let mut collector = Self::default();
+        plan.apply(|node| {
+            for expr in node.expressions() {
+                let _ = expr.apply(|expr| {
+                    if let Some((column, path, data_type)) = extract_json2_get(expr)? {
+                        collector
+                            .path_targets
+                            .entry(column)
+                            .or_default()
+                            .require_typed_path(&path, data_type);
+                    }
+                    Ok(TreeNodeRecursion::Continue)
+                })?;
+            }
+            Ok(TreeNodeRecursion::Continue)
+        })?;
+        Ok(collector)
+    }
+
+    fn is_empty(&self) -> bool {
+        self.path_targets.is_empty()
+    }
+
+    fn merge(
+        &self,
+        table_name: &TableReference,
+        schema: &datatypes::schema::SchemaRef,
+    ) -> HashMap<String, ConcreteDataType> {
+        let mut types = HashMap::new();
+
+        for column_schema in schema.column_schemas() {
+            let ConcreteDataType::Json(json_type) = &column_schema.data_type else {
+                continue;
+            };
+            if !matches!(json_type.format, JsonFormat::Json2) {
+                continue;
+            }
+
+            let matching_keys = self
+                .path_targets
+                .iter()
+                .filter(|(key, _)| {
+                    key.name == column_schema.name
+                        && key.relation.as_ref().is_none_or(|x| x == table_name)
+                })
+                .map(|(_, target)| target.clone())
+                .collect::<Vec<_>>();
+            if matching_keys.is_empty() {
+                continue;
+            }
+
+            let mut merged = JsonPathTarget::default();
+            for target in matching_keys {
+                if let Some(data_type) = target.build_type() {
+                    merge_path_target_from_type(&mut merged, &data_type, "");
+                }
+            }
+            if let Some(data_type) = merged.build_type() {
+                let _ = types.insert(column_schema.name.clone(), data_type);
+            }
+        }
+
+        types
+    }
+}
+
+fn extract_json2_get(expr: &Expr) -> Result<Option<(Json2ColumnKey, String, ConcreteDataType)>> {
+    let Expr::ScalarFunction(ScalarFunction { func, args }) = expr else {
+        return Ok(None);
+    };
+    if func.name() != Json2GetFunction::NAME {
+        return Ok(None);
+    }
+    if args.len() != 3 {
+        return internal_err!("function {} must have 3 arguments", Json2GetFunction::NAME);
+    }
+
+    let Expr::Column(column) = &args[0] else {
+        return Ok(None);
+    };
+
+    let path = match &args[1] {
+        Expr::Literal(ScalarValue::Utf8(Some(path)), _)
+        | Expr::Literal(ScalarValue::LargeUtf8(Some(path)), _)
+        | Expr::Literal(ScalarValue::Utf8View(Some(path)), _) => path.clone(),
+        _ => return Ok(None),
+    };
+
+    let data_type = args
+        .get(2)
+        .and_then(extract_expected_type)
+        .unwrap_or_else(ConcreteDataType::string_datatype);
+
+    Ok(Some((
+        Json2ColumnKey {
+            relation: column.relation.clone(),
+            name: column.name.clone(),
+        },
+        path,
+        data_type,
+    )))
+}
+
+fn extract_expected_type(expr: &Expr) -> Option<ConcreteDataType> {
+    match expr {
+        Expr::Literal(value, _) => {
+            let data_type = value.data_type();
+            Some(ConcreteDataType::from_arrow_type(&data_type))
+        }
+        _ => None,
+    }
+}
+
+fn merge_path_target_from_type(
+    target: &mut JsonPathTarget,
+    data_type: &ConcreteDataType,
+    prefix: &str,
+) {
+    match data_type {
+        ConcreteDataType::Struct(struct_type) => {
+            let fields = struct_type.fields();
+            for field in fields.iter() {
+                let path = if prefix.is_empty() {
+                    field.name().to_string()
+                } else {
+                    format!("{prefix}.{}", field.name())
+                };
+                merge_path_target_from_type(target, field.data_type(), &path);
+            }
+        }
+        _ => {
+            if !prefix.is_empty() {
+                target.require_typed_path(prefix, data_type.clone());
+            }
+        }
+    }
+}
diff --git a/src/query/src/query_engine/state.rs b/src/query/src/query_engine/state.rs
index f696c8b53e..d4bda7b903 100644
--- a/src/query/src/query_engine/state.rs
+++ b/src/query/src/query_engine/state.rs
@@ -62,6 +62,7 @@ use crate::optimizer::ExtensionAnalyzerRule;
 use crate::optimizer::constant_term::MatchesConstantTermOptimizer;
 use crate::optimizer::count_nest_aggr::CountNestAggrRule;
 use crate::optimizer::count_wildcard::CountWildcardToTimeIndexRule;
+use crate::optimizer::json2_scan_hint::Json2ScanHintRule;
 use crate::optimizer::parallelize_scan::ParallelizeScan;
 use crate::optimizer::pass_distribution::PassDistribution;
 use crate::optimizer::remove_duplicate::RemoveDuplicate;
@@ -173,6 +174,7 @@ impl QueryEngineState {
         analyzer.rules.push(Arc::new(FixStateUdafOrderingAnalyzer));
 
         let mut optimizer = Optimizer::new();
+        optimizer.rules.push(Arc::new(Json2ScanHintRule));
         optimizer.rules.push(Arc::new(ScanHintRule));
 
         // add physical optimizer
diff --git a/src/sql/src/statements.rs b/src/sql/src/statements.rs
index 211fc5598e..642479ca64 100644
--- a/src/sql/src/statements.rs
+++ b/src/sql/src/statements.rs
@@ -153,7 +153,16 @@ pub fn column_to_schema(
 
     column_schema.set_inverted_index(column.extensions.inverted_index_options.is_some());
 
-    if matches!(column.data_type(), SqlDataType::JSON) {
+    let is_json2_column = if let SqlDataType::Custom(object_name, _) = column.data_type() {
+        object_name
+            .0
+            .first()
+            .map(|x| x.to_string_unquoted().eq_ignore_ascii_case("JSON2"))
+            .unwrap_or_default()
+    } else {
+        false
+    };
+    if is_json2_column || matches!(column.data_type(), SqlDataType::JSON) {
         let settings = column
             .extensions
             .build_json_structure_settings()?
@@ -290,22 +299,25 @@ pub fn sql_data_type_to_concrete_data_type(
             };
             Ok(ConcreteDataType::Json(JsonType::new(format)))
         }
-        // Vector type
-        SqlDataType::Custom(name, d)
-            if name.0.as_slice().len() == 1
-                && name.0.as_slice()[0]
-                    .to_string_unquoted()
-                    .to_ascii_uppercase()
-                    == VECTOR_TYPE_NAME
-                && d.len() == 1 =>
-        {
-            let dim = d[0].parse().map_err(|e| {
-                error::ParseSqlValueSnafu {
-                    msg: format!("Failed to parse vector dimension: {}", e),
+        // Vector type and JSON2 type
+        SqlDataType::Custom(name, d) if name.0.len() == 1 => {
+            let name = name.0[0].to_string_unquoted().to_ascii_uppercase();
+            match name.as_str() {
+                VECTOR_TYPE_NAME if d.len() == 1 => {
+                    let dim = d[0].parse().map_err(|e| {
+                        error::ParseSqlValueSnafu {
+                            msg: format!(r#"Failed to parse vector dimension "{}": {}"#, d[0], e),
+                        }
+                        .build()
+                    })?;
+                    Ok(ConcreteDataType::vector_datatype(dim))
                 }
-                .build()
-            })?;
-            Ok(ConcreteDataType::vector_datatype(dim))
+                "JSON2" => Ok(ConcreteDataType::Json(JsonType::new(JsonFormat::Json2))),
+                _ => error::SqlTypeNotSupportedSnafu {
+                    t: data_type.clone(),
+                }
+                .fail(),
+            }
         }
         _ => error::SqlTypeNotSupportedSnafu {
             t: data_type.clone(),
diff --git a/src/sql/src/statements/create.rs b/src/sql/src/statements/create.rs
index 80eb52c406..f54fee1844 100644
--- a/src/sql/src/statements/create.rs
+++ b/src/sql/src/statements/create.rs
@@ -377,32 +377,35 @@ impl ColumnExtensions {
             None
         };
 
-        options
+        let format = options
             .get(JSON_OPT_FORMAT)
-            .map(|format| match format {
-                JSON_FORMAT_FULL_STRUCTURED => Ok(JsonStructureSettings::Structured(fields)),
-                JSON_FORMAT_PARTIAL => {
-                    let fields = fields.map(|fields| {
-                        let mut fields = Arc::unwrap_or_clone(fields.fields());
-                        fields.push(datatypes::types::StructField::new(
-                            JsonStructureSettings::RAW_FIELD.to_string(),
-                            ConcreteDataType::string_datatype(),
-                            true,
-                        ));
-                        StructType::new(Arc::new(fields))
-                    });
-                    Ok(JsonStructureSettings::PartialUnstructuredByKey {
-                        fields,
-                        unstructured_keys,
-                    })
+            .unwrap_or(JSON_FORMAT_FULL_STRUCTURED);
+        let settings = match format {
+            JSON_FORMAT_FULL_STRUCTURED => JsonStructureSettings::Structured(fields),
+            JSON_FORMAT_PARTIAL => {
+                let fields = fields.map(|fields| {
+                    let mut fields = Arc::unwrap_or_clone(fields.fields());
+                    fields.push(datatypes::types::StructField::new(
+                        JsonStructureSettings::RAW_FIELD.to_string(),
+                        ConcreteDataType::string_datatype(),
+                        true,
+                    ));
+                    StructType::new(Arc::new(fields))
+                });
+                JsonStructureSettings::PartialUnstructuredByKey {
+                    fields,
+                    unstructured_keys,
                 }
-                JSON_FORMAT_RAW => Ok(JsonStructureSettings::UnstructuredRaw),
-                _ => InvalidSqlSnafu {
+            }
+            JSON_FORMAT_RAW => JsonStructureSettings::UnstructuredRaw,
+            _ => {
+                return InvalidSqlSnafu {
                     msg: format!("unknown JSON datatype 'format': {format}"),
                 }
-                .fail(),
-            })
-            .transpose()
+                .fail();
+            }
+        };
+        Ok(Some(settings))
     }
 
     pub fn set_json_structure_settings(&mut self, settings: JsonStructureSettings) {
diff --git a/src/store-api/src/storage/requests.rs b/src/store-api/src/storage/requests.rs
index db3fb0388a..8382e66110 100644
--- a/src/store-api/src/storage/requests.rs
+++ b/src/store-api/src/storage/requests.rs
@@ -12,12 +12,14 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
+use std::collections::HashMap;
 use std::fmt::{Display, Formatter};
 
 use common_error::ext::BoxedError;
 use common_recordbatch::OrderOption;
 use datafusion_expr::expr::Expr;
 // Re-export vector types from datatypes to avoid duplication
+use datatypes::data_type::ConcreteDataType;
 pub use datatypes::schema::{VectorDistanceMetric, VectorIndexEngineType};
 use strum::Display;
 
@@ -128,6 +130,8 @@ pub struct ScanRequest {
     /// Optional hint for KNN vector search. When set, the scan should use
     /// vector index to find the k nearest neighbors.
     pub vector_search: Option<VectorSearchRequest>,
+    /// Optional target types for query-driven JSON2 concretization.
+    pub json2_column_types: HashMap<String, ConcreteDataType>,
 }
 
 impl Display for ScanRequest {
@@ -218,6 +222,14 @@ impl Display for ScanRequest {
                 vector_search.metric
             )?;
         }
+        if !self.json2_column_types.is_empty() {
+            write!(
+                f,
+                "{}json2_column_types: {:?}",
+                delimiter.as_str(),
+                self.json2_column_types
+            )?;
+        }
         write!(f, " }}")
     }
 }
diff --git a/tests-integration/Cargo.toml b/tests-integration/Cargo.toml
index 10b7097f4f..6440294eff 100644
--- a/tests-integration/Cargo.toml
+++ b/tests-integration/Cargo.toml
@@ -7,7 +7,7 @@ autotests = false
 
 [[test]]
 name = "main"
-path = "tests/main.rs"
+path = "tests/it/main.rs"
 
 [features]
 dashboard = ["servers/dashboard"]
diff --git a/tests-integration/tests/grpc.rs b/tests-integration/tests/it/grpc.rs
similarity index 100%
rename from tests-integration/tests/grpc.rs
rename to tests-integration/tests/it/grpc.rs
diff --git a/tests-integration/tests/http.rs b/tests-integration/tests/it/http.rs
similarity index 100%
rename from tests-integration/tests/http.rs
rename to tests-integration/tests/it/http.rs
diff --git a/tests-integration/tests/jsonbench.rs b/tests-integration/tests/it/jsonbench.rs
similarity index 73%
rename from tests-integration/tests/jsonbench.rs
rename to tests-integration/tests/it/jsonbench.rs
index 60f699c4ce..bc5a931760 100644
--- a/tests-integration/tests/jsonbench.rs
+++ b/tests-integration/tests/it/jsonbench.rs
@@ -149,16 +149,10 @@ async fn query_data(frontend: &Arc<Instance>) -> io::Result<()> {
 +----------+"#;
     execute_sql_and_expect(frontend, sql, expected).await;
 
-    let sql = "SELECT * FROM bluesky ORDER BY time_us";
-    let expected = fs::read_to_string(find_workspace_path(
-        "tests-integration/resources/jsonbench-select-all.txt",
-    ))?;
-    execute_sql_and_expect(frontend, sql, &expected).await;
-
     // query 1:
     let sql = "
 SELECT
-    json_get_string(data, '$.commit.collection') AS event, count() AS count
+    data.commit.collection AS event, count() AS count
 FROM bluesky
 GROUP BY event
 ORDER BY count DESC, event ASC";
@@ -176,13 +170,12 @@ ORDER BY count DESC, event ASC";
     // query 2:
     let sql = "
 SELECT
-    json_get_string(data, '$.commit.collection') AS event,
+    data.commit.collection AS event,
     count() AS count,
-    count(DISTINCT json_get_string(data, '$.did')) AS users
+    count(DISTINCT data.did) AS users
 FROM bluesky
 WHERE
-    (json_get_string(data, '$.kind') = 'commit') AND
-    (json_get_string(data, '$.commit.operation') = 'create')
+    data.kind = 'commit' AND data.commit.operation = 'create'
 GROUP BY event
 ORDER BY count DESC, event ASC";
     let expected = r#"
@@ -199,15 +192,14 @@ ORDER BY count DESC, event ASC";
     // query 3:
     let sql = "
 SELECT
-    json_get_string(data, '$.commit.collection') AS event,
-    date_part('hour', to_timestamp_micros(json_get_int(data, '$.time_us'))) as hour_of_day,
+    data.commit.collection AS event,
+    date_part('hour', to_timestamp_micros(arrow_cast(data.time_us, 'Int64'))) as hour_of_day,
     count() AS count
 FROM bluesky
 WHERE
-    (json_get_string(data, '$.kind') = 'commit') AND
-    (json_get_string(data, '$.commit.operation') = 'create') AND
-    json_get_string(data, '$.commit.collection') IN
-        ('app.bsky.feed.post', 'app.bsky.feed.repost', 'app.bsky.feed.like')
+    data.kind = 'commit' AND
+    data.commit.operation = 'create' AND
+    data.commit.collection in ('app.bsky.feed.post', 'app.bsky.feed.repost', 'app.bsky.feed.like')
 GROUP BY event, hour_of_day
 ORDER BY hour_of_day, event";
     let expected = r#"
@@ -223,13 +215,13 @@ ORDER BY hour_of_day, event";
     // query 4:
     let sql = "
 SELECT
-    json_get_string(data, '$.did') as user_id,
-    min(to_timestamp_micros(json_get_int(data, '$.time_us'))) AS first_post_ts
+    data.did::String as user_id,
+    min(to_timestamp_micros(arrow_cast(data.time_us, 'Int64'))) AS first_post_ts
 FROM bluesky
 WHERE
-    (json_get_string(data, '$.kind') = 'commit') AND
-    (json_get_string(data, '$.commit.operation') = 'create') AND
-    (json_get_string(data, '$.commit.collection') = 'app.bsky.feed.post')
+    data.kind = 'commit' AND
+    data.commit.operation = 'create' AND
+    data.commit.collection = 'app.bsky.feed.post'
 GROUP BY user_id
 ORDER BY first_post_ts ASC, user_id DESC
 LIMIT 3";
@@ -246,17 +238,17 @@ LIMIT 3";
     // query 5:
     let sql = "
 SELECT
-    json_get_string(data, '$.did') as user_id,
+    data.did::String as user_id,
     date_part(
         'epoch',
-        max(to_timestamp_micros(json_get_int(data, '$.time_us'))) -
-        min(to_timestamp_micros(json_get_int(data, '$.time_us')))
+        max(to_timestamp_micros(arrow_cast(data.time_us, 'Int64'))) -
+          min(to_timestamp_micros(arrow_cast(data.time_us, 'Int64')))
     ) AS activity_span
 FROM bluesky
 WHERE
-    (json_get_string(data, '$.kind') = 'commit') AND
-    (json_get_string(data, '$.commit.operation') = 'create') AND
-    (json_get_string(data, '$.commit.collection') = 'app.bsky.feed.post')
+    data.kind = 'commit' AND
+    data.commit.operation = 'create' AND
+    data.commit.collection = 'app.bsky.feed.post'
 GROUP BY user_id
 ORDER BY activity_span DESC, user_id DESC
 LIMIT 3";
@@ -300,30 +292,21 @@ async fn insert_data_by_sql(frontend: &Arc<Instance>) -> io::Result<()> {
 async fn desc_table(frontend: &Arc<Instance>) {
     let sql = "DESC TABLE bluesky";
     let expected = r#"
-+---------+------------------------------------------------------------------------------------------------------------------------------------------------+-----+------+---------+---------------+
-| Column  | Type                                                                                                                                           | Key | Null | Default | Semantic Type |
-+---------+------------------------------------------------------------------------------------------------------------------------------------------------+-----+------+---------+---------------+
-| data    | Json<{"_raw":"<String>","commit.collection":"<String>","commit.operation":"<String>","did":"<String>","kind":"<String>","time_us":"<Number>"}> |     | YES  |         | FIELD         |
-| time_us | TimestampMicrosecond                                                                                                                           | PRI | NO   |         | TIMESTAMP     |
-+---------+------------------------------------------------------------------------------------------------------------------------------------------------+-----+------+---------+---------------+"#;
++---------+----------------------+-----+------+---------+---------------+
+| Column  | Type                 | Key | Null | Default | Semantic Type |
++---------+----------------------+-----+------+---------+---------------+
+| data    | JSON2                |     | YES  |         | FIELD         |
+| time_us | TimestampMicrosecond | PRI | NO   |         | TIMESTAMP     |
++---------+----------------------+-----+------+---------+---------------+"#;
     execute_sql_and_expect(frontend, sql, expected).await;
 }
 
 async fn create_table(frontend: &Arc<Instance>) {
     let sql = r#"
 CREATE TABLE bluesky (
-  "data" JSON (
-    format = "partial",
-    fields = Struct<
-      kind String,
-      "commit.operation" String,
-      "commit.collection" String,
-      did String,
-      time_us Bigint
-    >,
-  ),
+  "data" JSON2,
   time_us TimestampMicrosecond TIME INDEX,
-)
+) WITH ('append_mode' = 'true', 'sst_format' = 'flat')
 "#;
     execute_sql_and_expect(frontend, sql, "Affected Rows: 0").await;
 }
diff --git a/tests-integration/tests/main.rs b/tests-integration/tests/it/main.rs
similarity index 100%
rename from tests-integration/tests/main.rs
rename to tests-integration/tests/it/main.rs
diff --git a/tests-integration/tests/region_migration.rs b/tests-integration/tests/it/region_migration.rs
similarity index 100%
rename from tests-integration/tests/region_migration.rs
rename to tests-integration/tests/it/region_migration.rs
diff --git a/tests-integration/tests/repartition.rs b/tests-integration/tests/it/repartition.rs
similarity index 100%
rename from tests-integration/tests/repartition.rs
rename to tests-integration/tests/it/repartition.rs
diff --git a/tests-integration/tests/repartition_expr_version.rs b/tests-integration/tests/it/repartition_expr_version.rs
similarity index 100%
rename from tests-integration/tests/repartition_expr_version.rs
rename to tests-integration/tests/it/repartition_expr_version.rs
diff --git a/tests-integration/tests/sql.rs b/tests-integration/tests/it/sql.rs
similarity index 100%
rename from tests-integration/tests/sql.rs
rename to tests-integration/tests/it/sql.rs
diff --git a/tests/cases/standalone/common/types/json/json-structured.result b/tests/cases/standalone/common/types/json/json-structured.result
deleted file mode 100644
index be04e2652d..0000000000
--- a/tests/cases/standalone/common/types/json/json-structured.result
+++ /dev/null
@@ -1,82 +0,0 @@
-CREATE TABLE t (ts TIMESTAMP TIME INDEX, j JSON(format = "structured") DEFAULT '{"foo": "bar"}');
-
-Error: 1001(Unsupported), Unsupported default constraint for column: 'j', reason: json column cannot have a default value
-
-CREATE TABLE t (ts TIMESTAMP TIME INDEX, j JSON(format = "structured"));
-
-Affected Rows: 0
-
-DESC TABLE t;
-
-+--------+----------------------+-----+------+---------+---------------+
-| Column | Type                 | Key | Null | Default | Semantic Type |
-+--------+----------------------+-----+------+---------+---------------+
-| ts     | TimestampMillisecond | PRI | NO   |         | TIMESTAMP     |
-| j      | Json<"<Null>">       |     | YES  |         | FIELD         |
-+--------+----------------------+-----+------+---------+---------------+
-
-INSERT INTO t VALUES
-(1762128001000, '{"int": 1}'),
-(1762128002000, '{"int": 2, "list": [0.1, 0.2, 0.3]}'),
-(1762128003000, '{"int": 3, "list": [0.4, 0.5, 0.6], "nested": {"a": {"x": "hello"}, "b": {"y": -1}}}');
-
-Affected Rows: 3
-
-DESC TABLE t;
-
-+--------+---------------------------------------------------------------------------------------------------+-----+------+---------+---------------+
-| Column | Type                                                                                              | Key | Null | Default | Semantic Type |
-+--------+---------------------------------------------------------------------------------------------------+-----+------+---------+---------------+
-| ts     | TimestampMillisecond                                                                              | PRI | NO   |         | TIMESTAMP     |
-| j      | Json<{"int":"<Number>","list":["<Number>"],"nested":{"a":{"x":"<String>"},"b":{"y":"<Number>"}}}> |     | YES  |         | FIELD         |
-+--------+---------------------------------------------------------------------------------------------------+-----+------+---------+---------------+
-
-INSERT INTO t VALUES
-(1762128004000, '{"int": 4, "bool": true, "nested": {"a": {"y": 1}}}'),
-(1762128005000, '{"int": 5, "bool": false, "nested": {"b": {"x": "world"}}}');
-
-Affected Rows: 2
-
-DESC TABLE t;
-
-+--------+-------------------------------------------------------------------------------------------------------------------------------------------------+-----+------+---------+---------------+
-| Column | Type                                                                                                                                            | Key | Null | Default | Semantic Type |
-+--------+-------------------------------------------------------------------------------------------------------------------------------------------------+-----+------+---------+---------------+
-| ts     | TimestampMillisecond                                                                                                                            | PRI | NO   |         | TIMESTAMP     |
-| j      | Json<{"bool":"<Bool>","int":"<Number>","list":["<Number>"],"nested":{"a":{"x":"<String>","y":"<Number>"},"b":{"x":"<String>","y":"<Number>"}}}> |     | YES  |         | FIELD         |
-+--------+-------------------------------------------------------------------------------------------------------------------------------------------------+-----+------+---------+---------------+
-
-INSERT INTO t VALUES (1762128006000, '{"int": 6, "list": [-6.0], "bool": true, "nested": {"a": {"x": "ax", "y": 66}, "b": {"y": -66, "x": "bx"}}}');
-
-Affected Rows: 1
-
-DESC TABLE t;
-
-+--------+-------------------------------------------------------------------------------------------------------------------------------------------------+-----+------+---------+---------------+
-| Column | Type                                                                                                                                            | Key | Null | Default | Semantic Type |
-+--------+-------------------------------------------------------------------------------------------------------------------------------------------------+-----+------+---------+---------------+
-| ts     | TimestampMillisecond                                                                                                                            | PRI | NO   |         | TIMESTAMP     |
-| j      | Json<{"bool":"<Bool>","int":"<Number>","list":["<Number>"],"nested":{"a":{"x":"<String>","y":"<Number>"},"b":{"x":"<String>","y":"<Number>"}}}> |     | YES  |         | FIELD         |
-+--------+-------------------------------------------------------------------------------------------------------------------------------------------------+-----+------+---------+---------------+
-
-INSERT INTO t VALUES (1762128011000, '{}');
-
-Error: 1004(InvalidArguments), Invalid InsertRequest, reason: empty json object is not supported, consider adding a dummy field
-
-SELECT ts, j FROM t order by ts;
-
-+---------------------+----------------------------------------------------------------------------------------+
-| ts                  | j                                                                                      |
-+---------------------+----------------------------------------------------------------------------------------+
-| 2025-11-03T00:00:01 | {bool: , int: 1, list: , nested: }                                                     |
-| 2025-11-03T00:00:02 | {bool: , int: 2, list: [0.1, 0.2, 0.3], nested: }                                      |
-| 2025-11-03T00:00:03 | {bool: , int: 3, list: [0.4, 0.5, 0.6], nested: {a: {x: hello, y: }, b: {x: , y: -1}}} |
-| 2025-11-03T00:00:04 | {bool: true, int: 4, list: , nested: {a: {x: , y: 1}, b: }}                            |
-| 2025-11-03T00:00:05 | {bool: false, int: 5, list: , nested: {a: , b: {x: world, y: }}}                       |
-| 2025-11-03T00:00:06 | {bool: true, int: 6, list: [-6.0], nested: {a: {x: ax, y: 66}, b: {x: bx, y: -66}}}    |
-+---------------------+----------------------------------------------------------------------------------------+
-
-DROP table t;
-
-Affected Rows: 0
-
diff --git a/tests/cases/standalone/common/types/json/json-structured.sql b/tests/cases/standalone/common/types/json/json-structured.sql
deleted file mode 100644
index 8bb10b4b0e..0000000000
--- a/tests/cases/standalone/common/types/json/json-structured.sql
+++ /dev/null
@@ -1,28 +0,0 @@
-CREATE TABLE t (ts TIMESTAMP TIME INDEX, j JSON(format = "structured") DEFAULT '{"foo": "bar"}');
-
-CREATE TABLE t (ts TIMESTAMP TIME INDEX, j JSON(format = "structured"));
-
-DESC TABLE t;
-
-INSERT INTO t VALUES
-(1762128001000, '{"int": 1}'),
-(1762128002000, '{"int": 2, "list": [0.1, 0.2, 0.3]}'),
-(1762128003000, '{"int": 3, "list": [0.4, 0.5, 0.6], "nested": {"a": {"x": "hello"}, "b": {"y": -1}}}');
-
-DESC TABLE t;
-
-INSERT INTO t VALUES
-(1762128004000, '{"int": 4, "bool": true, "nested": {"a": {"y": 1}}}'),
-(1762128005000, '{"int": 5, "bool": false, "nested": {"b": {"x": "world"}}}');
-
-DESC TABLE t;
-
-INSERT INTO t VALUES (1762128006000, '{"int": 6, "list": [-6.0], "bool": true, "nested": {"a": {"x": "ax", "y": 66}, "b": {"y": -66, "x": "bx"}}}');
-
-DESC TABLE t;
-
-INSERT INTO t VALUES (1762128011000, '{}');
-
-SELECT ts, j FROM t order by ts;
-
-DROP table t;
diff --git a/tests/cases/standalone/common/types/json/json2.result b/tests/cases/standalone/common/types/json/json2.result
new file mode 100644
index 0000000000..bd2d7ba3c2
--- /dev/null
+++ b/tests/cases/standalone/common/types/json/json2.result
@@ -0,0 +1,182 @@
+create table json2_table (
+    ts timestamp time index,
+    j  json2
+) with (
+    'append_mode' = 'true',
+    'sst_format' = 'flat',
+);
+
+Affected Rows: 0
+
+insert into json2_table (ts, j)
+values (1, '{"a": {"b": 1}, "c": "s1", "d": [{"e": {"f": 0.1}}]}'),
+       (2, '{"a": {"b": -2}, "c": "s2", "d": [{"e": {"f": 0.2}}]}');
+
+Affected Rows: 2
+
+admin flush_table('json2_table');
+
++----------------------------------+
+| ADMIN flush_table('json2_table') |
++----------------------------------+
+| 0                                |
++----------------------------------+
+
+insert into json2_table (ts, j)
+values (3, '{"a": {"b": 3}, "c": "s3"}');
+
+Affected Rows: 1
+
+insert into json2_table
+values (4, '{"a": {"b": -4}, "d": [{"e": {"g": -0.4}}]}'),
+       (5, '{"a": {}, "c": "s5"}'),
+       (6, '{"c": "s6"}');
+
+Affected Rows: 3
+
+admin flush_table('json2_table');
+
++----------------------------------+
+| ADMIN flush_table('json2_table') |
++----------------------------------+
+| 0                                |
++----------------------------------+
+
+admin compact_table('json2_table', 'swcs', '86400');
+
++-----------------------------------------------------+
+| ADMIN compact_table('json2_table', 'swcs', '86400') |
++-----------------------------------------------------+
+| 0                                                   |
++-----------------------------------------------------+
+
+insert into json2_table
+values (7, '{"a": {"b": "s7"}, "c": [1], "d": [{"e": {"g": -0.7}}]}'),
+       (8, '{"a": {"b": 8}, "c": "s8"}');
+
+Affected Rows: 2
+
+admin flush_table('json2_table');
+
++----------------------------------+
+| ADMIN flush_table('json2_table') |
++----------------------------------+
+| 0                                |
++----------------------------------+
+
+insert into json2_table
+values (9, '{"a": {"x": true}, "c": "s9", "d": [{"e": {"g": -0.9}}]}'),
+       (10, '{"a": {"b": 10}, "y": false}');
+
+Affected Rows: 2
+
+select j.a.b from json2_table order by ts;
+
++-----------------------------------------------------+
+| json2_get(json2_table.j,Utf8("a.b"),Utf8View(NULL)) |
++-----------------------------------------------------+
+| 1                                                   |
+| -2                                                  |
+| 3                                                   |
+| -4                                                  |
+|                                                     |
+|                                                     |
+| s7                                                  |
+| 8                                                   |
+|                                                     |
+| 10                                                  |
++-----------------------------------------------------+
+
+select j.a, j.a.x from json2_table order by ts;
+
++---------------------------------------------------+-----------------------------------------------------+
+| json2_get(json2_table.j,Utf8("a"),Utf8View(NULL)) | json2_get(json2_table.j,Utf8("a.x"),Utf8View(NULL)) |
++---------------------------------------------------+-----------------------------------------------------+
+| {b: 1, x: }                                       |                                                     |
+| {b: -2, x: }                                      |                                                     |
+| {b: 3, x: }                                       |                                                     |
+| {b: -4, x: }                                      |                                                     |
+| {b: , x: }                                        |                                                     |
+|                                                   |                                                     |
+| {b: s7, x: }                                      |                                                     |
+| {b: 8, x: }                                       |                                                     |
+| {b: , x: true}                                    | true                                                |
+| {b: 10, x: }                                      |                                                     |
++---------------------------------------------------+-----------------------------------------------------+
+
+select j.c, j.y from json2_table order by ts;
+
++---------------------------------------------------+---------------------------------------------------+
+| json2_get(json2_table.j,Utf8("c"),Utf8View(NULL)) | json2_get(json2_table.j,Utf8("y"),Utf8View(NULL)) |
++---------------------------------------------------+---------------------------------------------------+
+| s1                                                |                                                   |
+| s2                                                |                                                   |
+| s3                                                |                                                   |
+|                                                   |                                                   |
+| s5                                                |                                                   |
+| s6                                                |                                                   |
+| [1]                                               |                                                   |
+| s8                                                |                                                   |
+| s9                                                |                                                   |
+|                                                   | false                                             |
++---------------------------------------------------+---------------------------------------------------+
+
+select j from json2_table order by ts;
+
+Error: 3001(EngineExecuteQuery), Invalid argument error: column types must match schema types, expected Struct() but found Struct("a": Struct("b": Utf8, "x": Boolean), "c": Utf8, "d": List(Struct("e": Struct("f": Float64, "g": Float64))), "y": Boolean) at column index 0
+
+select * from json2_table order by ts;
+
+Error: 3001(EngineExecuteQuery), Invalid argument error: column types must match schema types, expected Struct() but found Struct("a": Struct("b": Utf8, "x": Boolean), "c": Utf8, "d": List(Struct("e": Struct("f": Float64, "g": Float64))), "y": Boolean) at column index 1
+
+select j.a.b + 1 from json2_table order by ts;
+
++-------------------------------------------------------------+
+| json2_get(json2_table.j,Utf8("a.b"),Int64(NULL)) + Int64(1) |
++-------------------------------------------------------------+
+| 2                                                           |
+| -1                                                          |
+| 4                                                           |
+| -3                                                          |
+|                                                             |
+|                                                             |
+|                                                             |
+| 9                                                           |
+|                                                             |
+| 11                                                          |
++-------------------------------------------------------------+
+
+select abs(j.a.b) from json2_table order by ts;
+
+Error: 3000(PlanQuery), Failed to plan SQL: Error during planning: Function 'abs' expects NativeType::Numeric but received NativeType::String No function matches the given name and argument types 'abs(Utf8View)'. You might need to add explicit type casts.
+	Candidate functions:
+	abs(Numeric(1))
+
+-- "j.c" is of type "String", "abs" is expected to be all "null"s.
+select abs(j.c) from json2_table order by ts;
+
+Error: 3000(PlanQuery), Failed to plan SQL: Error during planning: Function 'abs' expects NativeType::Numeric but received NativeType::String No function matches the given name and argument types 'abs(Utf8View)'. You might need to add explicit type casts.
+	Candidate functions:
+	abs(Numeric(1))
+
+select j.d from json2_table order by ts;
+
++---------------------------------------------------+
+| json2_get(json2_table.j,Utf8("d"),Utf8View(NULL)) |
++---------------------------------------------------+
+| [{e: {f: 0.1, g: }}]                              |
+| [{e: {f: 0.2, g: }}]                              |
+|                                                   |
+| [{e: {f: , g: -0.4}}]                             |
+|                                                   |
+|                                                   |
+| [{e: {g: -0.7}}]                                  |
+|                                                   |
+| [{e: {g: -0.9}}]                                  |
+|                                                   |
++---------------------------------------------------+
+
+drop table json2_table;
+
+Affected Rows: 0
+
diff --git a/tests/cases/standalone/common/types/json/json2.sql b/tests/cases/standalone/common/types/json/json2.sql
new file mode 100644
index 0000000000..c7c32fe94b
--- /dev/null
+++ b/tests/cases/standalone/common/types/json/json2.sql
@@ -0,0 +1,56 @@
+create table json2_table (
+    ts timestamp time index,
+    j  json2
+) with (
+    'append_mode' = 'true',
+    'sst_format' = 'flat',
+);
+
+insert into json2_table (ts, j)
+values (1, '{"a": {"b": 1}, "c": "s1", "d": [{"e": {"f": 0.1}}]}'),
+       (2, '{"a": {"b": -2}, "c": "s2", "d": [{"e": {"f": 0.2}}]}');
+
+admin flush_table('json2_table');
+
+insert into json2_table (ts, j)
+values (3, '{"a": {"b": 3}, "c": "s3"}');
+
+insert into json2_table
+values (4, '{"a": {"b": -4}, "d": [{"e": {"g": -0.4}}]}'),
+       (5, '{"a": {}, "c": "s5"}'),
+       (6, '{"c": "s6"}');
+
+admin flush_table('json2_table');
+
+admin compact_table('json2_table', 'swcs', '86400');
+
+insert into json2_table
+values (7, '{"a": {"b": "s7"}, "c": [1], "d": [{"e": {"g": -0.7}}]}'),
+       (8, '{"a": {"b": 8}, "c": "s8"}');
+
+admin flush_table('json2_table');
+
+insert into json2_table
+values (9, '{"a": {"x": true}, "c": "s9", "d": [{"e": {"g": -0.9}}]}'),
+       (10, '{"a": {"b": 10}, "y": false}');
+
+select j.a.b from json2_table order by ts;
+
+select j.a, j.a.x from json2_table order by ts;
+
+select j.c, j.y from json2_table order by ts;
+
+select j from json2_table order by ts;
+
+select * from json2_table order by ts;
+
+select j.a.b + 1 from json2_table order by ts;
+
+select abs(j.a.b) from json2_table order by ts;
+
+-- "j.c" is of type "String", "abs" is expected to be all "null"s.
+select abs(j.c) from json2_table order by ts;
+
+select j.d from json2_table order by ts;
+
+drop table json2_table;
diff --git a/tests/cases/standalone/common/types/json/jsonbench.result b/tests/cases/standalone/common/types/json/jsonbench.result
new file mode 100644
index 0000000000..5ad96a504b
--- /dev/null
+++ b/tests/cases/standalone/common/types/json/jsonbench.result
@@ -0,0 +1,176 @@
+CREATE TABLE bluesky (
+    `data`  JSON2,
+    time_us TimestampMicrosecond TIME INDEX
+) WITH ('append_mode' = 'true', 'sst_format' = 'flat');
+
+Affected Rows: 0
+
+INSERT INTO bluesky (time_us, data)
+VALUES (1732206349000167,
+        '{"did":"did:plc:yj3sjq3blzpynh27cumnp5ks","time_us":1732206349000167,"kind":"commit","commit":{"rev":"3lbhtytnn2k2f","operation":"create","collection":"app.bsky.feed.post","rkey":"3lbhtyteurk2y","record":{"$type":"app.bsky.feed.post","createdAt":"2024-11-21T16:09:27.095Z","langs":["en"],"reply":{"parent":{"cid":"bafyreibfglofvqou2yiqvwzk4rcgkhhxrbunyemshdjledgwymimqkg24e","uri":"at://did:plc:6tr6tuzlx2db3rduzr2d6r24/app.bsky.feed.post/3lbhqo2rtys2z"},"root":{"cid":"bafyreibfglofvqou2yiqvwzk4rcgkhhxrbunyemshdjledgwymimqkg24e","uri":"at://did:plc:6tr6tuzlx2db3rduzr2d6r24/app.bsky.feed.post/3lbhqo2rtys2z"}},"text":"aaaaah.  LIght shines in a corner of WTF...."},"cid":"bafyreidblutgvj75o4q4akzyyejedjj6l3it6hgqwee6jpwv2wqph5fsgm"}}');
+
+Affected Rows: 1
+
+INSERT INTO bluesky (time_us, data)
+VALUES (1732206349000644,
+        '{"did":"did:plc:3i4xf2v4wcnyktgv6satke64","time_us":1732206349000644,"kind":"commit","commit":{"rev":"3lbhuvzds6d2a","operation":"create","collection":"app.bsky.feed.like","rkey":"3lbhuvzdked2a","record":{"$type":"app.bsky.feed.like","createdAt":"2024-11-21T16:25:46.221Z","subject":{"cid":"bafyreidjvrcmckkm765mct5fph36x7kupkfo35rjklbf2k76xkzwyiauge","uri":"at://did:plc:azrv4rcbws6kmcga4fsbphg2/app.bsky.feed.post/3lbgjdpbiec2l"}},"cid":"bafyreia5l5vrkh5oj4cjyhcqby2dprhyvcyofo2q5562tijlae2pzih23m"}}');
+
+Affected Rows: 1
+
+ADMIN flush_table('bluesky');
+
++------------------------------+
+| ADMIN flush_table('bluesky') |
++------------------------------+
+| 0                            |
++------------------------------+
+
+INSERT INTO bluesky (time_us, data)
+VALUES (1732206349001108,
+        '{"did":"did:plc:gccfnqqizz4urhchsaie6jft","time_us":1732206349001108,"kind":"commit","commit":{"rev":"3lbhuvze3gi2u","operation":"create","collection":"app.bsky.graph.follow","rkey":"3lbhuvzdtmi2u","record":{"$type":"app.bsky.graph.follow","createdAt":"2024-11-21T16:27:40.923Z","subject":"did:plc:r7cdh4sgzqbfdc6wcdxxti7c"},"cid":"bafyreiew2p6cgirfaj45qoenm4fgumib7xoloclrap3jgkz5es7g7kby3i"}}');
+
+Affected Rows: 1
+
+INSERT INTO bluesky (time_us, data)
+VALUES (1732206349001372,
+        '{"did":"did:plc:msxqf3twq7abtdw7dbfskphk","time_us":1732206349001372,"kind":"commit","commit":{"rev":"3lbhueija5p22","operation":"create","collection":"app.bsky.feed.like","rkey":"3lbhueiizcx22","record":{"$type":"app.bsky.feed.like","createdAt":"2024-11-21T16:15:58.232Z","subject":{"cid":"bafyreiavpshyqzrlo5m7fqodjhs6jevweqnif4phasiwimv4a7mnsqi2fe","uri":"at://did:plc:fusulxqc52zbrc75fi6xrcof/app.bsky.feed.post/3lbhskq5zn22f"}},"cid":"bafyreidjix4dauj2afjlbzmhj3a7gwftcevvmmy6edww6vrjdbst26rkby"}}');
+
+Affected Rows: 1
+
+ADMIN flush_table('bluesky');
+
++------------------------------+
+| ADMIN flush_table('bluesky') |
++------------------------------+
+| 0                            |
++------------------------------+
+
+INSERT INTO bluesky (time_us, data)
+VALUES (1732206349001905,
+        '{"did":"did:plc:l5o3qjrmfztir54cpwlv2eme","time_us":1732206349001905,"kind":"commit","commit":{"rev":"3lbhtytohxc2o","operation":"create","collection":"app.bsky.feed.post","rkey":"3lbhtytjqzk2q","record":{"$type":"app.bsky.feed.post","createdAt":"2024-11-21T16:09:27.254Z","langs":["en"],"reply":{"parent":{"cid":"bafyreih35fe2jj3gchmgk4amold4l6sfxd2sby5wrg3jrws5fkdypxrbg4","uri":"at://did:plc:6wx2gg5yqgvmlu35r6y3bk6d/app.bsky.feed.post/3lbhtj2eb4s2o"},"root":{"cid":"bafyreifipyt3vctd4ptuoicvio7rbr5xvjv4afwuggnd2prnmn55mu6luu","uri":"at://did:plc:474ldquxwzrlcvjhhbbk2wte/app.bsky.feed.post/3lbhdzrynik27"}},"text":"okay i take mine back because I hadn’t heard this one yet^^"},"cid":"bafyreigzdsdne3z2xxcakgisieyj7y47hj6eg7lj6v4q25ah5q2qotu5ku"}}');
+
+Affected Rows: 1
+
+ADMIN compact_table('bluesky', 'swcs', '86400');
+
++-------------------------------------------------+
+| ADMIN compact_table('bluesky', 'swcs', '86400') |
++-------------------------------------------------+
+| 0                                               |
++-------------------------------------------------+
+
+SELECT count(*) FROM bluesky;
+
++----------+
+| count(*) |
++----------+
+| 5        |
++----------+
+
+-- Query 1:
+SELECT data.commit.collection AS event,
+       count() AS count
+FROM bluesky
+GROUP BY event
+ORDER BY count DESC, event ASC;
+
++-----------------------+-------+
+| event                 | count |
++-----------------------+-------+
+| app.bsky.feed.like    | 2     |
+| app.bsky.feed.post    | 2     |
+| app.bsky.graph.follow | 1     |
++-----------------------+-------+
+
+-- Query 2:
+SELECT data.commit.collection AS event,
+       count() AS count,
+       count(DISTINCT data.did) AS users
+FROM bluesky
+WHERE data.kind = 'commit' AND data.commit.operation = 'create'
+GROUP BY event
+ORDER BY count DESC, event ASC;
+
++-----------------------+-------+-------+
+| event                 | count | users |
++-----------------------+-------+-------+
+| app.bsky.feed.like    | 2     | 2     |
+| app.bsky.feed.post    | 2     | 2     |
+| app.bsky.graph.follow | 1     | 1     |
++-----------------------+-------+-------+
+
+-- Query 3:
+SELECT data.commit.collection AS event,
+       date_part('hour', to_timestamp_micros(arrow_cast(data.time_us, 'Int64'))) as hour_of_day,
+       count() AS count
+FROM bluesky
+WHERE data.kind = 'commit'
+  AND data.commit.operation = 'create'
+  AND data.commit.collection in ('app.bsky.feed.post', 'app.bsky.feed.repost', 'app.bsky.feed.like')
+GROUP BY event, hour_of_day
+ORDER BY hour_of_day, event;
+
++--------------------+-------------+-------+
+| event              | hour_of_day | count |
++--------------------+-------------+-------+
+| app.bsky.feed.like | 16          | 2     |
+| app.bsky.feed.post | 16          | 2     |
++--------------------+-------------+-------+
+
+-- Query 4:
+SELECT data.did::String as user_id,
+       min(to_timestamp_micros(arrow_cast(data.time_us, 'Int64'))) AS first_post_ts
+FROM bluesky
+WHERE data.kind = 'commit'
+  AND data.commit.operation = 'create'
+  AND data.commit.collection = 'app.bsky.feed.post'
+GROUP BY user_id
+ORDER BY first_post_ts ASC, user_id DESC
+LIMIT 3;
+
++----------------------------------+----------------------------+
+| user_id                          | first_post_ts              |
++----------------------------------+----------------------------+
+| did:plc:yj3sjq3blzpynh27cumnp5ks | 2024-11-21T16:25:49.000167 |
+| did:plc:l5o3qjrmfztir54cpwlv2eme | 2024-11-21T16:25:49.001905 |
++----------------------------------+----------------------------+
+
+-- Query 5:
+SELECT data.did::String as user_id,
+       date_part(
+           'epoch',
+           max(to_timestamp_micros(arrow_cast(data.time_us, 'Int64'))) -
+             min(to_timestamp_micros(arrow_cast(data.time_us, 'Int64')))
+       ) AS activity_span
+FROM bluesky
+WHERE data.kind = 'commit'
+  AND data.commit.operation = 'create'
+  AND data.commit.collection = 'app.bsky.feed.post'
+GROUP BY user_id
+ORDER BY activity_span DESC, user_id DESC
+LIMIT 3;
+
++----------------------------------+---------------+
+| user_id                          | activity_span |
++----------------------------------+---------------+
+| did:plc:yj3sjq3blzpynh27cumnp5ks | 0.0           |
+| did:plc:l5o3qjrmfztir54cpwlv2eme | 0.0           |
++----------------------------------+---------------+
+
+-- SQLNESS REPLACE (peers.*) REDACTED
+EXPLAIN
+SELECT date_part('hour', to_timestamp_micros(arrow_cast(data.time_us, 'Int64'))) as hour_of_day
+FROM bluesky;
+
++---------------+--------------------------------------------------------------------------------------------------------------------------------+
+| plan_type     | plan                                                                                                                           |
++---------------+--------------------------------------------------------------------------------------------------------------------------------+
+| logical_plan  | MergeScan [is_placeholder=false, remote_input=[                                                                                |
+|               | Projection: date_part(Utf8("hour"), to_timestamp_micros(json2_get(bluesky.data, Utf8("time_us"), Int64(NULL)))) AS hour_of_day |
+|               |   TableScan: bluesky                                                                                                           |
+|               | ]]                                                                                                                             |
+| physical_plan | CooperativeExec                                                                                                                |
+|               |   MergeScanExec: REDACTED
+|               |                                                                                                                                |
++---------------+--------------------------------------------------------------------------------------------------------------------------------+
+
diff --git a/tests/cases/standalone/common/types/json/jsonbench.sql b/tests/cases/standalone/common/types/json/jsonbench.sql
new file mode 100644
index 0000000000..c3f74ccec4
--- /dev/null
+++ b/tests/cases/standalone/common/types/json/jsonbench.sql
@@ -0,0 +1,90 @@
+CREATE TABLE bluesky (
+    `data`  JSON2,
+    time_us TimestampMicrosecond TIME INDEX
+) WITH ('append_mode' = 'true', 'sst_format' = 'flat');
+
+INSERT INTO bluesky (time_us, data)
+VALUES (1732206349000167,
+        '{"did":"did:plc:yj3sjq3blzpynh27cumnp5ks","time_us":1732206349000167,"kind":"commit","commit":{"rev":"3lbhtytnn2k2f","operation":"create","collection":"app.bsky.feed.post","rkey":"3lbhtyteurk2y","record":{"$type":"app.bsky.feed.post","createdAt":"2024-11-21T16:09:27.095Z","langs":["en"],"reply":{"parent":{"cid":"bafyreibfglofvqou2yiqvwzk4rcgkhhxrbunyemshdjledgwymimqkg24e","uri":"at://did:plc:6tr6tuzlx2db3rduzr2d6r24/app.bsky.feed.post/3lbhqo2rtys2z"},"root":{"cid":"bafyreibfglofvqou2yiqvwzk4rcgkhhxrbunyemshdjledgwymimqkg24e","uri":"at://did:plc:6tr6tuzlx2db3rduzr2d6r24/app.bsky.feed.post/3lbhqo2rtys2z"}},"text":"aaaaah.  LIght shines in a corner of WTF...."},"cid":"bafyreidblutgvj75o4q4akzyyejedjj6l3it6hgqwee6jpwv2wqph5fsgm"}}');
+
+INSERT INTO bluesky (time_us, data)
+VALUES (1732206349000644,
+        '{"did":"did:plc:3i4xf2v4wcnyktgv6satke64","time_us":1732206349000644,"kind":"commit","commit":{"rev":"3lbhuvzds6d2a","operation":"create","collection":"app.bsky.feed.like","rkey":"3lbhuvzdked2a","record":{"$type":"app.bsky.feed.like","createdAt":"2024-11-21T16:25:46.221Z","subject":{"cid":"bafyreidjvrcmckkm765mct5fph36x7kupkfo35rjklbf2k76xkzwyiauge","uri":"at://did:plc:azrv4rcbws6kmcga4fsbphg2/app.bsky.feed.post/3lbgjdpbiec2l"}},"cid":"bafyreia5l5vrkh5oj4cjyhcqby2dprhyvcyofo2q5562tijlae2pzih23m"}}');
+
+ADMIN flush_table('bluesky');
+
+INSERT INTO bluesky (time_us, data)
+VALUES (1732206349001108,
+        '{"did":"did:plc:gccfnqqizz4urhchsaie6jft","time_us":1732206349001108,"kind":"commit","commit":{"rev":"3lbhuvze3gi2u","operation":"create","collection":"app.bsky.graph.follow","rkey":"3lbhuvzdtmi2u","record":{"$type":"app.bsky.graph.follow","createdAt":"2024-11-21T16:27:40.923Z","subject":"did:plc:r7cdh4sgzqbfdc6wcdxxti7c"},"cid":"bafyreiew2p6cgirfaj45qoenm4fgumib7xoloclrap3jgkz5es7g7kby3i"}}');
+
+INSERT INTO bluesky (time_us, data)
+VALUES (1732206349001372,
+        '{"did":"did:plc:msxqf3twq7abtdw7dbfskphk","time_us":1732206349001372,"kind":"commit","commit":{"rev":"3lbhueija5p22","operation":"create","collection":"app.bsky.feed.like","rkey":"3lbhueiizcx22","record":{"$type":"app.bsky.feed.like","createdAt":"2024-11-21T16:15:58.232Z","subject":{"cid":"bafyreiavpshyqzrlo5m7fqodjhs6jevweqnif4phasiwimv4a7mnsqi2fe","uri":"at://did:plc:fusulxqc52zbrc75fi6xrcof/app.bsky.feed.post/3lbhskq5zn22f"}},"cid":"bafyreidjix4dauj2afjlbzmhj3a7gwftcevvmmy6edww6vrjdbst26rkby"}}');
+
+ADMIN flush_table('bluesky');
+
+INSERT INTO bluesky (time_us, data)
+VALUES (1732206349001905,
+        '{"did":"did:plc:l5o3qjrmfztir54cpwlv2eme","time_us":1732206349001905,"kind":"commit","commit":{"rev":"3lbhtytohxc2o","operation":"create","collection":"app.bsky.feed.post","rkey":"3lbhtytjqzk2q","record":{"$type":"app.bsky.feed.post","createdAt":"2024-11-21T16:09:27.254Z","langs":["en"],"reply":{"parent":{"cid":"bafyreih35fe2jj3gchmgk4amold4l6sfxd2sby5wrg3jrws5fkdypxrbg4","uri":"at://did:plc:6wx2gg5yqgvmlu35r6y3bk6d/app.bsky.feed.post/3lbhtj2eb4s2o"},"root":{"cid":"bafyreifipyt3vctd4ptuoicvio7rbr5xvjv4afwuggnd2prnmn55mu6luu","uri":"at://did:plc:474ldquxwzrlcvjhhbbk2wte/app.bsky.feed.post/3lbhdzrynik27"}},"text":"okay i take mine back because I hadn’t heard this one yet^^"},"cid":"bafyreigzdsdne3z2xxcakgisieyj7y47hj6eg7lj6v4q25ah5q2qotu5ku"}}');
+
+ADMIN compact_table('bluesky', 'swcs', '86400');
+
+SELECT count(*) FROM bluesky;
+
+-- Query 1:
+SELECT data.commit.collection AS event,
+       count() AS count
+FROM bluesky
+GROUP BY event
+ORDER BY count DESC, event ASC;
+
+-- Query 2:
+SELECT data.commit.collection AS event,
+       count() AS count,
+       count(DISTINCT data.did) AS users
+FROM bluesky
+WHERE data.kind = 'commit' AND data.commit.operation = 'create'
+GROUP BY event
+ORDER BY count DESC, event ASC;
+
+-- Query 3:
+SELECT data.commit.collection AS event,
+       date_part('hour', to_timestamp_micros(arrow_cast(data.time_us, 'Int64'))) as hour_of_day,
+       count() AS count
+FROM bluesky
+WHERE data.kind = 'commit'
+  AND data.commit.operation = 'create'
+  AND data.commit.collection in ('app.bsky.feed.post', 'app.bsky.feed.repost', 'app.bsky.feed.like')
+GROUP BY event, hour_of_day
+ORDER BY hour_of_day, event;
+
+-- Query 4:
+SELECT data.did::String as user_id,
+       min(to_timestamp_micros(arrow_cast(data.time_us, 'Int64'))) AS first_post_ts
+FROM bluesky
+WHERE data.kind = 'commit'
+  AND data.commit.operation = 'create'
+  AND data.commit.collection = 'app.bsky.feed.post'
+GROUP BY user_id
+ORDER BY first_post_ts ASC, user_id DESC
+LIMIT 3;
+
+-- Query 5:
+SELECT data.did::String as user_id,
+       date_part(
+           'epoch',
+           max(to_timestamp_micros(arrow_cast(data.time_us, 'Int64'))) -
+             min(to_timestamp_micros(arrow_cast(data.time_us, 'Int64')))
+       ) AS activity_span
+FROM bluesky
+WHERE data.kind = 'commit'
+  AND data.commit.operation = 'create'
+  AND data.commit.collection = 'app.bsky.feed.post'
+GROUP BY user_id
+ORDER BY activity_span DESC, user_id DESC
+LIMIT 3;
+
+-- SQLNESS REPLACE (peers.*) REDACTED
+EXPLAIN
+SELECT date_part('hour', to_timestamp_micros(arrow_cast(data.time_us, 'Int64'))) as hour_of_day
+FROM bluesky;