Signed-off-by: luofucong <luofc@foxmail.com>
This commit is contained in:
luofucong
2026-05-27 17:27:21 +08:00
parent bf7e3551fe
commit 3ac7235c68
6 changed files with 135 additions and 70 deletions

View File

@@ -26,12 +26,12 @@ use std::sync::Arc;
use serde::{Deserialize, Serialize};
use serde_json::{Map, Value as Json};
use snafu::{OptionExt, ResultExt, ensure};
use snafu::{OptionExt, ResultExt};
use crate::error::{self, InvalidJsonSnafu, Result, SerializeSnafu};
use crate::json::value::{JsonValue, JsonVariant};
use crate::types::json_type::{JsonNativeType, JsonNumberType, JsonObjectType};
use crate::types::{StructField, StructType};
use crate::types::{JsonType, StructField, StructType};
use crate::value::{ListValue, StructValue, Value};
/// The configuration of JSON encoding
@@ -305,33 +305,45 @@ fn encode_json_array_with_context<'a>(
) -> Result<JsonValue> {
let json_array_len = json_array.len();
let mut items = Vec::with_capacity(json_array_len);
let mut element_type = item_type.cloned();
for (index, value) in json_array.into_iter().enumerate() {
let array_context = context.with_key(&index.to_string());
let item_value =
encode_json_value_with_context(value, element_type.as_ref(), &array_context)?;
let item_type = item_value.json_type().native_type().clone();
items.push(item_value.into_variant());
// Determine the common type for the list
if let Some(current_type) = &element_type {
// It's valid for json array to have different types of items, for example,
// ["a string", 1]. However, the `JsonValue` will be converted to Arrow list array,
// which requires all items have exactly same type. So we forbid the different types
// case here. Besides, it's not common for items in a json array to differ. So I think
// we are good here.
ensure!(
item_type == *current_type,
error::InvalidJsonSnafu {
value: "all items in json array must have the same type"
}
);
} else {
element_type = Some(item_type);
}
let item_value = encode_json_value_with_context(value, None, &array_context)?;
items.push(item_value);
}
// In specification, it's valid for a JSON array to have different types of items, for example,
// ["a string", 1]. However, in implementation, the `JsonValue` will be converted to Arrow list
// array, which requires all items have exactly the same type. So we merge out the maybe
// different item types to a unified type, and align all the item values to it.
let provided_item_type = item_type.map(|x| JsonType::new_json2(x.clone()));
let merged_item_type = if let Some((first, rests)) = items.split_first() {
let mut merged = first.json_type().clone();
for rest in rests.iter().map(|x| x.json_type()) {
merged.merge(rest)?;
}
Some(merged)
} else {
None
};
let unified_item_type = match (provided_item_type, merged_item_type) {
(Some(mut x), Some(y)) => {
x.merge(&y)?;
Some(x)
}
(Some(x), None) | (None, Some(x)) => Some(x),
(None, None) => None,
};
if let Some(unified_item_type) = unified_item_type {
for item in &mut items {
item.try_align(&unified_item_type)?;
}
}
let items = items
.into_iter()
.map(|x| x.into_variant())
.collect::<Vec<_>>();
Ok(JsonValue::new(JsonVariant::Array(items)))
}

View File

@@ -65,6 +65,14 @@ impl JsonNumber {
JsonNumber::Float(n) => n.0,
}
}
fn native_type(&self) -> JsonNativeType {
match self {
JsonNumber::PosInt(_) => JsonNativeType::u64(),
JsonNumber::NegInt(_) => JsonNativeType::i64(),
JsonNumber::Float(_) => JsonNativeType::f64(),
}
}
}
impl From<u64> for JsonNumber {
@@ -147,26 +155,14 @@ impl JsonVariant {
match self {
JsonVariant::Null => JsonNativeType::Null,
JsonVariant::Bool(_) => JsonNativeType::Bool,
JsonVariant::Number(n) => match n {
JsonNumber::PosInt(_) => JsonNativeType::u64(),
JsonNumber::NegInt(_) => JsonNativeType::i64(),
JsonNumber::Float(_) => JsonNativeType::f64(),
},
JsonVariant::Number(n) => n.native_type(),
JsonVariant::String(_) => JsonNativeType::String,
JsonVariant::Array(array) => {
let item_type = if let Some(first) = array.first() {
first.native_type()
} else {
JsonNativeType::Null
};
JsonNativeType::Array(Box::new(item_type))
json_array_native_type(array.iter().map(JsonVariant::native_type))
}
JsonVariant::Object(object) => {
json_object_native_type(object.iter().map(|(k, v)| (k, v.native_type())))
}
JsonVariant::Object(object) => JsonNativeType::Object(
object
.iter()
.map(|(k, v)| (k.clone(), v.native_type()))
.collect(),
),
JsonVariant::Variant(_) => JsonNativeType::Variant,
}
}
@@ -469,6 +465,7 @@ impl JsonValue {
.collect::<Result<_>>()?,
),
(JsonVariant::Object(kvs), _) if kvs.is_empty() => JsonVariant::Null,
(JsonVariant::Object(mut kvs), JsonNativeType::Object(expected)) => {
ensure!(
expected.keys().len() >= kvs.keys().len()
@@ -517,7 +514,7 @@ impl JsonValue {
let x = std::mem::take(&mut self.json_variant);
self.json_variant = helper(x, expected.native_type())?;
self.json_type = OnceLock::from(expected.clone());
self.json_type = OnceLock::new();
Ok(())
}
}
@@ -623,35 +620,51 @@ pub enum JsonVariantRef<'a> {
}
impl JsonVariantRef<'_> {
fn json_type(&self) -> JsonType {
fn native_type(v: &JsonVariantRef<'_>) -> JsonNativeType {
match v {
JsonVariantRef::Null => JsonNativeType::Null,
JsonVariantRef::Bool(_) => JsonNativeType::Bool,
JsonVariantRef::Number(n) => match n {
JsonNumber::PosInt(_) => JsonNativeType::u64(),
JsonNumber::NegInt(_) => JsonNativeType::i64(),
JsonNumber::Float(_) => JsonNativeType::f64(),
},
JsonVariantRef::String(_) => JsonNativeType::String,
JsonVariantRef::Array(array) => {
let item_type = if let Some(first) = array.first() {
native_type(first)
} else {
JsonNativeType::Null
};
JsonNativeType::Array(Box::new(item_type))
}
JsonVariantRef::Object(object) => JsonNativeType::Object(
object
.iter()
.map(|(k, v)| (k.to_string(), native_type(v)))
.collect(),
),
JsonVariantRef::Variant(_) => JsonNativeType::Variant,
fn native_type(&self) -> JsonNativeType {
match self {
JsonVariantRef::Null => JsonNativeType::Null,
JsonVariantRef::Bool(_) => JsonNativeType::Bool,
JsonVariantRef::Number(n) => n.native_type(),
JsonVariantRef::String(_) => JsonNativeType::String,
JsonVariantRef::Array(array) => {
json_array_native_type(array.iter().map(JsonVariantRef::native_type))
}
JsonVariantRef::Object(object) => {
json_object_native_type(object.iter().map(|(k, v)| (*k, v.native_type())))
}
JsonVariantRef::Variant(_) => JsonNativeType::Variant,
}
JsonType::new_json2(native_type(self))
}
fn json_type(&self) -> JsonType {
JsonType::new_json2(self.native_type())
}
}
fn json_array_native_type<I>(items: I) -> JsonNativeType
where
I: IntoIterator<Item = JsonNativeType>,
{
let item_type = items
.into_iter()
.reduce(|mut acc, y| {
acc.merge(&y);
acc
})
.unwrap_or(JsonNativeType::Null);
JsonNativeType::Array(Box::new(item_type))
}
fn json_object_native_type<I, K>(fields: I) -> JsonNativeType
where
I: IntoIterator<Item = (K, JsonNativeType)>,
K: Into<String>,
{
let mut fields = fields.into_iter().peekable();
if fields.peek().is_none() {
JsonNativeType::Null
} else {
JsonNativeType::Object(fields.map(|(k, v)| (k.into(), v)).collect())
}
}

View File

@@ -115,6 +115,14 @@ impl JsonNativeType {
(JsonNativeType::Null, that) => that.clone(),
(this, JsonNativeType::Null) => this,
(this, that) if this == *that => this,
(JsonNativeType::Number(x), JsonNativeType::Number(y)) => {
JsonNativeType::Number(match (x, y) {
(x, y) if x == *y => x,
(JsonNumberType::F64, _) | (_, JsonNumberType::F64) => JsonNumberType::F64,
_ => JsonNumberType::I64,
})
}
_ => JsonNativeType::Variant,
};
}

View File

@@ -89,7 +89,9 @@ impl MutableVector for JsonVectorBuilder {
.fail();
};
let json_type = value.json_type();
self.merged_type.merge(json_type)?;
if !self.merged_type.is_include(json_type) {
self.merged_type.merge(json_type)?;
}
let value = JsonValue::new(JsonVariant::from(value.variant().clone()));
self.values.push(value);

View File

@@ -151,6 +151,14 @@ select j.c, j.y from json2_table order by ts;
| | false |
+-----------------------------------+-----------------------------------+
select j from json2_table order by ts;
Error: 3001(EngineExecuteQuery), Invalid argument error: column types must match schema types, expected Struct() but found Struct("a": Struct("b": Binary, "x": Boolean), "c": Binary, "d": List(Struct("e": Struct("f": Float64, "g": Float64))), "y": Boolean) at column index 0
select * from json2_table order by ts;
Error: 3001(EngineExecuteQuery), Invalid argument error: column types must match schema types, expected Struct() but found Struct("a": Struct("b": Binary, "x": Boolean), "c": Binary, "d": List(Struct("e": Struct("f": Float64, "g": Float64))), "y": Boolean) at column index 1
select j.a.b + 1 from json2_table order by ts;
+------------------------------------------------------------+
@@ -168,6 +176,19 @@ select j.a.b + 1 from json2_table order by ts;
| 11 |
+------------------------------------------------------------+
select abs(j.a.b) from json2_table order by ts;
Error: 3000(PlanQuery), Failed to plan SQL: Error during planning: Function 'abs' expects NativeType::Numeric but received NativeType::String No function matches the given name and argument types 'abs(Utf8View)'. You might need to add explicit type casts.
Candidate functions:
abs(Numeric(1))
-- "j.c" is of type "String", "abs" is expected to be all "null"s.
select abs(j.c) from json2_table order by ts;
Error: 3000(PlanQuery), Failed to plan SQL: Error during planning: Function 'abs' expects NativeType::Numeric but received NativeType::String No function matches the given name and argument types 'abs(Utf8View)'. You might need to add explicit type casts.
Candidate functions:
abs(Numeric(1))
select j.d from json2_table order by ts;
+-----------------------------------+

View File

@@ -46,8 +46,17 @@ select j.a, j.a.x from json2_table order by ts;
select j.c, j.y from json2_table order by ts;
select j from json2_table order by ts;
select * from json2_table order by ts;
select j.a.b + 1 from json2_table order by ts;
select abs(j.a.b) from json2_table order by ts;
-- "j.c" is of type "String", "abs" is expected to be all "null"s.
select abs(j.c) from json2_table order by ts;
select j.d from json2_table order by ts;
drop table json2_table;