diff --git a/src/datatypes/src/json.rs b/src/datatypes/src/json.rs index db657abbcb..114ebe1e5c 100644 --- a/src/datatypes/src/json.rs +++ b/src/datatypes/src/json.rs @@ -26,12 +26,12 @@ use std::sync::Arc; use serde::{Deserialize, Serialize}; use serde_json::{Map, Value as Json}; -use snafu::{OptionExt, ResultExt, ensure}; +use snafu::{OptionExt, ResultExt}; use crate::error::{self, InvalidJsonSnafu, Result, SerializeSnafu}; use crate::json::value::{JsonValue, JsonVariant}; use crate::types::json_type::{JsonNativeType, JsonNumberType, JsonObjectType}; -use crate::types::{StructField, StructType}; +use crate::types::{JsonType, StructField, StructType}; use crate::value::{ListValue, StructValue, Value}; /// The configuration of JSON encoding @@ -305,33 +305,45 @@ fn encode_json_array_with_context<'a>( ) -> Result { let json_array_len = json_array.len(); let mut items = Vec::with_capacity(json_array_len); - let mut element_type = item_type.cloned(); for (index, value) in json_array.into_iter().enumerate() { let array_context = context.with_key(&index.to_string()); - let item_value = - encode_json_value_with_context(value, element_type.as_ref(), &array_context)?; - let item_type = item_value.json_type().native_type().clone(); - items.push(item_value.into_variant()); - - // Determine the common type for the list - if let Some(current_type) = &element_type { - // It's valid for json array to have different types of items, for example, - // ["a string", 1]. However, the `JsonValue` will be converted to Arrow list array, - // which requires all items have exactly same type. So we forbid the different types - // case here. Besides, it's not common for items in a json array to differ. So I think - // we are good here. - ensure!( - item_type == *current_type, - error::InvalidJsonSnafu { - value: "all items in json array must have the same type" - } - ); - } else { - element_type = Some(item_type); - } + let item_value = encode_json_value_with_context(value, None, &array_context)?; + items.push(item_value); } + // In specification, it's valid for a JSON array to have different types of items, for example, + // ["a string", 1]. However, in implementation, the `JsonValue` will be converted to Arrow list + // array, which requires all items have exactly the same type. So we merge out the maybe + // different item types to a unified type, and align all the item values to it. + + let provided_item_type = item_type.map(|x| JsonType::new_json2(x.clone())); + let merged_item_type = if let Some((first, rests)) = items.split_first() { + let mut merged = first.json_type().clone(); + for rest in rests.iter().map(|x| x.json_type()) { + merged.merge(rest)?; + } + Some(merged) + } else { + None + }; + let unified_item_type = match (provided_item_type, merged_item_type) { + (Some(mut x), Some(y)) => { + x.merge(&y)?; + Some(x) + } + (Some(x), None) | (None, Some(x)) => Some(x), + (None, None) => None, + }; + if let Some(unified_item_type) = unified_item_type { + for item in &mut items { + item.try_align(&unified_item_type)?; + } + } + let items = items + .into_iter() + .map(|x| x.into_variant()) + .collect::>(); Ok(JsonValue::new(JsonVariant::Array(items))) } diff --git a/src/datatypes/src/json/value.rs b/src/datatypes/src/json/value.rs index f3b652a549..f3eab2a04c 100644 --- a/src/datatypes/src/json/value.rs +++ b/src/datatypes/src/json/value.rs @@ -65,6 +65,14 @@ impl JsonNumber { JsonNumber::Float(n) => n.0, } } + + fn native_type(&self) -> JsonNativeType { + match self { + JsonNumber::PosInt(_) => JsonNativeType::u64(), + JsonNumber::NegInt(_) => JsonNativeType::i64(), + JsonNumber::Float(_) => JsonNativeType::f64(), + } + } } impl From for JsonNumber { @@ -147,26 +155,14 @@ impl JsonVariant { match self { JsonVariant::Null => JsonNativeType::Null, JsonVariant::Bool(_) => JsonNativeType::Bool, - JsonVariant::Number(n) => match n { - JsonNumber::PosInt(_) => JsonNativeType::u64(), - JsonNumber::NegInt(_) => JsonNativeType::i64(), - JsonNumber::Float(_) => JsonNativeType::f64(), - }, + JsonVariant::Number(n) => n.native_type(), JsonVariant::String(_) => JsonNativeType::String, JsonVariant::Array(array) => { - let item_type = if let Some(first) = array.first() { - first.native_type() - } else { - JsonNativeType::Null - }; - JsonNativeType::Array(Box::new(item_type)) + json_array_native_type(array.iter().map(JsonVariant::native_type)) + } + JsonVariant::Object(object) => { + json_object_native_type(object.iter().map(|(k, v)| (k, v.native_type()))) } - JsonVariant::Object(object) => JsonNativeType::Object( - object - .iter() - .map(|(k, v)| (k.clone(), v.native_type())) - .collect(), - ), JsonVariant::Variant(_) => JsonNativeType::Variant, } } @@ -469,6 +465,7 @@ impl JsonValue { .collect::>()?, ), + (JsonVariant::Object(kvs), _) if kvs.is_empty() => JsonVariant::Null, (JsonVariant::Object(mut kvs), JsonNativeType::Object(expected)) => { ensure!( expected.keys().len() >= kvs.keys().len() @@ -517,7 +514,7 @@ impl JsonValue { let x = std::mem::take(&mut self.json_variant); self.json_variant = helper(x, expected.native_type())?; - self.json_type = OnceLock::from(expected.clone()); + self.json_type = OnceLock::new(); Ok(()) } } @@ -623,35 +620,51 @@ pub enum JsonVariantRef<'a> { } impl JsonVariantRef<'_> { - fn json_type(&self) -> JsonType { - fn native_type(v: &JsonVariantRef<'_>) -> JsonNativeType { - match v { - JsonVariantRef::Null => JsonNativeType::Null, - JsonVariantRef::Bool(_) => JsonNativeType::Bool, - JsonVariantRef::Number(n) => match n { - JsonNumber::PosInt(_) => JsonNativeType::u64(), - JsonNumber::NegInt(_) => JsonNativeType::i64(), - JsonNumber::Float(_) => JsonNativeType::f64(), - }, - JsonVariantRef::String(_) => JsonNativeType::String, - JsonVariantRef::Array(array) => { - let item_type = if let Some(first) = array.first() { - native_type(first) - } else { - JsonNativeType::Null - }; - JsonNativeType::Array(Box::new(item_type)) - } - JsonVariantRef::Object(object) => JsonNativeType::Object( - object - .iter() - .map(|(k, v)| (k.to_string(), native_type(v))) - .collect(), - ), - JsonVariantRef::Variant(_) => JsonNativeType::Variant, + fn native_type(&self) -> JsonNativeType { + match self { + JsonVariantRef::Null => JsonNativeType::Null, + JsonVariantRef::Bool(_) => JsonNativeType::Bool, + JsonVariantRef::Number(n) => n.native_type(), + JsonVariantRef::String(_) => JsonNativeType::String, + JsonVariantRef::Array(array) => { + json_array_native_type(array.iter().map(JsonVariantRef::native_type)) } + JsonVariantRef::Object(object) => { + json_object_native_type(object.iter().map(|(k, v)| (*k, v.native_type()))) + } + JsonVariantRef::Variant(_) => JsonNativeType::Variant, } - JsonType::new_json2(native_type(self)) + } + + fn json_type(&self) -> JsonType { + JsonType::new_json2(self.native_type()) + } +} + +fn json_array_native_type(items: I) -> JsonNativeType +where + I: IntoIterator, +{ + let item_type = items + .into_iter() + .reduce(|mut acc, y| { + acc.merge(&y); + acc + }) + .unwrap_or(JsonNativeType::Null); + JsonNativeType::Array(Box::new(item_type)) +} + +fn json_object_native_type(fields: I) -> JsonNativeType +where + I: IntoIterator, + K: Into, +{ + let mut fields = fields.into_iter().peekable(); + if fields.peek().is_none() { + JsonNativeType::Null + } else { + JsonNativeType::Object(fields.map(|(k, v)| (k.into(), v)).collect()) } } diff --git a/src/datatypes/src/types/json_type.rs b/src/datatypes/src/types/json_type.rs index e8d06543ed..167a44b784 100644 --- a/src/datatypes/src/types/json_type.rs +++ b/src/datatypes/src/types/json_type.rs @@ -115,6 +115,14 @@ impl JsonNativeType { (JsonNativeType::Null, that) => that.clone(), (this, JsonNativeType::Null) => this, (this, that) if this == *that => this, + + (JsonNativeType::Number(x), JsonNativeType::Number(y)) => { + JsonNativeType::Number(match (x, y) { + (x, y) if x == *y => x, + (JsonNumberType::F64, _) | (_, JsonNumberType::F64) => JsonNumberType::F64, + _ => JsonNumberType::I64, + }) + } _ => JsonNativeType::Variant, }; } diff --git a/src/datatypes/src/vectors/json/builder.rs b/src/datatypes/src/vectors/json/builder.rs index be79a921c7..7ca1ff2f6a 100644 --- a/src/datatypes/src/vectors/json/builder.rs +++ b/src/datatypes/src/vectors/json/builder.rs @@ -89,7 +89,9 @@ impl MutableVector for JsonVectorBuilder { .fail(); }; let json_type = value.json_type(); - self.merged_type.merge(json_type)?; + if !self.merged_type.is_include(json_type) { + self.merged_type.merge(json_type)?; + } let value = JsonValue::new(JsonVariant::from(value.variant().clone())); self.values.push(value); diff --git a/tests/cases/standalone/common/types/json/json2.result b/tests/cases/standalone/common/types/json/json2.result index 71e119307c..90817b2cd9 100644 --- a/tests/cases/standalone/common/types/json/json2.result +++ b/tests/cases/standalone/common/types/json/json2.result @@ -151,6 +151,14 @@ select j.c, j.y from json2_table order by ts; | | false | +-----------------------------------+-----------------------------------+ +select j from json2_table order by ts; + +Error: 3001(EngineExecuteQuery), Invalid argument error: column types must match schema types, expected Struct() but found Struct("a": Struct("b": Binary, "x": Boolean), "c": Binary, "d": List(Struct("e": Struct("f": Float64, "g": Float64))), "y": Boolean) at column index 0 + +select * from json2_table order by ts; + +Error: 3001(EngineExecuteQuery), Invalid argument error: column types must match schema types, expected Struct() but found Struct("a": Struct("b": Binary, "x": Boolean), "c": Binary, "d": List(Struct("e": Struct("f": Float64, "g": Float64))), "y": Boolean) at column index 1 + select j.a.b + 1 from json2_table order by ts; +------------------------------------------------------------+ @@ -168,6 +176,19 @@ select j.a.b + 1 from json2_table order by ts; | 11 | +------------------------------------------------------------+ +select abs(j.a.b) from json2_table order by ts; + +Error: 3000(PlanQuery), Failed to plan SQL: Error during planning: Function 'abs' expects NativeType::Numeric but received NativeType::String No function matches the given name and argument types 'abs(Utf8View)'. You might need to add explicit type casts. + Candidate functions: + abs(Numeric(1)) + +-- "j.c" is of type "String", "abs" is expected to be all "null"s. +select abs(j.c) from json2_table order by ts; + +Error: 3000(PlanQuery), Failed to plan SQL: Error during planning: Function 'abs' expects NativeType::Numeric but received NativeType::String No function matches the given name and argument types 'abs(Utf8View)'. You might need to add explicit type casts. + Candidate functions: + abs(Numeric(1)) + select j.d from json2_table order by ts; +-----------------------------------+ diff --git a/tests/cases/standalone/common/types/json/json2.sql b/tests/cases/standalone/common/types/json/json2.sql index 8dd6789bce..cb8df2f8b9 100644 --- a/tests/cases/standalone/common/types/json/json2.sql +++ b/tests/cases/standalone/common/types/json/json2.sql @@ -46,8 +46,17 @@ select j.a, j.a.x from json2_table order by ts; select j.c, j.y from json2_table order by ts; +select j from json2_table order by ts; + +select * from json2_table order by ts; + select j.a.b + 1 from json2_table order by ts; +select abs(j.a.b) from json2_table order by ts; + +-- "j.c" is of type "String", "abs" is expected to be all "null"s. +select abs(j.c) from json2_table order by ts; + select j.d from json2_table order by ts; drop table json2_table;