diff --git a/Cargo.lock b/Cargo.lock index a4dce12b27..305bbf4686 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5348,7 +5348,7 @@ dependencies = [ [[package]] name = "greptime-proto" version = "0.1.0" -source = "git+https://github.com/GreptimeTeam/greptime-proto.git?rev=14b9dc40bdc8288742b0cefc7bb024303b7429ef#14b9dc40bdc8288742b0cefc7bb024303b7429ef" +source = "git+https://github.com/GreptimeTeam/greptime-proto.git?rev=fd699b21991e358201ee81ead4d319545c5df2ad#fd699b21991e358201ee81ead4d319545c5df2ad" dependencies = [ "prost 0.13.5", "prost-types 0.13.5", diff --git a/Cargo.toml b/Cargo.toml index eedca63570..5961df7d8b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -148,7 +148,7 @@ etcd-client = { git = "https://github.com/GreptimeTeam/etcd-client", rev = "f62d fst = "0.4.7" futures = "0.3" futures-util = "0.3" -greptime-proto = { git = "https://github.com/GreptimeTeam/greptime-proto.git", rev = "14b9dc40bdc8288742b0cefc7bb024303b7429ef" } +greptime-proto = { git = "https://github.com/GreptimeTeam/greptime-proto.git", rev = "fd699b21991e358201ee81ead4d319545c5df2ad" } hex = "0.4" http = "1" humantime = "2.1" diff --git a/src/api/src/helper.rs b/src/api/src/helper.rs index da5fdcfeda..468ffb9cc2 100644 --- a/src/api/src/helper.rs +++ b/src/api/src/helper.rs @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -use std::collections::HashSet; +use std::collections::{BTreeMap, HashSet}; use std::sync::Arc; use common_decimal::Decimal128; @@ -20,6 +20,7 @@ use common_decimal::decimal128::{DECIMAL128_DEFAULT_SCALE, DECIMAL128_MAX_PRECIS use common_time::time::Time; use common_time::timestamp::TimeUnit; use common_time::{Date, IntervalDayTime, IntervalMonthDayNano, IntervalYearMonth, Timestamp}; +use datatypes::json::value::{JsonNumber, JsonValue, JsonValueRef, JsonVariant}; use datatypes::prelude::{ConcreteDataType, ValueRef}; use datatypes::types::{ IntervalType, JsonFormat, StructField, StructType, TimeType, TimestampType, @@ -34,9 +35,9 @@ use greptime_proto::v1::greptime_request::Request; use greptime_proto::v1::query_request::Query; use greptime_proto::v1::value::ValueData; use greptime_proto::v1::{ - self, ColumnDataTypeExtension, DdlRequest, DecimalTypeExtension, JsonNativeTypeExtension, - JsonTypeExtension, ListTypeExtension, QueryRequest, Row, SemanticType, StructTypeExtension, - VectorTypeExtension, + self, ColumnDataTypeExtension, DdlRequest, DecimalTypeExtension, JsonList, + JsonNativeTypeExtension, JsonObject, JsonTypeExtension, ListTypeExtension, QueryRequest, Row, + SemanticType, StructTypeExtension, VectorTypeExtension, json_value, }; use paste::paste; use snafu::prelude::*; @@ -801,21 +802,8 @@ pub fn pb_value_to_value_ref<'a>( } ValueData::JsonValue(inner_value) => { - let json_datatype_ext = datatype_ext - .as_ref() - .and_then(|ext| { - if let Some(TypeExt::JsonNativeType(l)) = &ext.type_ext { - Some(l) - } else { - None - } - }) - .expect("json value must contain datatype ext"); - - ValueRef::Json(Box::new(pb_value_to_value_ref( - inner_value, - json_datatype_ext.datatype_extension.as_deref(), - ))) + let value = decode_json_value(inner_value); + ValueRef::Json(Box::new(value)) } } } @@ -938,12 +926,72 @@ pub fn to_proto_value(value: Value) -> v1::Value { })), }, Value::Json(v) => v1::Value { - value_data: Some(ValueData::JsonValue(Box::new(to_proto_value(*v)))), + value_data: Some(ValueData::JsonValue(encode_json_value(*v))), }, Value::Duration(_) => v1::Value { value_data: None }, } } +fn encode_json_value(value: JsonValue) -> v1::JsonValue { + fn helper(json: JsonVariant) -> v1::JsonValue { + let value = match json { + JsonVariant::Null => None, + JsonVariant::Bool(x) => Some(json_value::Value::Boolean(x)), + JsonVariant::Number(x) => Some(match x { + JsonNumber::PosInt(i) => json_value::Value::Uint(i), + JsonNumber::NegInt(i) => json_value::Value::Int(i), + JsonNumber::Float(f) => json_value::Value::Float(f.0), + }), + JsonVariant::String(x) => Some(json_value::Value::Str(x)), + JsonVariant::Array(x) => Some(json_value::Value::Array(JsonList { + items: x.into_iter().map(helper).collect::>(), + })), + JsonVariant::Object(x) => { + let entries = x + .into_iter() + .map(|(key, v)| v1::json_object::Entry { + key, + value: Some(helper(v)), + }) + .collect::>(); + Some(json_value::Value::Object(JsonObject { entries })) + } + }; + v1::JsonValue { value } + } + helper(value.into_variant()) +} + +fn decode_json_value(value: &v1::JsonValue) -> JsonValueRef<'_> { + let Some(value) = &value.value else { + return JsonValueRef::null(); + }; + match value { + json_value::Value::Boolean(x) => (*x).into(), + json_value::Value::Int(x) => (*x).into(), + json_value::Value::Uint(x) => (*x).into(), + json_value::Value::Float(x) => (*x).into(), + json_value::Value::Str(x) => (x.as_str()).into(), + json_value::Value::Array(array) => array + .items + .iter() + .map(|x| decode_json_value(x).into_variant()) + .collect::>() + .into(), + json_value::Value::Object(x) => x + .entries + .iter() + .filter_map(|entry| { + entry + .value + .as_ref() + .map(|v| (entry.key.as_str(), decode_json_value(v).into_variant())) + }) + .collect::>() + .into(), + } +} + fn convert_list_to_pb_values(list_value: ListValue) -> Vec { list_value .take_items() @@ -1065,9 +1113,7 @@ pub fn value_to_grpc_value(value: Value) -> GrpcValue { .collect(); Some(ValueData::StructValue(v1::StructValue { items })) } - Value::Json(inner_value) => Some(ValueData::JsonValue(Box::new(value_to_grpc_value( - *inner_value, - )))), + Value::Json(v) => Some(ValueData::JsonValue(encode_json_value(*v))), Value::Duration(_) => unreachable!(), }, } @@ -1778,4 +1824,199 @@ mod tests { _ => panic!("Unexpected value type"), } } + + #[test] + fn test_encode_decode_json_value() { + let json = JsonValue::null(); + let proto = encode_json_value(json.clone()); + assert!(proto.value.is_none()); + let value = decode_json_value(&proto); + assert_eq!(json.as_ref(), value); + + let json: JsonValue = true.into(); + let proto = encode_json_value(json.clone()); + assert_eq!(proto.value, Some(json_value::Value::Boolean(true))); + let value = decode_json_value(&proto); + assert_eq!(json.as_ref(), value); + + let json: JsonValue = (-1i64).into(); + let proto = encode_json_value(json.clone()); + assert_eq!(proto.value, Some(json_value::Value::Int(-1))); + let value = decode_json_value(&proto); + assert_eq!(json.as_ref(), value); + + let json: JsonValue = 1u64.into(); + let proto = encode_json_value(json.clone()); + assert_eq!(proto.value, Some(json_value::Value::Uint(1))); + let value = decode_json_value(&proto); + assert_eq!(json.as_ref(), value); + + let json: JsonValue = 1.0f64.into(); + let proto = encode_json_value(json.clone()); + assert_eq!(proto.value, Some(json_value::Value::Float(1.0))); + let value = decode_json_value(&proto); + assert_eq!(json.as_ref(), value); + + let json: JsonValue = "s".into(); + let proto = encode_json_value(json.clone()); + assert_eq!(proto.value, Some(json_value::Value::Str("s".to_string()))); + let value = decode_json_value(&proto); + assert_eq!(json.as_ref(), value); + + let json: JsonValue = [1i64, 2, 3].into(); + let proto = encode_json_value(json.clone()); + assert_eq!( + proto.value, + Some(json_value::Value::Array(JsonList { + items: vec![ + v1::JsonValue { + value: Some(json_value::Value::Int(1)) + }, + v1::JsonValue { + value: Some(json_value::Value::Int(2)) + }, + v1::JsonValue { + value: Some(json_value::Value::Int(3)) + } + ] + })) + ); + let value = decode_json_value(&proto); + assert_eq!(json.as_ref(), value); + + let json: JsonValue = [(); 0].into(); + let proto = encode_json_value(json.clone()); + assert_eq!( + proto.value, + Some(json_value::Value::Array(JsonList { items: vec![] })) + ); + let value = decode_json_value(&proto); + assert_eq!(json.as_ref(), value); + + let json: JsonValue = [("k3", 3i64), ("k2", 2i64), ("k1", 1i64)].into(); + let proto = encode_json_value(json.clone()); + assert_eq!( + proto.value, + Some(json_value::Value::Object(JsonObject { + entries: vec![ + v1::json_object::Entry { + key: "k1".to_string(), + value: Some(v1::JsonValue { + value: Some(json_value::Value::Int(1)) + }), + }, + v1::json_object::Entry { + key: "k2".to_string(), + value: Some(v1::JsonValue { + value: Some(json_value::Value::Int(2)) + }), + }, + v1::json_object::Entry { + key: "k3".to_string(), + value: Some(v1::JsonValue { + value: Some(json_value::Value::Int(3)) + }), + }, + ] + })) + ); + let value = decode_json_value(&proto); + assert_eq!(json.as_ref(), value); + + let json: JsonValue = [("null", ()); 0].into(); + let proto = encode_json_value(json.clone()); + assert_eq!( + proto.value, + Some(json_value::Value::Object(JsonObject { entries: vec![] })) + ); + let value = decode_json_value(&proto); + assert_eq!(json.as_ref(), value); + + let json: JsonValue = [ + ("null", JsonVariant::from(())), + ("bool", false.into()), + ("list", ["hello", "world"].into()), + ( + "object", + [ + ("positive_i", JsonVariant::from(42u64)), + ("negative_i", (-42i64).into()), + ("nested", [("what", "blah")].into()), + ] + .into(), + ), + ] + .into(); + let proto = encode_json_value(json.clone()); + assert_eq!( + proto.value, + Some(json_value::Value::Object(JsonObject { + entries: vec![ + v1::json_object::Entry { + key: "bool".to_string(), + value: Some(v1::JsonValue { + value: Some(json_value::Value::Boolean(false)) + }), + }, + v1::json_object::Entry { + key: "list".to_string(), + value: Some(v1::JsonValue { + value: Some(json_value::Value::Array(JsonList { + items: vec![ + v1::JsonValue { + value: Some(json_value::Value::Str("hello".to_string())) + }, + v1::JsonValue { + value: Some(json_value::Value::Str("world".to_string())) + }, + ] + })) + }), + }, + v1::json_object::Entry { + key: "null".to_string(), + value: Some(v1::JsonValue { value: None }), + }, + v1::json_object::Entry { + key: "object".to_string(), + value: Some(v1::JsonValue { + value: Some(json_value::Value::Object(JsonObject { + entries: vec![ + v1::json_object::Entry { + key: "negative_i".to_string(), + value: Some(v1::JsonValue { + value: Some(json_value::Value::Int(-42)) + }), + }, + v1::json_object::Entry { + key: "nested".to_string(), + value: Some(v1::JsonValue { + value: Some(json_value::Value::Object(JsonObject { + entries: vec![v1::json_object::Entry { + key: "what".to_string(), + value: Some(v1::JsonValue { + value: Some(json_value::Value::Str( + "blah".to_string() + )) + }), + },] + })) + }), + }, + v1::json_object::Entry { + key: "positive_i".to_string(), + value: Some(v1::JsonValue { + value: Some(json_value::Value::Uint(42)) + }), + }, + ] + })) + }), + }, + ] + })) + ); + let value = decode_json_value(&proto); + assert_eq!(json.as_ref(), value); + } } diff --git a/src/common/sql/src/convert.rs b/src/common/sql/src/convert.rs index 0ff2e44061..487e84abb8 100644 --- a/src/common/sql/src/convert.rs +++ b/src/common/sql/src/convert.rs @@ -211,8 +211,7 @@ pub fn sql_value_to_value( | Value::Duration(_) | Value::IntervalYearMonth(_) | Value::IntervalDayTime(_) - | Value::IntervalMonthDayNano(_) - | Value::Json(_) => match unary_op { + | Value::IntervalMonthDayNano(_) => match unary_op { UnaryOperator::Plus => {} UnaryOperator::Minus => { value = value @@ -222,7 +221,11 @@ pub fn sql_value_to_value( _ => return InvalidUnaryOpSnafu { unary_op, value }.fail(), }, - Value::String(_) | Value::Binary(_) | Value::List(_) | Value::Struct(_) => { + Value::String(_) + | Value::Binary(_) + | Value::List(_) + | Value::Struct(_) + | Value::Json(_) => { return InvalidUnaryOpSnafu { unary_op, value }.fail(); } } diff --git a/src/datatypes/src/error.rs b/src/datatypes/src/error.rs index fe3ee99b63..65aca699ec 100644 --- a/src/datatypes/src/error.rs +++ b/src/datatypes/src/error.rs @@ -189,7 +189,7 @@ pub enum Error { location: Location, }, - #[snafu(display("Invalid JSON text: {}", value))] + #[snafu(display("Invalid JSON: {}", value))] InvalidJson { value: String, #[snafu(implicit)] diff --git a/src/datatypes/src/json.rs b/src/datatypes/src/json.rs index 64952bb39a..6b19df9b1c 100644 --- a/src/datatypes/src/json.rs +++ b/src/datatypes/src/json.rs @@ -19,18 +19,19 @@ //! The struct will carry all the fields of the Json object. We will not flatten any json object in this implementation. //! -use std::collections::HashSet; +pub mod value; + +use std::collections::{BTreeMap, HashSet}; use std::sync::Arc; -use common_base::bytes::StringBytes; -use ordered_float::OrderedFloat; use serde::{Deserialize, Serialize}; use serde_json::{Map, Value as Json}; use snafu::{ResultExt, ensure}; use crate::data_type::{ConcreteDataType, DataType}; use crate::error::{self, Error}; -use crate::types::{ListType, StructField, StructType}; +use crate::json::value::{JsonValue, JsonVariant}; +use crate::types::{StructField, StructType}; use crate::value::{ListValue, StructValue, Value}; /// The configuration of JSON encoding @@ -148,24 +149,16 @@ pub fn encode_json_with_context<'a>( json: Json, data_type: Option<&ConcreteDataType>, context: &JsonContext<'a>, -) -> Result { +) -> Result { // Check if the entire encoding should be unstructured if matches!(context.settings, JsonStructureSettings::UnstructuredRaw) { let json_string = json.to_string(); - let struct_value = StructValue::try_new( - vec![Value::String(json_string.into())], - StructType::new(Arc::new(vec![StructField::new( - JsonStructureSettings::RAW_FIELD.to_string(), - ConcreteDataType::string_datatype(), - true, - )])), - )?; - return Ok(Value::Struct(struct_value)); + return Ok([(JsonStructureSettings::RAW_FIELD, json_string)].into()); } // Check if current key should be treated as unstructured if context.is_unstructured_key() { - return Ok(Value::String(json.to_string().into())); + return Ok(json.to_string().into()); } match json { @@ -178,8 +171,7 @@ pub fn encode_json_with_context<'a>( ); let data_type = data_type.and_then(|x| x.as_struct()); - let struct_value = encode_json_object_with_context(json_object, data_type, context)?; - Ok(Value::Struct(struct_value)) + encode_json_object_with_context(json_object, data_type, context) } Json::Array(json_array) => { let item_type = if let Some(ConcreteDataType::List(list_type)) = data_type { @@ -187,14 +179,13 @@ pub fn encode_json_with_context<'a>( } else { None }; - let list_value = encode_json_array_with_context(json_array, item_type, context)?; - Ok(Value::List(list_value)) + encode_json_array_with_context(json_array, item_type, context) } _ => { // For non-collection types, verify type compatibility if let Some(expected_type) = data_type { - let (value, actual_type) = - encode_json_value_with_context(json, Some(expected_type), context)?; + let value = encode_json_value_with_context(json, Some(expected_type), context)?; + let actual_type = value.json_type().native_type(); if &actual_type == expected_type { Ok(value) } else { @@ -208,8 +199,7 @@ pub fn encode_json_with_context<'a>( .build()) } } else { - let (value, _) = encode_json_value_with_context(json, None, context)?; - Ok(value) + encode_json_value_with_context(json, None, context) } } } @@ -219,10 +209,8 @@ fn encode_json_object_with_context<'a>( mut json_object: Map, fields: Option<&StructType>, context: &JsonContext<'a>, -) -> Result { - let total_json_keys = json_object.len(); - let mut items = Vec::with_capacity(total_json_keys); - let mut struct_fields = Vec::with_capacity(total_json_keys); +) -> Result { + let mut object = BTreeMap::new(); // First, process fields from the provided schema in their original order if let Some(fields) = fields { for field in fields.fields().iter() { @@ -230,18 +218,12 @@ fn encode_json_object_with_context<'a>( if let Some(value) = json_object.remove(field_name) { let field_context = context.with_key(field_name); - let (value, data_type) = + let value = encode_json_value_with_context(value, Some(field.data_type()), &field_context)?; - items.push(value); - struct_fields.push(StructField::new( - field_name.to_string(), - data_type, - true, // JSON fields are always nullable - )); + object.insert(field_name.to_string(), value.into_variant()); } else { // Field exists in schema but not in JSON - add null value - items.push(Value::Null); - struct_fields.push(field.clone()); + object.insert(field_name.to_string(), ().into()); } } } @@ -250,57 +232,49 @@ fn encode_json_object_with_context<'a>( for (key, value) in json_object { let field_context = context.with_key(&key); - let (value, data_type) = encode_json_value_with_context(value, None, &field_context)?; - items.push(value); + let value = encode_json_value_with_context(value, None, &field_context)?; - struct_fields.push(StructField::new( - key.clone(), - data_type, - true, // JSON fields are always nullable - )); + object.insert(key, value.into_variant()); } - let struct_type = StructType::new(Arc::new(struct_fields)); - StructValue::try_new(items, struct_type) + Ok(JsonValue::new(JsonVariant::Object(object))) } fn encode_json_array_with_context<'a>( json_array: Vec, item_type: Option<&ConcreteDataType>, context: &JsonContext<'a>, -) -> Result { +) -> Result { let json_array_len = json_array.len(); let mut items = Vec::with_capacity(json_array_len); - let mut element_type = None; + let mut element_type = item_type.cloned(); for (index, value) in json_array.into_iter().enumerate() { let array_context = context.with_key(&index.to_string()); - let (item_value, item_type) = - encode_json_value_with_context(value, item_type, &array_context)?; - items.push(item_value); + let item_value = + encode_json_value_with_context(value, element_type.as_ref(), &array_context)?; + let item_type = item_value.json_type().native_type(); + items.push(item_value.into_variant()); // Determine the common type for the list if let Some(current_type) = &element_type { - // For now, we'll use the first non-null type we encounter - // In a more sophisticated implementation, we might want to find a common supertype - if *current_type == ConcreteDataType::null_datatype() - && item_type != ConcreteDataType::null_datatype() - { - element_type = Some(item_type); - } + // It's valid for json array to have different types of items, for example, + // ["a string", 1]. However, the `JsonValue` will be converted to Arrow list array, + // which requires all items have exactly same type. So we forbid the different types + // case here. Besides, it's not common for items in a json array to differ. So I think + // we are good here. + ensure!( + item_type == *current_type, + error::InvalidJsonSnafu { + value: "all items in json array must have the same type" + } + ); } else { element_type = Some(item_type); } } - // Use provided item_type if available, otherwise determine from elements - let element_type = if let Some(item_type) = item_type { - item_type.clone() - } else { - element_type.unwrap_or_else(ConcreteDataType::string_datatype) - }; - - Ok(ListValue::new(items, Arc::new(element_type))) + Ok(JsonValue::new(JsonVariant::Array(items))) } /// Helper function to encode a JSON value to a Value and determine its ConcreteDataType with context @@ -308,81 +282,61 @@ fn encode_json_value_with_context<'a>( json: Json, expected_type: Option<&ConcreteDataType>, context: &JsonContext<'a>, -) -> Result<(Value, ConcreteDataType), Error> { +) -> Result { // Check if current key should be treated as unstructured if context.is_unstructured_key() { - return Ok(( - Value::String(json.to_string().into()), - ConcreteDataType::string_datatype(), - )); + return Ok(json.to_string().into()); } match json { - Json::Null => Ok((Value::Null, ConcreteDataType::null_datatype())), - Json::Bool(b) => Ok((Value::Boolean(b), ConcreteDataType::boolean_datatype())), + Json::Null => Ok(JsonValue::null()), + Json::Bool(b) => Ok(b.into()), Json::Number(n) => { if let Some(i) = n.as_i64() { // Use int64 for all integer numbers when possible if let Some(expected) = expected_type && let Ok(value) = try_convert_to_expected_type(i, expected) { - return Ok((value, expected.clone())); + return Ok(value); } - Ok((Value::Int64(i), ConcreteDataType::int64_datatype())) + Ok(i.into()) } else if let Some(u) = n.as_u64() { // Use int64 for unsigned integers that fit, otherwise use u64 if let Some(expected) = expected_type && let Ok(value) = try_convert_to_expected_type(u, expected) { - return Ok((value, expected.clone())); + return Ok(value); } if u <= i64::MAX as u64 { - Ok((Value::Int64(u as i64), ConcreteDataType::int64_datatype())) + Ok((u as i64).into()) } else { - Ok((Value::UInt64(u), ConcreteDataType::uint64_datatype())) + Ok(u.into()) } } else if let Some(f) = n.as_f64() { // Try to use the expected type if provided if let Some(expected) = expected_type && let Ok(value) = try_convert_to_expected_type(f, expected) { - return Ok((value, expected.clone())); + return Ok(value); } // Default to f64 for floating point numbers - Ok(( - Value::Float64(OrderedFloat(f)), - ConcreteDataType::float64_datatype(), - )) + Ok(f.into()) } else { // Fallback to string representation - Ok(( - Value::String(StringBytes::from(n.to_string())), - ConcreteDataType::string_datatype(), - )) + Ok(n.to_string().into()) } } Json::String(s) => { if let Some(expected) = expected_type && let Ok(value) = try_convert_to_expected_type(s.as_str(), expected) { - return Ok((value, expected.clone())); + return Ok(value); } - Ok(( - Value::String(StringBytes::from(s.clone())), - ConcreteDataType::string_datatype(), - )) - } - Json::Array(arr) => { - let list_value = encode_json_array_with_context(arr, expected_type, context)?; - let datatype = ConcreteDataType::List(ListType::new(list_value.datatype())); - Ok((Value::List(list_value), datatype)) - } - Json::Object(obj) => { - let struct_value = encode_json_object_with_context(obj, None, context)?; - let data_type = ConcreteDataType::Struct(struct_value.struct_type().clone()); - Ok((Value::Struct(struct_value), data_type)) + Ok(s.into()) } + Json::Array(arr) => encode_json_array_with_context(arr, expected_type, context), + Json::Object(obj) => encode_json_object_with_context(obj, None, context), } } @@ -402,7 +356,6 @@ pub fn decode_value_with_context<'a>( } match value { - Value::Json(inner) => decode_value_with_context(*inner, context), Value::Struct(struct_value) => decode_struct_with_context(struct_value, context), Value::List(list_value) => decode_list_with_context(list_value, context), _ => decode_primitive_value(value), @@ -569,11 +522,13 @@ fn decode_struct_with_settings<'a>( key_path: field_context.key_path.clone(), settings: &JsonStructureSettings::Structured(None), }; - let (decoded_value, data_type) = encode_json_value_with_context( + let decoded_value = encode_json_value_with_context( json_value, None, // Don't force a specific type, let it be inferred from JSON &structured_context, - )?; + )? + .into_value(); + let data_type = decoded_value.data_type(); items.push(decoded_value); struct_fields.push(StructField::new( @@ -651,8 +606,9 @@ fn decode_unstructured_raw_struct(struct_value: StructValue) -> Result Result( value: T, expected_type: &ConcreteDataType, -) -> Result +) -> Result where - T: Into, + T: Into, { let value = value.into(); - expected_type.try_cast(value.clone()).ok_or_else(|| { + let cast_error = || { error::CastTypeSnafu { - msg: format!( - "Cannot cast from {} to {}", - value.data_type().name(), - expected_type.name() - ), + msg: format!("Cannot cast value {value} to {expected_type}"), } - .build() - }) + .fail() + }; + let actual_type = value.json_type().native_type(); + match (&actual_type, expected_type) { + (x, y) if x == y => Ok(value), + (ConcreteDataType::UInt64(_), ConcreteDataType::Int64(_)) => { + if let Some(i) = value.as_i64() { + Ok(i.into()) + } else { + cast_error() + } + } + (ConcreteDataType::UInt64(_), ConcreteDataType::Float64(_)) => { + if let Some(f) = value.as_f64() { + Ok(f.into()) + } else { + cast_error() + } + } + (ConcreteDataType::Int64(_), ConcreteDataType::UInt64(_)) => { + if let Some(i) = value.as_u64() { + Ok(i.into()) + } else { + cast_error() + } + } + (ConcreteDataType::Int64(_), ConcreteDataType::Float64(_)) => { + if let Some(f) = value.as_f64() { + Ok(f.into()) + } else { + cast_error() + } + } + (_, ConcreteDataType::String(_)) => Ok(value.to_string().into()), + _ => cast_error(), + } } #[cfg(test)] @@ -898,11 +884,11 @@ mod tests { let json = Json::from(42); let settings = JsonStructureSettings::Structured(None); let result = settings - .encode_with_type(json.clone(), Some(&ConcreteDataType::int8_datatype())) + .encode_with_type(json.clone(), Some(&ConcreteDataType::uint64_datatype())) .unwrap() .into_json_inner() .unwrap(); - assert_eq!(result, Value::Int8(42)); + assert_eq!(result, Value::UInt64(42)); // Test with expected string type let result = settings @@ -917,23 +903,11 @@ mod tests { fn test_encode_json_array_mixed_types() { let json = json!([1, "hello", true, 3.15]); let settings = JsonStructureSettings::Structured(None); - let result = settings - .encode_with_type(json, None) - .unwrap() - .into_json_inner() - .unwrap(); - - if let Value::List(list_value) = result { - assert_eq!(list_value.items().len(), 4); - // The first non-null type should determine the list type - // In this case, it should be string since we can't find a common numeric type - assert_eq!( - list_value.datatype(), - Arc::new(ConcreteDataType::int64_datatype()) - ); - } else { - panic!("Expected List value"); - } + let result = settings.encode_with_type(json, None); + assert_eq!( + result.unwrap_err().to_string(), + "Invalid JSON: all items in json array must have the same type" + ); } #[test] @@ -951,7 +925,7 @@ mod tests { // Empty arrays default to string type assert_eq!( list_value.datatype(), - Arc::new(ConcreteDataType::string_datatype()) + Arc::new(ConcreteDataType::null_datatype()) ); } else { panic!("Expected List value"); @@ -1008,15 +982,15 @@ mod tests { if let Value::Struct(struct_value) = result { assert_eq!(struct_value.items().len(), 2); let struct_fields = struct_value.struct_type().fields(); - assert_eq!(struct_fields[0].name(), "name"); + assert_eq!(struct_fields[0].name(), "age"); assert_eq!( struct_fields[0].data_type(), - &ConcreteDataType::string_datatype() + &ConcreteDataType::int64_datatype() ); - assert_eq!(struct_fields[1].name(), "age"); + assert_eq!(struct_fields[1].name(), "name"); assert_eq!( struct_fields[1].data_type(), - &ConcreteDataType::int64_datatype() + &ConcreteDataType::string_datatype() ); } else { panic!("Expected Struct value"); @@ -1051,7 +1025,7 @@ mod tests { ]; let struct_type = StructType::new(Arc::new(fields)); - let result = encode_json_object_with_context( + let Value::Struct(result) = encode_json_object_with_context( json.as_object().unwrap().clone(), Some(&struct_type), &JsonContext { @@ -1059,7 +1033,10 @@ mod tests { settings: &JsonStructureSettings::Structured(None), }, ) - .unwrap(); + .map(|x| x.into_value()) + .unwrap() else { + unreachable!() + }; // Verify field order is preserved from schema let struct_fields = result.struct_type().fields(); @@ -1093,7 +1070,7 @@ mod tests { ]; let struct_type = StructType::new(Arc::new(fields)); - let result = encode_json_object_with_context( + let Value::Struct(result) = encode_json_object_with_context( json.as_object().unwrap().clone(), Some(&struct_type), &JsonContext { @@ -1101,19 +1078,22 @@ mod tests { settings: &JsonStructureSettings::Structured(None), }, ) - .unwrap(); + .map(|x| x.into_value()) + .unwrap() else { + unreachable!() + }; - // Verify schema fields come first in order + // verify fields are sorted in json value let struct_fields = result.struct_type().fields(); - assert_eq!(struct_fields[0].name(), "name"); + assert_eq!(struct_fields[0].name(), "active"); assert_eq!(struct_fields[1].name(), "age"); - assert_eq!(struct_fields[2].name(), "active"); + assert_eq!(struct_fields[2].name(), "name"); // Verify values are correct let items = result.items(); - assert_eq!(items[0], Value::String("Alice".into())); + assert_eq!(items[0], Value::Boolean(true)); assert_eq!(items[1], Value::Int64(25)); - assert_eq!(items[2], Value::Boolean(true)); + assert_eq!(items[2], Value::String("Alice".into())); } #[test] @@ -1134,7 +1114,7 @@ mod tests { ]; let struct_type = StructType::new(Arc::new(fields)); - let result = encode_json_object_with_context( + let Value::Struct(result) = encode_json_object_with_context( json.as_object().unwrap().clone(), Some(&struct_type), &JsonContext { @@ -1142,17 +1122,20 @@ mod tests { settings: &JsonStructureSettings::Structured(None), }, ) - .unwrap(); + .map(|x| x.into_value()) + .unwrap() else { + unreachable!() + }; // Verify both schema fields are present let struct_fields = result.struct_type().fields(); - assert_eq!(struct_fields[0].name(), "name"); - assert_eq!(struct_fields[1].name(), "age"); + assert_eq!(struct_fields[0].name(), "age"); + assert_eq!(struct_fields[1].name(), "name"); // Verify values - name has value, age is null let items = result.items(); - assert_eq!(items[0], Value::String("Bob".into())); - assert_eq!(items[1], Value::Null); + assert_eq!(items[0], Value::Null); + assert_eq!(items[1], Value::String("Bob".into())); } #[test] @@ -1175,7 +1158,7 @@ mod tests { #[test] fn test_encode_json_array_with_item_type() { let json = json!([1, 2, 3]); - let item_type = Arc::new(ConcreteDataType::int8_datatype()); + let item_type = Arc::new(ConcreteDataType::uint64_datatype()); let list_type = ListType::new(item_type.clone()); let concrete_type = ConcreteDataType::List(list_type); let settings = JsonStructureSettings::Structured(None); @@ -1187,9 +1170,9 @@ mod tests { if let Value::List(list_value) = result { assert_eq!(list_value.items().len(), 3); - assert_eq!(list_value.items()[0], Value::Int8(1)); - assert_eq!(list_value.items()[1], Value::Int8(2)); - assert_eq!(list_value.items()[2], Value::Int8(3)); + assert_eq!(list_value.items()[0], Value::UInt64(1)); + assert_eq!(list_value.items()[1], Value::UInt64(2)); + assert_eq!(list_value.items()[2], Value::UInt64(3)); assert_eq!(list_value.datatype(), item_type); } else { panic!("Expected List value"); @@ -1199,7 +1182,7 @@ mod tests { #[test] fn test_encode_json_array_empty_with_item_type() { let json = json!([]); - let item_type = Arc::new(ConcreteDataType::string_datatype()); + let item_type = Arc::new(ConcreteDataType::null_datatype()); let list_type = ListType::new(item_type.clone()); let concrete_type = ConcreteDataType::List(list_type); let settings = JsonStructureSettings::Structured(None); @@ -1219,6 +1202,7 @@ mod tests { #[cfg(test)] mod decode_tests { + use ordered_float::OrderedFloat; use serde_json::json; use super::*; @@ -2153,20 +2137,11 @@ mod tests { )])), ); - let decoded_struct = settings.decode_struct(array_struct).unwrap(); - let fields = decoded_struct.struct_type().fields(); - let decoded_fields: Vec<&str> = fields.iter().map(|f| f.name()).collect(); - assert!(decoded_fields.contains(&"value")); - - if let Value::List(list_value) = &decoded_struct.items()[0] { - assert_eq!(list_value.items().len(), 4); - assert_eq!(list_value.items()[0], Value::Int64(1)); - assert_eq!(list_value.items()[1], Value::String("hello".into())); - assert_eq!(list_value.items()[2], Value::Boolean(true)); - assert_eq!(list_value.items()[3], Value::Float64(OrderedFloat(3.15))); - } else { - panic!("Expected array to be decoded as ListValue"); - } + let decoded_struct = settings.decode_struct(array_struct); + assert_eq!( + decoded_struct.unwrap_err().to_string(), + "Invalid JSON: all items in json array must have the same type" + ); } #[test] diff --git a/src/datatypes/src/json/value.rs b/src/datatypes/src/json/value.rs new file mode 100644 index 0000000000..4571fd1944 --- /dev/null +++ b/src/datatypes/src/json/value.rs @@ -0,0 +1,689 @@ +// Copyright 2023 Greptime Team +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::collections::BTreeMap; +use std::fmt::{Display, Formatter}; +use std::hash::{Hash, Hasher}; +use std::sync::{Arc, OnceLock}; + +use num_traits::ToPrimitive; +use ordered_float::OrderedFloat; +use serde::{Deserialize, Serialize}; +use serde_json::Number; + +use crate::data_type::ConcreteDataType; +use crate::types::{JsonType, StructField, StructType}; +use crate::value::{ListValue, ListValueRef, StructValue, StructValueRef, Value, ValueRef}; + +/// Number in json, can be a positive integer, a negative integer, or a floating number. +/// Each of which is represented as `u64`, `i64` and `f64`. +/// +/// This follows how `serde_json` designs number. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)] +pub enum JsonNumber { + PosInt(u64), + NegInt(i64), + Float(OrderedFloat), +} + +impl JsonNumber { + fn as_u64(&self) -> Option { + match self { + JsonNumber::PosInt(n) => Some(*n), + JsonNumber::NegInt(n) => (*n >= 0).then_some(*n as u64), + _ => None, + } + } + + fn as_i64(&self) -> Option { + match self { + JsonNumber::PosInt(n) => (*n <= i64::MAX as u64).then_some(*n as i64), + JsonNumber::NegInt(n) => Some(*n), + _ => None, + } + } + + fn as_f64(&self) -> f64 { + match self { + JsonNumber::PosInt(n) => *n as f64, + JsonNumber::NegInt(n) => *n as f64, + JsonNumber::Float(n) => n.0, + } + } +} + +impl From for JsonNumber { + fn from(i: u64) -> Self { + Self::PosInt(i) + } +} + +impl From for JsonNumber { + fn from(n: i64) -> Self { + Self::NegInt(n) + } +} + +impl From for JsonNumber { + fn from(i: f64) -> Self { + Self::Float(i.into()) + } +} + +impl Display for JsonNumber { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + match self { + Self::PosInt(x) => write!(f, "{x}"), + Self::NegInt(x) => write!(f, "{x}"), + Self::Float(x) => write!(f, "{x}"), + } + } +} + +/// Variants of json. +/// +/// This follows how [serde_json::Value] designs except that we only choose to use [BTreeMap] to +/// preserve the fields order by their names in the json object. (By default `serde_json` uses +/// [BTreeMap], too. But it additionally supports "IndexMap" which preserves the order by insertion +/// times of fields.) +#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)] +pub enum JsonVariant { + Null, + Bool(bool), + Number(JsonNumber), + String(String), + Array(Vec), + Object(BTreeMap), +} + +impl JsonVariant { + fn json_type(&self) -> JsonType { + match self { + JsonVariant::Null => JsonType::new_native(ConcreteDataType::null_datatype()), + JsonVariant::Bool(_) => JsonType::new_native(ConcreteDataType::boolean_datatype()), + JsonVariant::Number(n) => JsonType::new_native(match n { + JsonNumber::PosInt(_) => ConcreteDataType::uint64_datatype(), + JsonNumber::NegInt(_) => ConcreteDataType::int64_datatype(), + JsonNumber::Float(_) => ConcreteDataType::float64_datatype(), + }), + JsonVariant::String(_) => JsonType::new_native(ConcreteDataType::string_datatype()), + JsonVariant::Array(array) => { + let item_type = if let Some(first) = array.first() { + first.json_type().native_type() + } else { + ConcreteDataType::null_datatype() + }; + JsonType::new_native(ConcreteDataType::list_datatype(Arc::new(item_type))) + } + JsonVariant::Object(object) => { + let mut fields = Vec::with_capacity(object.len()); + for (k, v) in object.iter() { + fields.push(StructField::new( + k.clone(), + v.json_type().native_type(), + true, + )) + } + JsonType::new_native(ConcreteDataType::struct_datatype(StructType::new( + Arc::new(fields), + ))) + } + } + } + + fn as_ref(&self) -> JsonVariantRef<'_> { + match self { + JsonVariant::Null => JsonVariantRef::Null, + JsonVariant::Bool(x) => (*x).into(), + JsonVariant::Number(x) => match x { + JsonNumber::PosInt(i) => (*i).into(), + JsonNumber::NegInt(i) => (*i).into(), + JsonNumber::Float(f) => (f.0).into(), + }, + JsonVariant::String(x) => x.as_str().into(), + JsonVariant::Array(array) => { + JsonVariantRef::Array(array.iter().map(|x| x.as_ref()).collect()) + } + JsonVariant::Object(object) => JsonVariantRef::Object( + object + .iter() + .map(|(k, v)| (k.as_str(), v.as_ref())) + .collect(), + ), + } + } +} + +impl From<()> for JsonVariant { + fn from(_: ()) -> Self { + Self::Null + } +} + +impl From for JsonVariant { + fn from(v: bool) -> Self { + Self::Bool(v) + } +} + +impl> From for JsonVariant { + fn from(v: T) -> Self { + Self::Number(v.into()) + } +} + +impl From<&str> for JsonVariant { + fn from(v: &str) -> Self { + Self::String(v.to_string()) + } +} + +impl From for JsonVariant { + fn from(v: String) -> Self { + Self::String(v) + } +} + +impl> From<[T; N]> for JsonVariant { + fn from(vs: [T; N]) -> Self { + Self::Array(vs.into_iter().map(|x| x.into()).collect()) + } +} + +impl, V: Into, const N: usize> From<[(K, V); N]> for JsonVariant { + fn from(vs: [(K, V); N]) -> Self { + Self::Object(vs.into_iter().map(|(k, v)| (k.into(), v.into())).collect()) + } +} + +impl Display for JsonVariant { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + match self { + Self::Null => write!(f, "null"), + Self::Bool(x) => write!(f, "{x}"), + Self::Number(x) => write!(f, "{x}"), + Self::String(x) => write!(f, "{x}"), + Self::Array(array) => write!( + f, + "[{}]", + array + .iter() + .map(|x| x.to_string()) + .collect::>() + .join(", ") + ), + Self::Object(object) => { + write!( + f, + "{{ {} }}", + object + .iter() + .map(|(k, v)| format!("{k}: {v}")) + .collect::>() + .join(", ") + ) + } + } + } +} + +/// Represents any valid JSON value. +#[derive(Debug, Eq, Serialize, Deserialize)] +pub struct JsonValue { + #[serde(skip)] + json_type: OnceLock, + json_variant: JsonVariant, +} + +impl JsonValue { + pub fn null() -> Self { + ().into() + } + + pub(crate) fn new(json_variant: JsonVariant) -> Self { + Self { + json_type: OnceLock::new(), + json_variant, + } + } + + pub(crate) fn data_type(&self) -> ConcreteDataType { + ConcreteDataType::Json(self.json_type().clone()) + } + + pub(crate) fn json_type(&self) -> &JsonType { + self.json_type.get_or_init(|| self.json_variant.json_type()) + } + + pub(crate) fn is_null(&self) -> bool { + matches!(self.json_variant, JsonVariant::Null) + } + + pub(crate) fn as_i64(&self) -> Option { + match self.json_variant { + JsonVariant::Number(n) => n.as_i64(), + _ => None, + } + } + + pub(crate) fn as_u64(&self) -> Option { + match self.json_variant { + JsonVariant::Number(n) => n.as_u64(), + _ => None, + } + } + + pub(crate) fn as_f64(&self) -> Option { + match self.json_variant { + JsonVariant::Number(n) => Some(n.as_f64()), + _ => None, + } + } + + pub(crate) fn as_f64_lossy(&self) -> Option { + match self.json_variant { + JsonVariant::Number(n) => Some(match n { + JsonNumber::PosInt(i) => i as f64, + JsonNumber::NegInt(i) => i as f64, + JsonNumber::Float(f) => f.0, + }), + _ => None, + } + } + + pub(crate) fn as_bool(&self) -> Option { + match self.json_variant { + JsonVariant::Bool(b) => Some(b), + _ => None, + } + } + + pub fn as_ref(&self) -> JsonValueRef<'_> { + JsonValueRef { + json_type: OnceLock::new(), + json_variant: self.json_variant.as_ref(), + } + } + + pub fn into_variant(self) -> JsonVariant { + self.json_variant + } + + pub(crate) fn into_value(self) -> Value { + fn helper(v: JsonVariant) -> Value { + match v { + JsonVariant::Null => Value::Null, + JsonVariant::Bool(x) => Value::Boolean(x), + JsonVariant::Number(x) => match x { + JsonNumber::PosInt(i) => Value::UInt64(i), + JsonNumber::NegInt(i) => Value::Int64(i), + JsonNumber::Float(f) => Value::Float64(f), + }, + JsonVariant::String(x) => Value::String(x.into()), + JsonVariant::Array(array) => { + let item_type = if let Some(first) = array.first() { + first.json_type().native_type() + } else { + ConcreteDataType::null_datatype() + }; + Value::List(ListValue::new( + array.into_iter().map(helper).collect(), + Arc::new(item_type), + )) + } + JsonVariant::Object(object) => { + let mut fields = Vec::with_capacity(object.len()); + let mut items = Vec::with_capacity(object.len()); + for (k, v) in object { + fields.push(StructField::new(k, v.json_type().native_type(), true)); + items.push(helper(v)); + } + Value::Struct(StructValue::new(items, StructType::new(Arc::new(fields)))) + } + } + } + helper(self.json_variant) + } +} + +impl> From for JsonValue { + fn from(v: T) -> Self { + Self { + json_type: OnceLock::new(), + json_variant: v.into(), + } + } +} + +impl From for serde_json::Value { + fn from(v: JsonValue) -> Self { + fn helper(v: JsonVariant) -> serde_json::Value { + match v { + JsonVariant::Null => serde_json::Value::Null, + JsonVariant::Bool(x) => serde_json::Value::Bool(x), + JsonVariant::Number(x) => match x { + JsonNumber::PosInt(i) => serde_json::Value::Number(i.into()), + JsonNumber::NegInt(i) => serde_json::Value::Number(i.into()), + JsonNumber::Float(f) => { + if let Some(x) = Number::from_f64(f.0) { + serde_json::Value::Number(x) + } else { + serde_json::Value::String("NaN".into()) + } + } + }, + JsonVariant::String(x) => serde_json::Value::String(x), + JsonVariant::Array(array) => { + serde_json::Value::Array(array.into_iter().map(helper).collect()) + } + JsonVariant::Object(object) => serde_json::Value::Object( + object.into_iter().map(|(k, v)| (k, helper(v))).collect(), + ), + } + } + helper(v.json_variant) + } +} + +impl Clone for JsonValue { + fn clone(&self) -> Self { + let Self { + json_type: _, + json_variant, + } = self; + Self { + json_type: OnceLock::new(), + json_variant: json_variant.clone(), + } + } +} + +impl PartialEq for JsonValue { + fn eq(&self, other: &JsonValue) -> bool { + let Self { + json_type: _, + json_variant, + } = self; + json_variant.eq(&other.json_variant) + } +} + +impl Hash for JsonValue { + fn hash(&self, state: &mut H) { + let Self { + json_type: _, + json_variant, + } = self; + json_variant.hash(state); + } +} + +impl Display for JsonValue { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", self.json_variant) + } +} + +/// References of variants of json. +#[derive(Debug, Clone, PartialEq, Eq, Serialize)] +pub enum JsonVariantRef<'a> { + Null, + Bool(bool), + Number(JsonNumber), + String(&'a str), + Array(Vec>), + Object(BTreeMap<&'a str, JsonVariantRef<'a>>), +} + +impl JsonVariantRef<'_> { + fn json_type(&self) -> JsonType { + match self { + JsonVariantRef::Null => JsonType::new_native(ConcreteDataType::null_datatype()), + JsonVariantRef::Bool(_) => JsonType::new_native(ConcreteDataType::boolean_datatype()), + JsonVariantRef::Number(n) => JsonType::new_native(match n { + JsonNumber::PosInt(_) => ConcreteDataType::uint64_datatype(), + JsonNumber::NegInt(_) => ConcreteDataType::int64_datatype(), + JsonNumber::Float(_) => ConcreteDataType::float64_datatype(), + }), + JsonVariantRef::String(_) => JsonType::new_native(ConcreteDataType::string_datatype()), + JsonVariantRef::Array(array) => { + let item_type = if let Some(first) = array.first() { + first.json_type().native_type() + } else { + ConcreteDataType::null_datatype() + }; + JsonType::new_native(ConcreteDataType::list_datatype(Arc::new(item_type))) + } + JsonVariantRef::Object(object) => { + let mut fields = Vec::with_capacity(object.len()); + for (k, v) in object.iter() { + fields.push(StructField::new( + k.to_string(), + v.json_type().native_type(), + true, + )) + } + JsonType::new_native(ConcreteDataType::struct_datatype(StructType::new( + Arc::new(fields), + ))) + } + } + } +} + +impl From<()> for JsonVariantRef<'_> { + fn from(_: ()) -> Self { + Self::Null + } +} + +impl From for JsonVariantRef<'_> { + fn from(v: bool) -> Self { + Self::Bool(v) + } +} + +impl> From for JsonVariantRef<'_> { + fn from(v: T) -> Self { + Self::Number(v.into()) + } +} + +impl<'a> From<&'a str> for JsonVariantRef<'a> { + fn from(v: &'a str) -> Self { + Self::String(v) + } +} + +impl<'a, const N: usize, T: Into>> From<[T; N]> for JsonVariantRef<'a> { + fn from(vs: [T; N]) -> Self { + Self::Array(vs.into_iter().map(|x| x.into()).collect()) + } +} + +impl<'a, V: Into>, const N: usize> From<[(&'a str, V); N]> + for JsonVariantRef<'a> +{ + fn from(vs: [(&'a str, V); N]) -> Self { + Self::Object(vs.into_iter().map(|(k, v)| (k, v.into())).collect()) + } +} + +impl<'a> From>> for JsonVariantRef<'a> { + fn from(v: Vec>) -> Self { + Self::Array(v) + } +} + +impl<'a> From>> for JsonVariantRef<'a> { + fn from(v: BTreeMap<&'a str, JsonVariantRef<'a>>) -> Self { + Self::Object(v) + } +} + +impl From> for JsonVariant { + fn from(v: JsonVariantRef) -> Self { + match v { + JsonVariantRef::Null => Self::Null, + JsonVariantRef::Bool(x) => Self::Bool(x), + JsonVariantRef::Number(x) => Self::Number(x), + JsonVariantRef::String(x) => Self::String(x.to_string()), + JsonVariantRef::Array(array) => { + Self::Array(array.into_iter().map(Into::into).collect()) + } + JsonVariantRef::Object(object) => Self::Object( + object + .into_iter() + .map(|(k, v)| (k.to_string(), v.into())) + .collect(), + ), + } + } +} + +/// Reference to representation of any valid JSON value. +#[derive(Debug, Serialize)] +pub struct JsonValueRef<'a> { + #[serde(skip)] + json_type: OnceLock, + json_variant: JsonVariantRef<'a>, +} + +impl<'a> JsonValueRef<'a> { + pub fn null() -> Self { + ().into() + } + + pub(crate) fn data_type(&self) -> ConcreteDataType { + ConcreteDataType::Json(self.json_type().clone()) + } + + pub(crate) fn json_type(&self) -> &JsonType { + self.json_type.get_or_init(|| self.json_variant.json_type()) + } + + pub fn into_variant(self) -> JsonVariantRef<'a> { + self.json_variant + } + + pub(crate) fn is_null(&self) -> bool { + matches!(self.json_variant, JsonVariantRef::Null) + } + + pub fn is_object(&self) -> bool { + matches!(self.json_variant, JsonVariantRef::Object(_)) + } + + pub(crate) fn as_f32(&self) -> Option { + match self.json_variant { + JsonVariantRef::Number(JsonNumber::Float(f)) => f.to_f32(), + _ => None, + } + } + + pub(crate) fn as_f64(&self) -> Option { + match self.json_variant { + JsonVariantRef::Number(JsonNumber::Float(f)) => Some(f.0), + _ => None, + } + } + + pub fn as_value_ref(&self) -> ValueRef<'_> { + fn helper<'a>(v: &'a JsonVariantRef) -> ValueRef<'a> { + match v { + JsonVariantRef::Null => ValueRef::Null, + JsonVariantRef::Bool(x) => ValueRef::Boolean(*x), + JsonVariantRef::Number(x) => match x { + JsonNumber::PosInt(i) => ValueRef::UInt64(*i), + JsonNumber::NegInt(i) => ValueRef::Int64(*i), + JsonNumber::Float(f) => ValueRef::Float64(*f), + }, + JsonVariantRef::String(x) => ValueRef::String(x), + JsonVariantRef::Array(array) => { + let val = array.iter().map(helper).collect::>(); + let item_datatype = if let Some(first) = val.first() { + first.data_type() + } else { + ConcreteDataType::null_datatype() + }; + ValueRef::List(ListValueRef::RefList { + val, + item_datatype: Arc::new(item_datatype), + }) + } + JsonVariantRef::Object(object) => { + let mut fields = Vec::with_capacity(object.len()); + let mut val = Vec::with_capacity(object.len()); + for (k, v) in object.iter() { + let v = helper(v); + fields.push(StructField::new(k.to_string(), v.data_type(), true)); + val.push(v); + } + ValueRef::Struct(StructValueRef::RefList { + val, + fields: StructType::new(Arc::new(fields)), + }) + } + } + } + helper(&self.json_variant) + } + + pub(crate) fn data_size(&self) -> usize { + size_of_val(self) + } +} + +impl<'a, T: Into>> From for JsonValueRef<'a> { + fn from(v: T) -> Self { + Self { + json_type: OnceLock::new(), + json_variant: v.into(), + } + } +} + +impl From> for JsonValue { + fn from(v: JsonValueRef<'_>) -> Self { + Self { + json_type: OnceLock::new(), + json_variant: v.json_variant.into(), + } + } +} + +impl PartialEq for JsonValueRef<'_> { + fn eq(&self, other: &Self) -> bool { + let Self { + json_type: _, + json_variant, + } = self; + json_variant == &other.json_variant + } +} + +impl Eq for JsonValueRef<'_> {} + +impl Clone for JsonValueRef<'_> { + fn clone(&self) -> Self { + let Self { + json_type: _, + json_variant, + } = self; + Self { + json_type: OnceLock::new(), + json_variant: json_variant.clone(), + } + } +} diff --git a/src/datatypes/src/types.rs b/src/datatypes/src/types.rs index 1c7df86249..61366079fb 100644 --- a/src/datatypes/src/types.rs +++ b/src/datatypes/src/types.rs @@ -20,7 +20,7 @@ mod decimal_type; mod dictionary_type; mod duration_type; mod interval_type; -mod json_type; +pub(crate) mod json_type; mod list_type; mod null_type; mod primitive_type; diff --git a/src/datatypes/src/types/json_type.rs b/src/datatypes/src/types/json_type.rs index 660ddfe2c4..aa88aa9c12 100644 --- a/src/datatypes/src/types/json_type.rs +++ b/src/datatypes/src/types/json_type.rs @@ -57,6 +57,19 @@ impl JsonType { Self { format } } + pub(crate) fn new_native(native: ConcreteDataType) -> Self { + Self { + format: JsonFormat::Native(Box::new(native)), + } + } + + pub(crate) fn native_type(&self) -> ConcreteDataType { + match &self.format { + JsonFormat::Jsonb => ConcreteDataType::binary_datatype(), + JsonFormat::Native(x) => x.as_ref().clone(), + } + } + pub(crate) fn empty() -> Self { Self { format: JsonFormat::Native(Box::new(ConcreteDataType::null_datatype())), @@ -65,26 +78,21 @@ impl JsonType { /// Make json type a struct type, by: /// - if the json is an object, its entries are mapped to struct fields, obviously; - /// - if not, the json is one of bool, number, string or array, make it a special field called - /// [JSON_PLAIN_FIELD_NAME] with metadata [JSON_PLAIN_FIELD_METADATA_KEY] = `"true"` in a - /// struct with only that field. + /// - if not, the json is one of bool, number, string or array, make it a special field + /// (see [plain_json_struct_type]). pub(crate) fn as_struct_type(&self) -> StructType { match &self.format { JsonFormat::Jsonb => StructType::default(), JsonFormat::Native(inner) => match inner.as_ref() { ConcreteDataType::Struct(t) => t.clone(), - x => { - let mut field = - StructField::new(JSON_PLAIN_FIELD_NAME.to_string(), x.clone(), true); - field.insert_metadata(JSON_PLAIN_FIELD_METADATA_KEY, true); - StructType::new(Arc::new(vec![field])) - } + x => plain_json_struct_type(x.clone()), }, } } /// Check if this json type is the special "plain" one. /// See [JsonType::as_struct_type]. + #[expect(unused)] pub(crate) fn is_plain_json(&self) -> bool { let JsonFormat::Native(box ConcreteDataType::Struct(t)) = &self.format else { return true; @@ -124,6 +132,14 @@ impl JsonType { } } +/// A special struct type for denoting "plain"(not object) json value. It has only one field, with +/// fixed name [JSON_PLAIN_FIELD_NAME] and with metadata [JSON_PLAIN_FIELD_METADATA_KEY] = `"true"`. +pub(crate) fn plain_json_struct_type(item_type: ConcreteDataType) -> StructType { + let mut field = StructField::new(JSON_PLAIN_FIELD_NAME.to_string(), item_type, true); + field.insert_metadata(JSON_PLAIN_FIELD_METADATA_KEY, true); + StructType::new(Arc::new(vec![field])) +} + fn is_mergeable(this: &ConcreteDataType, that: &ConcreteDataType) -> bool { fn is_mergeable_struct(this: &StructType, that: &StructType) -> bool { let this_fields = this.fields(); diff --git a/src/datatypes/src/types/string_type.rs b/src/datatypes/src/types/string_type.rs index 61677ead4a..fff1d87f00 100644 --- a/src/datatypes/src/types/string_type.rs +++ b/src/datatypes/src/types/string_type.rs @@ -177,7 +177,7 @@ impl DataType for StringType { Value::Duration(v) => Some(Value::String(StringBytes::from(v.to_string()))), Value::Decimal128(v) => Some(Value::String(StringBytes::from(v.to_string()))), - Value::Json(v) => self.try_cast(*v), + Value::Json(v) => serde_json::to_string(v.as_ref()).ok().map(|s| s.into()), // StringBytes is only support for utf-8, Value::Binary and collections are not allowed. Value::Binary(_) | Value::List(_) | Value::Struct(_) => None, diff --git a/src/datatypes/src/value.rs b/src/datatypes/src/value.rs index 90ed848b7d..1c7dc35de6 100644 --- a/src/datatypes/src/value.rs +++ b/src/datatypes/src/value.rs @@ -36,6 +36,7 @@ use crate::error::{ self, ConvertArrowArrayToScalarsSnafu, ConvertScalarToArrowArraySnafu, Error, InconsistentStructFieldsAndItemsSnafu, Result, TryFromValueSnafu, }; +use crate::json::value::{JsonValue, JsonValueRef}; use crate::prelude::*; use crate::type_id::LogicalTypeId; use crate::types::{IntervalType, ListType, StructType}; @@ -86,7 +87,7 @@ pub enum Value { Struct(StructValue), // Json Logical types: - Json(Box), + Json(Box), } impl Display for Value { @@ -197,7 +198,7 @@ macro_rules! define_data_type_func { $struct::Struct(struct_value) => { ConcreteDataType::struct_datatype(struct_value.struct_type().clone()) } - $struct::Json(v) => ConcreteDataType::json_native_datatype(v.data_type()), + $struct::Json(v) => v.data_type(), } } }; @@ -220,7 +221,6 @@ impl Value { match self { Value::Null => Ok(None), Value::List(v) => Ok(Some(v)), - Value::Json(inner) => inner.as_list(), other => error::CastTypeSnafu { msg: format!("Failed to cast {other:?} to list value"), } @@ -232,7 +232,6 @@ impl Value { match self { Value::Null => Ok(None), Value::Struct(v) => Ok(Some(v)), - Value::Json(inner) => inner.as_struct(), other => error::CastTypeSnafu { msg: format!("Failed to cast {other:?} to struct value"), } @@ -267,7 +266,7 @@ impl Value { Value::Duration(v) => ValueRef::Duration(*v), Value::Decimal128(v) => ValueRef::Decimal128(*v), Value::Struct(v) => ValueRef::Struct(StructValueRef::Ref(v)), - Value::Json(v) => ValueRef::Json(Box::new(v.as_value_ref())), + Value::Json(v) => ValueRef::Json(Box::new((**v).as_ref())), } } @@ -391,7 +390,7 @@ impl Value { /// Extract the inner JSON value from a JSON type. pub fn into_json_inner(self) -> Option { match self { - Value::Json(v) => Some(*v), + Value::Json(v) => Some((*v).into_value()), _ => None, } } @@ -501,7 +500,12 @@ impl Value { let struct_type = output_type.as_struct().unwrap(); struct_value.try_to_scalar_value(struct_type)? } - Value::Json(v) => v.try_to_scalar_value(output_type)?, + Value::Json(_) => { + return error::ToScalarValueSnafu { + reason: "unsupported for json value", + } + .fail(); + } }; Ok(scalar_value) @@ -554,13 +558,12 @@ impl Value { Value::IntervalDayTime(x) => Some(Value::IntervalDayTime(x.negative())), Value::IntervalMonthDayNano(x) => Some(Value::IntervalMonthDayNano(x.negative())), - Value::Json(v) => v.try_negative().map(|neg| Value::Json(Box::new(neg))), - Value::Binary(_) | Value::String(_) | Value::Boolean(_) | Value::List(_) - | Value::Struct(_) => None, + | Value::Struct(_) + | Value::Json(_) => None, } } } @@ -929,7 +932,7 @@ impl TryFrom for serde_json::Value { .collect::>>()?; serde_json::Value::Object(map) } - Value::Json(v) => serde_json::Value::try_from(*v)?, + Value::Json(v) => (*v).into(), }; Ok(json_value) @@ -1263,7 +1266,7 @@ impl From> for Value { ValueRef::List(v) => v.to_value(), ValueRef::Decimal128(v) => Value::Decimal128(v), ValueRef::Struct(v) => v.to_value(), - ValueRef::Json(v) => Value::Json(Box::new(Value::from(*v))), + ValueRef::Json(v) => Value::Json(Box::new(JsonValue::from(*v))), } } } @@ -1307,7 +1310,7 @@ pub enum ValueRef<'a> { List(ListValueRef<'a>), Struct(StructValueRef<'a>), - Json(Box>), + Json(Box>), } macro_rules! impl_as_for_value_ref { @@ -1315,18 +1318,6 @@ macro_rules! impl_as_for_value_ref { match $value { ValueRef::Null => Ok(None), ValueRef::$Variant(v) => Ok(Some(v.clone())), - ValueRef::Json(v) => match v.as_ref() { - ValueRef::Null => Ok(None), - ValueRef::$Variant(v) => Ok(Some(v.clone())), - other => error::CastTypeSnafu { - msg: format!( - "Failed to cast value ref {:?} to {}", - other, - stringify!($Variant) - ), - } - .fail(), - }, other => error::CastTypeSnafu { msg: format!( "Failed to cast value ref {:?} to {}", @@ -1402,7 +1393,7 @@ impl<'a> ValueRef<'a> { match self { ValueRef::Null => Ok(None), ValueRef::Float32(f) => Ok(Some(f.0)), - ValueRef::Json(v) => v.try_into_f32(), + ValueRef::Json(v) => Ok(v.as_f32()), other => error::CastTypeSnafu { msg: format!("Failed to cast value ref {:?} to ValueRef::Float32", other,), } @@ -1414,7 +1405,7 @@ impl<'a> ValueRef<'a> { match self { ValueRef::Null => Ok(None), ValueRef::Float64(f) => Ok(Some(f.0)), - ValueRef::Json(v) => v.try_into_f64(), + ValueRef::Json(v) => Ok(v.as_f64()), other => error::CastTypeSnafu { msg: format!("Failed to cast value ref {:?} to ValueRef::Float64", other,), } @@ -1746,6 +1737,7 @@ pub(crate) mod tests { use num_traits::Float; use super::*; + use crate::json::value::{JsonVariant, JsonVariantRef}; use crate::types::StructField; use crate::vectors::ListVectorBuilder; @@ -2281,19 +2273,48 @@ pub(crate) mod tests { check_type_and_value( &ConcreteDataType::json_native_datatype(ConcreteDataType::boolean_datatype()), - &Value::Json(Box::new(Value::Boolean(true))), + &Value::Json(Box::new(true.into())), ); check_type_and_value( &ConcreteDataType::json_native_datatype(build_list_type()), - &Value::Json(Box::new(Value::List(build_list_value()))), + &Value::Json(Box::new([true].into())), ); check_type_and_value( &ConcreteDataType::json_native_datatype(ConcreteDataType::struct_datatype( - build_struct_type(), + StructType::new(Arc::new(vec![ + StructField::new( + "address".to_string(), + ConcreteDataType::string_datatype(), + true, + ), + StructField::new("age".to_string(), ConcreteDataType::uint64_datatype(), true), + StructField::new( + "awards".to_string(), + ConcreteDataType::list_datatype(Arc::new( + ConcreteDataType::boolean_datatype(), + )), + true, + ), + StructField::new("id".to_string(), ConcreteDataType::int64_datatype(), true), + StructField::new( + "name".to_string(), + ConcreteDataType::string_datatype(), + true, + ), + ])), + )), + &Value::Json(Box::new( + [ + ("id", JsonVariant::from(1i64)), + ("name", "Alice".into()), + ("age", 1u64.into()), + ("address", "blah".into()), + ("awards", [true, false].into()), + ] + .into(), )), - &Value::Json(Box::new(Value::Struct(build_struct_value()))), ); } @@ -2435,25 +2456,27 @@ pub(crate) mod tests { // string wrapped in json assert_eq!( - serde_json::Value::try_from(Value::Json(Box::new(Value::String("hello".into())))) - .unwrap(), + serde_json::Value::try_from(Value::Json(Box::new("hello".into()))).unwrap(), serde_json::json!("hello") ); // list wrapped in json assert_eq!( - serde_json::Value::try_from(Value::Json(Box::new(Value::List(ListValue::new( - vec![Value::Int32(1), Value::Int32(2), Value::Int32(3),], - Arc::new(ConcreteDataType::int32_datatype()) - ))))) - .unwrap(), + serde_json::Value::try_from(Value::Json(Box::new([1i64, 2, 3,].into()))).unwrap(), serde_json::json!([1, 2, 3]) ); // struct wrapped in json assert_eq!( - serde_json::Value::try_from(Value::Json(Box::new(Value::Struct(struct_value)))) - .unwrap(), + serde_json::Value::try_from(Value::Json(Box::new( + [ + ("num".to_string(), JsonVariant::from(42i64)), + ("name".to_string(), "tomcat".into()), + ("yes_or_no".to_string(), true.into()), + ] + .into() + ))) + .unwrap(), serde_json::json!({ "num": 42, "name": "tomcat", @@ -2465,7 +2488,7 @@ pub(crate) mod tests { #[test] fn test_null_value() { assert!(Value::Null.is_null()); - assert!(Value::Json(Box::new(Value::Null)).is_null()); + assert!(Value::Json(Box::new(JsonValue::null())).is_null()); assert!(!Value::Boolean(true).is_null()); assert!(Value::Null < Value::Boolean(false)); assert!(Value::Boolean(true) > Value::Null); @@ -2544,13 +2567,6 @@ pub(crate) mod tests { ValueRef::Struct(StructValueRef::Ref(&struct_value)), Value::Struct(struct_value.clone()).as_value_ref() ); - - assert_eq!( - ValueRef::Json(Box::new(ValueRef::Struct(StructValueRef::Ref( - &struct_value - )))), - Value::Json(Box::new(Value::Struct(struct_value.clone()))).as_value_ref() - ); } #[test] @@ -2675,8 +2691,18 @@ pub(crate) mod tests { ); assert_eq!( - Value::Json(Box::new(Value::Struct(build_struct_value()))).to_string(), - "Json({ id: 1, name: tom, age: 25, address: 94038, awards: Boolean[true, false] })" + Value::Json(Box::new( + [ + ("id", JsonVariant::from(1i64)), + ("name", "tom".into()), + ("age", 25u64.into()), + ("address", "94038".into()), + ("awards", [true, false].into()), + ] + .into() + )) + .to_string(), + "Json({ address: 94038, age: 25, awards: [true, false], id: 1, name: tom })" ) } @@ -3167,10 +3193,17 @@ pub(crate) mod tests { ); check_value_ref_size_eq( - &ValueRef::Json(Box::new(ValueRef::Struct(StructValueRef::Ref( - &build_struct_value(), - )))), - 31, + &ValueRef::Json(Box::new( + [ + ("id", JsonVariantRef::from(1i64)), + ("name", "tom".into()), + ("age", 25u64.into()), + ("address", "94038".into()), + ("awards", [true, false].into()), + ] + .into(), + )), + 48, ); } diff --git a/src/datatypes/src/vectors/json/builder.rs b/src/datatypes/src/vectors/json/builder.rs index cb19a329ef..1467686cf8 100644 --- a/src/datatypes/src/vectors/json/builder.rs +++ b/src/datatypes/src/vectors/json/builder.rs @@ -14,13 +14,13 @@ use std::any::Any; use std::collections::HashMap; - -use snafu::OptionExt; +use std::sync::LazyLock; use crate::data_type::ConcreteDataType; use crate::error::{Result, TryFromValueSnafu, UnsupportedOperationSnafu}; +use crate::json::value::JsonValueRef; use crate::prelude::{ValueRef, Vector, VectorRef}; -use crate::types::JsonType; +use crate::types::{JsonType, json_type}; use crate::value::StructValueRef; use crate::vectors::{MutableVector, StructVectorBuilder}; @@ -40,16 +40,16 @@ impl JsonStructsBuilder { self.inner.len() } - fn push(&mut self, value: &ValueRef) -> Result<()> { - if self.json_type.is_plain_json() { - let value = ValueRef::Struct(StructValueRef::RefList { - val: vec![value.clone()], - fields: self.json_type.as_struct_type(), - }); - self.inner.try_push_value_ref(&value) - } else { - self.inner.try_push_value_ref(value) + fn push(&mut self, json: &JsonValueRef) -> Result<()> { + let mut value = json.as_value_ref(); + if !json.is_object() { + let fields = json_type::plain_json_struct_type(value.data_type()); + value = ValueRef::Struct(StructValueRef::RefList { + val: vec![value], + fields, + }) } + self.inner.try_push_value_ref(&value) } /// Try to merge (and consume the data of) other json vector builder into this one. @@ -252,13 +252,17 @@ impl MutableVector for JsonVectorBuilder { } fn try_push_value_ref(&mut self, value: &ValueRef) -> Result<()> { - let data_type = value.data_type(); - let json_type = data_type.as_json().with_context(|| TryFromValueSnafu { - reason: format!("expected json value, got {value:?}"), - })?; + let ValueRef::Json(value) = value else { + return TryFromValueSnafu { + reason: format!("expected json value, got {value:?}"), + } + .fail(); + }; + let json_type = value.json_type(); let builder = match self.builders.last_mut() { Some(last) => { + // TODO(LFC): use "is_include" and amend json value with nulls if &last.json_type != json_type { self.try_create_new_builder(json_type)? } else { @@ -268,21 +272,16 @@ impl MutableVector for JsonVectorBuilder { None => self.try_create_new_builder(json_type)?, }; - let ValueRef::Json(value) = value else { - // Safety: json datatype value must be the value of json. - unreachable!() - }; - builder.push(value) + builder.push(value.as_ref()) } fn push_null(&mut self) { - let null_json_value = ValueRef::Json(Box::new(ValueRef::Null)); - self.try_push_value_ref(&null_json_value) + static NULL_JSON: LazyLock = + LazyLock::new(|| ValueRef::Json(Box::new(JsonValueRef::null()))); + self.try_push_value_ref(&NULL_JSON) // Safety: learning from the method "try_push_value_ref", a null json value should be // always able to push into any json vectors. - .unwrap_or_else(|e| { - panic!("failed to push null json value: {null_json_value:?}, error: {e}") - }); + .unwrap_or_else(|e| panic!("failed to push null json value, error: {e}")); } fn extend_slice_of(&mut self, _: &dyn Vector, _: usize, _: usize) -> Result<()> { @@ -307,12 +306,11 @@ mod tests { let value = settings.encode(json).unwrap(); let value = value.as_value_ref(); - let result = builder.try_push_value_ref(&value); - match (result, expected) { - (Ok(()), Ok(())) => (), - (Err(e), Err(expected)) => assert_eq!(e.to_string(), expected), - _ => unreachable!(), - } + let result = builder + .try_push_value_ref(&value) + .map_err(|e| e.to_string()); + let expected = expected.map_err(|e| e.to_string()); + assert_eq!(result, expected); } #[test] diff --git a/src/frontend/src/limiter.rs b/src/frontend/src/limiter.rs index e0e32e6b1b..1055267b2d 100644 --- a/src/frontend/src/limiter.rs +++ b/src/frontend/src/limiter.rs @@ -18,7 +18,8 @@ use api::v1::column::Values; use api::v1::greptime_request::Request; use api::v1::value::ValueData; use api::v1::{ - Decimal128, InsertRequests, IntervalMonthDayNano, RowInsertRequest, RowInsertRequests, + Decimal128, InsertRequests, IntervalMonthDayNano, JsonValue, RowInsertRequest, + RowInsertRequests, json_value, }; use pipeline::ContextReq; use snafu::ResultExt; @@ -229,12 +230,29 @@ impl Limiter { .unwrap_or(0) }) .sum(), - ValueData::JsonValue(inner) => inner - .as_ref() - .value_data - .as_ref() - .map(Self::size_of_value_data) - .unwrap_or(0), + ValueData::JsonValue(v) => { + fn calc(v: &JsonValue) -> usize { + let Some(value) = v.value.as_ref() else { + return 0; + }; + match value { + json_value::Value::Boolean(_) => size_of::(), + json_value::Value::Int(_) => size_of::(), + json_value::Value::Uint(_) => size_of::(), + json_value::Value::Float(_) => size_of::(), + json_value::Value::Str(s) => s.len(), + json_value::Value::Array(array) => array.items.iter().map(calc).sum(), + json_value::Value::Object(object) => object + .entries + .iter() + .flat_map(|entry| { + entry.value.as_ref().map(|v| entry.key.len() + calc(v)) + }) + .sum(), + } + } + calc(v) + } } } } diff --git a/src/partition/src/collider.rs b/src/partition/src/collider.rs index 1bd5000f9d..c426e84575 100644 --- a/src/partition/src/collider.rs +++ b/src/partition/src/collider.rs @@ -173,6 +173,9 @@ impl<'a> Collider<'a> { for (column, mut column_values) in values { column_values.sort_unstable(); column_values.dedup(); // Remove duplicates + + // allowed because we have carefully implemented `Hash` to eliminate the mutable + #[allow(clippy::mutable_key_type)] let mut value_map = HashMap::with_capacity(column_values.len()); let mut start_value = ZERO; for value in column_values {