diff --git a/src/api/src/helper.rs b/src/api/src/helper.rs index d2d5963491..ed8d64b57a 100644 --- a/src/api/src/helper.rs +++ b/src/api/src/helper.rs @@ -451,10 +451,10 @@ impl TryFrom for ColumnDataTypeWrapper { if native_type.is_null() { None } else { - let native_type = + let concrete_type = ConcreteDataType::from_arrow_type(&native_type.as_arrow_type()); let (datatype, datatype_extension) = - ColumnDataTypeWrapper::try_from(native_type)?.into_parts(); + ColumnDataTypeWrapper::try_from(concrete_type)?.into_parts(); Some(ColumnDataTypeExtension { type_ext: Some(TypeExt::JsonNativeType(Box::new( JsonNativeTypeExtension { diff --git a/src/common/sql/src/convert.rs b/src/common/sql/src/convert.rs index be9701f832..3a10c5476a 100644 --- a/src/common/sql/src/convert.rs +++ b/src/common/sql/src/convert.rs @@ -309,11 +309,11 @@ pub(crate) fn parse_string_to_value( JsonFormat::Json2(_) => { let extension_type: Option = column_schema.extension_type().context(DatatypeSnafu)?; - let json_structure_settings = extension_type - .and_then(|x| x.metadata().json_structure_settings.clone()) + let json_settings = extension_type + .and_then(|x| x.metadata().json_settings.clone()) .unwrap_or_default(); let v = serde_json::from_str(&s).context(DeserializeSnafu { json: s })?; - json_structure_settings.encode(v).context(DatatypeSnafu) + json_settings.encode(v).context(DatatypeSnafu) } }, ConcreteDataType::Vector(d) => { diff --git a/src/datatypes/src/extension/json.rs b/src/datatypes/src/extension/json.rs index 678308226c..6418b97131 100644 --- a/src/datatypes/src/extension/json.rs +++ b/src/datatypes/src/extension/json.rs @@ -18,14 +18,13 @@ use arrow_schema::extension::ExtensionType; use arrow_schema::{ArrowError, DataType, FieldRef}; use serde::{Deserialize, Serialize}; -use crate::json::JsonStructureSettings; +use crate::json::JsonSettings; #[derive(Debug, Clone, Serialize, Deserialize, Default)] pub struct JsonMetadata { - /// Indicates how to handle JSON is stored in underlying data type - /// - /// This field can be `None` for data is converted to complete structured in-memory form. - pub json_structure_settings: Option, + /// JSON2 settings stored in column schema metadata and represented through + /// Arrow extension metadata. + pub json_settings: Option, } #[derive(Debug, Clone)] diff --git a/src/datatypes/src/json.rs b/src/datatypes/src/json.rs index 33104084ad..823ab2a6bb 100644 --- a/src/datatypes/src/json.rs +++ b/src/datatypes/src/json.rs @@ -21,286 +21,272 @@ pub mod value; -use std::collections::{BTreeMap, HashSet}; -use std::sync::Arc; +use std::collections::BTreeMap; +use std::collections::btree_map::Entry; use serde::{Deserialize, Serialize}; use serde_json::{Map, Value as Json}; -use snafu::{OptionExt, ResultExt}; +use snafu::ResultExt; -use crate::error::{self, InvalidJsonSnafu, Result, SerializeSnafu}; +use crate::data_type::ConcreteDataType; +use crate::error::{self, Result}; use crate::json::value::{JsonValue, JsonVariant}; -use crate::types::json_type::{JsonNativeType, JsonNumberType, JsonObjectType}; -use crate::types::{JsonType, StructField, StructType}; +use crate::schema::ColumnDefaultConstraint; +use crate::types::json_type::JsonNativeType; use crate::value::{ListValue, StructValue, Value}; -/// The configuration of JSON encoding -/// -/// The enum describes how we handle JSON encoding to `StructValue` internally. -/// It defines three configurations: -/// - Structured: Encodes JSON objects as StructValue with an optional predefined StructType. -/// - UnstructuredRaw: Encodes JSON data as string and store it in a struct with a field named "_raw". -/// - PartialUnstructuredByKey: Encodes JSON objects as StructValue with an optional predefined StructType -/// and a set of unstructured keys, these keys are provided as flattened names, for example: `a.b.c`. -/// -/// We provide a few methods to convert JSON data to StructValue based on the settings. And we also -/// convert them to fully structured StructValue for user-facing APIs: the UI protocol and the UDF interface. -/// -/// **Important**: This settings only controls the internal form of JSON encoding. -#[derive(Debug, Clone, Serialize, Deserialize)] -pub enum JsonStructureSettings { - // TODO(sunng87): provide a limit - Structured(Option), - UnstructuredRaw, - PartialUnstructuredByKey { - fields: Option, - unstructured_keys: HashSet, - }, +/// JSON2 settings stored in column schema metadata and represented through +/// Arrow extension metadata. +#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize)] +pub struct JsonSettings { + #[serde(default)] + pub type_hints: Vec, } -/// Context for JSON encoding/decoding that tracks the current key path +/// Declares selected JSON2 subpaths as typed fields. +/// +/// These hints let JSON2 encode frequently used subpaths in a typed layout, so +/// queries over those subpaths can get behavior and performance closer to +/// ordinary columns. +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +pub struct JsonTypeHint { + /// JSON2 subpath for a typed field. + /// + /// Each item is one JSON object key. For example, `["user", "age"]` + /// represents `user.age`. + /// + /// Array traversal is not currently supported. For example, a hint cannot + /// describe `events[0].name` or fields shared by all items in `events[*]`. + pub path: Vec, + #[serde(rename = "type")] + pub data_type: ConcreteDataType, + pub nullable: bool, + #[serde(skip_serializing_if = "Option::is_none")] + pub default_constraint: Option, + pub inverted_index: bool, +} + +/// Context for JSON encoding/decoding that tracks the current key path. #[derive(Clone, Debug)] pub struct JsonContext<'a> { - /// Current key path in dot notation (e.g., "user.profile.name") - pub key_path: String, - /// Settings for JSON structure handling - pub settings: &'a JsonStructureSettings, + /// Current key path from the JSON2 root. + pub path: Vec, + /// Settings for JSON encoding/decoding. + pub settings: &'a JsonSettings, } -impl JsonStructureSettings { - pub const RAW_FIELD: &'static str = "_raw"; +impl JsonSettings { + pub fn new(type_hints: Vec) -> Self { + Self { type_hints } + } /// Decode an encoded StructValue back into a serde_json::Value. pub fn decode(&self, value: Value) -> Result { let context = JsonContext { - key_path: String::new(), + path: Vec::new(), settings: self, }; decode_value_with_context(value, &context) } - /// Decode a StructValue that was encoded with current settings back into a fully structured StructValue. - /// This is useful for reconstructing the original structure from encoded data, especially when - /// unstructured encoding was used for some fields. - pub fn decode_struct(&self, struct_value: StructValue) -> Result { - let context = JsonContext { - key_path: String::new(), - settings: self, - }; - decode_struct_with_settings(struct_value, &context) - } - /// Encode a serde_json::Value into a Value::Json using current settings. pub fn encode(&self, json: Json) -> Result { - if let Some(json_struct) = self.json_struct() { - return encode_by_struct(json_struct, json); - } - let context = JsonContext { - key_path: String::new(), + path: Vec::new(), settings: self, }; - encode_json_with_context(json, None, &context).map(|v| Value::Json(Box::new(v))) - } - - /// Encode a serde_json::Value into a Value::Json with given data type. - pub fn encode_with_type( - &self, - json: Json, - data_type: Option<&JsonNativeType>, - ) -> Result { - let context = JsonContext { - key_path: String::new(), - settings: self, - }; - encode_json_with_context(json, data_type, &context).map(|v| Value::Json(Box::new(v))) - } - - fn json_struct(&self) -> Option<&StructType> { - match &self { - JsonStructureSettings::Structured(fields) => fields.as_ref(), - JsonStructureSettings::PartialUnstructuredByKey { fields, .. } => fields.as_ref(), - _ => None, - } - } -} - -impl Default for JsonStructureSettings { - fn default() -> Self { - Self::Structured(None) + encode_json_with_context(json, &context).map(|v| Value::Json(Box::new(v))) } } impl<'a> JsonContext<'a> { /// Create a new context with an updated key path pub fn with_key(&self, key: &str) -> JsonContext<'a> { - let new_key_path = if self.key_path.is_empty() { - key.to_string() - } else { - format!("{}.{}", self.key_path, key) - }; + let mut path = self.path.clone(); + path.push(key.to_string()); JsonContext { - key_path: new_key_path, + path, settings: self.settings, } } - /// Check if the current key path should be treated as unstructured - pub fn is_unstructured_key(&self) -> bool { - match &self.settings { - JsonStructureSettings::PartialUnstructuredByKey { - unstructured_keys, .. - } => unstructured_keys.contains(&self.key_path), - _ => false, - } + fn type_hint(&self) -> Option<&'a JsonTypeHint> { + self.settings + .type_hints + .iter() + .find(|hint| hint.path == self.path) } } -fn encode_by_struct(json_struct: &StructType, mut json: Json) -> Result { - let Some(json_object) = json.as_object_mut() else { - return InvalidJsonSnafu { - value: "expect JSON object when struct is provided", - } - .fail(); - }; - let mut encoded = BTreeMap::new(); - - fn extract_field(json_object: &mut Map, field: &str) -> Result> { - let (first, rest) = field.split_once('.').unwrap_or((field, "")); - - if rest.is_empty() { - Ok(json_object.remove(first)) - } else { - let Some(value) = json_object.get_mut(first) else { - return Ok(None); - }; - let json_object = value.as_object_mut().with_context(|| InvalidJsonSnafu { - value: format!(r#"expect "{}" an object"#, first), - })?; - extract_field(json_object, rest) - } - } - - let fields = json_struct.fields(); - for field in fields.iter() { - let Some(field_value) = extract_field(json_object, field.name())? else { - continue; - }; - let field_type: JsonNativeType = field.data_type().into(); - let field_value = try_convert_to_expected_type(field_value, &field_type)?; - encoded.insert(field.name().to_string(), field_value); - } - - let rest = serde_json::to_string(json_object).context(SerializeSnafu)?; - encoded.insert(JsonStructureSettings::RAW_FIELD.to_string(), rest.into()); - - let value: JsonValue = encoded.into(); - Ok(Value::Json(Box::new(value))) -} - /// Main encoding function with key path tracking -pub fn encode_json_with_context<'a>( - json: Json, - data_type: Option<&JsonNativeType>, - context: &JsonContext<'a>, -) -> Result { - // Check if the entire encoding should be unstructured - if matches!(context.settings, JsonStructureSettings::UnstructuredRaw) { - let json_string = json.to_string(); - return Ok([(JsonStructureSettings::RAW_FIELD, json_string)].into()); - } - - // Check if current key should be treated as unstructured - if context.is_unstructured_key() { - return Ok(json.to_string().into()); - } - +pub fn encode_json_with_context<'a>(json: Json, context: &JsonContext<'a>) -> Result { match json { - Json::Object(json_object) => { - let object_type = match data_type.as_ref() { - Some(JsonNativeType::Object(x)) => Some(x), - None => None, - _ => { - return error::InvalidJsonSnafu { - value: "JSON object value must be encoded with object type", - } - .fail(); - } - }; - encode_json_object_with_context(json_object, object_type, context) - } - Json::Array(json_array) => { - let item_type = match data_type.as_ref() { - Some(JsonNativeType::Array(x)) => Some(x.as_ref()), - None => None, - _ => { - return error::InvalidJsonSnafu { - value: "JSON array value must be encoded with array type", - } - .fail(); - } - }; - encode_json_array_with_context(json_array, item_type, context) - } - _ => { - // For non-collection types, verify type compatibility - if let Some(expected_type) = data_type { - let value = encode_json_value_with_context(json, Some(expected_type), context)?; - let actual_type = value.json_type().native_type(); - if actual_type == expected_type { - Ok(value) - } else { - Err(error::InvalidJsonSnafu { - value: format!( - "JSON value type {} does not match expected type {}", - actual_type, expected_type - ), - } - .build()) - } - } else { - encode_json_value_with_context(json, None, context) - } - } + Json::Object(json_object) => encode_json_object_with_context(json_object, context), + Json::Array(json_array) => encode_json_array_with_context(json_array, context), + _ => encode_json_value_with_context(json, context), } } fn encode_json_object_with_context<'a>( - mut json_object: Map, - fields: Option<&JsonObjectType>, + json_object: Map, context: &JsonContext<'a>, ) -> Result { let mut object = BTreeMap::new(); - // First, process fields from the provided schema in their original order - if let Some(fields) = fields { - for (field_name, field_type) in fields { - if let Some(value) = json_object.remove(field_name) { - let field_context = context.with_key(field_name); - let value = - encode_json_value_with_context(value, Some(field_type), &field_context)?; - object.insert(field_name.clone(), value.into_variant()); - } else { - // Field exists in schema but not in JSON - add null value - object.insert(field_name.clone(), ().into()); - } - } - } - - // Then, process any remaining JSON fields that weren't in the schema for (key, value) in json_object { let field_context = context.with_key(&key); - let value = encode_json_value_with_context(value, None, &field_context)?; + let value = if let Some(hint) = field_context.type_hint() { + encode_json_value_with_hint(value, hint, &field_context)? + } else { + encode_json_value_with_context(value, &field_context)? + }; object.insert(key, value.into_variant()); } + apply_missing_type_hints(&mut object, context)?; + Ok(JsonValue::new(JsonVariant::Object(object))) } +fn apply_missing_type_hints( + object: &mut BTreeMap, + context: &JsonContext, +) -> Result<()> { + for hint in &context.settings.type_hints { + if hint.path.len() > context.path.len() && hint.path.starts_with(&context.path) { + insert_missing_type_hint(object, context, hint, context.path.len())?; + } + } + Ok(()) +} + +fn insert_missing_type_hint( + object: &mut BTreeMap, + context: &JsonContext, + hint: &JsonTypeHint, + depth: usize, +) -> Result<()> { + let key = &hint.path[depth]; + let field_context = context.with_key(key); + let is_leaf = depth + 1 == hint.path.len(); + + if is_leaf { + if !object.contains_key(key) { + let value = encode_missing_type_hint_value(hint, &field_context)?; + object.insert(key.clone(), value.into_variant()); + } + return Ok(()); + } + + match object.entry(key.clone()) { + Entry::Occupied(mut entry) => match entry.get_mut() { + JsonVariant::Object(child) => { + insert_missing_type_hint(child, &field_context, hint, depth + 1) + } + _ => error::InvalidJsonSnafu { + value: format!( + "JSON2 type hint path {} expects object at {}", + hint.path.join("."), + field_context.path.join(".") + ), + } + .fail(), + }, + Entry::Vacant(entry) => { + let mut child = BTreeMap::new(); + insert_missing_type_hint(&mut child, &field_context, hint, depth + 1)?; + entry.insert(JsonVariant::Object(child)); + Ok(()) + } + } +} + +fn encode_missing_type_hint_value(hint: &JsonTypeHint, context: &JsonContext) -> Result { + if let Some(default_constraint) = &hint.default_constraint { + let value = default_constraint.create_default(&hint.data_type, hint.nullable)?; + let json = decode_primitive_value(value)?; + return encode_json_value_with_hint(json, hint, context); + } + + if hint.nullable { + Ok(JsonValue::null()) + } else { + error::InvalidJsonSnafu { + value: format!( + "missing non-null JSON2 type hint path {}", + hint.path.join(".") + ), + } + .fail() + } +} + +fn encode_json_value_with_hint( + json: Json, + hint: &JsonTypeHint, + context: &JsonContext, +) -> Result { + if json.is_null() { + return if hint.nullable { + Ok(JsonValue::null()) + } else { + error::InvalidJsonSnafu { + value: format!( + "JSON2 type hint path {} is not nullable", + context.path.join(".") + ), + } + .fail() + }; + } + + let invalid_type = || { + error::InvalidJsonSnafu { + value: format!( + "JSON value at {} does not match JSON2 type hint {}", + context.path.join("."), + hint.data_type + ), + } + .fail() + }; + + match (&hint.data_type, json) { + (ConcreteDataType::String(_), Json::String(v)) => Ok(v.into()), + ( + ConcreteDataType::Int8(_) + | ConcreteDataType::Int16(_) + | ConcreteDataType::Int32(_) + | ConcreteDataType::Int64(_), + Json::Number(v), + ) => match v.as_i64() { + Some(v) => Ok(v.into()), + None => invalid_type(), + }, + ( + ConcreteDataType::UInt8(_) + | ConcreteDataType::UInt16(_) + | ConcreteDataType::UInt32(_) + | ConcreteDataType::UInt64(_), + Json::Number(v), + ) => match v.as_u64() { + Some(v) => Ok(v.into()), + None => invalid_type(), + }, + (ConcreteDataType::Float32(_) | ConcreteDataType::Float64(_), Json::Number(v)) => { + match v.as_f64() { + Some(v) => Ok(v.into()), + None => invalid_type(), + } + } + (ConcreteDataType::Boolean(_), Json::Bool(v)) => Ok(v.into()), + _ => invalid_type(), + } +} + fn encode_json_array_with_context<'a>( json_array: Vec, - item_type: Option<&JsonNativeType>, context: &JsonContext<'a>, ) -> Result { let json_array_len = json_array.len(); @@ -308,7 +294,7 @@ fn encode_json_array_with_context<'a>( for (index, value) in json_array.into_iter().enumerate() { let array_context = context.with_key(&index.to_string()); - let item_value = encode_json_value_with_context(value, None, &array_context)?; + let item_value = encode_json_value_with_context(value, &array_context)?; items.push(item_value); } @@ -317,7 +303,6 @@ fn encode_json_array_with_context<'a>( // array, which requires all items have exactly the same type. So we merge out the maybe // different item types to a unified type, and align all the item values to it. - let provided_item_type = item_type.map(|x| JsonType::new_json2(x.clone())); let merged_item_type = if let Some((first, rests)) = items.split_first() { let mut merged = first.json_type().clone(); for rest in rests.iter().map(|x| x.json_type()) { @@ -330,14 +315,7 @@ fn encode_json_array_with_context<'a>( } else { None }; - let unified_item_type = match (provided_item_type, merged_item_type) { - (Some(mut x), Some(y)) => { - x.merge(&y)?; - Some(x) - } - (x, y) => x.or(y), - }; - if let Some(unified_item_type) = unified_item_type { + if let Some(unified_item_type) = merged_item_type { for item in &mut items { item.try_align(&unified_item_type)?; } @@ -350,80 +328,34 @@ fn encode_json_array_with_context<'a>( } /// Helper function to encode a JSON value to a Value and determine its ConcreteDataType with context -fn encode_json_value_with_context<'a>( - json: Json, - expected_type: Option<&JsonNativeType>, - context: &JsonContext<'a>, -) -> Result { - // Check if current key should be treated as unstructured - if context.is_unstructured_key() { - return Ok(json.to_string().into()); - } - +fn encode_json_value_with_context<'a>(json: Json, context: &JsonContext<'a>) -> Result { match json { Json::Null => Ok(JsonValue::null()), Json::Bool(b) => Ok(b.into()), Json::Number(n) => { if let Some(i) = n.as_i64() { - // Use int64 for all integer numbers when possible - if let Some(expected) = expected_type - && let Ok(value) = try_convert_to_expected_type(i, expected) - { - return Ok(value.into()); - } Ok(i.into()) } else if let Some(u) = n.as_u64() { - // Use int64 for unsigned integers that fit, otherwise use u64 - if let Some(expected) = expected_type - && let Ok(value) = try_convert_to_expected_type(u, expected) - { - return Ok(value.into()); - } if u <= i64::MAX as u64 { Ok((u as i64).into()) } else { Ok(u.into()) } } else if let Some(f) = n.as_f64() { - // Try to use the expected type if provided - if let Some(expected) = expected_type - && let Ok(value) = try_convert_to_expected_type(f, expected) - { - return Ok(value.into()); - } - - // Default to f64 for floating point numbers Ok(f.into()) } else { // Fallback to string representation Ok(n.to_string().into()) } } - Json::String(s) => { - if let Some(expected) = expected_type - && let Ok(value) = try_convert_to_expected_type(s.as_str(), expected) - { - return Ok(value.into()); - } - Ok(s.into()) - } - Json::Array(arr) => encode_json_array_with_context(arr, expected_type, context), - Json::Object(obj) => encode_json_object_with_context(obj, None, context), + Json::String(s) => Ok(s.into()), + Json::Array(arr) => encode_json_array_with_context(arr, context), + Json::Object(obj) => encode_json_object_with_context(obj, context), } } /// Main decoding function with key path tracking pub fn decode_value_with_context(value: Value, context: &JsonContext) -> Result { - // Check if the entire decoding should be unstructured - if matches!(context.settings, JsonStructureSettings::UnstructuredRaw) { - return decode_unstructured_value(value); - } - - // Check if current key should be treated as unstructured - if context.is_unstructured_key() { - return decode_unstructured_value(value); - } - match value { Value::Struct(struct_value) => decode_struct_with_context(struct_value, context), Value::List(list_value) => decode_list_with_context(list_value, context), @@ -464,46 +396,6 @@ fn decode_list_with_context(list_value: ListValue, context: &JsonContext) -> Res Ok(Json::Array(json_array)) } -/// Decode unstructured value (stored as string) -fn decode_unstructured_value(value: Value) -> Result { - match value { - // Handle expected format: StructValue with single _raw field - Value::Struct(struct_value) => { - if struct_value.struct_type().fields().len() == 1 { - let field = &struct_value.struct_type().fields()[0]; - if field.name() == JsonStructureSettings::RAW_FIELD - && let Some(Value::String(s)) = struct_value.items().first() - { - let json_str = s.as_utf8(); - return serde_json::from_str(json_str).with_context(|_| { - error::DeserializeSnafu { - json: json_str.to_string(), - } - }); - } - } - // Invalid format - expected struct with single _raw field - Err(error::InvalidJsonSnafu { - value: "Unstructured value must be stored as struct with single _raw field" - .to_string(), - } - .build()) - } - // Handle old format: plain string (for backward compatibility) - Value::String(s) => { - let json_str = s.as_utf8(); - serde_json::from_str(json_str).with_context(|_| error::DeserializeSnafu { - json: json_str.to_string(), - }) - } - _ => Err(error::InvalidJsonSnafu { - value: "Unstructured value must be stored as string or struct with _raw field" - .to_string(), - } - .build()), - } -} - /// Decode primitive value to JSON fn decode_primitive_value(value: Value) -> Result { match value { @@ -545,357 +437,133 @@ fn decode_primitive_value(value: Value) -> Result { } } -/// Decode a StructValue that was encoded with current settings back into a fully structured StructValue -fn decode_struct_with_settings<'a>( - struct_value: StructValue, - context: &JsonContext<'a>, -) -> Result { - // Check if we can return the struct directly (Structured case) - if matches!(context.settings, JsonStructureSettings::Structured(_)) { - return Ok(struct_value); - } - - // Check if we can return the struct directly (PartialUnstructuredByKey with no matching keys) - if let JsonStructureSettings::PartialUnstructuredByKey { - unstructured_keys, .. - } = context.settings - && unstructured_keys.is_empty() - { - return Ok(struct_value.clone()); - } - - // Check if the entire decoding should be unstructured (UnstructuredRaw case) - if matches!(context.settings, JsonStructureSettings::UnstructuredRaw) { - // For UnstructuredRaw, the entire struct should be reconstructed from _raw field - return decode_unstructured_raw_struct(struct_value); - } - - let mut items = Vec::with_capacity(struct_value.len()); - let mut struct_fields = Vec::with_capacity(struct_value.len()); - - // Process each field in the struct value - let (struct_data, fields) = struct_value.into_parts(); - for (field, value) in fields.fields().iter().zip(struct_data) { - let field_context = context.with_key(field.name()); - - // Check if this field should be treated as unstructured - if field_context.is_unstructured_key() { - // Decode the unstructured value - let json_value = decode_unstructured_value(value)?; - - // Re-encode the unstructured value with proper structure using structured context - let structured_context = JsonContext { - key_path: field_context.key_path.clone(), - settings: &JsonStructureSettings::Structured(None), - }; - let decoded_value = encode_json_value_with_context( - json_value, - None, // Don't force a specific type, let it be inferred from JSON - &structured_context, - )? - .into_value(); - let data_type = decoded_value.data_type(); - - items.push(decoded_value); - struct_fields.push(StructField::new( - field.name().to_string(), - data_type, - true, // JSON fields are always nullable - )); - } else { - // For structured fields, recursively decode if they are structs/lists - let decoded_value = match value { - Value::Struct(nested_struct) => { - let nested_context = context.with_key(field.name()); - Value::Struct(decode_struct_with_settings(nested_struct, &nested_context)?) - } - Value::List(list_value) => { - let list_context = context.with_key(field.name()); - Value::List(decode_list_with_settings(list_value, &list_context)?) - } - _ => value.clone(), - }; - - items.push(decoded_value); - struct_fields.push(field.clone()); - } - } - - let struct_type = StructType::new(Arc::new(struct_fields)); - StructValue::try_new(items, struct_type) -} - -/// Decode a ListValue that was encoded with current settings back into a fully structured ListValue -fn decode_list_with_settings<'a>( - list_value: ListValue, - context: &JsonContext<'a>, -) -> Result { - let mut items = Vec::with_capacity(list_value.len()); - - let (data_items, datatype) = list_value.into_parts(); - - for (index, item) in data_items.into_iter().enumerate() { - let item_context = context.with_key(&index.to_string()); - - let decoded_item = match item { - Value::Struct(nested_struct) => { - Value::Struct(decode_struct_with_settings(nested_struct, &item_context)?) - } - Value::List(nested_list) => { - Value::List(decode_list_with_settings(nested_list, &item_context)?) - } - _ => item.clone(), - }; - - items.push(decoded_item); - } - - Ok(ListValue::new(items, datatype)) -} - -/// Helper function to decode a struct that was encoded with UnstructuredRaw settings -fn decode_unstructured_raw_struct(struct_value: StructValue) -> Result { - // For UnstructuredRaw, the struct must have exactly one field named "_raw" - if struct_value.struct_type().fields().len() == 1 { - let field = &struct_value.struct_type().fields()[0]; - if field.name() == JsonStructureSettings::RAW_FIELD - && let Some(Value::String(s)) = struct_value.items().first() - { - let json_str = s.as_utf8(); - let json_value: Json = - serde_json::from_str(json_str).with_context(|_| error::DeserializeSnafu { - json: json_str.to_string(), - })?; - - // Re-encode the JSON with proper structure - let context = JsonContext { - key_path: String::new(), - settings: &JsonStructureSettings::Structured(None), - }; - let decoded_value = - encode_json_value_with_context(json_value, None, &context)?.into_value(); - let data_type = decoded_value.data_type(); - - if let Value::Struct(decoded_struct) = decoded_value { - return Ok(decoded_struct); - } else { - // If the decoded value is not a struct, wrap it in a struct - let struct_type = StructType::new(Arc::new(vec![StructField::new( - "value".to_string(), - data_type, - true, - )])); - return StructValue::try_new(vec![decoded_value], struct_type); - } - } - } - - // Invalid format - expected struct with single _raw field - Err(error::InvalidJsonSnafu { - value: "UnstructuredRaw value must be stored as struct with single _raw field".to_string(), - } - .build()) -} - -/// Helper function to try converting a value to an expected type -fn try_convert_to_expected_type(value: T, expected_type: &JsonNativeType) -> Result -where - T: Into, -{ - let value = value.into(); - let cast_error = || { - error::CastTypeSnafu { - msg: format!("Cannot cast value {value} to {expected_type}"), - } - .fail() - }; - let actual_type = &value.native_type(); - match (actual_type, expected_type) { - (x, y) if x == y => Ok(value), - (JsonNativeType::Number(x), JsonNativeType::Number(y)) => match (x, y) { - (JsonNumberType::U64, JsonNumberType::I64) => { - if let Some(i) = value.as_i64() { - Ok(i.into()) - } else { - cast_error() - } - } - (JsonNumberType::I64, JsonNumberType::U64) => { - if let Some(i) = value.as_u64() { - Ok(i.into()) - } else { - cast_error() - } - } - (_, JsonNumberType::F64) => { - if let Some(f) = value.as_f64() { - Ok(f.into()) - } else { - cast_error() - } - } - _ => cast_error(), - }, - (_, JsonNativeType::String) => Ok(value.to_string().into()), - _ => cast_error(), - } -} - #[cfg(test)] mod tests { + use std::sync::Arc; use serde_json::json; use super::*; use crate::data_type::ConcreteDataType; - use crate::types::ListType; + use crate::types::{ListType, StructField, StructType}; + + fn struct_field_value<'a>(struct_value: &'a StructValue, field_name: &str) -> &'a Value { + let index = struct_value + .struct_type() + .fields() + .iter() + .position(|field| field.name() == field_name) + .expect("field exists"); + &struct_value.items()[index] + } #[test] - fn test_encode_by_struct() { - let json_struct: StructType = [ - StructField::new("s", ConcreteDataType::string_datatype(), true), - StructField::new("foo.i", ConcreteDataType::int64_datatype(), true), - StructField::new("x.y.z", ConcreteDataType::boolean_datatype(), true), - ] - .into(); - - let json = json!({ - "s": "hello", - "t": "world", - "foo": { - "i": 1, - "j": 2 - }, - "x": { - "y": { - "z": true + fn test_json_settings_forward_compatibility() { + let json_str = r#"{ + "type_hints": [ + { + "path": ["user", "age"], + "type": { + "Int64": {} + }, + "nullable": false, + "default_constraint": { + "Value": { + "Int64": 18 + } + }, + "inverted_index": true + }, + { + "path": ["user", "name"], + "type": { + "String": { + "size_type": "Utf8" + } + }, + "nullable": true, + "inverted_index": false } - } - }); - let value = encode_by_struct(&json_struct, json).unwrap(); - assert_eq!( - value.to_string(), - r#"Json({ _raw: {"foo":{"j":2},"t":"world","x":{"y":{}}}, foo.i: 1, s: hello, x.y.z: true })"# - ); + ] + }"#; - let json = json!({ - "t": "world", - "foo": { - "i": 1, - "j": 2 - }, - "x": { - "y": { - "z": true - } - } - }); - let value = encode_by_struct(&json_struct, json).unwrap(); - assert_eq!( - value.to_string(), - r#"Json({ _raw: {"foo":{"j":2},"t":"world","x":{"y":{}}}, foo.i: 1, x.y.z: true })"# - ); + let deserialized = serde_json::from_str::(json_str).unwrap(); - let json = json!({ - "s": 1234, - "foo": { - "i": 1, - "j": 2 - }, - "x": { - "y": { - "z": true - } - } - }); - let value = encode_by_struct(&json_struct, json).unwrap(); assert_eq!( - value.to_string(), - r#"Json({ _raw: {"foo":{"j":2},"x":{"y":{}}}, foo.i: 1, s: 1234, x.y.z: true })"# + deserialized, + JsonSettings::new(vec![ + JsonTypeHint { + path: vec!["user".to_string(), "age".to_string()], + data_type: ConcreteDataType::int64_datatype(), + nullable: false, + default_constraint: Some(ColumnDefaultConstraint::Value(Value::Int64(18))), + inverted_index: true, + }, + JsonTypeHint { + path: vec!["user".to_string(), "name".to_string()], + data_type: ConcreteDataType::string_datatype(), + nullable: true, + default_constraint: None, + inverted_index: false, + }, + ]) ); + } - let json = json!({ - "s": "hello", - "t": "world", - "foo": { - "i": "bar", - "j": 2 + #[test] + fn test_json_settings_ser_de() { + let settings = JsonSettings::new(vec![ + JsonTypeHint { + path: vec!["user".to_string(), "age".to_string()], + data_type: ConcreteDataType::int64_datatype(), + nullable: false, + default_constraint: Some(ColumnDefaultConstraint::Value(Value::Int64(18))), + inverted_index: true, }, - "x": { - "y": { - "z": true - } - } - }); - let result = encode_by_struct(&json_struct, json); - assert_eq!( - result.unwrap_err().to_string(), - r#"Cannot cast value bar to """# - ); + JsonTypeHint { + path: vec!["user".to_string(), "name".to_string()], + data_type: ConcreteDataType::string_datatype(), + nullable: true, + default_constraint: None, + inverted_index: false, + }, + ]); - let json = json!({ - "s": "hello", - "t": "world", - "foo": { - "i": 1, - "j": 2 - }, - "x": { - "y": "z" - } - }); - let result = encode_by_struct(&json_struct, json); - assert_eq!( - result.unwrap_err().to_string(), - r#"Invalid JSON: expect "y" an object"# - ); + let serialized = serde_json::to_string(&settings).unwrap(); + let deserialized = serde_json::from_str::(&serialized).unwrap(); + + assert_eq!(settings, deserialized); } #[test] fn test_encode_json_null() { let json = Json::Null; - let settings = JsonStructureSettings::Structured(None); - let result = settings - .encode_with_type(json, None) - .unwrap() - .into_json_inner() - .unwrap(); + let settings = JsonSettings::default(); + let result = settings.encode(json).unwrap().into_json_inner().unwrap(); assert_eq!(result, Value::Null); } #[test] fn test_encode_json_boolean() { let json = Json::Bool(true); - let settings = JsonStructureSettings::Structured(None); - let result = settings - .encode_with_type(json, None) - .unwrap() - .into_json_inner() - .unwrap(); + let settings = JsonSettings::default(); + let result = settings.encode(json).unwrap().into_json_inner().unwrap(); assert_eq!(result, Value::Boolean(true)); } #[test] fn test_encode_json_number_integer() { let json = Json::from(42); - let settings = JsonStructureSettings::Structured(None); - let result = settings - .encode_with_type(json, None) - .unwrap() - .into_json_inner() - .unwrap(); + let settings = JsonSettings::default(); + let result = settings.encode(json).unwrap().into_json_inner().unwrap(); assert_eq!(result, Value::Int64(42)); } #[test] fn test_encode_json_number_float() { let json = Json::from(3.15); - let settings = JsonStructureSettings::Structured(None); - let result = settings - .encode_with_type(json, None) - .unwrap() - .into_json_inner() - .unwrap(); + let settings = JsonSettings::default(); + let result = settings.encode(json).unwrap().into_json_inner().unwrap(); match result { Value::Float64(f) => assert_eq!(f.0, 3.15), _ => panic!("Expected Float64"), @@ -905,24 +573,16 @@ mod tests { #[test] fn test_encode_json_string() { let json = Json::String("hello".to_string()); - let settings = JsonStructureSettings::Structured(None); - let result = settings - .encode_with_type(json, None) - .unwrap() - .into_json_inner() - .unwrap(); + let settings = JsonSettings::default(); + let result = settings.encode(json).unwrap().into_json_inner().unwrap(); assert_eq!(result, Value::String("hello".into())); } #[test] fn test_encode_json_array() { let json = json!([1, 2, 3]); - let settings = JsonStructureSettings::Structured(None); - let result = settings - .encode_with_type(json, None) - .unwrap() - .into_json_inner() - .unwrap(); + let settings = JsonSettings::default(); + let result = settings.encode(json).unwrap().into_json_inner().unwrap(); if let Value::List(list_value) = result { assert_eq!(list_value.items().len(), 3); @@ -942,12 +602,8 @@ mod tests { "active": true }); - let settings = JsonStructureSettings::Structured(None); - let result = settings - .encode_with_type(json, None) - .unwrap() - .into_json_inner() - .unwrap(); + let settings = JsonSettings::default(); + let result = settings.encode(json).unwrap().into_json_inner().unwrap(); let Value::Struct(result) = result else { panic!("Expected Struct value"); }; @@ -993,12 +649,8 @@ mod tests { "scores": [95, 87, 92] }); - let settings = JsonStructureSettings::Structured(None); - let result = settings - .encode_with_type(json, None) - .unwrap() - .into_json_inner() - .unwrap(); + let settings = JsonSettings::default(); + let result = settings.encode(json).unwrap().into_json_inner().unwrap(); let Value::Struct(result) = result else { panic!("Expected Struct value"); }; @@ -1039,44 +691,19 @@ mod tests { } } - #[test] - fn test_encode_json_with_expected_type() { - // Test encoding JSON number with expected int8 type - let json = Json::from(42); - let settings = JsonStructureSettings::Structured(None); - let result = settings - .encode_with_type(json.clone(), Some(&JsonNativeType::u64())) - .unwrap() - .into_json_inner() - .unwrap(); - assert_eq!(result, Value::UInt64(42)); - - // Test with expected string type - let result = settings - .encode_with_type(json, Some(&JsonNativeType::String)) - .unwrap() - .into_json_inner() - .unwrap(); - assert_eq!(result, Value::String("42".into())); - } - #[test] fn test_encode_json_array_mixed_types() { let json = json!([1, "hello", true, 3.15]); - let settings = JsonStructureSettings::Structured(None); - let value = settings.encode_with_type(json, None).unwrap(); + let settings = JsonSettings::default(); + let value = settings.encode(json).unwrap(); assert_eq!(value.data_type().to_string(), r#"Json2[""]"#); } #[test] fn test_encode_json_empty_array() { let json = json!([]); - let settings = JsonStructureSettings::Structured(None); - let result = settings - .encode_with_type(json, None) - .unwrap() - .into_json_inner() - .unwrap(); + let settings = JsonSettings::default(); + let result = settings.encode(json).unwrap().into_json_inner().unwrap(); if let Value::List(list_value) = result { assert_eq!(list_value.items().len(), 0); @@ -1097,7 +724,7 @@ mod tests { "age": 35 }); - let settings = JsonStructureSettings::Structured(None); + let settings = JsonSettings::default(); let result = settings.encode(json).unwrap().into_json_inner().unwrap(); if let Value::Struct(struct_value) = result { @@ -1112,169 +739,186 @@ mod tests { } #[test] - fn test_encode_json_structured_with_fields() { - let json = json!({ - "name": "Carol", - "age": 28 - }); + fn test_encode_json_respects_type_hint() { + let settings = JsonSettings::new(vec![JsonTypeHint { + path: vec!["age".to_string()], + data_type: ConcreteDataType::int64_datatype(), + nullable: false, + default_constraint: None, + inverted_index: false, + }]); - // Define expected struct type - let concrete_type = JsonNativeType::Object(JsonObjectType::from([ - ("name".to_string(), JsonNativeType::String), - ("age".to_string(), JsonNativeType::i64()), - ])); - - let settings = JsonStructureSettings::Structured(None); let result = settings - .encode_with_type(json, Some(&concrete_type)) + .encode(json!({ + "name": "Alice", + "age": 42 + })) .unwrap() .into_json_inner() .unwrap(); - if let Value::Struct(struct_value) = result { - assert_eq!(struct_value.items().len(), 2); - let struct_fields = struct_value.struct_type().fields(); - assert_eq!(struct_fields[0].name(), "age"); - assert_eq!( - struct_fields[0].data_type(), - &ConcreteDataType::int64_datatype() - ); - assert_eq!(struct_fields[1].name(), "name"); - assert_eq!( - struct_fields[1].data_type(), - &ConcreteDataType::string_datatype() - ); - } else { + let Value::Struct(struct_value) = result else { panic!("Expected Struct value"); - } + }; + assert_eq!(struct_field_value(&struct_value, "age"), &Value::Int64(42)); + + let err = settings + .encode(json!({ + "age": "42" + })) + .unwrap_err(); + assert!(err.to_string().contains("does not match JSON2 type hint")); } #[test] - fn test_encode_json_object_field_order_preservation() { - let json = json!({ - "z_field": "last", - "a_field": "first", - "m_field": "middle" - }); + fn test_encode_json_respects_unsigned_type_hint() { + let settings = JsonSettings::new(vec![JsonTypeHint { + path: vec!["count".to_string()], + data_type: ConcreteDataType::uint64_datatype(), + nullable: false, + default_constraint: None, + inverted_index: false, + }]); - // Define schema with specific field order - let json_type = JsonObjectType::from([ - ("a_field".to_string(), JsonNativeType::String), - ("m_field".to_string(), JsonNativeType::String), - ("z_field".to_string(), JsonNativeType::String), + let result = settings + .encode(json!({ + "count": u64::MAX + })) + .unwrap() + .into_json_inner() + .unwrap(); + + let Value::Struct(struct_value) = result else { + panic!("Expected Struct value"); + }; + assert_eq!( + struct_field_value(&struct_value, "count"), + &Value::UInt64(u64::MAX) + ); + + let err = settings + .encode(json!({ + "count": -1 + })) + .unwrap_err(); + assert!(err.to_string().contains("does not match JSON2 type hint")); + } + + #[test] + fn test_encode_json_fills_missing_type_hint_with_default() { + let settings = JsonSettings::new(vec![JsonTypeHint { + path: vec!["user".to_string(), "age".to_string()], + data_type: ConcreteDataType::int64_datatype(), + nullable: false, + default_constraint: Some(ColumnDefaultConstraint::Value(Value::Int64(7))), + inverted_index: false, + }]); + + let result = settings + .encode(json!({})) + .unwrap() + .into_json_inner() + .unwrap(); + + let Value::Struct(root) = result else { + panic!("Expected Struct value"); + }; + let Value::Struct(user) = struct_field_value(&root, "user") else { + panic!("Expected user Struct value"); + }; + assert_eq!(struct_field_value(user, "age"), &Value::Int64(7)); + } + + #[test] + fn test_encode_json_fills_missing_nullable_type_hint_with_null() { + let settings = JsonSettings::new(vec![JsonTypeHint { + path: vec!["user".to_string(), "name".to_string()], + data_type: ConcreteDataType::string_datatype(), + nullable: true, + default_constraint: None, + inverted_index: false, + }]); + + let result = settings + .encode(json!({ "user": {} })) + .unwrap() + .into_json_inner() + .unwrap(); + + let Value::Struct(root) = result else { + panic!("Expected Struct value"); + }; + let Value::Struct(user) = struct_field_value(&root, "user") else { + panic!("Expected user Struct value"); + }; + assert_eq!(struct_field_value(user, "name"), &Value::Null); + } + + #[test] + fn test_encode_json_rejects_missing_non_null_type_hint() { + let settings = JsonSettings::new(vec![JsonTypeHint { + path: vec!["user".to_string(), "age".to_string()], + data_type: ConcreteDataType::int64_datatype(), + nullable: false, + default_constraint: None, + inverted_index: false, + }]); + + let err = settings.encode(json!({})).unwrap_err(); + assert!( + err.to_string() + .contains("missing non-null JSON2 type hint path user.age") + ); + } + + #[test] + fn test_encode_json_merges_missing_type_hint_prefix() { + let settings = JsonSettings::new(vec![ + JsonTypeHint { + path: vec!["user".to_string(), "age".to_string()], + data_type: ConcreteDataType::int64_datatype(), + nullable: false, + default_constraint: Some(ColumnDefaultConstraint::Value(Value::Int64(7))), + inverted_index: false, + }, + JsonTypeHint { + path: vec!["user".to_string(), "name".to_string()], + data_type: ConcreteDataType::string_datatype(), + nullable: false, + default_constraint: Some(ColumnDefaultConstraint::Value(Value::String( + "unknown".into(), + ))), + inverted_index: false, + }, ]); - let Value::Struct(result) = encode_json_object_with_context( - json.as_object().unwrap().clone(), - Some(&json_type), - &JsonContext { - key_path: String::new(), - settings: &JsonStructureSettings::Structured(None), - }, - ) - .map(|x| x.into_value()) - .unwrap() else { - unreachable!() + let result = settings + .encode(json!({})) + .unwrap() + .into_json_inner() + .unwrap(); + + let Value::Struct(root) = result else { + panic!("Expected Struct value"); }; - - // Verify field order is preserved from schema - let struct_fields = result.struct_type().fields(); - assert_eq!(struct_fields[0].name(), "a_field"); - assert_eq!(struct_fields[1].name(), "m_field"); - assert_eq!(struct_fields[2].name(), "z_field"); - - // Verify values are correct - let items = result.items(); - assert_eq!(items[0], Value::String("first".into())); - assert_eq!(items[1], Value::String("middle".into())); - assert_eq!(items[2], Value::String("last".into())); + let Value::Struct(user) = struct_field_value(&root, "user") else { + panic!("Expected user Struct value"); + }; + assert_eq!(struct_field_value(user, "age"), &Value::Int64(7)); + assert_eq!( + struct_field_value(user, "name"), + &Value::String("unknown".into()) + ); } #[test] - fn test_encode_json_object_schema_reuse_with_extra_fields() { - let json = json!({ - "name": "Alice", - "age": 25, - "active": true // Extra field not in schema - }); - - // Define schema with only name and age - let json_type = JsonObjectType::from([ - ("name".to_string(), JsonNativeType::String), - ("age".to_string(), JsonNativeType::i64()), - ]); - - let Value::Struct(result) = encode_json_object_with_context( - json.as_object().unwrap().clone(), - Some(&json_type), - &JsonContext { - key_path: String::new(), - settings: &JsonStructureSettings::Structured(None), - }, - ) - .map(|x| x.into_value()) - .unwrap() else { - unreachable!() - }; - - // verify fields are sorted in json value - let struct_fields = result.struct_type().fields(); - assert_eq!(struct_fields[0].name(), "active"); - assert_eq!(struct_fields[1].name(), "age"); - assert_eq!(struct_fields[2].name(), "name"); - - // Verify values are correct - let items = result.items(); - assert_eq!(items[0], Value::Boolean(true)); - assert_eq!(items[1], Value::Int64(25)); - assert_eq!(items[2], Value::String("Alice".into())); - } - - #[test] - fn test_encode_json_object_missing_schema_fields() { - let json = json!({ - "name": "Bob" - // age field is missing from JSON but present in schema - }); - - // Define schema with name and age - let json_type = JsonObjectType::from([ - ("name".to_string(), JsonNativeType::String), - ("age".to_string(), JsonNativeType::i64()), - ]); - - let Value::Struct(result) = encode_json_object_with_context( - json.as_object().unwrap().clone(), - Some(&json_type), - &JsonContext { - key_path: String::new(), - settings: &JsonStructureSettings::Structured(None), - }, - ) - .map(|x| x.into_value()) - .unwrap() else { - unreachable!() - }; - - // Verify both schema fields are present - let struct_fields = result.struct_type().fields(); - assert_eq!(struct_fields[0].name(), "age"); - assert_eq!(struct_fields[1].name(), "name"); - - // Verify values - name has value, age is null - let items = result.items(); - assert_eq!(items[0], Value::Null); - assert_eq!(items[1], Value::String("Bob".into())); - } - - #[test] - fn test_json_structure_settings_structured() { + fn test_json_settings_structured() { let json = json!({ "name": "Eve", "score": 95 }); - let settings = JsonStructureSettings::Structured(None); + let settings = JsonSettings::default(); let result = settings.encode(json).unwrap().into_json_inner().unwrap(); if let Value::Struct(struct_value) = result { @@ -1284,53 +928,6 @@ mod tests { } } - #[test] - fn test_encode_json_array_with_item_type() { - let json = json!([1, 2, 3]); - let item_type = Arc::new(ConcreteDataType::int64_datatype()); - let settings = JsonStructureSettings::Structured(None); - let result = settings - .encode_with_type( - json, - Some(&JsonNativeType::Array(Box::new(JsonNativeType::i64()))), - ) - .unwrap() - .into_json_inner() - .unwrap(); - - if let Value::List(list_value) = result { - assert_eq!(list_value.items().len(), 3); - assert_eq!(list_value.items()[0], Value::Int64(1)); - assert_eq!(list_value.items()[1], Value::Int64(2)); - assert_eq!(list_value.items()[2], Value::Int64(3)); - assert_eq!(list_value.datatype(), item_type); - } else { - panic!("Expected List value"); - } - } - - #[test] - fn test_encode_json_array_empty_with_item_type() { - let json = json!([]); - let item_type = Arc::new(ConcreteDataType::null_datatype()); - let settings = JsonStructureSettings::Structured(None); - let result = settings - .encode_with_type( - json, - Some(&JsonNativeType::Array(Box::new(JsonNativeType::Null))), - ) - .unwrap() - .into_json_inner() - .unwrap(); - - if let Value::List(list_value) = result { - assert_eq!(list_value.items().len(), 0); - assert_eq!(list_value.datatype(), item_type); - } else { - panic!("Expected List value"); - } - } - #[cfg(test)] mod decode_tests { use ordered_float::OrderedFloat; @@ -1340,7 +937,7 @@ mod tests { #[test] fn test_decode_primitive_values() { - let settings = JsonStructureSettings::Structured(None); + let settings = JsonSettings::default(); // Test null let result = settings.decode(Value::Null).unwrap(); @@ -1365,7 +962,7 @@ mod tests { #[test] fn test_decode_struct() { - let settings = JsonStructureSettings::Structured(None); + let settings = JsonSettings::default(); let struct_value = StructValue::new( vec![ @@ -1399,7 +996,7 @@ mod tests { #[test] fn test_decode_list() { - let settings = JsonStructureSettings::Structured(None); + let settings = JsonSettings::default(); let list_value = ListValue::new( vec![Value::Int64(1), Value::Int64(2), Value::Int64(3)], @@ -1413,7 +1010,7 @@ mod tests { #[test] fn test_decode_nested_structure() { - let settings = JsonStructureSettings::Structured(None); + let settings = JsonSettings::default(); let inner_struct = StructValue::new( vec![Value::String("Alice".into()), Value::Int64(25)], @@ -1472,89 +1069,9 @@ mod tests { assert_eq!(result, expected); } - #[test] - fn test_decode_unstructured_raw() { - let settings = JsonStructureSettings::UnstructuredRaw; - - let json_str = r#"{"name": "Bob", "age": 30}"#; - let value = Value::String(json_str.into()); - - let result = settings.decode(value).unwrap(); - let expected: Json = serde_json::from_str(json_str).unwrap(); - assert_eq!(result, expected); - } - - #[test] - fn test_decode_unstructured_raw_struct_format() { - let settings = JsonStructureSettings::UnstructuredRaw; - - let json_str = r#"{"name": "Bob", "age": 30}"#; - let struct_value = StructValue::new( - vec![Value::String(json_str.into())], - StructType::new(Arc::new(vec![StructField::new( - JsonStructureSettings::RAW_FIELD.to_string(), - ConcreteDataType::string_datatype(), - true, - )])), - ); - let value = Value::Struct(struct_value); - - let result = settings.decode(value).unwrap(); - let expected: Json = serde_json::from_str(json_str).unwrap(); - assert_eq!(result, expected); - } - - #[test] - fn test_decode_partial_unstructured() { - let mut unstructured_keys = HashSet::new(); - unstructured_keys.insert("user.metadata".to_string()); - - let settings = JsonStructureSettings::PartialUnstructuredByKey { - fields: None, - unstructured_keys, - }; - - let metadata_json = r#"{"preferences": {"theme": "dark"}, "history": [1, 2, 3]}"#; - - let struct_value = StructValue::new( - vec![ - Value::String("Alice".into()), - Value::String(metadata_json.into()), - ], - StructType::new(Arc::new(vec![ - StructField::new( - "name".to_string(), - ConcreteDataType::string_datatype(), - true, - ), - StructField::new( - "metadata".to_string(), - ConcreteDataType::string_datatype(), - true, - ), - ])), - ); - - let result = settings.decode(Value::Struct(struct_value)).unwrap(); - - if let Json::Object(obj) = result { - assert_eq!(obj.get("name"), Some(&Json::String("Alice".to_string()))); - - if let Some(Json::String(metadata_str)) = obj.get("metadata") { - let metadata: Json = serde_json::from_str(metadata_str).unwrap(); - let expected_metadata: Json = serde_json::from_str(metadata_json).unwrap(); - assert_eq!(metadata, expected_metadata); - } else { - panic!("Expected metadata to be unstructured string"); - } - } else { - panic!("Expected object result"); - } - } - #[test] fn test_decode_missing_fields() { - let settings = JsonStructureSettings::Structured(None); + let settings = JsonSettings::default(); // Struct with missing field (null value) let struct_value = StructValue::new( @@ -1581,857 +1098,17 @@ mod tests { } } - #[test] - fn test_encode_json_with_concrete_type() { - let settings = JsonStructureSettings::Structured(None); - - // Test encoding JSON number with expected int64 type - let json = Json::from(42); - let result = settings - .encode_with_type(json, Some(&JsonNativeType::i64())) - .unwrap() - .into_json_inner() - .unwrap(); - assert_eq!(result, Value::Int64(42)); - - // Test encoding JSON string with expected string type - let json = Json::String("hello".to_string()); - let result = settings - .encode_with_type(json, Some(&JsonNativeType::String)) - .unwrap() - .into_json_inner() - .unwrap(); - assert_eq!(result, Value::String("hello".into())); - - // Test encoding JSON boolean with expected boolean type - let json = Json::Bool(true); - let result = settings - .encode_with_type(json, Some(&JsonNativeType::Bool)) - .unwrap() - .into_json_inner() - .unwrap(); - assert_eq!(result, Value::Boolean(true)); - } - - #[test] - fn test_encode_json_with_mismatched_type() { - // Test encoding JSON number with mismatched string type - let json = Json::from(42); - let settings = JsonStructureSettings::Structured(None); - let result = settings.encode_with_type(json, Some(&JsonNativeType::String)); - assert!(result.is_ok()); // Should succeed due to type conversion - - // Test encoding JSON object with mismatched non-struct type - let json = json!({"name": "test"}); - let result = settings.encode_with_type(json, Some(&JsonNativeType::i64())); - assert!(result.is_err()); // Should fail - object can't be converted to int64 - } - - #[test] - fn test_encode_json_array_with_list_type() { - let json = json!([1, 2, 3]); - let item_type = Arc::new(ConcreteDataType::int64_datatype()); - - let settings = JsonStructureSettings::Structured(None); - let result = settings - .encode_with_type( - json, - Some(&JsonNativeType::Array(Box::new(JsonNativeType::i64()))), - ) - .unwrap() - .into_json_inner() - .unwrap(); - - if let Value::List(list_value) = result { - assert_eq!(list_value.items().len(), 3); - assert_eq!(list_value.items()[0], Value::Int64(1)); - assert_eq!(list_value.items()[1], Value::Int64(2)); - assert_eq!(list_value.items()[2], Value::Int64(3)); - assert_eq!(list_value.datatype(), item_type); - } else { - panic!("Expected List value"); - } - } - - #[test] - fn test_encode_json_non_collection_with_type() { - // Test null with null type - let json = Json::Null; - let settings = JsonStructureSettings::Structured(None); - let result = settings - .encode_with_type(json.clone(), Some(&JsonNativeType::Null)) - .unwrap() - .into_json_inner() - .unwrap(); - assert_eq!(result, Value::Null); - - // Test float with float64 type - let json = Json::from(3.15); - let result = settings - .encode_with_type(json, Some(&JsonNativeType::f64())) - .unwrap() - .into_json_inner() - .unwrap(); - match result { - Value::Float64(f) => assert_eq!(f.0, 3.15), - _ => panic!("Expected Float64"), - } - } - #[test] fn test_encode_json_large_unsigned_integer() { // Test unsigned integer that fits in i64 let json = Json::from(u64::MAX / 2); - let settings = JsonStructureSettings::Structured(None); - let result = settings - .encode_with_type(json, None) - .unwrap() - .into_json_inner() - .unwrap(); + let settings = JsonSettings::default(); + let result = settings.encode(json).unwrap().into_json_inner().unwrap(); assert_eq!(result, Value::Int64((u64::MAX / 2) as i64)); // Test unsigned integer that exceeds i64 range let json = Json::from(u64::MAX); - let result = settings - .encode_with_type(json, None) - .unwrap() - .into_json_inner() - .unwrap(); + let result = settings.encode(json).unwrap().into_json_inner().unwrap(); assert_eq!(result, Value::UInt64(u64::MAX)); } - - #[test] - fn test_json_structure_settings_unstructured_raw() { - let json = json!({ - "name": "Frank", - "score": 88 - }); - - let settings = JsonStructureSettings::UnstructuredRaw; - let result = settings.encode(json).unwrap().into_json_inner().unwrap(); - - if let Value::Struct(struct_value) = result { - assert_eq!(struct_value.struct_type().fields().len(), 1); - let field = &struct_value.struct_type().fields()[0]; - assert_eq!(field.name(), JsonStructureSettings::RAW_FIELD); - assert_eq!(field.data_type(), &ConcreteDataType::string_datatype()); - - let items = struct_value.items(); - assert_eq!(items.len(), 1); - if let Value::String(s) = &items[0] { - let json_str = s.as_utf8(); - assert!(json_str.contains("\"name\":\"Frank\"")); - assert!(json_str.contains("\"score\":88")); - } else { - panic!("Expected String value in _raw field"); - } - } else { - panic!("Expected Struct value"); - } - } - - #[test] - fn test_json_structure_settings_unstructured_raw_with_type() { - let json = json!({ - "name": "Grace", - "age": 30, - "active": true - }); - - let settings = JsonStructureSettings::UnstructuredRaw; - - // Test with encode (no type) - let result = settings - .encode(json.clone()) - .unwrap() - .into_json_inner() - .unwrap(); - if let Value::Struct(s) = result { - if let Value::String(json_str) = &s.items()[0] { - let json_str = json_str.as_utf8(); - assert!(json_str.contains("\"name\":\"Grace\"")); - assert!(json_str.contains("\"age\":30")); - assert!(json_str.contains("\"active\":true")); - } else { - panic!("Expected String value in _raw field"); - } - } else { - panic!("Expected Struct value for encode"); - } - - // Test with encode_with_type (with type) - let concrete_type = JsonNativeType::Object(JsonObjectType::from([ - ("name".to_string(), JsonNativeType::String), - ("age".to_string(), JsonNativeType::i64()), - ("active".to_string(), JsonNativeType::Bool), - ])); - - let result2 = settings - .encode_with_type(json, Some(&concrete_type)) - .unwrap() - .into_json_inner() - .unwrap(); - if let Value::Struct(s) = result2 { - if let Value::String(json_str) = &s.items()[0] { - let json_str = json_str.as_utf8(); - assert!(json_str.contains("\"name\":\"Grace\"")); - assert!(json_str.contains("\"age\":30")); - assert!(json_str.contains("\"active\":true")); - } else { - panic!("Expected String value for _raw field"); - } - } else { - panic!("Expected String value for encode_with_type"); - } - - // Test with nested objects - let nested_json = json!({ - "user": { - "profile": { - "name": "Alice", - "settings": {"theme": "dark"} - } - } - }); - - let result3 = settings - .encode(nested_json) - .unwrap() - .into_json_inner() - .unwrap(); - if let Value::Struct(s) = result3 { - if let Value::String(json_str) = &s.items()[0] { - let json_str = json_str.as_utf8(); - assert!(json_str.contains("\"user\"")); - assert!(json_str.contains("\"profile\"")); - assert!(json_str.contains("\"name\":\"Alice\"")); - assert!(json_str.contains("\"settings\"")); - assert!(json_str.contains("\"theme\":\"dark\"")); - } else { - panic!("Expected String value for _raw field"); - } - } else { - panic!("Expected String value for nested JSON"); - } - - // Test with arrays - let array_json = json!([1, "hello", true, 3.15]); - let result4 = settings - .encode(array_json) - .unwrap() - .into_json_inner() - .unwrap(); - if let Value::Struct(s) = result4 { - if let Value::String(json_str) = &s.items()[0] { - let json_str = json_str.as_utf8(); - assert!(json_str.contains("[1,\"hello\",true,3.15]")); - } else { - panic!("Expected String value for _raw field") - } - } else { - panic!("Expected String value for array JSON"); - } - } - - #[test] - fn test_encode_json_with_context_partial_unstructured() { - let json = json!({ - "user": { - "name": "Alice", - "metadata": { - "preferences": {"theme": "dark"}, - "history": [1, 2, 3] - } - } - }); - - let mut unstructured_keys = HashSet::new(); - unstructured_keys.insert("user.metadata".to_string()); - - let settings = JsonStructureSettings::PartialUnstructuredByKey { - fields: None, - unstructured_keys, - }; - let result = settings.encode(json).unwrap().into_json_inner().unwrap(); - - if let Value::Struct(struct_value) = result { - let items = struct_value.items(); - let struct_type = struct_value.struct_type(); - - // Find user field - let user_index = struct_type - .fields() - .iter() - .position(|f| f.name() == "user") - .unwrap(); - if let Value::Struct(user_struct) = &items[user_index] { - let user_items = user_struct.items(); - let fields = user_struct.struct_type().fields(); - let user_fields: Vec<&str> = fields.iter().map(|f| f.name()).collect(); - - // name should be structured - let name_index = user_fields.iter().position(|&f| f == "name").unwrap(); - assert_eq!(user_items[name_index], Value::String("Alice".into())); - - // metadata should be unstructured (string) - let metadata_index = user_fields.iter().position(|&f| f == "metadata").unwrap(); - if let Value::String(metadata_str) = &user_items[metadata_index] { - let json_str = metadata_str.as_utf8(); - assert!(json_str.contains("\"preferences\"")); - assert!(json_str.contains("\"history\"")); - } else { - panic!("Expected String value for metadata field"); - } - } else { - panic!("Expected Struct value for user field"); - } - } else { - panic!("Expected Struct value"); - } - } - - #[test] - fn test_decode_struct_structured() { - // Test decoding a structured struct value - should return the same struct - let settings = JsonStructureSettings::Structured(None); - - let original_struct = StructValue::new( - vec![ - Value::String("Alice".into()), - Value::Int64(25), - Value::Boolean(true), - ], - StructType::new(Arc::new(vec![ - StructField::new( - "name".to_string(), - ConcreteDataType::string_datatype(), - true, - ), - StructField::new("age".to_string(), ConcreteDataType::int64_datatype(), true), - StructField::new( - "active".to_string(), - ConcreteDataType::boolean_datatype(), - true, - ), - ])), - ); - - let decoded_struct = settings.decode_struct(original_struct.clone()).unwrap(); - - // With Structured settings, the struct should be returned directly - assert_eq!(decoded_struct.items(), original_struct.items()); - assert_eq!(decoded_struct.struct_type(), original_struct.struct_type()); - } - - #[test] - fn test_decode_struct_partial_unstructured_empty_keys() { - // Test decoding with PartialUnstructuredByKey but empty unstructured_keys - let settings = JsonStructureSettings::PartialUnstructuredByKey { - fields: None, - unstructured_keys: HashSet::new(), - }; - - let original_struct = StructValue::new( - vec![ - Value::String("Alice".into()), - Value::Int64(25), - Value::Boolean(true), - ], - StructType::new(Arc::new(vec![ - StructField::new( - "name".to_string(), - ConcreteDataType::string_datatype(), - true, - ), - StructField::new("age".to_string(), ConcreteDataType::int64_datatype(), true), - StructField::new( - "active".to_string(), - ConcreteDataType::boolean_datatype(), - true, - ), - ])), - ); - - let decoded_struct = settings.decode_struct(original_struct.clone()).unwrap(); - - // With empty unstructured_keys, the struct should be returned directly - assert_eq!(decoded_struct.items(), original_struct.items()); - assert_eq!(decoded_struct.struct_type(), original_struct.struct_type()); - } - - #[test] - fn test_decode_struct_partial_unstructured() { - // Test decoding a struct with unstructured fields - let mut unstructured_keys = HashSet::new(); - unstructured_keys.insert("metadata".to_string()); - - let settings = JsonStructureSettings::PartialUnstructuredByKey { - fields: Some(StructType::new(Arc::new(vec![ - StructField::new( - "name".to_string(), - ConcreteDataType::string_datatype(), - true, - ), - StructField::new( - "metadata".to_string(), - ConcreteDataType::string_datatype(), - true, - ), - ]))), - unstructured_keys, - }; - - // Create a struct where metadata is stored as unstructured JSON string - let encoded_struct = StructValue::new( - vec![ - Value::String("Alice".into()), - Value::String(r#"{"preferences":{"theme":"dark"},"history":[1,2,3]}"#.into()), - ], - StructType::new(Arc::new(vec![ - StructField::new( - "name".to_string(), - ConcreteDataType::string_datatype(), - true, - ), - StructField::new( - "metadata".to_string(), - ConcreteDataType::string_datatype(), - true, - ), - ])), - ); - - let decoded_struct = settings.decode_struct(encoded_struct).unwrap(); - - // Verify name field remains the same - assert_eq!(decoded_struct.items()[0], Value::String("Alice".into())); - - // Verify metadata field is now properly structured - if let Value::Struct(metadata_struct) = &decoded_struct.items()[1] { - let fields = metadata_struct.struct_type().fields(); - let metadata_fields: Vec<&str> = fields.iter().map(|f| f.name()).collect(); - - assert!(metadata_fields.contains(&"preferences")); - assert!(metadata_fields.contains(&"history")); - } else { - panic!("Expected metadata to be decoded as structured value"); - } - } - - #[test] - fn test_decode_struct_nested_unstructured() { - // Test decoding nested structures with unstructured fields - let mut unstructured_keys = HashSet::new(); - unstructured_keys.insert("user.metadata".to_string()); - - let settings = JsonStructureSettings::PartialUnstructuredByKey { - fields: None, - unstructured_keys, - }; - - // Create a nested struct where user.metadata is stored as unstructured JSON string - let user_struct = StructValue::new( - vec![ - Value::String("Alice".into()), - Value::String(r#"{"preferences":{"theme":"dark"},"history":[1,2,3]}"#.into()), - ], - StructType::new(Arc::new(vec![ - StructField::new( - "name".to_string(), - ConcreteDataType::string_datatype(), - true, - ), - StructField::new( - "metadata".to_string(), - ConcreteDataType::string_datatype(), - true, - ), - ])), - ); - - let encoded_struct = StructValue::new( - vec![Value::Struct(user_struct)], - StructType::new(Arc::new(vec![StructField::new( - "user".to_string(), - ConcreteDataType::struct_datatype(StructType::new(Arc::new(vec![]))), - true, - )])), - ); - - let decoded_struct = settings.decode_struct(encoded_struct).unwrap(); - - // Verify the nested structure is properly decoded - if let Value::Struct(decoded_user) = &decoded_struct.items()[0] { - if let Value::Struct(metadata_struct) = &decoded_user.items()[1] { - let fields = metadata_struct.struct_type().fields(); - let metadata_fields: Vec<&str> = fields.iter().map(|f| f.name()).collect(); - - assert!(metadata_fields.contains(&"preferences")); - assert!(metadata_fields.contains(&"history")); - - let preference_index = metadata_fields - .iter() - .position(|&field| field == "preferences") - .unwrap(); - let history_index = metadata_fields - .iter() - .position(|&field| field == "history") - .unwrap(); - - // Verify the nested structure within preferences - if let Value::Struct(preferences_struct) = - &metadata_struct.items()[preference_index] - { - let fields = preferences_struct.struct_type().fields(); - let pref_fields: Vec<&str> = fields.iter().map(|f| f.name()).collect(); - assert!(pref_fields.contains(&"theme")); - } else { - panic!("Expected preferences to be decoded as structured value"); - } - - // Verify the array within history - if let Value::List(history_list) = &metadata_struct.items()[history_index] { - assert_eq!(history_list.items().len(), 3); - } else { - panic!("Expected history to be decoded as list value"); - } - } else { - panic!("Expected metadata to be decoded as structured value"); - } - } else { - panic!("Expected user to be decoded as structured value"); - } - } - - #[test] - fn test_decode_struct_unstructured_raw() { - // Test decoding with UnstructuredRaw setting - let settings = JsonStructureSettings::UnstructuredRaw; - - // With UnstructuredRaw, the entire JSON is encoded as a struct with _raw field - let encoded_struct = StructValue::new( - vec![Value::String( - r#"{"name":"Alice","age":25,"active":true}"#.into(), - )], - StructType::new(Arc::new(vec![StructField::new( - "_raw".to_string(), - ConcreteDataType::string_datatype(), - true, - )])), - ); - - let decoded_struct = settings.decode_struct(encoded_struct).unwrap(); - - // With UnstructuredRaw, the entire struct should be reconstructed from _raw field - let fields = decoded_struct.struct_type().fields(); - let decoded_fields: Vec<&str> = fields.iter().map(|f| f.name()).collect(); - - assert!(decoded_fields.contains(&"name")); - assert!(decoded_fields.contains(&"age")); - assert!(decoded_fields.contains(&"active")); - - // Verify the actual values - let name_index = decoded_fields.iter().position(|&f| f == "name").unwrap(); - let age_index = decoded_fields.iter().position(|&f| f == "age").unwrap(); - let active_index = decoded_fields.iter().position(|&f| f == "active").unwrap(); - - assert_eq!( - decoded_struct.items()[name_index], - Value::String("Alice".into()) - ); - assert_eq!(decoded_struct.items()[age_index], Value::Int64(25)); - assert_eq!(decoded_struct.items()[active_index], Value::Boolean(true)); - } - - #[test] - fn test_decode_struct_unstructured_raw_invalid_format() { - // Test UnstructuredRaw decoding when the struct doesn't have the expected _raw field format - let settings = JsonStructureSettings::UnstructuredRaw; - - // Create a struct that doesn't match the expected UnstructuredRaw format - let invalid_struct = StructValue::new( - vec![Value::String("Alice".into()), Value::Int64(25)], - StructType::new(Arc::new(vec![ - StructField::new( - "name".to_string(), - ConcreteDataType::string_datatype(), - true, - ), - StructField::new("age".to_string(), ConcreteDataType::int64_datatype(), true), - ])), - ); - - // Should fail with error since it doesn't match expected UnstructuredRaw format - let result = settings.decode_struct(invalid_struct); - assert!(result.is_err()); - assert!( - result - .unwrap_err() - .to_string() - .contains("UnstructuredRaw value must be stored as struct with single _raw field") - ); - } - - #[test] - fn test_decode_struct_unstructured_raw_primitive_value() { - // Test UnstructuredRaw decoding when the _raw field contains a primitive value - let settings = JsonStructureSettings::UnstructuredRaw; - - // Test with a string primitive in _raw field - let string_struct = StructValue::new( - vec![Value::String("\"hello world\"".into())], - StructType::new(Arc::new(vec![StructField::new( - "_raw".to_string(), - ConcreteDataType::string_datatype(), - true, - )])), - ); - - let decoded_struct = settings.decode_struct(string_struct).unwrap(); - let fields = decoded_struct.struct_type().fields(); - let decoded_fields: Vec<&str> = fields.iter().map(|f| f.name()).collect(); - assert!(decoded_fields.contains(&"value")); - assert_eq!( - decoded_struct.items()[0], - Value::String("hello world".into()) - ); - - // Test with a number primitive in _raw field - let number_struct = StructValue::new( - vec![Value::String("42".into())], - StructType::new(Arc::new(vec![StructField::new( - "_raw".to_string(), - ConcreteDataType::string_datatype(), - true, - )])), - ); - - let decoded_struct = settings.decode_struct(number_struct).unwrap(); - let fields = decoded_struct.struct_type().fields(); - let decoded_fields: Vec<&str> = fields.iter().map(|f| f.name()).collect(); - assert!(decoded_fields.contains(&"value")); - assert_eq!(decoded_struct.items()[0], Value::Int64(42)); - - // Test with a boolean primitive in _raw field - let bool_struct = StructValue::new( - vec![Value::String("true".into())], - StructType::new(Arc::new(vec![StructField::new( - "_raw".to_string(), - ConcreteDataType::string_datatype(), - true, - )])), - ); - - let decoded_struct = settings.decode_struct(bool_struct).unwrap(); - let fields = decoded_struct.struct_type().fields(); - let decoded_fields: Vec<&str> = fields.iter().map(|f| f.name()).collect(); - assert!(decoded_fields.contains(&"value")); - assert_eq!(decoded_struct.items()[0], Value::Boolean(true)); - - // Test with a null primitive in _raw field - let null_struct = StructValue::new( - vec![Value::String("null".into())], - StructType::new(Arc::new(vec![StructField::new( - "_raw".to_string(), - ConcreteDataType::string_datatype(), - true, - )])), - ); - - let decoded_struct = settings.decode_struct(null_struct).unwrap(); - let fields = decoded_struct.struct_type().fields(); - let decoded_fields: Vec<&str> = fields.iter().map(|f| f.name()).collect(); - assert!(decoded_fields.contains(&"value")); - assert_eq!(decoded_struct.items()[0], Value::Null); - } - - #[test] - fn test_decode_struct_unstructured_raw_array() { - // Test UnstructuredRaw decoding when the _raw field contains a JSON array - let settings = JsonStructureSettings::UnstructuredRaw; - - // Test with an array in _raw field - let array_struct = StructValue::new( - vec![Value::String("[1, \"hello\", true, 3.15]".into())], - StructType::new(Arc::new(vec![StructField::new( - "_raw".to_string(), - ConcreteDataType::string_datatype(), - true, - )])), - ); - - let decoded_struct = settings.decode_struct(array_struct).unwrap(); - assert_eq!( - format!("{decoded_struct:?}"), - r#"StructValue { items: [List(ListValue { items: [Binary(Bytes(b"1")), Binary(Bytes(b"\"hello\"")), Binary(Bytes(b"true")), Binary(Bytes(b"3.15"))], datatype: Binary(BinaryType { repr_type: Binary }) })], fields: StructType { fields: [StructField { name: "value", data_type: List(ListType { item_type: Binary(BinaryType { repr_type: Binary }) }), nullable: true, metadata: {} }] } }"# - ); - } - - #[test] - fn test_decode_struct_comprehensive_flow() { - // Test the complete flow: encode JSON with partial unstructured settings, - // then decode the resulting StructValue back to fully structured form - let mut unstructured_keys = HashSet::new(); - unstructured_keys.insert("metadata".to_string()); - unstructured_keys.insert("user.profile.settings".to_string()); - - let settings = JsonStructureSettings::PartialUnstructuredByKey { - fields: None, - unstructured_keys, - }; - - // Original JSON with nested structure - let original_json = json!({ - "name": "Alice", - "age": 25, - "metadata": { - "tags": ["admin", "premium"], - "preferences": { - "theme": "dark", - "notifications": true - } - }, - "user": { - "profile": { - "name": "Alice Smith", - "settings": { - "language": "en", - "timezone": "UTC" - } - }, - "active": true - } - }); - - // Encode the JSON with partial unstructured settings - let encoded_value = settings - .encode(original_json) - .unwrap() - .into_json_inner() - .unwrap(); - - // Verify encoding worked - metadata and user.profile.settings should be unstructured - if let Value::Struct(encoded_struct) = encoded_value { - let fields = encoded_struct.struct_type().fields(); - let fields: Vec<&str> = fields.iter().map(|f| f.name()).collect(); - - assert!(fields.contains(&"name")); - assert!(fields.contains(&"age")); - assert!(fields.contains(&"metadata")); - assert!(fields.contains(&"user")); - - // Check that metadata is stored as string (unstructured) - let metadata_index = fields.iter().position(|&f| f == "metadata").unwrap(); - if let Value::String(_) = encoded_struct.items()[metadata_index] { - // Good - metadata is unstructured - } else { - panic!("Expected metadata to be encoded as string (unstructured)"); - } - - // Check that user.profile.settings is unstructured - let user_index = fields.iter().position(|&f| f == "user").unwrap(); - if let Value::Struct(user_struct) = &encoded_struct.items()[user_index] { - let fields = user_struct.struct_type().fields(); - let user_fields: Vec<&str> = fields.iter().map(|f| f.name()).collect(); - - let profile_index = user_fields.iter().position(|&f| f == "profile").unwrap(); - if let Value::Struct(profile_struct) = &user_struct.items()[profile_index] { - let fields = profile_struct.struct_type().fields(); - let profile_fields: Vec<&str> = fields.iter().map(|f| f.name()).collect(); - - let settings_index = profile_fields - .iter() - .position(|&f| f == "settings") - .unwrap(); - if let Value::String(_) = &profile_struct.items()[settings_index] { - // Good - settings is unstructured - } else { - panic!( - "Expected user.profile.settings to be encoded as string (unstructured)" - ); - } - } else { - panic!("Expected user.profile to be a struct"); - } - } else { - panic!("Expected user to be a struct"); - } - - // Now decode the struct back to fully structured form - let decoded_struct = settings.decode_struct(encoded_struct).unwrap(); - - // Verify the decoded struct has proper structure - let fields = decoded_struct.struct_type().fields(); - let decoded_fields: Vec<&str> = fields.iter().map(|f| f.name()).collect(); - - assert!(decoded_fields.contains(&"name")); - assert!(decoded_fields.contains(&"age")); - assert!(decoded_fields.contains(&"metadata")); - assert!(decoded_fields.contains(&"user")); - - // Check that metadata is now properly structured - let metadata_index = decoded_fields - .iter() - .position(|&f| f == "metadata") - .unwrap(); - if let Value::Struct(metadata_struct) = &decoded_struct.items()[metadata_index] { - let fields = metadata_struct.struct_type().fields(); - let metadata_fields: Vec<&str> = fields.iter().map(|f| f.name()).collect(); - - assert!(metadata_fields.contains(&"tags")); - assert!(metadata_fields.contains(&"preferences")); - - // Check nested structure within metadata - let preferences_index = metadata_fields - .iter() - .position(|&f| f == "preferences") - .unwrap(); - if let Value::Struct(prefs_struct) = &metadata_struct.items()[preferences_index] { - let fields = prefs_struct.struct_type().fields(); - let prefs_fields: Vec<&str> = fields.iter().map(|f| f.name()).collect(); - - assert!(prefs_fields.contains(&"theme")); - assert!(prefs_fields.contains(&"notifications")); - } else { - panic!("Expected metadata.preferences to be a struct"); - } - } else { - panic!("Expected metadata to be decoded as struct"); - } - - // Check that user.profile.settings is now properly structured - let user_index = decoded_fields.iter().position(|&f| f == "user").unwrap(); - if let Value::Struct(user_struct) = &decoded_struct.items()[user_index] { - let fields = user_struct.struct_type().fields(); - let user_fields: Vec<&str> = fields.iter().map(|f| f.name()).collect(); - - let profile_index = user_fields.iter().position(|&f| f == "profile").unwrap(); - if let Value::Struct(profile_struct) = &user_struct.items()[profile_index] { - let fields = profile_struct.struct_type().fields(); - let profile_fields: Vec<&str> = fields.iter().map(|f| f.name()).collect(); - - let settings_index = profile_fields - .iter() - .position(|&f| f == "settings") - .unwrap(); - if let Value::Struct(settings_struct) = &profile_struct.items()[settings_index] - { - let fields = settings_struct.struct_type().fields(); - let settings_fields: Vec<&str> = fields.iter().map(|f| f.name()).collect(); - - assert!(settings_fields.contains(&"language")); - assert!(settings_fields.contains(&"timezone")); - } else { - panic!("Expected user.profile.settings to be decoded as struct"); - } - } else { - panic!("Expected user.profile to be a struct"); - } - } else { - panic!("Expected user to be a struct"); - } - } else { - panic!("Expected encoded value to be a struct"); - } - } } diff --git a/src/datatypes/src/json/value.rs b/src/datatypes/src/json/value.rs index 4350630003..e23d25ddee 100644 --- a/src/datatypes/src/json/value.rs +++ b/src/datatypes/src/json/value.rs @@ -144,13 +144,6 @@ impl JsonVariant { } } - pub(crate) fn as_f64(&self) -> Option { - match self { - JsonVariant::Number(n) => Some(n.as_f64()), - _ => None, - } - } - pub(crate) fn native_type(&self) -> JsonNativeType { match self { JsonVariant::Null => JsonNativeType::Null, @@ -513,6 +506,7 @@ impl JsonValue { } let x = std::mem::take(&mut self.json_variant); + self.json_variant = helper(x, expected.native_type())?; self.json_type = OnceLock::new(); Ok(()) @@ -650,6 +644,7 @@ where Some(t) => t, None => return JsonNativeType::Array(Box::new(JsonNativeType::Null)), }; + for x in iter { if matches!(item_type, JsonNativeType::Variant) { break; diff --git a/src/datatypes/src/types/json_type.rs b/src/datatypes/src/types/json_type.rs index 652847da43..d0a1f4bb6a 100644 --- a/src/datatypes/src/types/json_type.rs +++ b/src/datatypes/src/types/json_type.rs @@ -168,7 +168,7 @@ impl From<&ConcreteDataType> for JsonNativeType { ConcreteDataType::Float64(_) | ConcreteDataType::Float32(_) => JsonNativeType::f64(), ConcreteDataType::String(_) => JsonNativeType::String, ConcreteDataType::List(list_type) => { - JsonNativeType::Array(Box::new(list_type.item_type().into())) + JsonNativeType::Array(Box::new(JsonNativeType::from(list_type.item_type()))) } ConcreteDataType::Struct(struct_type) => JsonNativeType::Object( struct_type @@ -243,9 +243,7 @@ impl Display for JsonNativeType { write!(f, r#""""#) } JsonNativeType::String => write!(f, r#""""#), - JsonNativeType::Array(item_type) => { - write!(f, "[{}]", item_type) - } + JsonNativeType::Array(item_type) => write!(f, "[{}]", item_type), JsonNativeType::Object(object) => { write!( f, @@ -342,9 +340,7 @@ impl JsonType { pub fn is_include(&self, other: &JsonType) -> bool { match (&self.format, &other.format) { (JsonFormat::Jsonb, JsonFormat::Jsonb) => true, - (JsonFormat::Json2(this), JsonFormat::Json2(that)) => { - is_include(this.as_ref(), that.as_ref()) - } + (JsonFormat::Json2(this), JsonFormat::Json2(that)) => is_include(this, that), _ => false, } } @@ -365,9 +361,7 @@ fn is_include(this: &JsonNativeType, that: &JsonNativeType) -> bool { match (this, that) { (this, that) if this == that => true, - (JsonNativeType::Array(this), JsonNativeType::Array(that)) => { - is_include(this.as_ref(), that.as_ref()) - } + (JsonNativeType::Array(this), JsonNativeType::Array(that)) => is_include(this, that), (JsonNativeType::Object(this), JsonNativeType::Object(that)) => { is_include_object(this, that) } @@ -398,14 +392,9 @@ impl DataType for JsonType { fn name(&self) -> String { match &self.format { JsonFormat::Jsonb => JSON_TYPE_NAME.to_string(), - JsonFormat::Json2(x) => format!( - "{JSON2_TYPE_NAME}{}", - if x.is_null() { - "".to_string() - } else { - x.to_string() - } - ), + JsonFormat::Json2(ty) => { + format!("{JSON2_TYPE_NAME}{}", ty) + } } } @@ -427,7 +416,7 @@ impl DataType for JsonType { fn create_mutable_vector(&self, capacity: usize) -> Box { match &self.format { JsonFormat::Jsonb => Box::new(BinaryVectorBuilder::with_capacity(capacity)), - JsonFormat::Json2(x) => Box::new(JsonVectorBuilder::new(*x.clone(), capacity)), + JsonFormat::Json2(x) => Box::new(JsonVectorBuilder::new(x.as_ref().clone(), capacity)), } } @@ -528,7 +517,7 @@ pub fn parse_string_to_jsonb(s: &str) -> Result> { #[cfg(test)] mod tests { use super::*; - use crate::json::JsonStructureSettings; + use crate::json::JsonSettings; #[test] fn test_fix_unicode_point() -> Result<()> { @@ -585,13 +574,13 @@ mod tests { #[test] fn test_json_type_include() { fn test(this: &JsonNativeType, that: &JsonNativeType, expected: bool) { - assert_eq!(is_include(this, that), expected); + assert_eq!(is_include(this, that), expected, "this={this}, that={that}"); } test(&JsonNativeType::Null, &JsonNativeType::Null, true); test(&JsonNativeType::Null, &JsonNativeType::Bool, false); - test(&JsonNativeType::Bool, &JsonNativeType::Null, true); + test(&JsonNativeType::Bool, &JsonNativeType::Bool, true); test(&JsonNativeType::Bool, &JsonNativeType::u64(), false); @@ -637,7 +626,6 @@ mod tests { "foo".to_string(), JsonNativeType::String, )])); - test(simple_json_object, &JsonNativeType::Null, true); test(simple_json_object, simple_json_object, true); test(simple_json_object, &JsonNativeType::i64(), false); test( @@ -665,7 +653,7 @@ mod tests { ), ("bar".to_string(), JsonNativeType::i64()), ])); - test(complex_json_object, &JsonNativeType::Null, true); + test(simple_json_object, &JsonNativeType::Null, true); test(complex_json_object, &JsonNativeType::String, false); test(complex_json_object, complex_json_object, true); test( @@ -789,7 +777,7 @@ mod tests { ) -> Result<()> { let json: serde_json::Value = serde_json::from_str(json).unwrap(); - let settings = JsonStructureSettings::Structured(None); + let settings = JsonSettings::default(); let value = settings.encode(json)?; let value_type = value.data_type(); let Some(other) = value_type.as_json() else { diff --git a/src/datatypes/src/vectors/json/builder.rs b/src/datatypes/src/vectors/json/builder.rs index 7ca1ff2f6a..f98a4c9a43 100644 --- a/src/datatypes/src/vectors/json/builder.rs +++ b/src/datatypes/src/vectors/json/builder.rs @@ -29,9 +29,9 @@ pub(crate) struct JsonVectorBuilder { } impl JsonVectorBuilder { - pub(crate) fn new(json_type: JsonNativeType, capacity: usize) -> Self { + pub(crate) fn new(initial_native_type: JsonNativeType, capacity: usize) -> Self { Self { - merged_type: JsonType::new_json2(json_type), + merged_type: JsonType::new_json2(initial_native_type), values: Vec::with_capacity(capacity), } } @@ -173,6 +173,33 @@ mod tests { )) ); + // A Null initial type represents an unknown JSON2 runtime type. The first + // non-null value should set the concrete type instead of aligning all rows to Null. + let mut inferred_builder = JsonVectorBuilder::new(JsonNativeType::Null, 2); + let inferred_value = parse_json_value(r#"{"id":3}"#); + inferred_builder.push_null(); + inferred_builder.try_push_value_ref(&inferred_value.as_value_ref())?; + + let inferred_type = JsonType::new_json2(JsonNativeType::Object(JsonObjectType::from([( + "id".to_string(), + JsonNativeType::i64(), + )]))); + assert_eq!( + inferred_builder.data_type(), + ConcreteDataType::Json(inferred_type.clone()) + ); + + let inferred_struct_type = inferred_type.as_struct_type(); + let vector = inferred_builder.to_vector(); + assert_eq!(vector.get(0), Value::Null); + assert_eq!( + vector.get(1), + Value::Struct(StructValue::new( + vec![Value::Int64(3)], + inferred_struct_type, + )) + ); + // Root-level conflicts should be lifted to a plain Variant field that preserves // each original JSON payload. let mut variant_builder = JsonVectorBuilder::new(JsonNativeType::Bool, 2); diff --git a/src/operator/src/expr_helper.rs b/src/operator/src/expr_helper.rs index b7e9ba39e6..7bc0800aa7 100644 --- a/src/operator/src/expr_helper.rs +++ b/src/operator/src/expr_helper.rs @@ -806,8 +806,7 @@ pub(crate) fn to_alter_table_expr( target_type, } => { let target_type = - sql_data_type_to_concrete_data_type(&target_type, &Default::default()) - .context(ParseSqlSnafu)?; + sql_data_type_to_concrete_data_type(&target_type).context(ParseSqlSnafu)?; let (target_type, target_type_extension) = ColumnDataTypeWrapper::try_from(target_type) .map(|w| w.to_parts()) .context(ColumnDataTypeSnafu)?; diff --git a/src/pipeline/src/etl/transform/transformer/greptime.rs b/src/pipeline/src/etl/transform/transformer/greptime.rs index e48026840f..9c17586a71 100644 --- a/src/pipeline/src/etl/transform/transformer/greptime.rs +++ b/src/pipeline/src/etl/transform/transformer/greptime.rs @@ -707,7 +707,7 @@ fn resolve_value( None }; let settings = json_extension_type - .and_then(|x| x.metadata().json_structure_settings.clone()) + .and_then(|x| x.metadata().json_settings.clone()) .unwrap_or_default(); let value: serde_json::Value = value.try_into().map_err(|e: StdError| { CoerceIncompatibleTypesSnafu { msg: e.to_string() }.build() diff --git a/src/query/src/sql.rs b/src/query/src/sql.rs index 99ecd67e45..8dda64cfd2 100644 --- a/src/query/src/sql.rs +++ b/src/query/src/sql.rs @@ -1082,11 +1082,18 @@ fn describe_column_types(columns_schemas: &[ColumnSchema]) -> VectorRef { Arc::new(StringVector::from( columns_schemas .iter() - .map(|cs| cs.data_type.name()) + .map(|cs| describe_column_type_name(&cs.data_type)) .collect::>(), )) } +fn describe_column_type_name(data_type: &ConcreteDataType) -> String { + match data_type { + ConcreteDataType::Json(json_type) if json_type.is_json2() => "Json2".to_string(), + data_type => data_type.name(), + } +} + fn describe_column_keys( columns_schemas: &[ColumnSchema], primary_key_indices: &[usize], @@ -1340,6 +1347,7 @@ mod test { use common_time::timestamp::TimeUnit; use datatypes::prelude::ConcreteDataType; use datatypes::schema::{ColumnDefaultConstraint, ColumnSchema, Schema, SchemaRef}; + use datatypes::types::json_type::JsonNativeType; use datatypes::vectors::{StringVector, TimestampMillisecondVector, UInt32Vector, VectorRef}; use session::context::QueryContextBuilder; use snafu::ResultExt; @@ -1348,7 +1356,7 @@ mod test { use table::TableRef; use table::test_util::MemTable; - use super::show_variable; + use super::{describe_column_type_name, show_variable}; use crate::error; use crate::error::Result; use crate::sql::{ @@ -1391,6 +1399,18 @@ mod test { describe_table_test_by_schema(table_name, schema, data, expected_columns) } + #[test] + fn test_describe_column_type_name_json2() { + assert_eq!( + describe_column_type_name(&ConcreteDataType::json2(JsonNativeType::Null)), + "Json2" + ); + assert_eq!( + describe_column_type_name(&ConcreteDataType::uint32_datatype()), + "UInt32" + ); + } + fn describe_table_test_by_schema( table_name: &str, schema: Vec, diff --git a/src/query/src/sql/show_create_table.rs b/src/query/src/sql/show_create_table.rs index a89607d789..1c37558d4b 100644 --- a/src/query/src/sql/show_create_table.rs +++ b/src/query/src/sql/show_create_table.rs @@ -29,12 +29,13 @@ use datatypes::schema::{ COLUMN_VECTOR_INDEX_OPT_KEY_METRIC, COMMENT_KEY, ColumnDefaultConstraint, ColumnSchema, FulltextBackend, SchemaRef, }; +use datatypes::types::JsonFormat; use snafu::ResultExt; -use sql::ast::{ColumnDef, ColumnOption, ColumnOptionDef, Expr, Ident, ObjectName}; +use sql::ast::{ColumnDef, ColumnOption, ColumnOptionDef, DataType, Expr, Ident, ObjectName}; use sql::dialect::GreptimeDbDialect; use sql::parser::ParserContext; use sql::statements::create::{Column, ColumnExtensions, CreateTable, TableConstraint}; -use sql::statements::{self, OptionMap}; +use sql::statements::{self, OptionMap, concrete_data_type_to_sql_data_type}; use store_api::metric_engine_consts::{is_metric_engine, is_metric_engine_internal_column}; use table::metadata::{TableInfoRef, TableMeta}; use table::requests::{ @@ -197,27 +198,32 @@ fn create_column(column_schema: &ColumnSchema, quote_style: char) -> Result()? - { + let mut data_type = concrete_data_type_to_sql_data_type(&column_schema.data_type) + .with_context(|_| ConvertSqlTypeSnafu { + datatype: column_schema.data_type.clone(), + })?; + + if matches!( + &column_schema.data_type, + datatypes::data_type::ConcreteDataType::Json(json_type) + if matches!(json_type.format, JsonFormat::Json2(_)) + ) { + data_type = DataType::Custom(ObjectName::from(vec![Ident::new("JSON2")]), vec![]); + } + + if let Some(json_extension) = column_schema.extension_type::()? { let settings = json_extension .metadata() - .json_structure_settings + .json_settings .clone() .unwrap_or_default(); - extensions.set_json_structure_settings(settings); + extensions.set_json_settings(settings).context(SqlSnafu)?; } Ok(Column { column_def: ColumnDef { name: Ident::with_quote(quote_style, name), - data_type: statements::concrete_data_type_to_sql_data_type(&column_schema.data_type) - .with_context(|_| ConvertSqlTypeSnafu { - datatype: column_schema.data_type.clone(), - })?, + data_type, options, }, extensions, @@ -429,9 +435,7 @@ WITH( let mut json_column = ColumnSchema::new("j", ConcreteDataType::json_datatype(), true); json_column .with_extension_type(&JsonExtensionType::new(Arc::new( - datatypes::extension::json::JsonMetadata { - json_structure_settings: Some(datatypes::json::JsonStructureSettings::default()), - }, + datatypes::extension::json::JsonMetadata::default(), ))) .unwrap(); diff --git a/src/servers/src/postgres/types.rs b/src/servers/src/postgres/types.rs index 9fc5dccf5f..74e230f83a 100644 --- a/src/servers/src/postgres/types.rs +++ b/src/servers/src/postgres/types.rs @@ -30,7 +30,7 @@ use common_time::{IntervalDayTime, IntervalMonthDayNano, IntervalYearMonth}; use datafusion_common::ScalarValue; use datafusion_expr::LogicalPlan; use datatypes::arrow::datatypes::DataType as ArrowDataType; -use datatypes::json::JsonStructureSettings; +use datatypes::json::JsonSettings; use datatypes::prelude::{ConcreteDataType, Value}; use datatypes::schema::{Schema, SchemaRef}; use datatypes::types::{Decimal128Type, IntervalType, TimestampType, jsonb_to_string}; @@ -81,7 +81,7 @@ pub(super) fn schema_to_pg( /// this function will encode greptime's `StructValue` into PostgreSQL jsonb type /// /// Note that greptimedb has different types of StructValue for storing json data, -/// based on policy defined in `JsonStructureSettings`. But here the `StructValue` +/// based on policy defined in `JsonSettings`. But here the `StructValue` /// should be fully structured. /// /// there are alternatives like records, arrays, etc. but there are also limitations: @@ -93,7 +93,7 @@ fn encode_struct( builder: &mut S, pg_field: &FieldInfo, ) -> PgWireResult<()> { - let encoding_setting = JsonStructureSettings::Structured(None); + let encoding_setting = JsonSettings::default(); let json_value = encoding_setting .decode(Value::Struct(struct_value)) .map_err(|e| PgWireError::ApiError(Box::new(e)))?; diff --git a/src/sql/src/error.rs b/src/sql/src/error.rs index 5159b70fc1..e4323bb661 100644 --- a/src/sql/src/error.rs +++ b/src/sql/src/error.rs @@ -215,13 +215,6 @@ pub enum Error { location: Location, }, - #[snafu(display("Invalid JSON structure setting, reason: {reason}"))] - InvalidJsonStructureSetting { - reason: String, - #[snafu(implicit)] - location: Location, - }, - #[snafu(display("Failed to serialize column default constraint"))] SerializeColumnDefaultConstraint { #[snafu(implicit)] @@ -348,7 +341,7 @@ pub enum Error { }, #[snafu(display("Failed to set JSON structure settings: {value}"))] - SetJsonStructureSettings { + SetJsonSettings { value: String, source: datatypes::error::Error, #[snafu(implicit)] @@ -381,7 +374,6 @@ impl ErrorExt for Error { InvalidColumnOption { .. } | InvalidExprAsOptionValue { .. } - | InvalidJsonStructureSetting { .. } | InvalidDatabaseName { .. } | InvalidDatabaseOption { .. } | ColumnTypeMismatch { .. } @@ -400,8 +392,9 @@ impl ErrorExt for Error { #[cfg(feature = "enterprise")] InvalidTriggerWebhookOption { .. } => StatusCode::InvalidArguments, - SerializeColumnDefaultConstraint { source, .. } - | SetJsonStructureSettings { source, .. } => source.status_code(), + SerializeColumnDefaultConstraint { source, .. } | SetJsonSettings { source, .. } => { + source.status_code() + } ConvertToGrpcDataType { source, .. } => source.status_code(), SqlCommon { source, .. } => source.status_code(), diff --git a/src/sql/src/parser.rs b/src/sql/src/parser.rs index 3a2d654f96..b776879d9f 100644 --- a/src/sql/src/parser.rs +++ b/src/sql/src/parser.rs @@ -372,8 +372,7 @@ mod tests { let ts_col = columns.first().unwrap(); assert_eq!( expected_type, - sql_data_type_to_concrete_data_type(ts_col.data_type(), &Default::default()) - .unwrap() + sql_data_type_to_concrete_data_type(ts_col.data_type()).unwrap() ); } _ => unreachable!(), diff --git a/src/sql/src/parsers/create_parser.rs b/src/sql/src/parsers/create_parser.rs index c4dfd8407d..a257f029e3 100644 --- a/src/sql/src/parsers/create_parser.rs +++ b/src/sql/src/parsers/create_parser.rs @@ -710,12 +710,13 @@ impl<'a> ParserContext<'a> { let mut extensions = ColumnExtensions::default(); - let data_type = parser.parse_data_type().context(SyntaxSnafu)?; - // Must immediately parse the JSON datatype format because it is closely after the "JSON" - // datatype, like this: "JSON(format = ...)". - if matches!(data_type, DataType::JSON) { - extensions.json_datatype_options = json::parse_json_datatype_options(parser)?; - } + let data_type = + if let Some((data_type, type_hints)) = json::parse_json2_type_and_hints(parser)? { + extensions.json_type_hints = type_hints; + data_type + } else { + parser.parse_data_type().context(SyntaxSnafu)? + }; let mut options = vec![]; loop { @@ -910,7 +911,7 @@ impl<'a> ParserContext<'a> { ); let column_type = get_unalias_type(column_type); - let data_type = sql_data_type_to_concrete_data_type(&column_type, column_extensions)?; + let data_type = sql_data_type_to_concrete_data_type(&column_type)?; ensure!( data_type == ConcreteDataType::string_datatype(), InvalidColumnOptionSnafu { @@ -1007,7 +1008,7 @@ impl<'a> ParserContext<'a> { // Check that column is a vector type let column_type = get_unalias_type(column_type); - let data_type = sql_data_type_to_concrete_data_type(&column_type, column_extensions)?; + let data_type = sql_data_type_to_concrete_data_type(&column_type)?; ensure!( matches!(data_type, ConcreteDataType::Vector(_)), InvalidColumnOptionSnafu { diff --git a/src/sql/src/parsers/create_parser/json.rs b/src/sql/src/parsers/create_parser/json.rs index 2078b36a10..fd8225cc9b 100644 --- a/src/sql/src/parsers/create_parser/json.rs +++ b/src/sql/src/parsers/create_parser/json.rs @@ -12,163 +12,523 @@ // See the License for the specific language governing permissions and // limitations under the License. -use snafu::ResultExt; +use snafu::{ResultExt, ensure}; +use sqlparser::ast::{DataType, ExactNumberInfo, Expr, ObjectName, UnaryOperator}; +use sqlparser::dialect::keywords::Keyword; use sqlparser::parser::Parser; use sqlparser::tokenizer::Token; -use crate::error::{Result, SyntaxSnafu}; -use crate::statements::OptionMap; -use crate::util; +use crate::ast::Ident; +use crate::error::{InvalidSqlSnafu, Result, SyntaxSnafu}; +use crate::parsers::create_parser::{INVERTED, SKIPPING}; +use crate::statements::create::JsonTypeHint; +use crate::statements::transform::type_alias::get_type_by_alias; -pub(super) fn parse_json_datatype_options(parser: &mut Parser<'_>) -> Result> { - if parser.consume_token(&Token::LParen) { - let result = parser - .parse_comma_separated0(Parser::parse_sql_option, Token::RParen) - .context(SyntaxSnafu) - .and_then(|options| { - options - .into_iter() - .map(util::parse_option_string) - .collect::>>() - })?; - parser.expect_token(&Token::RParen).context(SyntaxSnafu)?; - Ok(Some(OptionMap::new(result))) - } else { - Ok(None) +const JSON2_TYPE_NAME: &str = "JSON2"; + +pub(super) fn parse_json2_type_and_hints( + parser: &mut Parser<'_>, +) -> Result)>> { + let token = parser.peek_token(); + let Token::Word(word) = &token.token else { + return Ok(None); + }; + + if !word.value.eq_ignore_ascii_case(JSON2_TYPE_NAME) || word.quote_style.is_some() { + return Ok(None); } + + parser.next_token(); + let data_type = DataType::Custom(ObjectName::from(vec![Ident::new(JSON2_TYPE_NAME)]), vec![]); + let type_hints = if parser.consume_token(&Token::LParen) { + parse_json2_type_hints(parser)? + } else { + vec![] + }; + + Ok(Some((data_type, type_hints))) +} + +fn parse_json2_type_hints(parser: &mut Parser<'_>) -> Result> { + let mut hints = Vec::new(); + + if parser.consume_token(&Token::RParen) { + return Ok(hints); + } + + loop { + let hint = parse_json2_type_hint(parser)?; + ensure_no_path_conflict(&hints, &hint.path)?; + hints.push(hint); + + if parser.consume_token(&Token::Comma) { + if parser.consume_token(&Token::RParen) { + break; + } + } else { + parser.expect_token(&Token::RParen).context(SyntaxSnafu)?; + break; + } + } + + Ok(hints) +} + +fn parse_json2_type_hint(parser: &mut Parser<'_>) -> Result { + let path = parse_json2_path(parser)?; + let data_type = parser.parse_data_type().context(SyntaxSnafu)?; + let data_type = normalize_json2_type_hint_type(data_type)?; + + let mut nullable = true; + let mut nullable_set = false; + let mut default = None; + let mut inverted_index = false; + + loop { + if parser.parse_keywords(&[Keyword::NOT, Keyword::NULL]) { + ensure!( + !nullable_set, + InvalidSqlSnafu { + msg: format!( + "NULL/NOT NULL option already specified for JSON2 type hint '{}'", + path.join(".") + ) + } + ); + nullable = false; + nullable_set = true; + } else if parser.parse_keyword(Keyword::NULL) { + ensure!( + !nullable_set, + InvalidSqlSnafu { + msg: format!( + "NULL/NOT NULL option already specified for JSON2 type hint '{}'", + path.join(".") + ) + } + ); + nullable = true; + nullable_set = true; + } else if parser.parse_keyword(Keyword::DEFAULT) { + ensure!( + default.is_none(), + InvalidSqlSnafu { + msg: format!( + "duplicated DEFAULT option for JSON2 type hint '{}'", + path.join(".") + ) + } + ); + let expr = parser.parse_expr().context(SyntaxSnafu)?; + ensure_json2_default_expr_is_literal(&expr)?; + default = Some(expr); + } else if let Token::Word(word) = parser.peek_token().token + && word.value.eq_ignore_ascii_case(INVERTED) + { + parser.next_token(); + ensure!( + parser.parse_keyword(Keyword::INDEX), + InvalidSqlSnafu { + msg: format!( + "expect INDEX after INVERTED keyword for JSON2 type hint '{}'", + path.join(".") + ) + } + ); + ensure!( + !inverted_index, + InvalidSqlSnafu { + msg: format!( + "duplicated INVERTED INDEX option for JSON2 type hint '{}'", + path.join(".") + ) + } + ); + inverted_index = true; + } else if let Token::Word(word) = parser.peek_token().token + && word.value.eq_ignore_ascii_case(SKIPPING) + { + return InvalidSqlSnafu { + msg: "JSON2 type hint SKIPPING INDEX is not supported yet".to_string(), + } + .fail(); + } else if matches!(parser.peek_token().token, Token::Comma | Token::RParen) { + break; + } else { + return parser + .expected("JSON2 type hint option", parser.peek_token()) + .context(SyntaxSnafu); + } + } + + Ok(JsonTypeHint { + path, + data_type, + nullable, + default, + inverted_index, + }) +} + +fn parse_json2_path(parser: &mut Parser<'_>) -> Result> { + let first = parser.parse_identifier().context(SyntaxSnafu)?; + let mut path = vec![first.value]; + + while parser.consume_token(&Token::Period) { + let segment = parser.parse_identifier().context(SyntaxSnafu)?; + path.push(segment.value); + } + + ensure!( + !path.iter().any(|segment| segment.is_empty()), + InvalidSqlSnafu { + msg: "JSON2 type hint path segment cannot be empty".to_string(), + } + ); + + Ok(path) +} + +fn normalize_json2_type_hint_type(data_type: DataType) -> Result { + let data_type = get_type_by_alias(&data_type).unwrap_or(data_type); + let normalized = match data_type { + DataType::String(_) | DataType::Text | DataType::Varchar(_) | DataType::Char(_) => { + DataType::String(None) + } + DataType::TinyInt(_) + | DataType::SmallInt(_) + | DataType::Int(_) + | DataType::Integer(_) + | DataType::BigInt(_) => DataType::BigInt(None), + DataType::TinyIntUnsigned(_) + | DataType::SmallIntUnsigned(_) + | DataType::IntUnsigned(_) + | DataType::UnsignedInteger + | DataType::BigIntUnsigned(_) => DataType::BigIntUnsigned(None), + DataType::Float(_) | DataType::Real | DataType::Double(_) => { + DataType::Double(ExactNumberInfo::None) + } + DataType::Boolean => DataType::Boolean, + _ => { + return InvalidSqlSnafu { + msg: format!("unsupported JSON2 type hint data type: {data_type}"), + } + .fail(); + } + }; + + Ok(normalized) +} + +fn ensure_json2_default_expr_is_literal(expr: &Expr) -> Result<()> { + let is_literal = match expr { + Expr::Value(_) => true, + Expr::UnaryOp { op, expr } => { + matches!(op, UnaryOperator::Plus | UnaryOperator::Minus) + && matches!(expr.as_ref(), Expr::Value(_)) + } + _ => false, + }; + ensure!( + is_literal, + InvalidSqlSnafu { + msg: "JSON2 type hint DEFAULT only supports literal values", + } + ); + Ok(()) +} + +fn ensure_no_path_conflict(hints: &[JsonTypeHint], path: &[String]) -> Result<()> { + for hint in hints { + ensure!( + hint.path != path, + InvalidSqlSnafu { + msg: format!("duplicated JSON2 type hint path '{}'", path.join(".")) + } + ); + ensure!( + !hint.path.starts_with(path) && !path.starts_with(&hint.path), + InvalidSqlSnafu { + msg: format!( + "JSON2 type hint path '{}' conflicts with '{}'", + path.join("."), + hint.path.join(".") + ) + } + ); + } + Ok(()) } #[cfg(test)] mod tests { - use sqlparser::ast::{DataType, Expr, Ident, StructField}; + use sqlparser::ast::{DataType, ExactNumberInfo}; use crate::dialect::GreptimeDbDialect; use crate::parser::{ParseOptions, ParserContext}; - use crate::statements::OptionMap; - use crate::statements::create::{ - Column, JSON_FORMAT_FULL_STRUCTURED, JSON_FORMAT_PARTIAL, JSON_FORMAT_RAW, JSON_OPT_FIELDS, - JSON_OPT_FORMAT, JSON_OPT_UNSTRUCTURED_KEYS, - }; + use crate::statements::create::Column; use crate::statements::statement::Statement; - use crate::util::OptionValue; + + fn parse_json2_column(sql: &str) -> Column { + let Statement::CreateTable(mut create_table) = + ParserContext::create_with_dialect(sql, &GreptimeDbDialect {}, ParseOptions::default()) + .unwrap() + .remove(0) + else { + unreachable!() + }; + + create_table.columns.remove(0) + } #[test] - fn test_parse_json_datatype_options() { - fn parse(sql: &str) -> Option { - let Statement::CreateTable(mut create_table) = ParserContext::create_with_dialect( + fn test_parse_json2_type_hints() { + let column = parse_json2_column( + r#" +CREATE TABLE traces ( + log_json_data JSON2 ( + "service.name" STRING NOT NULL DEFAULT 'null' INVERTED INDEX, + http.method STRING NOT NULL, + status_code INT64 NOT NULL, + comment STRING NULL, + ), + ts TIMESTAMP TIME INDEX, +)"#, + ); + + assert!(matches!( + column.column_def.data_type, + DataType::Custom(_, _) + )); + let hints = column.extensions.json_type_hints; + assert_eq!(hints.len(), 4); + + assert_eq!(hints[0].path, vec!["service.name"]); + assert_eq!(hints[0].data_type, DataType::String(None)); + assert!(!hints[0].nullable); + assert_eq!( + hints[0] + .default + .as_ref() + .map(|expr| expr.to_string()) + .as_deref(), + Some("'null'") + ); + assert!(hints[0].inverted_index); + + assert_eq!(hints[1].path, vec!["http", "method"]); + assert_eq!(hints[1].data_type, DataType::String(None)); + assert!(!hints[1].nullable); + assert!(!hints[1].inverted_index); + + assert_eq!(hints[2].path, vec!["status_code"]); + assert_eq!(hints[2].data_type, DataType::BigInt(None)); + assert!(!hints[2].nullable); + + assert_eq!(hints[3].path, vec!["comment"]); + assert_eq!(hints[3].data_type, DataType::String(None)); + assert!(hints[3].nullable); + } + + #[test] + fn test_parse_json2_type_hint_default_nullable() { + let column = parse_json2_column( + r#" +CREATE TABLE traces ( + log_json_data JSON2 (http.method STRING), + ts TIMESTAMP TIME INDEX, +)"#, + ); + + let hints = column.extensions.json_type_hints; + assert_eq!(hints.len(), 1); + assert!(hints[0].nullable); + } + + #[test] + fn test_parse_json2_type_hint_quoted_path_segments() { + let column = parse_json2_column( + r#" +CREATE TABLE traces ( + log_json_data JSON2 ( + "a".b STRING, + "x"."y" STRING, + "a.b"."c" STRING, + a."b.c" STRING + ), + ts TIMESTAMP TIME INDEX, +)"#, + ); + + let hints = column.extensions.json_type_hints; + assert_eq!(hints.len(), 4); + assert_eq!(hints[0].path, vec!["a", "b"]); + assert_eq!(hints[1].path, vec!["x", "y"]); + assert_eq!(hints[2].path, vec!["a.b", "c"]); + assert_eq!(hints[3].path, vec!["a", "b.c"]); + } + + #[test] + fn test_parse_json2_type_hint_normalizes_numeric_types() { + let column = parse_json2_column( + r#" +CREATE TABLE traces ( + log_json_data JSON2 ( + tinyint_value TINYINT, + smallint_value SMALLINT, + int_value INT, + integer_value INTEGER, + bigint_value BIGINT, + int64_value INT64, + tinyuint_value TINYINT UNSIGNED, + smalluint_value SMALLINT UNSIGNED, + uint_value INT UNSIGNED, + uint64_value UINT64, + float_value FLOAT, + real_value REAL, + double_value DOUBLE, + float64_value FLOAT64 + ), + ts TIMESTAMP TIME INDEX, +)"#, + ); + + let hints = column.extensions.json_type_hints; + assert_eq!(hints.len(), 14); + for hint in hints.iter().take(6) { + assert_eq!(hint.data_type, DataType::BigInt(None)); + } + for hint in hints.iter().skip(6).take(4) { + assert_eq!(hint.data_type, DataType::BigIntUnsigned(None)); + } + for hint in hints.iter().skip(10) { + assert_eq!(hint.data_type, DataType::Double(ExactNumberInfo::None)); + } + } + + #[test] + fn test_parse_json2_type_hint_default_accepts_signed_literals() { + let column = parse_json2_column( + r#" +CREATE TABLE traces ( + log_json_data JSON2 ( + negative_int INT64 DEFAULT -5, + positive_float FLOAT64 DEFAULT +1.5 + ), + ts TIMESTAMP TIME INDEX, +)"#, + ); + + let hints = column.extensions.json_type_hints; + assert_eq!(hints.len(), 2); + assert_eq!( + hints[0] + .default + .as_ref() + .map(|expr| expr.to_string()) + .as_deref(), + Some("-5") + ); + assert_eq!( + hints[1] + .default + .as_ref() + .map(|expr| expr.to_string()) + .as_deref(), + Some("+1.5") + ); + } + + #[test] + fn test_parse_json2_type_hint_default_rejects_function() { + let result = ParserContext::create_with_dialect( + r#" +CREATE TABLE traces ( + log_json_data JSON2 (status_code INT64 DEFAULT abs(-1)), + ts TIMESTAMP TIME INDEX, +)"#, + &GreptimeDbDialect {}, + ParseOptions::default(), + ); + + assert!(result.is_err()); + assert!( + result + .unwrap_err() + .to_string() + .contains("DEFAULT only supports literal values") + ); + } + + #[test] + fn test_parse_json2_type_hint_rejects_duplicate_path() { + let result = ParserContext::create_with_dialect( + r#" +CREATE TABLE traces ( + log_json_data JSON2 (a.b STRING, a.b INT64), + ts TIMESTAMP TIME INDEX, +)"#, + &GreptimeDbDialect {}, + ParseOptions::default(), + ); + + assert!(result.is_err()); + assert!(result.unwrap_err().to_string().contains("duplicated")); + } + + #[test] + fn test_parse_json2_type_hint_rejects_parent_child_path() { + let result = ParserContext::create_with_dialect( + r#" +CREATE TABLE traces ( + log_json_data JSON2 (a STRING, a.b INT64), + ts TIMESTAMP TIME INDEX, +)"#, + &GreptimeDbDialect {}, + ParseOptions::default(), + ); + + assert!(result.is_err()); + assert!(result.unwrap_err().to_string().contains("conflicts")); + } + + #[test] + fn test_parse_json2_type_hint_rejects_duplicated_nullability() { + for sql in [ + r#" +CREATE TABLE traces ( + log_json_data JSON2 (a STRING NULL NULL), + ts TIMESTAMP TIME INDEX, +)"#, + r#" +CREATE TABLE traces ( + log_json_data JSON2 (a STRING NOT NULL NOT NULL), + ts TIMESTAMP TIME INDEX, +)"#, + r#" +CREATE TABLE traces ( + log_json_data JSON2 (a STRING NOT NULL NULL), + ts TIMESTAMP TIME INDEX, +)"#, + r#" +CREATE TABLE traces ( + log_json_data JSON2 (a STRING NULL NOT NULL), + ts TIMESTAMP TIME INDEX, +)"#, + ] { + let result = ParserContext::create_with_dialect( sql, &GreptimeDbDialect {}, ParseOptions::default(), - ) - .unwrap() - .remove(0) else { - unreachable!() - }; + ); - let Column { - column_def, - extensions, - } = create_table.columns.remove(0); - assert_eq!(column_def.name.to_string(), "my_json"); - assert_eq!(column_def.data_type, DataType::JSON); - assert!(column_def.options.is_empty()); - - extensions.json_datatype_options + assert!(result.is_err()); + assert!( + result + .unwrap_err() + .to_string() + .contains("NULL/NOT NULL option already specified") + ); } - - let sql = r#" -CREATE TABLE json_data ( - my_json JSON(format = "partial", fields = Struct), - ts TIMESTAMP TIME INDEX, -)"#; - let options = parse(sql).unwrap(); - assert_eq!(options.len(), 2); - let option = options.value(JSON_OPT_FIELDS); - let expected = OptionValue::try_new(Expr::Struct { - values: vec![], - fields: vec![ - StructField { - field_name: Some(Ident::new("i")), - field_type: DataType::Int(None), - options: None, - }, - StructField { - field_name: Some(Ident::with_quote('"', "o.a")), - field_type: DataType::String(None), - options: None, - }, - StructField { - field_name: Some(Ident::with_quote('"', "o.b")), - field_type: DataType::String(None), - options: None, - }, - StructField { - field_name: Some(Ident::with_quote('`', "x.y.z")), - field_type: DataType::Float64, - options: None, - }, - ], - }) - .ok(); - assert_eq!(option, expected.as_ref()); - - let sql = r#" -CREATE TABLE json_data ( - my_json JSON(format = "partial", unstructured_keys = ["k", "foo.bar", "a.b.c"]), - ts TIMESTAMP TIME INDEX, -)"#; - let options = parse(sql).unwrap(); - assert_eq!(options.len(), 2); - assert_eq!( - options.value(JSON_OPT_FORMAT).and_then(|x| x.as_string()), - Some(JSON_FORMAT_PARTIAL) - ); - let expected = vec!["k", "foo.bar", "a.b.c"]; - assert_eq!( - options - .value(JSON_OPT_UNSTRUCTURED_KEYS) - .and_then(|x| x.as_list()), - Some(expected) - ); - - let sql = r#" -CREATE TABLE json_data ( - my_json JSON(format = "structured"), - ts TIMESTAMP TIME INDEX, -)"#; - let options = parse(sql).unwrap(); - assert_eq!(options.len(), 1); - assert_eq!( - options.value(JSON_OPT_FORMAT).and_then(|x| x.as_string()), - Some(JSON_FORMAT_FULL_STRUCTURED) - ); - - let sql = r#" -CREATE TABLE json_data ( - my_json JSON(format = "raw"), - ts TIMESTAMP TIME INDEX, -)"#; - let options = parse(sql).unwrap(); - assert_eq!(options.len(), 1); - assert_eq!( - options.value(JSON_OPT_FORMAT).and_then(|x| x.as_string()), - Some(JSON_FORMAT_RAW) - ); - - let sql = r#" -CREATE TABLE json_data ( - my_json JSON(), - ts TIMESTAMP TIME INDEX, -)"#; - let options = parse(sql).unwrap(); - assert!(options.is_empty()); - - let sql = r#" -CREATE TABLE json_data ( - my_json JSON, - ts TIMESTAMP TIME INDEX, -)"#; - let options = parse(sql); - assert!(options.is_none()); } } diff --git a/src/sql/src/statements.rs b/src/sql/src/statements.rs index f51735b769..565a2a96f0 100644 --- a/src/sql/src/statements.rs +++ b/src/sql/src/statements.rs @@ -40,7 +40,6 @@ use api::v1::SemanticType; use common_sql::default_constraint::parse_column_default_constraint; use common_time::timezone::Timezone; use datatypes::extension::json::{JsonExtensionType, JsonMetadata}; -use datatypes::json::JsonStructureSettings; use datatypes::prelude::ConcreteDataType; use datatypes::schema::{COMMENT_KEY, ColumnDefaultConstraint, ColumnSchema}; use datatypes::types::json_type::JsonNativeType; @@ -55,10 +54,10 @@ use crate::ast::{ }; use crate::error::{ self, ConvertToGrpcDataTypeSnafu, ConvertValueSnafu, Result, - SerializeColumnDefaultConstraintSnafu, SetFulltextOptionSnafu, SetJsonStructureSettingsSnafu, + SerializeColumnDefaultConstraintSnafu, SetFulltextOptionSnafu, SetJsonSettingsSnafu, SetSkippingIndexOptionSnafu, SetVectorIndexOptionSnafu, SqlCommonSnafu, }; -use crate::statements::create::{Column, ColumnExtensions}; +use crate::statements::create::Column; pub use crate::statements::option_map::OptionMap; pub(crate) use crate::statements::transform::transform_statements; @@ -110,7 +109,7 @@ pub fn column_to_schema( && !is_time_index; let name = column.name().value.clone(); - let data_type = sql_data_type_to_concrete_data_type(column.data_type(), &column.extensions)?; + let data_type = sql_data_type_to_concrete_data_type(column.data_type())?; let default_constraint = parse_column_default_constraint(&name, &data_type, column.options(), timezone) .context(SqlCommonSnafu)?; @@ -164,16 +163,13 @@ pub fn column_to_schema( false }; if is_json2_column { - let settings = column - .extensions - .build_json_structure_settings()? - .unwrap_or_default(); + let settings = column.extensions.build_json_settings()?.unwrap_or_default(); let extension = JsonExtensionType::new(Arc::new(JsonMetadata { - json_structure_settings: Some(settings.clone()), + json_settings: Some(settings.clone()), })); column_schema .with_extension_type(&extension) - .with_context(|_| SetJsonStructureSettingsSnafu { + .with_context(|_| SetJsonSettingsSnafu { value: format!("{settings:?}"), })?; } @@ -187,7 +183,7 @@ pub fn sql_column_def_to_grpc_column_def( timezone: Option<&Timezone>, ) -> Result { let name = col.name.value.clone(); - let data_type = sql_data_type_to_concrete_data_type(&col.data_type, &Default::default())?; + let data_type = sql_data_type_to_concrete_data_type(&col.data_type)?; let is_nullable = col .options @@ -228,10 +224,7 @@ pub fn sql_column_def_to_grpc_column_def( }) } -pub fn sql_data_type_to_concrete_data_type( - data_type: &SqlDataType, - column_extensions: &ColumnExtensions, -) -> Result { +pub fn sql_data_type_to_concrete_data_type(data_type: &SqlDataType) -> Result { match data_type { SqlDataType::BigInt(_) | SqlDataType::Int64 => Ok(ConcreteDataType::int64_datatype()), SqlDataType::BigIntUnsigned(_) => Ok(ConcreteDataType::uint64_datatype()), @@ -299,19 +292,9 @@ pub fn sql_data_type_to_concrete_data_type( Ok(ConcreteDataType::vector_datatype(dim)) } JSON2_TYPE_NAME if args.is_empty() => { - let native_type = column_extensions - .build_json_structure_settings()? - .and_then(|x| match x { - JsonStructureSettings::Structured(Some(fields)) - | JsonStructureSettings::PartialUnstructuredByKey { - fields: Some(fields), - .. - } => Some(JsonNativeType::from(&ConcreteDataType::Struct(fields))), - JsonStructureSettings::UnstructuredRaw => Some(JsonNativeType::Variant), - _ => None, - }) - .unwrap_or(JsonNativeType::Object(Default::default())); - let format = JsonFormat::Json2(Box::new(native_type)); + // Currently, JSON2 is not inferred as any native type initially. + // TODO(fys): infer it later from type hints. + let format = JsonFormat::Json2(Box::new(JsonNativeType::Null)); Ok(ConcreteDataType::Json(JsonType::new(format))) } _ => error::SqlTypeNotSupportedSnafu { @@ -390,7 +373,7 @@ mod tests { fn check_type(sql_type: SqlDataType, data_type: ConcreteDataType) { assert_eq!( data_type, - sql_data_type_to_concrete_data_type(&sql_type, &Default::default()).unwrap() + sql_data_type_to_concrete_data_type(&sql_type).unwrap() ); } @@ -744,7 +727,7 @@ mod tests { vector_options: None, skipping_index_options: None, inverted_index_options: None, - json_datatype_options: None, + json_type_hints: vec![], vector_index_options: None, }, }; @@ -776,7 +759,7 @@ mod tests { vector_options: None, skipping_index_options: None, inverted_index_options: None, - json_datatype_options: None, + json_type_hints: vec![], vector_index_options: Some(OptionMap::from([ ("metric".to_string(), "cosine".to_string()), ("connectivity".to_string(), "32".to_string()), @@ -817,7 +800,7 @@ mod tests { vector_options: None, skipping_index_options: None, inverted_index_options: None, - json_datatype_options: None, + json_type_hints: vec![], vector_index_options: Some(OptionMap::default()), }, }; diff --git a/src/sql/src/statements/create.rs b/src/sql/src/statements/create.rs index 67742b853d..dc94c7f888 100644 --- a/src/sql/src/statements/create.rs +++ b/src/sql/src/statements/create.rs @@ -12,47 +12,40 @@ // See the License for the specific language governing permissions and // limitations under the License. -use std::collections::{HashMap, HashSet}; +use std::collections::HashMap; use std::fmt::{Display, Formatter}; -use std::sync::Arc; use common_catalog::consts::FILE_ENGINE; -use datatypes::data_type::ConcreteDataType; -use datatypes::json::JsonStructureSettings; +use common_sql::default_constraint::parse_column_default_constraint; +use datatypes::json::JsonSettings; +use datatypes::prelude::ConcreteDataType; use datatypes::schema::{ - FulltextOptions, SkippingIndexOptions, VectorDistanceMetric, VectorIndexEngineType, - VectorIndexOptions, + ColumnDefaultConstraint, FulltextOptions, SkippingIndexOptions, VectorDistanceMetric, + VectorIndexEngineType, VectorIndexOptions, }; -use datatypes::types::StructType; use itertools::Itertools; use serde::Serialize; -use snafu::{OptionExt, ResultExt}; -use sqlparser::ast::{ColumnOptionDef, DataType, Expr}; +use snafu::ResultExt; +use sqlparser::ast::{ColumnOption, ColumnOptionDef, DataType, Expr}; use sqlparser_derive::{Visit, VisitMut}; use crate::ast::{ColumnDef, Ident, ObjectName, Value as SqlValue}; +use crate::dialect::GreptimeDbDialect; use crate::error::{ - InvalidFlowQuerySnafu, InvalidJsonStructureSettingSnafu, InvalidSqlSnafu, Result, - SetFulltextOptionSnafu, SetSkippingIndexOptionSnafu, + InvalidFlowQuerySnafu, InvalidSqlSnafu, Result, SetFulltextOptionSnafu, + SetSkippingIndexOptionSnafu, }; +use crate::parser::ParserContext; use crate::statements::query::Query as GtQuery; use crate::statements::statement::Statement; use crate::statements::tql::Tql; -use crate::statements::{OptionMap, sql_data_type_to_concrete_data_type}; -use crate::util::OptionValue; +use crate::statements::{OptionMap, sql_data_type_to_concrete_data_type, value_to_sql_value}; const LINE_SEP: &str = ",\n"; const COMMA_SEP: &str = ", "; const INDENT: usize = 2; pub const VECTOR_OPT_DIM: &str = "dim"; -pub const JSON_OPT_UNSTRUCTURED_KEYS: &str = "unstructured_keys"; -pub const JSON_OPT_FORMAT: &str = "format"; -pub(crate) const JSON_OPT_FIELDS: &str = "fields"; -pub const JSON_FORMAT_FULL_STRUCTURED: &str = "structured"; -pub const JSON_FORMAT_RAW: &str = "raw"; -pub const JSON_FORMAT_PARTIAL: &str = "partial"; - macro_rules! format_indent { ($fmt: expr, $arg: expr) => { format!($fmt, format_args!("{: >1$}", "", INDENT), $arg) @@ -143,7 +136,16 @@ pub struct ColumnExtensions { pub inverted_index_options: Option, /// Vector index options for HNSW-based vector similarity search. pub vector_index_options: Option, - pub json_datatype_options: Option, + pub json_type_hints: Vec, +} + +#[derive(Debug, PartialEq, Eq, Clone, Visit, VisitMut, Serialize)] +pub struct JsonTypeHint { + pub path: Vec, + pub data_type: DataType, + pub nullable: bool, + pub default: Option, + pub inverted_index: bool, } impl Column { @@ -178,14 +180,11 @@ impl Display for Column { } write!(f, "{} {}", self.column_def.name, self.column_def.data_type)?; - if let Some(options) = &self.extensions.json_datatype_options { + if !self.extensions.json_type_hints.is_empty() { write!( f, - "({})", - options - .entries() - .map(|(k, v)| format!("{k} = {v}")) - .join(COMMA_SEP) + "{}", + format_json_type_hints(&self.extensions.json_type_hints) )?; } for option in &self.column_def.options { @@ -335,106 +334,168 @@ impl ColumnExtensions { Ok(Some(result)) } - pub fn build_json_structure_settings(&self) -> Result> { - let Some(options) = self.json_datatype_options.as_ref() else { + pub fn build_json_settings(&self) -> Result> { + if self.json_type_hints.is_empty() { return Ok(None); - }; - - let unstructured_keys = options - .value(JSON_OPT_UNSTRUCTURED_KEYS) - .and_then(|v| { - v.as_list().map(|x| { - x.into_iter() - .map(|x| x.to_string()) - .collect::>() - }) - }) - .unwrap_or_default(); - - let fields = if let Some(value) = options.value(JSON_OPT_FIELDS) { - let fields = value - .as_struct_fields() - .context(InvalidJsonStructureSettingSnafu { - reason: format!(r#"expect "{JSON_OPT_FIELDS}" a struct, actual: "{value}""#,), - })?; - let fields = fields - .iter() - .map(|field| { - let name = field.field_name.as_ref().map(|x| x.value.clone()).context( - InvalidJsonStructureSettingSnafu { - reason: format!(r#"missing field name in "{field}""#), - }, - )?; - let datatype = sql_data_type_to_concrete_data_type( - &field.field_type, - &Default::default(), - )?; - Ok(datatypes::types::StructField::new(name, datatype, true)) - }) - .collect::>()?; - Some(StructType::new(Arc::new(fields))) - } else { - None - }; - - let format = options - .get(JSON_OPT_FORMAT) - .unwrap_or(JSON_FORMAT_FULL_STRUCTURED); - let settings = match format { - JSON_FORMAT_FULL_STRUCTURED => JsonStructureSettings::Structured(fields), - JSON_FORMAT_PARTIAL => { - let fields = fields.map(|fields| { - let mut fields = Arc::unwrap_or_clone(fields.fields()); - fields.push(datatypes::types::StructField::new( - JsonStructureSettings::RAW_FIELD.to_string(), - ConcreteDataType::string_datatype(), - true, - )); - StructType::new(Arc::new(fields)) - }); - JsonStructureSettings::PartialUnstructuredByKey { - fields, - unstructured_keys, - } - } - JSON_FORMAT_RAW => JsonStructureSettings::UnstructuredRaw, - _ => { - return InvalidSqlSnafu { - msg: format!("unknown JSON datatype 'format': {format}"), - } - .fail(); - } - }; - Ok(Some(settings)) - } - - pub fn set_json_structure_settings(&mut self, settings: JsonStructureSettings) { - let mut map = OptionMap::default(); - - let format = match settings { - JsonStructureSettings::Structured(_) => JSON_FORMAT_FULL_STRUCTURED, - JsonStructureSettings::PartialUnstructuredByKey { .. } => JSON_FORMAT_PARTIAL, - JsonStructureSettings::UnstructuredRaw => JSON_FORMAT_RAW, - }; - map.insert(JSON_OPT_FORMAT.to_string(), format.to_string()); - - if let JsonStructureSettings::PartialUnstructuredByKey { - fields: _, - unstructured_keys, - } = settings - { - let value = OptionValue::from( - unstructured_keys - .iter() - .map(|x| x.as_str()) - .sorted() - .collect::>(), - ); - map.insert_options(JSON_OPT_UNSTRUCTURED_KEYS, value); } - self.json_datatype_options = Some(map); + Ok(Some(JsonSettings::new( + self.json_type_hints + .iter() + .map(|hint| { + Ok(datatypes::json::JsonTypeHint { + path: hint.path.clone(), + data_type: json_type_hint_concrete_data_type(&hint.data_type)?, + nullable: hint.nullable, + default_constraint: build_json_type_hint_default_constraint(hint)?, + inverted_index: hint.inverted_index, + }) + }) + .collect::>>()?, + ))) } + + pub fn set_json_settings(&mut self, settings: JsonSettings) -> Result<()> { + self.json_type_hints = settings + .type_hints + .into_iter() + .map(|hint| { + let data_type = json_type_hint_sql_data_type(&hint.data_type)?; + let default = hint + .default_constraint + .map(|constraint| column_default_constraint_to_expr(&constraint)) + .transpose()?; + Ok(JsonTypeHint { + path: hint.path, + data_type, + nullable: hint.nullable, + default, + inverted_index: hint.inverted_index, + }) + }) + .collect::>>()?; + Ok(()) + } +} + +fn build_json_type_hint_default_constraint( + hint: &JsonTypeHint, +) -> Result> { + let Some(default) = &hint.default else { + return Ok(None); + }; + + let data_type = json_type_hint_concrete_data_type(&hint.data_type)?; + let opts = [ColumnOptionDef { + name: None, + option: ColumnOption::Default(default.clone()), + }]; + + // Use the JSON path as the column name context for default value parsing errors. + let json_path = hint.path.join("."); + let default_constraint = parse_column_default_constraint(&json_path, &data_type, &opts, None) + .context(crate::error::SqlCommonSnafu)?; + + if let Some(constraint) = &default_constraint { + constraint + .validate(&data_type, hint.nullable) + .map_err(|e| { + InvalidSqlSnafu { + msg: format!("invalid DEFAULT for JSON2 type hint '{}': {e}", json_path), + } + .build() + })?; + } + + Ok(default_constraint) +} + +fn json_type_hint_concrete_data_type(data_type: &DataType) -> Result { + let data_type = sql_data_type_to_concrete_data_type(data_type)?; + normalize_json_type_hint_concrete_data_type(&data_type) +} + +fn normalize_json_type_hint_concrete_data_type( + data_type: &ConcreteDataType, +) -> Result { + let normalized = match data_type { + ConcreteDataType::String(_) => ConcreteDataType::string_datatype(), + ConcreteDataType::Int8(_) + | ConcreteDataType::Int16(_) + | ConcreteDataType::Int32(_) + | ConcreteDataType::Int64(_) => ConcreteDataType::int64_datatype(), + ConcreteDataType::UInt8(_) + | ConcreteDataType::UInt16(_) + | ConcreteDataType::UInt32(_) + | ConcreteDataType::UInt64(_) => ConcreteDataType::uint64_datatype(), + ConcreteDataType::Float32(_) | ConcreteDataType::Float64(_) => { + ConcreteDataType::float64_datatype() + } + ConcreteDataType::Boolean(_) => ConcreteDataType::boolean_datatype(), + _ => { + return InvalidSqlSnafu { + msg: format!("unsupported JSON2 type hint data type: {data_type}"), + } + .fail(); + } + }; + Ok(normalized) +} + +fn json_type_hint_sql_data_type(data_type: &ConcreteDataType) -> Result { + let data_type = normalize_json_type_hint_concrete_data_type(data_type)?; + let sql_type = match data_type { + ConcreteDataType::String(_) => DataType::String(None), + ConcreteDataType::Int64(_) => DataType::BigInt(None), + ConcreteDataType::UInt64(_) => DataType::BigIntUnsigned(None), + ConcreteDataType::Float64(_) => DataType::Double(sqlparser::ast::ExactNumberInfo::None), + ConcreteDataType::Boolean(_) => DataType::Boolean, + _ => unreachable!("JSON2 type hint data type should have been normalized"), + }; + Ok(sql_type) +} + +fn column_default_constraint_to_expr(constraint: &ColumnDefaultConstraint) -> Result { + match constraint { + ColumnDefaultConstraint::Value(value) => Ok(Expr::Value(value_to_sql_value(value)?.into())), + ColumnDefaultConstraint::Function(function) => { + ParserContext::parse_function(function, &GreptimeDbDialect {}) + } + } +} + +fn format_json_type_hint(hint: &JsonTypeHint) -> String { + let path = hint + .path + .iter() + .map(|segment| format_json_path_segment(segment)) + .join("."); + let nullability = if hint.nullable { " NULL" } else { " NOT NULL" }; + let default = hint + .default + .as_ref() + .map(|expr| format!(" DEFAULT {expr}")) + .unwrap_or_default(); + let inverted_index = if hint.inverted_index { + " INVERTED INDEX" + } else { + "" + }; + format!( + "{} {}{}{}{}", + path, hint.data_type, nullability, default, inverted_index + ) +} + +fn format_json_type_hints(hints: &[JsonTypeHint]) -> String { + format!( + "(\n {}\n )", + hints.iter().map(format_json_type_hint).join(",\n ") + ) +} + +fn format_json_path_segment(segment: &str) -> String { + format!("\"{}\"", segment.replace('"', "\"\"")) } /// Partition on columns or values. @@ -720,6 +781,11 @@ impl Display for CreateView { mod tests { use std::assert_matches; + use datatypes::json::{JsonSettings, JsonTypeHint as DatatypeJsonTypeHint}; + use datatypes::prelude::ConcreteDataType; + use datatypes::schema::ColumnDefaultConstraint; + use datatypes::value::Value; + use crate::dialect::GreptimeDbDialect; use crate::error::Error; use crate::parser::{ParseOptions, ParserContext}; @@ -889,6 +955,240 @@ ENGINE=mito ); } + #[test] + fn test_display_json2_type_hints_quotes_path_segments() { + let sql = r#"CREATE TABLE traces ( + log_json_data JSON2 ( + "service.name" STRING, + "a.b"."c" INT64 NOT NULL, + a."b.c" STRING + ), + ts TIMESTAMP TIME INDEX + )"#; + let result = + ParserContext::create_with_dialect(sql, &GreptimeDbDialect {}, ParseOptions::default()) + .unwrap(); + + match &result[0] { + Statement::CreateTable(c) => { + let new_sql = format!("\n{}", c); + assert_eq!( + r#" +CREATE TABLE traces ( + log_json_data JSON2( + "service.name" STRING NULL, + "a.b"."c" BIGINT NOT NULL, + "a"."b.c" STRING NULL + ), + ts TIMESTAMP NOT NULL, + TIME INDEX (ts) +) +ENGINE=mito +"#, + &new_sql + ); + + let new_result = ParserContext::create_with_dialect( + &new_sql, + &GreptimeDbDialect {}, + ParseOptions::default(), + ) + .unwrap(); + assert_eq!(result, new_result); + } + _ => unreachable!(), + } + } + + #[test] + fn test_display_json2_type_hints_quotes_numeric_segments() { + let sql = r#"CREATE TABLE traces ( + log_json_data JSON2 ( + "1abc" STRING, + a."2b" INT64 NOT NULL + ), + ts TIMESTAMP TIME INDEX + )"#; + let result = + ParserContext::create_with_dialect(sql, &GreptimeDbDialect {}, ParseOptions::default()) + .unwrap(); + + match &result[0] { + Statement::CreateTable(c) => { + let new_sql = format!("\n{}", c); + assert_eq!( + r#" +CREATE TABLE traces ( + log_json_data JSON2( + "1abc" STRING NULL, + "a"."2b" BIGINT NOT NULL + ), + ts TIMESTAMP NOT NULL, + TIME INDEX (ts) +) +ENGINE=mito +"#, + &new_sql + ); + + let new_result = ParserContext::create_with_dialect( + &new_sql, + &GreptimeDbDialect {}, + ParseOptions::default(), + ) + .unwrap(); + assert_eq!(result, new_result); + } + _ => unreachable!(), + } + } + + #[test] + fn test_json2_type_hint_default_builds_default_constraint() { + let sql = r#"CREATE TABLE traces ( + log_json_data JSON2 ( + status_code INT64 DEFAULT -5, + duration FLOAT64 DEFAULT +1.5, + error BOOLEAN DEFAULT false, + message STRING DEFAULT 'unknown' + ), + ts TIMESTAMP TIME INDEX + )"#; + let result = + ParserContext::create_with_dialect(sql, &GreptimeDbDialect {}, ParseOptions::default()) + .unwrap(); + + let Statement::CreateTable(create_table) = &result[0] else { + unreachable!() + }; + let settings = create_table.columns[0] + .extensions + .build_json_settings() + .unwrap() + .unwrap(); + let hints = settings.type_hints; + + assert_eq!(hints[0].data_type, ConcreteDataType::int64_datatype()); + assert_eq!( + hints[0].default_constraint, + Some(ColumnDefaultConstraint::Value(Value::Int64(-5))) + ); + assert_eq!(hints[1].data_type, ConcreteDataType::float64_datatype()); + assert_eq!( + hints[1].default_constraint, + Some(ColumnDefaultConstraint::Value(Value::Float64(1.5.into()))) + ); + assert_eq!(hints[2].data_type, ConcreteDataType::boolean_datatype()); + assert_eq!( + hints[2].default_constraint, + Some(ColumnDefaultConstraint::Value(Value::Boolean(false))) + ); + assert_eq!(hints[3].data_type, ConcreteDataType::string_datatype()); + assert_eq!( + hints[3].default_constraint, + Some(ColumnDefaultConstraint::Value(Value::String( + "unknown".into() + ))) + ); + } + + #[test] + fn test_json2_type_hint_not_null_default_null_is_rejected() { + let sql = r#"CREATE TABLE traces ( + log_json_data JSON2 ( + status_code INT64 NOT NULL DEFAULT NULL + ), + ts TIMESTAMP TIME INDEX + )"#; + let result = + ParserContext::create_with_dialect(sql, &GreptimeDbDialect {}, ParseOptions::default()) + .unwrap(); + + let Statement::CreateTable(create_table) = &result[0] else { + unreachable!() + }; + let err = create_table.columns[0] + .extensions + .build_json_settings() + .unwrap_err(); + assert!( + err.to_string() + .contains("Default value should not be null for non null column") + ); + } + + #[test] + fn test_set_json_settings_normalizes_type_hint_sql_types() { + let mut extensions = super::ColumnExtensions::default(); + extensions + .set_json_settings(JsonSettings::new(vec![ + DatatypeJsonTypeHint { + path: vec!["i".to_string()], + data_type: ConcreteDataType::int32_datatype(), + nullable: true, + default_constraint: None, + inverted_index: false, + }, + DatatypeJsonTypeHint { + path: vec!["f".to_string()], + data_type: ConcreteDataType::float32_datatype(), + nullable: true, + default_constraint: None, + inverted_index: false, + }, + DatatypeJsonTypeHint { + path: vec!["u".to_string()], + data_type: ConcreteDataType::uint32_datatype(), + nullable: true, + default_constraint: None, + inverted_index: false, + }, + DatatypeJsonTypeHint { + path: vec!["s".to_string()], + data_type: ConcreteDataType::string_datatype(), + nullable: true, + default_constraint: None, + inverted_index: false, + }, + DatatypeJsonTypeHint { + path: vec!["b".to_string()], + data_type: ConcreteDataType::boolean_datatype(), + nullable: true, + default_constraint: None, + inverted_index: false, + }, + ])) + .unwrap(); + + assert_eq!( + extensions + .json_type_hints + .iter() + .map(|hint| hint.data_type.to_string()) + .collect::>(), + vec!["BIGINT", "DOUBLE", "BIGINT UNSIGNED", "STRING", "BOOLEAN"] + ); + } + + #[test] + fn test_set_json_settings_rejects_unsupported_type_hint_type() { + let mut extensions = super::ColumnExtensions::default(); + let err = extensions + .set_json_settings(JsonSettings::new(vec![DatatypeJsonTypeHint { + path: vec!["u".to_string()], + data_type: ConcreteDataType::date_datatype(), + nullable: true, + default_constraint: None, + inverted_index: false, + }])) + .unwrap_err(); + + assert!( + err.to_string() + .contains("unsupported JSON2 type hint data type") + ); + } + #[test] fn test_display_create_database() { let sql = r"create database test;"; @@ -1068,7 +1368,7 @@ AS SELECT number FROM numbers_input where number > 10"#, vector_options: None, skipping_index_options: None, inverted_index_options: None, - json_datatype_options: None, + json_type_hints: vec![], vector_index_options: Some(OptionMap::from([( "connectivity".to_string(), "0".to_string(), @@ -1089,7 +1389,7 @@ AS SELECT number FROM numbers_input where number > 10"#, vector_options: None, skipping_index_options: None, inverted_index_options: None, - json_datatype_options: None, + json_type_hints: vec![], vector_index_options: Some(OptionMap::from([( "expansion_add".to_string(), "0".to_string(), @@ -1110,7 +1410,7 @@ AS SELECT number FROM numbers_input where number > 10"#, vector_options: None, skipping_index_options: None, inverted_index_options: None, - json_datatype_options: None, + json_type_hints: vec![], vector_index_options: Some(OptionMap::from([( "expansion_search".to_string(), "0".to_string(), @@ -1131,7 +1431,7 @@ AS SELECT number FROM numbers_input where number > 10"#, vector_options: None, skipping_index_options: None, inverted_index_options: None, - json_datatype_options: None, + json_type_hints: vec![], vector_index_options: Some(OptionMap::from([ ("connectivity".to_string(), "32".to_string()), ("expansion_add".to_string(), "200".to_string()), diff --git a/src/sql/src/statements/transform/type_alias.rs b/src/sql/src/statements/transform/type_alias.rs index f2b59913c4..b2d1803fb3 100644 --- a/src/sql/src/statements/transform/type_alias.rs +++ b/src/sql/src/statements/transform/type_alias.rs @@ -117,9 +117,7 @@ impl TransformRule for TypeAliasTransformRule { } if get_type_by_alias(data_type).is_some() => { // Safety: checked in the match arm. let new_type = get_type_by_alias(data_type).unwrap(); - if let Ok(new_type) = - sql_data_type_to_concrete_data_type(&new_type, &Default::default()) - { + if let Ok(new_type) = sql_data_type_to_concrete_data_type(&new_type) { *expr = Expr::Function(cast_expr_to_arrow_cast_func( (**cast_expr).clone(), new_type.as_arrow_type().to_string(), @@ -134,10 +132,9 @@ impl TransformRule for TypeAliasTransformRule { expr: cast_expr, .. } => { - if let Ok(concrete_type) = sql_data_type_to_concrete_data_type( - &DataType::Timestamp(*precision, *zone), - &Default::default(), - ) { + if let Ok(concrete_type) = + sql_data_type_to_concrete_data_type(&DataType::Timestamp(*precision, *zone)) + { let new_type = concrete_type.as_arrow_type(); *expr = Expr::Function(cast_expr_to_arrow_cast_func( (**cast_expr).clone(), diff --git a/src/sql/src/util.rs b/src/sql/src/util.rs index f627c43e48..6a7abe23bb 100644 --- a/src/sql/src/util.rs +++ b/src/sql/src/util.rs @@ -26,7 +26,7 @@ use promql_parser::parser::{ use serde::Serialize; use snafu::ensure; use sqlparser::ast::{ - Array, Expr, Ident, ObjectName, ObjectNamePart, SetExpr, SqlOption, StructField, TableFactor, + Array, Expr, Ident, ObjectName, ObjectNamePart, SetExpr, SqlOption, TableFactor, TableWithJoins, Value, ValueWithSpan, }; use sqlparser_derive::{Visit, VisitMut}; @@ -128,13 +128,6 @@ impl OptionValue { _ => None, } } - - pub(crate) fn as_struct_fields(&self) -> Option<&[StructField]> { - match &self.0 { - Expr::Struct { fields, .. } => Some(fields), - _ => None, - } - } } impl From for OptionValue { diff --git a/tests-integration/src/tests/instance_test.rs b/tests-integration/src/tests/instance_test.rs index 59f6a626c7..12aa145dca 100644 --- a/tests-integration/src/tests/instance_test.rs +++ b/tests-integration/src/tests/instance_test.rs @@ -3009,17 +3009,17 @@ CREATE TABLE b ( let output = execute_sql(&instance, "SHOW CREATE TABLE b").await.data; let expected = r#" -+-------+-----------------------------------------+ -| Table | Create Table | -+-------+-----------------------------------------+ -| b | CREATE TABLE IF NOT EXISTS "b" ( | -| | "j" JSON(format = 'structured') NULL, | -| | "ts" TIMESTAMP(3) NOT NULL, | -| | TIME INDEX ("ts") | -| | ) | -| | | -| | ENGINE=mito | -| | | -+-------+-----------------------------------------+"#; ++-------+----------------------------------+ +| Table | Create Table | ++-------+----------------------------------+ +| b | CREATE TABLE IF NOT EXISTS "b" ( | +| | "j" JSON2 NULL, | +| | "ts" TIMESTAMP(3) NOT NULL, | +| | TIME INDEX ("ts") | +| | ) | +| | | +| | ENGINE=mito | +| | | ++-------+----------------------------------+"#; check_output_stream(output, expected).await; } diff --git a/tests-integration/tests/jsonbench.rs b/tests-integration/tests/jsonbench.rs index 9e8cabf3e0..a7b7ce2764 100644 --- a/tests-integration/tests/jsonbench.rs +++ b/tests-integration/tests/jsonbench.rs @@ -295,7 +295,7 @@ async fn desc_table(frontend: &Arc) { +---------+----------------------+-----+------+---------+---------------+ | Column | Type | Key | Null | Default | Semantic Type | +---------+----------------------+-----+------+---------+---------------+ -| data | Json2{} | | YES | | FIELD | +| data | Json2 | | YES | | FIELD | | time_us | TimestampMicrosecond | PRI | NO | | TIMESTAMP | +---------+----------------------+-----+------+---------+---------------+"#; execute_sql_and_expect(frontend, sql, expected).await; diff --git a/tests/cases/standalone/common/types/json/json2.result b/tests/cases/standalone/common/types/json/json2.result index 7de73f2a78..84539b50fb 100644 --- a/tests/cases/standalone/common/types/json/json2.result +++ b/tests/cases/standalone/common/types/json/json2.result @@ -153,11 +153,37 @@ select j.c, j.y from json2_table order by ts; select j from json2_table order by ts; -Error: 3001(EngineExecuteQuery), Failed to align JSON array, reason: Invalid argument error: use StructArray::try_new_with_length or StructArray::new_empty_fields to create a struct array with no fields so that the length can be set correctly ++--------------------+ +| j | ++--------------------+ +| {__json_plain__: } | +| {__json_plain__: } | +| {__json_plain__: } | +| {__json_plain__: } | +| {__json_plain__: } | +| {__json_plain__: } | +| {__json_plain__: } | +| {__json_plain__: } | +| {__json_plain__: } | +| {__json_plain__: } | ++--------------------+ select * from json2_table order by ts; -Error: 3001(EngineExecuteQuery), Failed to align JSON array, reason: Invalid argument error: use StructArray::try_new_with_length or StructArray::new_empty_fields to create a struct array with no fields so that the length can be set correctly ++-------------------------+--------------------+ +| ts | j | ++-------------------------+--------------------+ +| 1970-01-01T00:00:00.001 | {__json_plain__: } | +| 1970-01-01T00:00:00.002 | {__json_plain__: } | +| 1970-01-01T00:00:00.003 | {__json_plain__: } | +| 1970-01-01T00:00:00.004 | {__json_plain__: } | +| 1970-01-01T00:00:00.005 | {__json_plain__: } | +| 1970-01-01T00:00:00.006 | {__json_plain__: } | +| 1970-01-01T00:00:00.007 | {__json_plain__: } | +| 1970-01-01T00:00:00.008 | {__json_plain__: } | +| 1970-01-01T00:00:00.009 | {__json_plain__: } | +| 1970-01-01T00:00:00.010 | {__json_plain__: } | ++-------------------------+--------------------+ select j.a.b + 1 from json2_table order by ts; @@ -210,3 +236,25 @@ drop table json2_table; Affected Rows: 0 +create table json2_default_null_ok ( + ts timestamp time index, + j json2( + a int64 null default null + ) +); + +Affected Rows: 0 + +drop table json2_default_null_ok; + +Affected Rows: 0 + +create table json2_default_null_check ( + ts timestamp time index, + j json2( + a int64 not null default null + ) +); + +Error: 2000(InvalidSyntax), Invalid SQL, error: invalid DEFAULT for JSON2 type hint 'a': Default value should not be null for non null column + diff --git a/tests/cases/standalone/common/types/json/json2.sql b/tests/cases/standalone/common/types/json/json2.sql index cb8df2f8b9..51be5fbbe0 100644 --- a/tests/cases/standalone/common/types/json/json2.sql +++ b/tests/cases/standalone/common/types/json/json2.sql @@ -60,3 +60,19 @@ select abs(j.c) from json2_table order by ts; select j.d from json2_table order by ts; drop table json2_table; + +create table json2_default_null_ok ( + ts timestamp time index, + j json2( + a int64 null default null + ) +); + +drop table json2_default_null_ok; + +create table json2_default_null_check ( + ts timestamp time index, + j json2( + a int64 not null default null + ) +); diff --git a/tests/cases/standalone/common/types/json/json2_type_hints.result b/tests/cases/standalone/common/types/json/json2_type_hints.result new file mode 100644 index 0000000000..f8b5d661a0 --- /dev/null +++ b/tests/cases/standalone/common/types/json/json2_type_hints.result @@ -0,0 +1,92 @@ +CREATE TABLE json2_type_hints ( + ts TIMESTAMP TIME INDEX, + j JSON2 ( + user.age BIGINT NOT NULL DEFAULT 18, + user.name STRING DEFAULT 'unknown', + user.active BOOLEAN NULL, + score DOUBLE NULL DEFAULT 1.5 + ) +); + +Affected Rows: 0 + +SHOW CREATE TABLE json2_type_hints; + ++------------------+--------------------------------------------------+ +| Table | Create Table | ++------------------+--------------------------------------------------+ +| json2_type_hints | CREATE TABLE IF NOT EXISTS "json2_type_hints" ( | +| | "ts" TIMESTAMP(3) NOT NULL, | +| | "j" JSON2( | +| | "user"."age" BIGINT NOT NULL DEFAULT 18, | +| | "user"."name" STRING NULL DEFAULT 'unknown', | +| | "user"."active" BOOLEAN NULL, | +| | "score" DOUBLE NULL DEFAULT 1.5 | +| | ) NULL, | +| | TIME INDEX ("ts") | +| | ) | +| | | +| | ENGINE=mito | +| | | ++------------------+--------------------------------------------------+ + +INSERT INTO json2_type_hints +VALUES + (1, '{"user":{"age":42,"name":"Alice","active":true},"score":3.25}'), + (2, '{"user":{"name":"Bob"}}'), + (3, '{}'); + +Affected Rows: 3 + +SELECT + j.user.age, + j.user.name, + j.user.active, + j.score +FROM json2_type_hints +ORDER BY ts; + ++-----------------------------------------------+------------------------------------------------+--------------------------------------------------+--------------------------------------------+ +| json_get(json2_type_hints.j,Utf8("user.age")) | json_get(json2_type_hints.j,Utf8("user.name")) | json_get(json2_type_hints.j,Utf8("user.active")) | json_get(json2_type_hints.j,Utf8("score")) | ++-----------------------------------------------+------------------------------------------------+--------------------------------------------------+--------------------------------------------+ +| 42 | Alice | true | 3.25 | +| 18 | Bob | | 1.5 | +| 18 | unknown | | 1.5 | ++-----------------------------------------------+------------------------------------------------+--------------------------------------------------+--------------------------------------------+ + +INSERT INTO json2_type_hints +VALUES (4, '{"user":{"age":"bad"}}'); + +Error: 1004(InvalidArguments), Invalid JSON: JSON value at user.age does not match JSON2 type hint Int64 + +CREATE TABLE json2_type_hints_required ( + ts TIMESTAMP TIME INDEX, + j JSON2 ( + user.age BIGINT NOT NULL + ) +); + +Affected Rows: 0 + +INSERT INTO json2_type_hints_required +VALUES (1, '{}'); + +Error: 1004(InvalidArguments), Invalid JSON: missing non-null JSON2 type hint path user.age + +CREATE TABLE json2_type_hints_timestamp ( + ts TIMESTAMP TIME INDEX, + j JSON2 ( + event_time TIMESTAMP + ) +); + +Error: 2000(InvalidSyntax), Invalid SQL, error: unsupported JSON2 type hint data type: TIMESTAMP + +DROP TABLE json2_type_hints; + +Affected Rows: 0 + +DROP TABLE json2_type_hints_required; + +Affected Rows: 0 + diff --git a/tests/cases/standalone/common/types/json/json2_type_hints.sql b/tests/cases/standalone/common/types/json/json2_type_hints.sql new file mode 100644 index 0000000000..80dcc49e70 --- /dev/null +++ b/tests/cases/standalone/common/types/json/json2_type_hints.sql @@ -0,0 +1,49 @@ +CREATE TABLE json2_type_hints ( + ts TIMESTAMP TIME INDEX, + j JSON2 ( + user.age BIGINT NOT NULL DEFAULT 18, + user.name STRING DEFAULT 'unknown', + user.active BOOLEAN NULL, + score DOUBLE NULL DEFAULT 1.5 + ) +); + +SHOW CREATE TABLE json2_type_hints; + +INSERT INTO json2_type_hints +VALUES + (1, '{"user":{"age":42,"name":"Alice","active":true},"score":3.25}'), + (2, '{"user":{"name":"Bob"}}'), + (3, '{}'); + +SELECT + j.user.age, + j.user.name, + j.user.active, + j.score +FROM json2_type_hints +ORDER BY ts; + +INSERT INTO json2_type_hints +VALUES (4, '{"user":{"age":"bad"}}'); + +CREATE TABLE json2_type_hints_required ( + ts TIMESTAMP TIME INDEX, + j JSON2 ( + user.age BIGINT NOT NULL + ) +); + +INSERT INTO json2_type_hints_required +VALUES (1, '{}'); + +CREATE TABLE json2_type_hints_timestamp ( + ts TIMESTAMP TIME INDEX, + j JSON2 ( + event_time TIMESTAMP + ) +); + +DROP TABLE json2_type_hints; + +DROP TABLE json2_type_hints_required;