diff --git a/src/api/src/helper.rs b/src/api/src/helper.rs index 0b70ea865d..da5fdcfeda 100644 --- a/src/api/src/helper.rs +++ b/src/api/src/helper.rs @@ -13,6 +13,7 @@ // limitations under the License. use std::collections::HashSet; +use std::sync::Arc; use common_decimal::Decimal128; use common_decimal::decimal128::{DECIMAL128_DEFAULT_SCALE, DECIMAL128_MAX_PRECISION}; @@ -185,7 +186,7 @@ impl From for ConcreteDataType { datatype: d.datatype(), datatype_ext: d.datatype_extension.clone().map(|d| *d), }; - ConcreteDataType::list_datatype(item_type.into()) + ConcreteDataType::list_datatype(Arc::new(item_type.into())) } else { // invalid state: type extension not found ConcreteDataType::null_datatype() @@ -208,7 +209,7 @@ impl From for ConcreteDataType { StructField::new(f.name.clone(), field_type.into(), true) }) .collect::>(); - ConcreteDataType::struct_datatype(StructType::from(fields)) + ConcreteDataType::struct_datatype(StructType::new(Arc::new(fields))) } else { // invalid state: type extension not found ConcreteDataType::null_datatype() @@ -445,7 +446,7 @@ impl TryFrom for ColumnDataTypeWrapper { ColumnDataType::Struct => { if let Some(struct_type) = datatype.as_struct() { let mut fields = Vec::with_capacity(struct_type.fields().len()); - for field in struct_type.fields() { + for field in struct_type.fields().iter() { let field_type = ColumnDataTypeWrapper::try_from(field.data_type().clone())?; let proto_field = crate::v1::StructField { @@ -755,7 +756,7 @@ pub fn pb_value_to_value_ref<'a>( let list_value = ListValueRef::RefList { val: items, - item_datatype: item_type.clone(), + item_datatype: Arc::new(item_type.clone()), }; ValueRef::List(list_value) } @@ -794,7 +795,7 @@ pub fn pb_value_to_value_ref<'a>( let struct_value_ref = StructValueRef::RefList { val: items, - fields: StructType::new(struct_fields), + fields: StructType::new(Arc::new(struct_fields)), }; ValueRef::Struct(struct_value_ref) } @@ -1351,10 +1352,10 @@ mod tests { ColumnDataTypeWrapper::vector_datatype(3).into() ); assert_eq!( - ConcreteDataType::list_datatype(ConcreteDataType::string_datatype()), + ConcreteDataType::list_datatype(Arc::new(ConcreteDataType::string_datatype())), ColumnDataTypeWrapper::list_datatype(ColumnDataTypeWrapper::string_datatype()).into() ); - let struct_type = StructType::new(vec![ + let struct_type = StructType::new(Arc::new(vec![ StructField::new("id".to_string(), ConcreteDataType::int64_datatype(), true), StructField::new( "name".to_string(), @@ -1367,7 +1368,7 @@ mod tests { ConcreteDataType::string_datatype(), true, ), - ]); + ])); assert_eq!( ConcreteDataType::struct_datatype(struct_type.clone()), ColumnDataTypeWrapper::struct_datatype(vec![ @@ -1534,7 +1535,7 @@ mod tests { assert_eq!( ColumnDataTypeWrapper::list_datatype(ColumnDataTypeWrapper::int16_datatype()), - ConcreteDataType::list_datatype(ConcreteDataType::int16_datatype()) + ConcreteDataType::list_datatype(Arc::new(ConcreteDataType::int16_datatype())) .try_into() .expect("Failed to create column datatype from List(ListType { item_type: Int16(Int16Type) })") ); @@ -1547,16 +1548,16 @@ mod tests { ColumnDataTypeWrapper::list_datatype(ColumnDataTypeWrapper::string_datatype()) ) ]), - ConcreteDataType::struct_datatype(StructType::new(vec![ + ConcreteDataType::struct_datatype(StructType::new(Arc::new(vec![ StructField::new("a".to_string(), ConcreteDataType::int64_datatype(), true), StructField::new( "a.a".to_string(), - ConcreteDataType::list_datatype(ConcreteDataType::string_datatype()), true + ConcreteDataType::list_datatype(Arc::new(ConcreteDataType::string_datatype())), true ) - ])).try_into().expect("Failed to create column datatype from Struct(StructType { fields: [StructField { name: \"a\", data_type: Int64(Int64Type) }, StructField { name: \"a.a\", data_type: List(ListType { item_type: String(StringType) }) }] })") + ]))).try_into().expect("Failed to create column datatype from Struct(StructType { fields: [StructField { name: \"a\", data_type: Int64(Int64Type) }, StructField { name: \"a.a\", data_type: List(ListType { item_type: String(StringType) }) }] })") ); - let struct_type = StructType::new(vec![ + let struct_type = StructType::new(Arc::new(vec![ StructField::new("id".to_string(), ConcreteDataType::int64_datatype(), true), StructField::new( "name".to_string(), @@ -1569,7 +1570,7 @@ mod tests { ConcreteDataType::string_datatype(), true, ), - ]); + ])); assert_eq!( ColumnDataTypeWrapper::new( ColumnDataType::Json, @@ -1736,7 +1737,7 @@ mod tests { fn test_list_to_pb_value() { let value = Value::List(ListValue::new( vec![Value::Boolean(true)], - ConcreteDataType::boolean_datatype(), + Arc::new(ConcreteDataType::boolean_datatype()), )); let pb_value = to_proto_value(value); @@ -1756,14 +1757,14 @@ mod tests { let value = Value::Struct( StructValue::try_new( items, - StructType::new(vec![ + StructType::new(Arc::new(vec![ StructField::new( "a.a".to_string(), ConcreteDataType::boolean_datatype(), true, ), StructField::new("a.b".to_string(), ConcreteDataType::string_datatype(), true), - ]), + ])), ) .unwrap(), ); diff --git a/src/datatypes/src/data_type.rs b/src/datatypes/src/data_type.rs index 3f305828a2..eb47d30305 100644 --- a/src/datatypes/src/data_type.rs +++ b/src/datatypes/src/data_type.rs @@ -456,9 +456,9 @@ impl TryFrom<&ArrowDataType> for ConcreteDataType { } ArrowDataType::Utf8 | ArrowDataType::Utf8View => Self::string_datatype(), ArrowDataType::LargeUtf8 => Self::large_string_datatype(), - ArrowDataType::List(field) => Self::List(ListType::new( + ArrowDataType::List(field) => Self::List(ListType::new(Arc::new( ConcreteDataType::from_arrow_type(field.data_type()), - )), + ))), ArrowDataType::Dictionary(key_type, value_type) => { let key_type = ConcreteDataType::from_arrow_type(key_type); let value_type = ConcreteDataType::from_arrow_type(value_type); @@ -641,7 +641,7 @@ impl ConcreteDataType { } } - pub fn list_datatype(item_type: ConcreteDataType) -> ConcreteDataType { + pub fn list_datatype(item_type: Arc) -> ConcreteDataType { ConcreteDataType::List(ListType::new(item_type)) } @@ -794,7 +794,7 @@ mod tests { ArrowDataType::Int32, true, )))), - ConcreteDataType::List(ListType::new(ConcreteDataType::int32_datatype())) + ConcreteDataType::List(ListType::new(Arc::new(ConcreteDataType::int32_datatype()))) ); assert!(matches!( ConcreteDataType::from_arrow_type(&ArrowDataType::Date32), @@ -985,9 +985,10 @@ mod tests { #[test] fn test_as_list() { - let list_type = ConcreteDataType::list_datatype(ConcreteDataType::int32_datatype()); + let list_type = + ConcreteDataType::list_datatype(Arc::new(ConcreteDataType::int32_datatype())); assert_eq!( - ListType::new(ConcreteDataType::int32_datatype()), + ListType::new(Arc::new(ConcreteDataType::int32_datatype())), *list_type.as_list().unwrap() ); assert!(ConcreteDataType::int32_datatype().as_list().is_none()); @@ -1032,21 +1033,24 @@ mod tests { ); // Nested types assert_eq!( - ConcreteDataType::list_datatype(ConcreteDataType::int32_datatype()).to_string(), + ConcreteDataType::list_datatype(Arc::new(ConcreteDataType::int32_datatype())) + .to_string(), "List" ); assert_eq!( - ConcreteDataType::list_datatype(ConcreteDataType::Dictionary(DictionaryType::new( - ConcreteDataType::int32_datatype(), - ConcreteDataType::string_datatype() + ConcreteDataType::list_datatype(Arc::new(ConcreteDataType::Dictionary( + DictionaryType::new( + ConcreteDataType::int32_datatype(), + ConcreteDataType::string_datatype() + ) ))) .to_string(), "List>" ); assert_eq!( - ConcreteDataType::list_datatype(ConcreteDataType::list_datatype( - ConcreteDataType::list_datatype(ConcreteDataType::int32_datatype()) - )) + ConcreteDataType::list_datatype(Arc::new(ConcreteDataType::list_datatype(Arc::new( + ConcreteDataType::list_datatype(Arc::new(ConcreteDataType::int32_datatype())) + )))) .to_string(), "List>>" ); diff --git a/src/datatypes/src/json.rs b/src/datatypes/src/json.rs index 96894a1736..380cc8ce06 100644 --- a/src/datatypes/src/json.rs +++ b/src/datatypes/src/json.rs @@ -20,6 +20,7 @@ //! use std::collections::HashSet; +use std::sync::Arc; use common_base::bytes::StringBytes; use ordered_float::OrderedFloat; @@ -28,7 +29,7 @@ use snafu::{ResultExt, ensure}; use crate::data_type::{ConcreteDataType, DataType}; use crate::error::{self, Error}; -use crate::types::{ListType, StructField, StructType}; +use crate::types::{StructField, StructType}; use crate::value::{ListValue, StructValue, Value}; /// The configuration of JSON encoding @@ -146,11 +147,11 @@ pub fn encode_json_with_context<'a>( let json_string = json.to_string(); let struct_value = StructValue::try_new( vec![Value::String(json_string.into())], - StructType::new(vec![StructField::new( + StructType::new(Arc::new(vec![StructField::new( JsonStructureSettings::RAW_FIELD.to_string(), ConcreteDataType::string_datatype(), true, - )]), + )])), )?; return Ok(Value::Struct(struct_value)); } @@ -217,7 +218,7 @@ fn encode_json_object_with_context<'a>( let mut struct_fields = Vec::with_capacity(total_json_keys); // First, process fields from the provided schema in their original order if let Some(fields) = fields { - for field in fields.fields() { + for field in fields.fields().iter() { let field_name = field.name(); if let Some(value) = json_object.remove(field_name) { @@ -252,7 +253,7 @@ fn encode_json_object_with_context<'a>( )); } - let struct_type = StructType::new(struct_fields); + let struct_type = StructType::new(Arc::new(struct_fields)); StructValue::try_new(items, struct_type) } @@ -291,9 +292,8 @@ fn encode_json_array_with_context<'a>( } else { element_type.unwrap_or_else(ConcreteDataType::string_datatype) }; - let list_type = ListType::new(element_type); - Ok(ListValue::new(items, ConcreteDataType::List(list_type))) + Ok(ListValue::new(items, Arc::new(element_type))) } /// Helper function to encode a JSON value to a Value and determine its ConcreteDataType with context @@ -369,7 +369,7 @@ fn encode_json_value_with_context<'a>( Json::Array(arr) => { let list_value = encode_json_array_with_context(arr, expected_type, context)?; let data_type = list_value.datatype().clone(); - Ok((Value::List(list_value), data_type)) + Ok((Value::List(list_value), (*data_type).clone())) } Json::Object(obj) => { let struct_value = encode_json_object_with_context(obj, None, context)?; @@ -593,7 +593,7 @@ fn decode_struct_with_settings<'a>( } } - let struct_type = StructType::new(struct_fields); + let struct_type = StructType::new(Arc::new(struct_fields)); StructValue::try_new(items, struct_type) } @@ -651,8 +651,11 @@ fn decode_unstructured_raw_struct(struct_value: StructValue) -> Result = struct_type.fields().iter().map(|f| f.name()).collect(); + let fields = struct_type.fields(); + let field_names: Vec<&str> = fields.iter().map(|f| f.name()).collect(); assert!(field_names.contains(&"name")); assert!(field_names.contains(&"age")); assert!(field_names.contains(&"active")); @@ -855,12 +861,8 @@ mod tests { .unwrap(); if let Value::Struct(person_struct) = &items[person_index] { assert_eq!(person_struct.items().len(), 2); - let person_fields: Vec<&str> = person_struct - .struct_type() - .fields() - .iter() - .map(|f| f.name()) - .collect(); + let fields = person_struct.struct_type().fields(); + let person_fields: Vec<&str> = fields.iter().map(|f| f.name()).collect(); assert!(person_fields.contains(&"name")); assert!(person_fields.contains(&"age")); } else { @@ -920,7 +922,7 @@ mod tests { // In this case, it should be string since we can't find a common numeric type assert_eq!( list_value.datatype(), - &ConcreteDataType::List(ListType::new(ConcreteDataType::int64_datatype())) + Arc::new(ConcreteDataType::int64_datatype()) ); } else { panic!("Expected List value"); @@ -942,7 +944,7 @@ mod tests { // Empty arrays default to string type assert_eq!( list_value.datatype(), - &ConcreteDataType::List(ListType::new(ConcreteDataType::string_datatype())) + Arc::new(ConcreteDataType::string_datatype()) ); } else { panic!("Expected List value"); @@ -961,12 +963,8 @@ mod tests { if let Value::Struct(struct_value) = result { assert_eq!(struct_value.items().len(), 2); - let field_names: Vec<&str> = struct_value - .struct_type() - .fields() - .iter() - .map(|f| f.name()) - .collect(); + let fields = struct_value.struct_type().fields(); + let field_names: Vec<&str> = fields.iter().map(|f| f.name()).collect(); assert!(field_names.contains(&"name")); assert!(field_names.contains(&"age")); } else { @@ -990,7 +988,7 @@ mod tests { ), StructField::new("age".to_string(), ConcreteDataType::int64_datatype(), true), ]; - let struct_type = StructType::new(fields); + let struct_type = StructType::new(Arc::new(fields)); let concrete_type = ConcreteDataType::Struct(struct_type); let settings = JsonStructureSettings::Structured(None); @@ -1044,7 +1042,7 @@ mod tests { true, ), ]; - let struct_type = StructType::new(fields); + let struct_type = StructType::new(Arc::new(fields)); let result = encode_json_object_with_context( json.as_object().unwrap().clone(), @@ -1086,7 +1084,7 @@ mod tests { ), StructField::new("age".to_string(), ConcreteDataType::int64_datatype(), true), ]; - let struct_type = StructType::new(fields); + let struct_type = StructType::new(Arc::new(fields)); let result = encode_json_object_with_context( json.as_object().unwrap().clone(), @@ -1127,7 +1125,7 @@ mod tests { ), StructField::new("age".to_string(), ConcreteDataType::int64_datatype(), true), ]; - let struct_type = StructType::new(fields); + let struct_type = StructType::new(Arc::new(fields)); let result = encode_json_object_with_context( json.as_object().unwrap().clone(), @@ -1170,7 +1168,8 @@ mod tests { #[test] fn test_encode_json_array_with_item_type() { let json = json!([1, 2, 3]); - let list_type = ListType::new(ConcreteDataType::int8_datatype()); + let item_type = Arc::new(ConcreteDataType::int8_datatype()); + let list_type = ListType::new(item_type.clone()); let concrete_type = ConcreteDataType::List(list_type); let settings = JsonStructureSettings::Structured(None); let result = settings @@ -1184,10 +1183,7 @@ mod tests { assert_eq!(list_value.items()[0], Value::Int8(1)); assert_eq!(list_value.items()[1], Value::Int8(2)); assert_eq!(list_value.items()[2], Value::Int8(3)); - assert_eq!( - list_value.datatype(), - &ConcreteDataType::List(ListType::new(ConcreteDataType::int8_datatype())) - ); + assert_eq!(list_value.datatype(), item_type); } else { panic!("Expected List value"); } @@ -1196,7 +1192,8 @@ mod tests { #[test] fn test_encode_json_array_empty_with_item_type() { let json = json!([]); - let list_type = ListType::new(ConcreteDataType::string_datatype()); + let item_type = Arc::new(ConcreteDataType::string_datatype()); + let list_type = ListType::new(item_type.clone()); let concrete_type = ConcreteDataType::List(list_type); let settings = JsonStructureSettings::Structured(None); let result = settings @@ -1207,10 +1204,7 @@ mod tests { if let Value::List(list_value) = result { assert_eq!(list_value.items().len(), 0); - assert_eq!( - list_value.datatype(), - &ConcreteDataType::List(ListType::new(ConcreteDataType::string_datatype())) - ); + assert_eq!(list_value.datatype(), item_type); } else { panic!("Expected List value"); } @@ -1257,7 +1251,7 @@ mod tests { Value::Int64(25), Value::Boolean(true), ], - StructType::new(vec![ + StructType::new(Arc::new(vec![ StructField::new( "name".to_string(), ConcreteDataType::string_datatype(), @@ -1269,7 +1263,7 @@ mod tests { ConcreteDataType::boolean_datatype(), true, ), - ]), + ])), ); let result = settings.decode(Value::Struct(struct_value)).unwrap(); @@ -1287,7 +1281,7 @@ mod tests { let list_value = ListValue::new( vec![Value::Int64(1), Value::Int64(2), Value::Int64(3)], - ConcreteDataType::List(ListType::new(ConcreteDataType::int64_datatype())), + Arc::new(ConcreteDataType::int64_datatype()), ); let result = settings.decode(Value::List(list_value)).unwrap(); @@ -1301,28 +1295,29 @@ mod tests { let inner_struct = StructValue::new( vec![Value::String("Alice".into()), Value::Int64(25)], - StructType::new(vec![ + StructType::new(Arc::new(vec![ StructField::new( "name".to_string(), ConcreteDataType::string_datatype(), true, ), StructField::new("age".to_string(), ConcreteDataType::int64_datatype(), true), - ]), + ])), ); + let score_list_item_type = Arc::new(ConcreteDataType::int64_datatype()); let outer_struct = StructValue::new( vec![ Value::Struct(inner_struct), Value::List(ListValue::new( vec![Value::Int64(95), Value::Int64(87)], - ConcreteDataType::List(ListType::new(ConcreteDataType::int64_datatype())), + score_list_item_type.clone(), )), ], - StructType::new(vec![ + StructType::new(Arc::new(vec![ StructField::new( "user".to_string(), - ConcreteDataType::Struct(StructType::new(vec![ + ConcreteDataType::Struct(StructType::new(Arc::new(vec![ StructField::new( "name".to_string(), ConcreteDataType::string_datatype(), @@ -1333,15 +1328,15 @@ mod tests { ConcreteDataType::int64_datatype(), true, ), - ])), + ]))), true, ), StructField::new( "scores".to_string(), - ConcreteDataType::List(ListType::new(ConcreteDataType::int64_datatype())), + ConcreteDataType::List(ListType::new(score_list_item_type.clone())), true, ), - ]), + ])), ); let result = settings.decode(Value::Struct(outer_struct)).unwrap(); @@ -1374,11 +1369,11 @@ mod tests { let json_str = r#"{"name": "Bob", "age": 30}"#; let struct_value = StructValue::new( vec![Value::String(json_str.into())], - StructType::new(vec![StructField::new( + StructType::new(Arc::new(vec![StructField::new( JsonStructureSettings::RAW_FIELD.to_string(), ConcreteDataType::string_datatype(), true, - )]), + )])), ); let value = Value::Struct(struct_value); @@ -1404,7 +1399,7 @@ mod tests { Value::String("Alice".into()), Value::String(metadata_json.into()), ], - StructType::new(vec![ + StructType::new(Arc::new(vec![ StructField::new( "name".to_string(), ConcreteDataType::string_datatype(), @@ -1415,7 +1410,7 @@ mod tests { ConcreteDataType::string_datatype(), true, ), - ]), + ])), ); let result = settings.decode(Value::Struct(struct_value)).unwrap(); @@ -1445,14 +1440,14 @@ mod tests { Value::String("Bob".into()), Value::Null, // missing age field ], - StructType::new(vec![ + StructType::new(Arc::new(vec![ StructField::new( "name".to_string(), ConcreteDataType::string_datatype(), true, ), StructField::new("age".to_string(), ConcreteDataType::int64_datatype(), true), - ]), + ])), ); let result = settings.decode(Value::Struct(struct_value)).unwrap(); @@ -1513,7 +1508,8 @@ mod tests { #[test] fn test_encode_json_array_with_list_type() { let json = json!([1, 2, 3]); - let list_type = ListType::new(ConcreteDataType::int64_datatype()); + let item_type = Arc::new(ConcreteDataType::int64_datatype()); + let list_type = ListType::new(item_type.clone()); let concrete_type = ConcreteDataType::List(list_type); let settings = JsonStructureSettings::Structured(None); @@ -1528,10 +1524,7 @@ mod tests { assert_eq!(list_value.items()[0], Value::Int64(1)); assert_eq!(list_value.items()[1], Value::Int64(2)); assert_eq!(list_value.items()[2], Value::Int64(3)); - assert_eq!( - list_value.datatype(), - &ConcreteDataType::List(ListType::new(ConcreteDataType::int64_datatype())) - ); + assert_eq!(list_value.datatype(), item_type); } else { panic!("Expected List value"); } @@ -1644,7 +1637,7 @@ mod tests { } // Test with encode_with_type (with type) - let struct_type = StructType::new(vec![ + let struct_type = StructType::new(Arc::new(vec![ StructField::new( "name".to_string(), ConcreteDataType::string_datatype(), @@ -1656,7 +1649,7 @@ mod tests { ConcreteDataType::boolean_datatype(), true, ), - ]); + ])); let concrete_type = ConcreteDataType::Struct(struct_type); let result2 = settings @@ -1759,12 +1752,8 @@ mod tests { .unwrap(); if let Value::Struct(user_struct) = &items[user_index] { let user_items = user_struct.items(); - let user_fields: Vec<&str> = user_struct - .struct_type() - .fields() - .iter() - .map(|f| f.name()) - .collect(); + let fields = user_struct.struct_type().fields(); + let user_fields: Vec<&str> = fields.iter().map(|f| f.name()).collect(); // name should be structured let name_index = user_fields.iter().position(|&f| f == "name").unwrap(); @@ -1798,7 +1787,7 @@ mod tests { Value::Int64(25), Value::Boolean(true), ], - StructType::new(vec![ + StructType::new(Arc::new(vec![ StructField::new( "name".to_string(), ConcreteDataType::string_datatype(), @@ -1810,7 +1799,7 @@ mod tests { ConcreteDataType::boolean_datatype(), true, ), - ]), + ])), ); let decoded_struct = settings.decode_struct(original_struct.clone()).unwrap(); @@ -1834,7 +1823,7 @@ mod tests { Value::Int64(25), Value::Boolean(true), ], - StructType::new(vec![ + StructType::new(Arc::new(vec![ StructField::new( "name".to_string(), ConcreteDataType::string_datatype(), @@ -1846,7 +1835,7 @@ mod tests { ConcreteDataType::boolean_datatype(), true, ), - ]), + ])), ); let decoded_struct = settings.decode_struct(original_struct.clone()).unwrap(); @@ -1863,7 +1852,28 @@ mod tests { unstructured_keys.insert("metadata".to_string()); let settings = JsonStructureSettings::PartialUnstructuredByKey { - fields: Some(StructType::new(vec![ + fields: Some(StructType::new(Arc::new(vec![ + StructField::new( + "name".to_string(), + ConcreteDataType::string_datatype(), + true, + ), + StructField::new( + "metadata".to_string(), + ConcreteDataType::string_datatype(), + true, + ), + ]))), + unstructured_keys, + }; + + // Create a struct where metadata is stored as unstructured JSON string + let encoded_struct = StructValue::new( + vec![ + Value::String("Alice".into()), + Value::String(r#"{"preferences":{"theme":"dark"},"history":[1,2,3]}"#.into()), + ], + StructType::new(Arc::new(vec![ StructField::new( "name".to_string(), ConcreteDataType::string_datatype(), @@ -1875,27 +1885,6 @@ mod tests { true, ), ])), - unstructured_keys, - }; - - // Create a struct where metadata is stored as unstructured JSON string - let encoded_struct = StructValue::new( - vec![ - Value::String("Alice".into()), - Value::String(r#"{"preferences":{"theme":"dark"},"history":[1,2,3]}"#.into()), - ], - StructType::new(vec![ - StructField::new( - "name".to_string(), - ConcreteDataType::string_datatype(), - true, - ), - StructField::new( - "metadata".to_string(), - ConcreteDataType::string_datatype(), - true, - ), - ]), ); let decoded_struct = settings.decode_struct(encoded_struct).unwrap(); @@ -1905,12 +1894,8 @@ mod tests { // Verify metadata field is now properly structured if let Value::Struct(metadata_struct) = &decoded_struct.items()[1] { - let metadata_fields: Vec<&str> = metadata_struct - .struct_type() - .fields() - .iter() - .map(|f| f.name()) - .collect(); + let fields = metadata_struct.struct_type().fields(); + let metadata_fields: Vec<&str> = fields.iter().map(|f| f.name()).collect(); assert!(metadata_fields.contains(&"preferences")); assert!(metadata_fields.contains(&"history")); @@ -1936,7 +1921,7 @@ mod tests { Value::String("Alice".into()), Value::String(r#"{"preferences":{"theme":"dark"},"history":[1,2,3]}"#.into()), ], - StructType::new(vec![ + StructType::new(Arc::new(vec![ StructField::new( "name".to_string(), ConcreteDataType::string_datatype(), @@ -1947,16 +1932,16 @@ mod tests { ConcreteDataType::string_datatype(), true, ), - ]), + ])), ); let encoded_struct = StructValue::new( vec![Value::Struct(user_struct)], - StructType::new(vec![StructField::new( + StructType::new(Arc::new(vec![StructField::new( "user".to_string(), - ConcreteDataType::struct_datatype(StructType::new(vec![])), + ConcreteDataType::struct_datatype(StructType::new(Arc::new(vec![]))), true, - )]), + )])), ); let decoded_struct = settings.decode_struct(encoded_struct).unwrap(); @@ -1964,12 +1949,8 @@ mod tests { // Verify the nested structure is properly decoded if let Value::Struct(decoded_user) = &decoded_struct.items()[0] { if let Value::Struct(metadata_struct) = &decoded_user.items()[1] { - let metadata_fields: Vec<&str> = metadata_struct - .struct_type() - .fields() - .iter() - .map(|f| f.name()) - .collect(); + let fields = metadata_struct.struct_type().fields(); + let metadata_fields: Vec<&str> = fields.iter().map(|f| f.name()).collect(); assert!(metadata_fields.contains(&"preferences")); assert!(metadata_fields.contains(&"history")); @@ -1987,12 +1968,8 @@ mod tests { if let Value::Struct(preferences_struct) = &metadata_struct.items()[preference_index] { - let pref_fields: Vec<&str> = preferences_struct - .struct_type() - .fields() - .iter() - .map(|f| f.name()) - .collect(); + let fields = preferences_struct.struct_type().fields(); + let pref_fields: Vec<&str> = fields.iter().map(|f| f.name()).collect(); assert!(pref_fields.contains(&"theme")); } else { panic!("Expected preferences to be decoded as structured value"); @@ -2022,22 +1999,18 @@ mod tests { vec![Value::String( r#"{"name":"Alice","age":25,"active":true}"#.into(), )], - StructType::new(vec![StructField::new( + StructType::new(Arc::new(vec![StructField::new( "_raw".to_string(), ConcreteDataType::string_datatype(), true, - )]), + )])), ); let decoded_struct = settings.decode_struct(encoded_struct).unwrap(); // With UnstructuredRaw, the entire struct should be reconstructed from _raw field - let decoded_fields: Vec<&str> = decoded_struct - .struct_type() - .fields() - .iter() - .map(|f| f.name()) - .collect(); + let fields = decoded_struct.struct_type().fields(); + let decoded_fields: Vec<&str> = fields.iter().map(|f| f.name()).collect(); assert!(decoded_fields.contains(&"name")); assert!(decoded_fields.contains(&"age")); @@ -2064,14 +2037,14 @@ mod tests { // Create a struct that doesn't match the expected UnstructuredRaw format let invalid_struct = StructValue::new( vec![Value::String("Alice".into()), Value::Int64(25)], - StructType::new(vec![ + StructType::new(Arc::new(vec![ StructField::new( "name".to_string(), ConcreteDataType::string_datatype(), true, ), StructField::new("age".to_string(), ConcreteDataType::int64_datatype(), true), - ]), + ])), ); // Should fail with error since it doesn't match expected UnstructuredRaw format @@ -2093,20 +2066,16 @@ mod tests { // Test with a string primitive in _raw field let string_struct = StructValue::new( vec![Value::String("\"hello world\"".into())], - StructType::new(vec![StructField::new( + StructType::new(Arc::new(vec![StructField::new( "_raw".to_string(), ConcreteDataType::string_datatype(), true, - )]), + )])), ); let decoded_struct = settings.decode_struct(string_struct).unwrap(); - let decoded_fields: Vec<&str> = decoded_struct - .struct_type() - .fields() - .iter() - .map(|f| f.name()) - .collect(); + let fields = decoded_struct.struct_type().fields(); + let decoded_fields: Vec<&str> = fields.iter().map(|f| f.name()).collect(); assert!(decoded_fields.contains(&"value")); assert_eq!( decoded_struct.items()[0], @@ -2116,60 +2085,48 @@ mod tests { // Test with a number primitive in _raw field let number_struct = StructValue::new( vec![Value::String("42".into())], - StructType::new(vec![StructField::new( + StructType::new(Arc::new(vec![StructField::new( "_raw".to_string(), ConcreteDataType::string_datatype(), true, - )]), + )])), ); let decoded_struct = settings.decode_struct(number_struct).unwrap(); - let decoded_fields: Vec<&str> = decoded_struct - .struct_type() - .fields() - .iter() - .map(|f| f.name()) - .collect(); + let fields = decoded_struct.struct_type().fields(); + let decoded_fields: Vec<&str> = fields.iter().map(|f| f.name()).collect(); assert!(decoded_fields.contains(&"value")); assert_eq!(decoded_struct.items()[0], Value::Int64(42)); // Test with a boolean primitive in _raw field let bool_struct = StructValue::new( vec![Value::String("true".into())], - StructType::new(vec![StructField::new( + StructType::new(Arc::new(vec![StructField::new( "_raw".to_string(), ConcreteDataType::string_datatype(), true, - )]), + )])), ); let decoded_struct = settings.decode_struct(bool_struct).unwrap(); - let decoded_fields: Vec<&str> = decoded_struct - .struct_type() - .fields() - .iter() - .map(|f| f.name()) - .collect(); + let fields = decoded_struct.struct_type().fields(); + let decoded_fields: Vec<&str> = fields.iter().map(|f| f.name()).collect(); assert!(decoded_fields.contains(&"value")); assert_eq!(decoded_struct.items()[0], Value::Boolean(true)); // Test with a null primitive in _raw field let null_struct = StructValue::new( vec![Value::String("null".into())], - StructType::new(vec![StructField::new( + StructType::new(Arc::new(vec![StructField::new( "_raw".to_string(), ConcreteDataType::string_datatype(), true, - )]), + )])), ); let decoded_struct = settings.decode_struct(null_struct).unwrap(); - let decoded_fields: Vec<&str> = decoded_struct - .struct_type() - .fields() - .iter() - .map(|f| f.name()) - .collect(); + let fields = decoded_struct.struct_type().fields(); + let decoded_fields: Vec<&str> = fields.iter().map(|f| f.name()).collect(); assert!(decoded_fields.contains(&"value")); assert_eq!(decoded_struct.items()[0], Value::Null); } @@ -2182,20 +2139,16 @@ mod tests { // Test with an array in _raw field let array_struct = StructValue::new( vec![Value::String("[1, \"hello\", true, 3.15]".into())], - StructType::new(vec![StructField::new( + StructType::new(Arc::new(vec![StructField::new( "_raw".to_string(), ConcreteDataType::string_datatype(), true, - )]), + )])), ); let decoded_struct = settings.decode_struct(array_struct).unwrap(); - let decoded_fields: Vec<&str> = decoded_struct - .struct_type() - .fields() - .iter() - .map(|f| f.name()) - .collect(); + let fields = decoded_struct.struct_type().fields(); + let decoded_fields: Vec<&str> = fields.iter().map(|f| f.name()).collect(); assert!(decoded_fields.contains(&"value")); if let Value::List(list_value) = &decoded_struct.items()[0] { @@ -2254,12 +2207,8 @@ mod tests { // Verify encoding worked - metadata and user.profile.settings should be unstructured if let Value::Struct(encoded_struct) = encoded_value { - let fields: Vec<&str> = encoded_struct - .struct_type() - .fields() - .iter() - .map(|f| f.name()) - .collect(); + let fields = encoded_struct.struct_type().fields(); + let fields: Vec<&str> = fields.iter().map(|f| f.name()).collect(); assert!(fields.contains(&"name")); assert!(fields.contains(&"age")); @@ -2277,21 +2226,13 @@ mod tests { // Check that user.profile.settings is unstructured let user_index = fields.iter().position(|&f| f == "user").unwrap(); if let Value::Struct(user_struct) = &encoded_struct.items()[user_index] { - let user_fields: Vec<&str> = user_struct - .struct_type() - .fields() - .iter() - .map(|f| f.name()) - .collect(); + let fields = user_struct.struct_type().fields(); + let user_fields: Vec<&str> = fields.iter().map(|f| f.name()).collect(); let profile_index = user_fields.iter().position(|&f| f == "profile").unwrap(); if let Value::Struct(profile_struct) = &user_struct.items()[profile_index] { - let profile_fields: Vec<&str> = profile_struct - .struct_type() - .fields() - .iter() - .map(|f| f.name()) - .collect(); + let fields = profile_struct.struct_type().fields(); + let profile_fields: Vec<&str> = fields.iter().map(|f| f.name()).collect(); let settings_index = profile_fields .iter() @@ -2315,12 +2256,8 @@ mod tests { let decoded_struct = settings.decode_struct(encoded_struct).unwrap(); // Verify the decoded struct has proper structure - let decoded_fields: Vec<&str> = decoded_struct - .struct_type() - .fields() - .iter() - .map(|f| f.name()) - .collect(); + let fields = decoded_struct.struct_type().fields(); + let decoded_fields: Vec<&str> = fields.iter().map(|f| f.name()).collect(); assert!(decoded_fields.contains(&"name")); assert!(decoded_fields.contains(&"age")); @@ -2333,12 +2270,8 @@ mod tests { .position(|&f| f == "metadata") .unwrap(); if let Value::Struct(metadata_struct) = &decoded_struct.items()[metadata_index] { - let metadata_fields: Vec<&str> = metadata_struct - .struct_type() - .fields() - .iter() - .map(|f| f.name()) - .collect(); + let fields = metadata_struct.struct_type().fields(); + let metadata_fields: Vec<&str> = fields.iter().map(|f| f.name()).collect(); assert!(metadata_fields.contains(&"tags")); assert!(metadata_fields.contains(&"preferences")); @@ -2349,12 +2282,8 @@ mod tests { .position(|&f| f == "preferences") .unwrap(); if let Value::Struct(prefs_struct) = &metadata_struct.items()[preferences_index] { - let prefs_fields: Vec<&str> = prefs_struct - .struct_type() - .fields() - .iter() - .map(|f| f.name()) - .collect(); + let fields = prefs_struct.struct_type().fields(); + let prefs_fields: Vec<&str> = fields.iter().map(|f| f.name()).collect(); assert!(prefs_fields.contains(&"theme")); assert!(prefs_fields.contains(&"notifications")); @@ -2368,21 +2297,13 @@ mod tests { // Check that user.profile.settings is now properly structured let user_index = decoded_fields.iter().position(|&f| f == "user").unwrap(); if let Value::Struct(user_struct) = &decoded_struct.items()[user_index] { - let user_fields: Vec<&str> = user_struct - .struct_type() - .fields() - .iter() - .map(|f| f.name()) - .collect(); + let fields = user_struct.struct_type().fields(); + let user_fields: Vec<&str> = fields.iter().map(|f| f.name()).collect(); let profile_index = user_fields.iter().position(|&f| f == "profile").unwrap(); if let Value::Struct(profile_struct) = &user_struct.items()[profile_index] { - let profile_fields: Vec<&str> = profile_struct - .struct_type() - .fields() - .iter() - .map(|f| f.name()) - .collect(); + let fields = profile_struct.struct_type().fields(); + let profile_fields: Vec<&str> = fields.iter().map(|f| f.name()).collect(); let settings_index = profile_fields .iter() @@ -2390,12 +2311,8 @@ mod tests { .unwrap(); if let Value::Struct(settings_struct) = &profile_struct.items()[settings_index] { - let settings_fields: Vec<&str> = settings_struct - .struct_type() - .fields() - .iter() - .map(|f| f.name()) - .collect(); + let fields = settings_struct.struct_type().fields(); + let settings_fields: Vec<&str> = fields.iter().map(|f| f.name()).collect(); assert!(settings_fields.contains(&"language")); assert!(settings_fields.contains(&"timezone")); diff --git a/src/datatypes/src/scalars.rs b/src/datatypes/src/scalars.rs index 2032f6bc56..5a16383893 100644 --- a/src/datatypes/src/scalars.rs +++ b/src/datatypes/src/scalars.rs @@ -380,6 +380,8 @@ impl<'a> ScalarRef<'a> for StructValueRef<'a> { #[cfg(test)] mod tests { + use std::sync::Arc; + use super::*; use crate::data_type::ConcreteDataType; use crate::timestamp::TimestampSecond; @@ -451,14 +453,13 @@ mod tests { #[test] fn test_list_value_scalar() { - let list_value = - ListValue::new(vec![Value::Int32(123)], ConcreteDataType::int32_datatype()); + let item_type = Arc::new(ConcreteDataType::int32_datatype()); + let list_value = ListValue::new(vec![Value::Int32(123)], item_type.clone()); let list_ref = ListValueRef::Ref { val: &list_value }; assert_eq!(list_ref, list_value.as_scalar_ref()); assert_eq!(list_value, list_ref.to_owned_scalar()); - let mut builder = - ListVectorBuilder::with_type_capacity(ConcreteDataType::int32_datatype(), 1); + let mut builder = ListVectorBuilder::with_type_capacity(item_type.clone(), 1); builder.push(None); builder.push(Some(list_value.as_scalar_ref())); let vector = builder.finish(); diff --git a/src/datatypes/src/type_id.rs b/src/datatypes/src/type_id.rs index 52e09d8751..987cae2cad 100644 --- a/src/datatypes/src/type_id.rs +++ b/src/datatypes/src/type_id.rs @@ -80,7 +80,10 @@ impl LogicalTypeId { /// Panics if data type is not supported. #[cfg(any(test, feature = "test"))] pub fn data_type(&self) -> crate::data_type::ConcreteDataType { + use std::sync::Arc; + use crate::data_type::ConcreteDataType; + use crate::types::StructType; match self { LogicalTypeId::Null => ConcreteDataType::null_datatype(), @@ -107,9 +110,11 @@ impl LogicalTypeId { } LogicalTypeId::TimestampNanosecond => ConcreteDataType::timestamp_nanosecond_datatype(), LogicalTypeId::List => { - ConcreteDataType::list_datatype(ConcreteDataType::null_datatype()) + ConcreteDataType::list_datatype(Arc::new(ConcreteDataType::null_datatype())) + } + LogicalTypeId::Struct => { + ConcreteDataType::struct_datatype(StructType::new(Arc::new(vec![]))) } - LogicalTypeId::Struct => ConcreteDataType::struct_datatype(vec![].into()), LogicalTypeId::Dictionary => ConcreteDataType::dictionary_datatype( ConcreteDataType::null_datatype(), ConcreteDataType::null_datatype(), diff --git a/src/datatypes/src/types/list_type.rs b/src/datatypes/src/types/list_type.rs index acdb443c7e..c5256de31b 100644 --- a/src/datatypes/src/types/list_type.rs +++ b/src/datatypes/src/types/list_type.rs @@ -26,22 +26,19 @@ use crate::vectors::{ListVectorBuilder, MutableVector}; #[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize, Deserialize)] pub struct ListType { /// The type of List's item. - // Use Box to avoid recursive dependency, as enum ConcreteDataType depends on ListType. - item_type: Box, + item_type: Arc, } impl Default for ListType { fn default() -> Self { - ListType::new(ConcreteDataType::null_datatype()) + ListType::new(Arc::new(ConcreteDataType::null_datatype())) } } impl ListType { /// Create a new `ListType` whose item's data type is `item_type`. - pub fn new(item_type: ConcreteDataType) -> Self { - ListType { - item_type: Box::new(item_type), - } + pub fn new(item_type: Arc) -> Self { + ListType { item_type } } /// Returns the item data type. @@ -61,7 +58,7 @@ impl DataType for ListType { } fn default_value(&self) -> Value { - Value::List(ListValue::new(vec![], *self.item_type.clone())) + Value::List(ListValue::new(vec![], self.item_type.clone())) } fn as_arrow_type(&self) -> ArrowDataType { @@ -75,7 +72,7 @@ impl DataType for ListType { fn create_mutable_vector(&self, capacity: usize) -> Box { Box::new(ListVectorBuilder::with_type_capacity( - *self.item_type.clone(), + self.item_type.clone(), capacity, )) } @@ -95,11 +92,14 @@ mod tests { #[test] fn test_list_type() { - let t = ListType::new(ConcreteDataType::boolean_datatype()); + let t = ListType::new(Arc::new(ConcreteDataType::boolean_datatype())); assert_eq!("List", t.name()); assert_eq!(LogicalTypeId::List, t.logical_type_id()); assert_eq!( - Value::List(ListValue::new(vec![], ConcreteDataType::boolean_datatype())), + Value::List(ListValue::new( + vec![], + Arc::new(ConcreteDataType::boolean_datatype()) + )), t.default_value() ); assert_eq!( diff --git a/src/datatypes/src/types/struct_type.rs b/src/datatypes/src/types/struct_type.rs index e854783b32..5e3156498f 100644 --- a/src/datatypes/src/types/struct_type.rs +++ b/src/datatypes/src/types/struct_type.rs @@ -12,6 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. +use std::sync::Arc; + use arrow::datatypes::{DataType as ArrowDataType, Field}; use arrow_schema::Fields; use serde::{Deserialize, Serialize}; @@ -22,7 +24,7 @@ use crate::vectors::StructVectorBuilder; #[derive(Clone, Debug, Default, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize, Deserialize)] pub struct StructType { - fields: Vec, + fields: Arc>, } impl TryFrom<&Fields> for StructType { @@ -38,13 +40,9 @@ impl TryFrom<&Fields> for StructType { )) }) .collect::, Self::Error>>()?; - Ok(StructType { fields }) - } -} - -impl From> for StructType { - fn from(fields: Vec) -> Self { - StructType { fields } + Ok(StructType { + fields: Arc::new(fields), + }) } } @@ -87,16 +85,14 @@ impl DataType for StructType { } impl StructType { - pub fn new(fields: Vec) -> Self { - StructType { fields } + pub fn new(fields: Arc>) -> Self { + StructType { + fields: fields.clone(), + } } - pub fn fields(&self) -> &[StructField] { - &self.fields - } - - pub fn take_fields(self) -> Vec { - self.fields + pub fn fields(&self) -> Arc> { + self.fields.clone() } pub fn as_arrow_fields(&self) -> Fields { diff --git a/src/datatypes/src/value.rs b/src/datatypes/src/value.rs index 9427baeedc..11a974463c 100644 --- a/src/datatypes/src/value.rs +++ b/src/datatypes/src/value.rs @@ -911,11 +911,14 @@ impl TryFrom for serde_json::Value { Value::Struct(v) => { let (items, struct_type) = v.into_parts(); let map = struct_type - .take_fields() - .into_iter() + .fields() + .iter() .zip(items.into_iter()) .map(|(field, value)| { - Ok((field.take_name(), serde_json::Value::try_from(value)?)) + Ok(( + field.name().to_string(), + serde_json::Value::try_from(value)?, + )) }) .collect::>>()?; serde_json::Value::Object(map) @@ -934,13 +937,13 @@ pub struct ListValue { items: Vec, /// Inner values datatype, to distinguish empty lists of different datatypes. /// Restricted by DataFusion, cannot use null datatype for empty list. - datatype: ConcreteDataType, + datatype: Arc, } impl Eq for ListValue {} impl ListValue { - pub fn new(items: Vec, datatype: ConcreteDataType) -> Self { + pub fn new(items: Vec, datatype: Arc) -> Self { Self { items, datatype } } @@ -952,12 +955,13 @@ impl ListValue { self.items } - pub fn into_parts(self) -> (Vec, ConcreteDataType) { + pub fn into_parts(self) -> (Vec, Arc) { (self.items, self.datatype) } - pub fn datatype(&self) -> &ConcreteDataType { - &self.datatype + /// List value's inner type data type + pub fn datatype(&self) -> Arc { + self.datatype.clone() } pub fn len(&self) -> usize { @@ -988,12 +992,13 @@ impl ListValue { .first() .map(|x| x.as_value_ref().data_size() * self.items.len()) .unwrap_or(0) + + std::mem::size_of::>() } } impl Default for ListValue { fn default() -> ListValue { - ListValue::new(vec![], ConcreteDataType::null_datatype()) + ListValue::new(vec![], Arc::new(ConcreteDataType::null_datatype())) } } @@ -1066,7 +1071,8 @@ impl StructValue { self.items .iter() .map(|x| x.as_value_ref().data_size()) - .sum() + .sum::() + + std::mem::size_of::() } fn try_to_scalar_value(&self, output_type: &StructType) -> Result { @@ -1089,7 +1095,7 @@ impl StructValue { impl Default for StructValue { fn default() -> StructValue { - StructValue::try_new(vec![], StructType::new(vec![])).unwrap() + StructValue::try_new(vec![], StructType::new(Arc::new(vec![]))).unwrap() } } @@ -1136,7 +1142,7 @@ impl TryFrom for Value { .flatten() .map(|x| x.try_into()) .collect::>>()?; - Value::List(ListValue::new(items, datatype)) + Value::List(ListValue::new(items, Arc::new(datatype))) } ScalarValue::Date32(d) => d.map(|x| Value::Date(Date::new(x))).unwrap_or(Value::Null), ScalarValue::TimestampSecond(t, _) => t @@ -1547,7 +1553,7 @@ pub enum ListValueRef<'a> { }, RefList { val: Vec>, - item_datatype: ConcreteDataType, + item_datatype: Arc, }, } @@ -1564,9 +1570,9 @@ impl ListValueRef<'_> { } } /// Returns the inner element's data type. - fn datatype(&self) -> ConcreteDataType { + fn datatype(&self) -> Arc { match self { - ListValueRef::Indexed { vector, .. } => vector.data_type(), + ListValueRef::Indexed { vector, .. } => vector.item_type(), ListValueRef::Ref { val } => val.datatype().clone(), ListValueRef::RefList { item_datatype, .. } => item_datatype.clone(), } @@ -1707,12 +1713,18 @@ impl ValueRef<'_> { ValueRef::List(v) => match v { ListValueRef::Indexed { vector, .. } => vector.memory_size() / vector.len(), ListValueRef::Ref { val } => val.estimated_size(), - ListValueRef::RefList { val, .. } => val.iter().map(|v| v.data_size()).sum(), + ListValueRef::RefList { val, .. } => { + val.iter().map(|v| v.data_size()).sum::() + + std::mem::size_of::>() + } }, ValueRef::Struct(val) => match val { StructValueRef::Indexed { vector, .. } => vector.memory_size() / vector.len(), StructValueRef::Ref(val) => val.estimated_size(), - StructValueRef::RefList { val, .. } => val.iter().map(|v| v.data_size()).sum(), + StructValueRef::RefList { val, .. } => { + val.iter().map(|v| v.data_size()).sum::() + + std::mem::size_of::() + } }, ValueRef::Json(v) => v.data_size(), } @@ -1730,7 +1742,7 @@ pub(crate) mod tests { use crate::vectors::ListVectorBuilder; pub(crate) fn build_struct_type() -> StructType { - StructType::new(vec![ + StructType::new(Arc::new(vec![ StructField::new("id".to_string(), ConcreteDataType::int32_datatype(), false), StructField::new( "name".to_string(), @@ -1745,10 +1757,10 @@ pub(crate) mod tests { ), StructField::new( "awards".to_string(), - ConcreteDataType::list_datatype(ConcreteDataType::boolean_datatype()), + ConcreteDataType::list_datatype(Arc::new(ConcreteDataType::boolean_datatype())), true, ), - ]) + ])) } pub(crate) fn build_struct_value() -> StructValue { @@ -1778,12 +1790,12 @@ pub(crate) mod tests { } pub fn build_list_type() -> ConcreteDataType { - ConcreteDataType::list_datatype(ConcreteDataType::boolean_datatype()) + ConcreteDataType::list_datatype(Arc::new(ConcreteDataType::boolean_datatype())) } pub(crate) fn build_list_value() -> ListValue { let items = vec![Value::Boolean(true), Value::Boolean(false)]; - ListValue::new(items, ConcreteDataType::boolean_datatype()) + ListValue::new(items, Arc::new(ConcreteDataType::boolean_datatype())) } pub(crate) fn build_scalar_list_value() -> ScalarValue { @@ -1909,7 +1921,10 @@ pub(crate) mod tests { build_scalar_list_value().try_into().unwrap() ); assert_eq!( - Value::List(ListValue::new(vec![], ConcreteDataType::uint32_datatype())), + Value::List(ListValue::new( + vec![], + Arc::new(ConcreteDataType::uint32_datatype()) + )), ScalarValue::List(ScalarValue::new_list(&[], &ArrowDataType::UInt32, true)) .try_into() .unwrap() @@ -2176,15 +2191,13 @@ pub(crate) mod tests { &ConcreteDataType::binary_datatype(), &Value::Binary(Bytes::from(b"world".as_slice())), ); + let item_type = Arc::new(ConcreteDataType::int32_datatype()); check_type_and_value( - &ConcreteDataType::list_datatype(ConcreteDataType::int32_datatype()), - &Value::List(ListValue::new( - vec![Value::Int32(10)], - ConcreteDataType::int32_datatype(), - )), + &ConcreteDataType::list_datatype(item_type.clone()), + &Value::List(ListValue::new(vec![Value::Int32(10)], item_type.clone())), ); check_type_and_value( - &ConcreteDataType::list_datatype(ConcreteDataType::null_datatype()), + &ConcreteDataType::list_datatype(Arc::new(ConcreteDataType::null_datatype())), &Value::List(ListValue::default()), ); check_type_and_value( @@ -2244,11 +2257,12 @@ pub(crate) mod tests { &Value::Decimal128(Decimal128::new(1, 38, 10)), ); + let item_type = Arc::new(ConcreteDataType::boolean_datatype()); check_type_and_value( - &ConcreteDataType::list_datatype(ConcreteDataType::boolean_datatype()), + &ConcreteDataType::list_datatype(item_type.clone()), &Value::List(ListValue::new( vec![Value::Boolean(true)], - ConcreteDataType::boolean_datatype(), + item_type.clone(), )), ); @@ -2377,7 +2391,7 @@ pub(crate) mod tests { json_value, to_json(Value::List(ListValue { items: vec![Value::Int32(123)], - datatype: ConcreteDataType::int32_datatype(), + datatype: Arc::new(ConcreteDataType::int32_datatype()), })) ); @@ -2387,7 +2401,7 @@ pub(crate) mod tests { Value::String("tomcat".into()), Value::Boolean(true), ], - StructType::new(vec![ + StructType::new(Arc::new(vec![ StructField::new("num".to_string(), ConcreteDataType::int64_datatype(), true), StructField::new( "name".to_string(), @@ -2399,7 +2413,7 @@ pub(crate) mod tests { ConcreteDataType::boolean_datatype(), true, ), - ]), + ])), ) .unwrap(); assert_eq!( @@ -2422,7 +2436,7 @@ pub(crate) mod tests { assert_eq!( serde_json::Value::try_from(Value::Json(Box::new(Value::List(ListValue::new( vec![Value::Int32(1), Value::Int32(2), Value::Int32(3),], - ConcreteDataType::int32_datatype() + Arc::new(ConcreteDataType::int32_datatype()) ))))) .unwrap(), serde_json::json!([1, 2, 3]) @@ -2617,7 +2631,7 @@ pub(crate) mod tests { assert_eq!( Value::List(ListValue::new( vec![], - ConcreteDataType::timestamp_second_datatype(), + Arc::new(ConcreteDataType::timestamp_second_datatype()), )) .to_string(), "TimestampSecond[]" @@ -2625,7 +2639,7 @@ pub(crate) mod tests { assert_eq!( Value::List(ListValue::new( vec![], - ConcreteDataType::timestamp_millisecond_datatype(), + Arc::new(ConcreteDataType::timestamp_millisecond_datatype()), )) .to_string(), "TimestampMillisecond[]" @@ -2633,7 +2647,7 @@ pub(crate) mod tests { assert_eq!( Value::List(ListValue::new( vec![], - ConcreteDataType::timestamp_microsecond_datatype(), + Arc::new(ConcreteDataType::timestamp_microsecond_datatype()), )) .to_string(), "TimestampMicrosecond[]" @@ -2641,7 +2655,7 @@ pub(crate) mod tests { assert_eq!( Value::List(ListValue::new( vec![], - ConcreteDataType::timestamp_nanosecond_datatype(), + Arc::new(ConcreteDataType::timestamp_nanosecond_datatype()), )) .to_string(), "TimestampNanosecond[]" @@ -2765,9 +2779,9 @@ pub(crate) mod tests { assert_eq!( build_scalar_list_value(), Value::List(build_list_value()) - .try_to_scalar_value(&ConcreteDataType::list_datatype( + .try_to_scalar_value(&ConcreteDataType::list_datatype(Arc::new( ConcreteDataType::boolean_datatype() - )) + ))) .unwrap() ); } @@ -2912,9 +2926,9 @@ pub(crate) mod tests { assert_eq!( ScalarValue::new_null_list(ArrowDataType::Boolean, true, 1), Value::Null - .try_to_scalar_value(&ConcreteDataType::list_datatype( - ConcreteDataType::boolean_datatype(), - )) + .try_to_scalar_value(&ConcreteDataType::list_datatype(Arc::new( + ConcreteDataType::boolean_datatype() + ))) .unwrap() ); @@ -2929,11 +2943,10 @@ pub(crate) mod tests { #[test] fn test_list_value_to_scalar_value() { let items = vec![Value::Int32(-1), Value::Null]; - let list = Value::List(ListValue::new(items, ConcreteDataType::int32_datatype())); + let item_type = Arc::new(ConcreteDataType::int32_datatype()); + let list = Value::List(ListValue::new(items, item_type.clone())); let df_list = list - .try_to_scalar_value(&ConcreteDataType::list_datatype( - ConcreteDataType::int32_datatype(), - )) + .try_to_scalar_value(&ConcreteDataType::list_datatype(item_type.clone())) .unwrap(); assert!(matches!(df_list, ScalarValue::List(_))); match df_list { @@ -3092,10 +3105,10 @@ pub(crate) mod tests { Value::String("hello world".into()), Value::String("greptimedb".into()), ], - datatype: ConcreteDataType::string_datatype(), + datatype: Arc::new(ConcreteDataType::string_datatype()), }, }), - 22, + 30, ); let data = vec![ @@ -3103,12 +3116,12 @@ pub(crate) mod tests { None, Some(vec![Some(4), None, Some(6)]), ]; - let mut builder = - ListVectorBuilder::with_type_capacity(ConcreteDataType::int32_datatype(), 8); + let item_type = Arc::new(ConcreteDataType::int32_datatype()); + let mut builder = ListVectorBuilder::with_type_capacity(item_type.clone(), 8); for vec_opt in &data { if let Some(vec) = vec_opt { let values = vec.iter().map(|v| Value::from(*v)).collect(); - let list_value = ListValue::new(values, ConcreteDataType::int32_datatype()); + let list_value = ListValue::new(values, item_type.clone()); builder.push(Some(ListValueRef::Ref { val: &list_value })); } else { @@ -3142,14 +3155,14 @@ pub(crate) mod tests { check_value_ref_size_eq( &ValueRef::Struct(StructValueRef::Ref(&build_struct_value())), - 15, + 31, ); check_value_ref_size_eq( &ValueRef::Json(Box::new(ValueRef::Struct(StructValueRef::Ref( &build_struct_value(), )))), - 15, + 31, ); } diff --git a/src/datatypes/src/vectors.rs b/src/datatypes/src/vectors.rs index b80287fec5..e61b2ca35e 100644 --- a/src/datatypes/src/vectors.rs +++ b/src/datatypes/src/vectors.rs @@ -424,12 +424,12 @@ pub mod tests { #[test] #[should_panic(expected = "Must use ListVectorBuilder::with_type_capacity()")] fn test_mutable_vector_list_data_type() { + let item_type = Arc::new(ConcreteDataType::int32_datatype()); // List type - let builder = - ListVectorBuilder::with_type_capacity(ConcreteDataType::int32_datatype(), 1024); + let builder = ListVectorBuilder::with_type_capacity(item_type.clone(), 1024); assert_eq!( builder.data_type(), - ConcreteDataType::list_datatype(ConcreteDataType::int32_datatype()) + ConcreteDataType::list_datatype(item_type) ); // Panic with_capacity diff --git a/src/datatypes/src/vectors/helper.rs b/src/datatypes/src/vectors/helper.rs index a9eafce154..05021d7338 100644 --- a/src/datatypes/src/vectors/helper.rs +++ b/src/datatypes/src/vectors/helper.rs @@ -168,7 +168,7 @@ impl Helper { ConstantVector::new(Arc::new(BinaryVector::from(vec![v])), length) } ScalarValue::List(array) => { - let item_type = ConcreteDataType::try_from(&array.value_type())?; + let item_type = Arc::new(ConcreteDataType::try_from(&array.value_type())?); let mut builder = ListVectorBuilder::with_type_capacity(item_type.clone(), 1); let values = ScalarValue::convert_array_to_scalar_vec(array.as_ref()) .context(ConvertArrowArrayToScalarsSnafu)? @@ -560,7 +560,7 @@ mod tests { )); let vector = Helper::try_from_scalar_value(value, 3).unwrap(); assert_eq!( - ConcreteDataType::list_datatype(ConcreteDataType::int32_datatype()), + ConcreteDataType::list_datatype(Arc::new(ConcreteDataType::int32_datatype())), vector.data_type() ); assert_eq!(3, vector.len()); diff --git a/src/datatypes/src/vectors/list.rs b/src/datatypes/src/vectors/list.rs index feda1beeea..5b6f55a602 100644 --- a/src/datatypes/src/vectors/list.rs +++ b/src/datatypes/src/vectors/list.rs @@ -35,7 +35,7 @@ use crate::vectors::{self, Helper, MutableVector, Validity, Vector, VectorRef}; pub struct ListVector { array: ListArray, /// The datatype of the items in the list. - item_type: ConcreteDataType, + item_type: Arc, } impl ListVector { @@ -50,7 +50,7 @@ impl ListVector { &self.array } - pub(crate) fn item_type(&self) -> ConcreteDataType { + pub(crate) fn item_type(&self) -> Arc { self.item_type.clone() } } @@ -145,10 +145,10 @@ impl Serializable for ListVector { impl From for ListVector { fn from(array: ListArray) -> Self { - let item_type = ConcreteDataType::from_arrow_type(match array.data_type() { + let item_type = Arc::new(ConcreteDataType::from_arrow_type(match array.data_type() { ArrowDataType::List(field) => field.data_type(), other => panic!("Try to create ListVector from an arrow array with type {other:?}"), - }); + })); Self { array, item_type } } } @@ -217,7 +217,7 @@ impl ScalarVector for ListVector { // See https://github.com/apache/arrow-rs/blob/94565bca99b5d9932a3e9a8e094aaf4e4384b1e5/arrow-array/src/builder/generic_list_builder.rs /// [ListVector] builder. pub struct ListVectorBuilder { - item_type: ConcreteDataType, + item_type: Arc, offsets_builder: Int32BufferBuilder, null_buffer_builder: NullBufferBuilder, values_builder: Box, @@ -226,7 +226,10 @@ pub struct ListVectorBuilder { impl ListVectorBuilder { /// Creates a new [`ListVectorBuilder`]. `item_type` is the data type of the list item, `capacity` /// is the number of items to pre-allocate space for in this builder. - pub fn with_type_capacity(item_type: ConcreteDataType, capacity: usize) -> ListVectorBuilder { + pub fn with_type_capacity( + item_type: Arc, + capacity: usize, + ) -> ListVectorBuilder { let mut offsets_builder = Int32BufferBuilder::new(capacity + 1); offsets_builder.append(0); // The actual required capacity might be greater than the capacity of the `ListVector` @@ -496,12 +499,12 @@ pub mod tests { use crate::vectors::Int32Vector; pub fn new_list_vector(data: &[Option>>]) -> ListVector { - let mut builder = - ListVectorBuilder::with_type_capacity(ConcreteDataType::int32_datatype(), 8); + let item_type = Arc::new(ConcreteDataType::int32_datatype()); + let mut builder = ListVectorBuilder::with_type_capacity(item_type.clone(), 8); for vec_opt in data { if let Some(vec) = vec_opt { let values = vec.iter().map(|v| Value::from(*v)).collect(); - let list_value = ListValue::new(values, ConcreteDataType::int32_datatype()); + let list_value = ListValue::new(values, item_type.clone()); builder.push(Some(ListValueRef::Ref { val: &list_value })); } else { @@ -537,10 +540,11 @@ pub mod tests { Some(vec![Some(4), None, Some(6)]), ]; + let item_type = Arc::new(ConcreteDataType::int32_datatype()); let list_vector = new_list_vector(&data); assert_eq!( - ConcreteDataType::List(ListType::new(ConcreteDataType::int32_datatype())), + ConcreteDataType::List(ListType::new(item_type.clone())), list_vector.data_type() ); assert_eq!("ListVector", list_vector.vector_type_name()); @@ -581,7 +585,7 @@ pub mod tests { assert_eq!( Value::List(ListValue::new( vec![Value::Int32(1), Value::Int32(2), Value::Int32(3)], - ConcreteDataType::int32_datatype() + item_type.clone() )), list_vector.get(0) ); @@ -600,7 +604,7 @@ pub mod tests { assert_eq!( Value::List(ListValue::new( vec![Value::Int32(4), Value::Null, Value::Int32(6)], - ConcreteDataType::int32_datatype() + item_type.clone() )), list_vector.get(2) ); @@ -636,10 +640,11 @@ pub mod tests { Some(vec![Some(4), None, Some(6)]), ]; + let item_type = Arc::new(ConcreteDataType::int32_datatype()); let list_vector = new_list_vector(&data); assert_eq!( - ConcreteDataType::List(ListType::new(ConcreteDataType::int32_datatype())), + ConcreteDataType::List(ListType::new(item_type.clone())), list_vector.data_type() ); let mut iter = list_vector.values_iter(); @@ -672,12 +677,12 @@ pub mod tests { #[test] fn test_list_vector_builder() { - let mut builder = - ListType::new(ConcreteDataType::int32_datatype()).create_mutable_vector(3); + let item_type = Arc::new(ConcreteDataType::int32_datatype()); + let mut builder = ListType::new(item_type.clone()).create_mutable_vector(3); builder.push_value_ref(&ValueRef::List(ListValueRef::Ref { val: &ListValue::new( vec![Value::Int32(4), Value::Null, Value::Int32(6)], - ConcreteDataType::int32_datatype(), + item_type.clone(), ), })); assert!(builder.try_push_value_ref(&ValueRef::Int32(123)).is_err()); @@ -706,13 +711,13 @@ pub mod tests { #[test] fn test_list_vector_for_scalar() { - let mut builder = - ListVectorBuilder::with_type_capacity(ConcreteDataType::int32_datatype(), 2); + let item_type = Arc::new(ConcreteDataType::int32_datatype()); + let mut builder = ListVectorBuilder::with_type_capacity(item_type.clone(), 2); builder.push(None); builder.push(Some(ListValueRef::Ref { val: &ListValue::new( vec![Value::Int32(4), Value::Null, Value::Int32(6)], - ConcreteDataType::int32_datatype(), + item_type.clone(), ), })); let vector = builder.finish(); @@ -757,13 +762,13 @@ pub mod tests { #[test] fn test_list_vector_builder_finish_cloned() { - let mut builder = - ListVectorBuilder::with_type_capacity(ConcreteDataType::int32_datatype(), 2); + let item_type = Arc::new(ConcreteDataType::int32_datatype()); + let mut builder = ListVectorBuilder::with_type_capacity(item_type.clone(), 2); builder.push(None); builder.push(Some(ListValueRef::Ref { val: &ListValue::new( vec![Value::Int32(4), Value::Null, Value::Int32(6)], - ConcreteDataType::int32_datatype(), + item_type.clone(), ), })); let vector = builder.finish_cloned(); diff --git a/src/datatypes/src/vectors/struct_vector.rs b/src/datatypes/src/vectors/struct_vector.rs index 26fa5ad844..e4f0fe5b2a 100644 --- a/src/datatypes/src/vectors/struct_vector.rs +++ b/src/datatypes/src/vectors/struct_vector.rs @@ -493,20 +493,21 @@ mod tests { let struct_type = ConcreteDataType::struct_datatype(build_struct_type()); let struct_value = build_struct_value(); // level 2: list - let list_type = ConcreteDataType::list_datatype(struct_type.clone()); + let struct_type_ref = Arc::new(struct_type); + let list_type = ConcreteDataType::list_datatype(struct_type_ref.clone()); let list_value = ListValue::new( vec![ Value::Struct(struct_value.clone()), Value::Struct(struct_value.clone()), ], - struct_type.clone(), + struct_type_ref.clone(), ); // level 3: struct - let root_type = StructType::new(vec![StructField::new( + let root_type = StructType::new(Arc::new(vec![StructField::new( "items".to_string(), list_type, false, - )]); + )])); let root_value = StructValue::new(vec![Value::List(list_value)], root_type.clone()); let mut builder = StructVectorBuilder::with_type_and_capacity(root_type.clone(), 20); diff --git a/src/flow/src/compute/render/reduce.rs b/src/flow/src/compute/render/reduce.rs index 935c29763d..e8b69af128 100644 --- a/src/flow/src/compute/render/reduce.rs +++ b/src/flow/src/compute/render/reduce.rs @@ -1146,7 +1146,7 @@ fn from_accums_to_offsetted_accum(new_accums: Vec>) -> Vec { }) .map(Value::from) .collect::>(); - let first = ListValue::new(offset, ConcreteDataType::uint64_datatype()); + let first = ListValue::new(offset, Arc::new(ConcreteDataType::uint64_datatype())); let first = Value::List(first); // construct new_accums diff --git a/src/servers/src/postgres/types.rs b/src/servers/src/postgres/types.rs index e78980f1d8..9c32ee2fdd 100644 --- a/src/servers/src/postgres/types.rs +++ b/src/servers/src/postgres/types.rs @@ -19,6 +19,7 @@ mod interval; use std::collections::HashMap; use std::ops::Deref; +use std::sync::Arc; use chrono::{DateTime, NaiveDate, NaiveDateTime, NaiveTime}; use common_time::{IntervalDayTime, IntervalMonthDayNano, IntervalYearMonth}; @@ -90,7 +91,7 @@ fn encode_array( value_list: ListValue, builder: &mut DataRowEncoder, ) -> PgWireResult<()> { - match &value_list.datatype() { + match value_list.datatype().as_ref() { ConcreteDataType::Boolean(_) => { let array = value_list .items() @@ -551,21 +552,21 @@ pub(super) fn type_pg_to_gt(origin: &Type) -> Result { &Type::TIME => Ok(ConcreteDataType::timestamp_datatype( common_time::timestamp::TimeUnit::Microsecond, )), - &Type::CHAR_ARRAY => Ok(ConcreteDataType::list_datatype( + &Type::CHAR_ARRAY => Ok(ConcreteDataType::list_datatype(Arc::new( ConcreteDataType::int8_datatype(), - )), - &Type::INT2_ARRAY => Ok(ConcreteDataType::list_datatype( + ))), + &Type::INT2_ARRAY => Ok(ConcreteDataType::list_datatype(Arc::new( ConcreteDataType::int16_datatype(), - )), - &Type::INT4_ARRAY => Ok(ConcreteDataType::list_datatype( + ))), + &Type::INT4_ARRAY => Ok(ConcreteDataType::list_datatype(Arc::new( ConcreteDataType::int32_datatype(), - )), - &Type::INT8_ARRAY => Ok(ConcreteDataType::list_datatype( + ))), + &Type::INT8_ARRAY => Ok(ConcreteDataType::list_datatype(Arc::new( ConcreteDataType::int64_datatype(), - )), - &Type::VARCHAR_ARRAY => Ok(ConcreteDataType::list_datatype( + ))), + &Type::VARCHAR_ARRAY => Ok(ConcreteDataType::list_datatype(Arc::new( ConcreteDataType::string_datatype(), - )), + ))), _ => server_error::InternalSnafu { err_msg: format!("unimplemented datatype {origin:?}"), } @@ -1345,10 +1346,12 @@ mod test { ConcreteDataType::interval_datatype(IntervalUnit::YearMonth), ConcreteDataType::interval_datatype(IntervalUnit::DayTime), ConcreteDataType::interval_datatype(IntervalUnit::MonthDayNano), - ConcreteDataType::list_datatype(ConcreteDataType::int64_datatype()), - ConcreteDataType::list_datatype(ConcreteDataType::float64_datatype()), - ConcreteDataType::list_datatype(ConcreteDataType::string_datatype()), - ConcreteDataType::list_datatype(ConcreteDataType::timestamp_second_datatype()), + ConcreteDataType::list_datatype(Arc::new(ConcreteDataType::int64_datatype())), + ConcreteDataType::list_datatype(Arc::new(ConcreteDataType::float64_datatype())), + ConcreteDataType::list_datatype(Arc::new(ConcreteDataType::string_datatype())), + ConcreteDataType::list_datatype( + Arc::new(ConcreteDataType::timestamp_second_datatype()), + ), ]; let values = vec![ Value::Null, @@ -1381,19 +1384,19 @@ mod test { Value::IntervalMonthDayNano(IntervalMonthDayNano::new(1, 1, 10)), Value::List(ListValue::new( vec![Value::Int64(1i64)], - ConcreteDataType::int64_datatype(), + Arc::new(ConcreteDataType::int64_datatype()), )), Value::List(ListValue::new( vec![Value::Float64(1.0f64.into())], - ConcreteDataType::float64_datatype(), + Arc::new(ConcreteDataType::float64_datatype()), )), Value::List(ListValue::new( vec![Value::String("tom".into())], - ConcreteDataType::string_datatype(), + Arc::new(ConcreteDataType::string_datatype()), )), Value::List(ListValue::new( vec![Value::Timestamp(Timestamp::new(1i64, TimeUnit::Second))], - ConcreteDataType::timestamp_second_datatype(), + Arc::new(ConcreteDataType::timestamp_second_datatype()), )), ]; let query_context = QueryContextBuilder::default()