fix: list inner type for json and valueref, refactor type to ref for struct/list (#7113)

* refactor: use arc for struct type

* fix: inner type of list value and ref
This commit is contained in:
Ning Sun
2025-10-21 20:46:18 +08:00
committed by GitHub
parent 2e7b3951fb
commit bfa00df9f2
14 changed files with 338 additions and 392 deletions

View File

@@ -13,6 +13,7 @@
// limitations under the License.
use std::collections::HashSet;
use std::sync::Arc;
use common_decimal::Decimal128;
use common_decimal::decimal128::{DECIMAL128_DEFAULT_SCALE, DECIMAL128_MAX_PRECISION};
@@ -185,7 +186,7 @@ impl From<ColumnDataTypeWrapper> for ConcreteDataType {
datatype: d.datatype(),
datatype_ext: d.datatype_extension.clone().map(|d| *d),
};
ConcreteDataType::list_datatype(item_type.into())
ConcreteDataType::list_datatype(Arc::new(item_type.into()))
} else {
// invalid state: type extension not found
ConcreteDataType::null_datatype()
@@ -208,7 +209,7 @@ impl From<ColumnDataTypeWrapper> for ConcreteDataType {
StructField::new(f.name.clone(), field_type.into(), true)
})
.collect::<Vec<_>>();
ConcreteDataType::struct_datatype(StructType::from(fields))
ConcreteDataType::struct_datatype(StructType::new(Arc::new(fields)))
} else {
// invalid state: type extension not found
ConcreteDataType::null_datatype()
@@ -445,7 +446,7 @@ impl TryFrom<ConcreteDataType> for ColumnDataTypeWrapper {
ColumnDataType::Struct => {
if let Some(struct_type) = datatype.as_struct() {
let mut fields = Vec::with_capacity(struct_type.fields().len());
for field in struct_type.fields() {
for field in struct_type.fields().iter() {
let field_type =
ColumnDataTypeWrapper::try_from(field.data_type().clone())?;
let proto_field = crate::v1::StructField {
@@ -755,7 +756,7 @@ pub fn pb_value_to_value_ref<'a>(
let list_value = ListValueRef::RefList {
val: items,
item_datatype: item_type.clone(),
item_datatype: Arc::new(item_type.clone()),
};
ValueRef::List(list_value)
}
@@ -794,7 +795,7 @@ pub fn pb_value_to_value_ref<'a>(
let struct_value_ref = StructValueRef::RefList {
val: items,
fields: StructType::new(struct_fields),
fields: StructType::new(Arc::new(struct_fields)),
};
ValueRef::Struct(struct_value_ref)
}
@@ -1351,10 +1352,10 @@ mod tests {
ColumnDataTypeWrapper::vector_datatype(3).into()
);
assert_eq!(
ConcreteDataType::list_datatype(ConcreteDataType::string_datatype()),
ConcreteDataType::list_datatype(Arc::new(ConcreteDataType::string_datatype())),
ColumnDataTypeWrapper::list_datatype(ColumnDataTypeWrapper::string_datatype()).into()
);
let struct_type = StructType::new(vec![
let struct_type = StructType::new(Arc::new(vec![
StructField::new("id".to_string(), ConcreteDataType::int64_datatype(), true),
StructField::new(
"name".to_string(),
@@ -1367,7 +1368,7 @@ mod tests {
ConcreteDataType::string_datatype(),
true,
),
]);
]));
assert_eq!(
ConcreteDataType::struct_datatype(struct_type.clone()),
ColumnDataTypeWrapper::struct_datatype(vec![
@@ -1534,7 +1535,7 @@ mod tests {
assert_eq!(
ColumnDataTypeWrapper::list_datatype(ColumnDataTypeWrapper::int16_datatype()),
ConcreteDataType::list_datatype(ConcreteDataType::int16_datatype())
ConcreteDataType::list_datatype(Arc::new(ConcreteDataType::int16_datatype()))
.try_into()
.expect("Failed to create column datatype from List(ListType { item_type: Int16(Int16Type) })")
);
@@ -1547,16 +1548,16 @@ mod tests {
ColumnDataTypeWrapper::list_datatype(ColumnDataTypeWrapper::string_datatype())
)
]),
ConcreteDataType::struct_datatype(StructType::new(vec![
ConcreteDataType::struct_datatype(StructType::new(Arc::new(vec![
StructField::new("a".to_string(), ConcreteDataType::int64_datatype(), true),
StructField::new(
"a.a".to_string(),
ConcreteDataType::list_datatype(ConcreteDataType::string_datatype()), true
ConcreteDataType::list_datatype(Arc::new(ConcreteDataType::string_datatype())), true
)
])).try_into().expect("Failed to create column datatype from Struct(StructType { fields: [StructField { name: \"a\", data_type: Int64(Int64Type) }, StructField { name: \"a.a\", data_type: List(ListType { item_type: String(StringType) }) }] })")
]))).try_into().expect("Failed to create column datatype from Struct(StructType { fields: [StructField { name: \"a\", data_type: Int64(Int64Type) }, StructField { name: \"a.a\", data_type: List(ListType { item_type: String(StringType) }) }] })")
);
let struct_type = StructType::new(vec![
let struct_type = StructType::new(Arc::new(vec![
StructField::new("id".to_string(), ConcreteDataType::int64_datatype(), true),
StructField::new(
"name".to_string(),
@@ -1569,7 +1570,7 @@ mod tests {
ConcreteDataType::string_datatype(),
true,
),
]);
]));
assert_eq!(
ColumnDataTypeWrapper::new(
ColumnDataType::Json,
@@ -1736,7 +1737,7 @@ mod tests {
fn test_list_to_pb_value() {
let value = Value::List(ListValue::new(
vec![Value::Boolean(true)],
ConcreteDataType::boolean_datatype(),
Arc::new(ConcreteDataType::boolean_datatype()),
));
let pb_value = to_proto_value(value);
@@ -1756,14 +1757,14 @@ mod tests {
let value = Value::Struct(
StructValue::try_new(
items,
StructType::new(vec![
StructType::new(Arc::new(vec![
StructField::new(
"a.a".to_string(),
ConcreteDataType::boolean_datatype(),
true,
),
StructField::new("a.b".to_string(), ConcreteDataType::string_datatype(), true),
]),
])),
)
.unwrap(),
);

View File

@@ -456,9 +456,9 @@ impl TryFrom<&ArrowDataType> for ConcreteDataType {
}
ArrowDataType::Utf8 | ArrowDataType::Utf8View => Self::string_datatype(),
ArrowDataType::LargeUtf8 => Self::large_string_datatype(),
ArrowDataType::List(field) => Self::List(ListType::new(
ArrowDataType::List(field) => Self::List(ListType::new(Arc::new(
ConcreteDataType::from_arrow_type(field.data_type()),
)),
))),
ArrowDataType::Dictionary(key_type, value_type) => {
let key_type = ConcreteDataType::from_arrow_type(key_type);
let value_type = ConcreteDataType::from_arrow_type(value_type);
@@ -641,7 +641,7 @@ impl ConcreteDataType {
}
}
pub fn list_datatype(item_type: ConcreteDataType) -> ConcreteDataType {
pub fn list_datatype(item_type: Arc<ConcreteDataType>) -> ConcreteDataType {
ConcreteDataType::List(ListType::new(item_type))
}
@@ -794,7 +794,7 @@ mod tests {
ArrowDataType::Int32,
true,
)))),
ConcreteDataType::List(ListType::new(ConcreteDataType::int32_datatype()))
ConcreteDataType::List(ListType::new(Arc::new(ConcreteDataType::int32_datatype())))
);
assert!(matches!(
ConcreteDataType::from_arrow_type(&ArrowDataType::Date32),
@@ -985,9 +985,10 @@ mod tests {
#[test]
fn test_as_list() {
let list_type = ConcreteDataType::list_datatype(ConcreteDataType::int32_datatype());
let list_type =
ConcreteDataType::list_datatype(Arc::new(ConcreteDataType::int32_datatype()));
assert_eq!(
ListType::new(ConcreteDataType::int32_datatype()),
ListType::new(Arc::new(ConcreteDataType::int32_datatype())),
*list_type.as_list().unwrap()
);
assert!(ConcreteDataType::int32_datatype().as_list().is_none());
@@ -1032,21 +1033,24 @@ mod tests {
);
// Nested types
assert_eq!(
ConcreteDataType::list_datatype(ConcreteDataType::int32_datatype()).to_string(),
ConcreteDataType::list_datatype(Arc::new(ConcreteDataType::int32_datatype()))
.to_string(),
"List<Int32>"
);
assert_eq!(
ConcreteDataType::list_datatype(ConcreteDataType::Dictionary(DictionaryType::new(
ConcreteDataType::int32_datatype(),
ConcreteDataType::string_datatype()
ConcreteDataType::list_datatype(Arc::new(ConcreteDataType::Dictionary(
DictionaryType::new(
ConcreteDataType::int32_datatype(),
ConcreteDataType::string_datatype()
)
)))
.to_string(),
"List<Dictionary<Int32, String>>"
);
assert_eq!(
ConcreteDataType::list_datatype(ConcreteDataType::list_datatype(
ConcreteDataType::list_datatype(ConcreteDataType::int32_datatype())
))
ConcreteDataType::list_datatype(Arc::new(ConcreteDataType::list_datatype(Arc::new(
ConcreteDataType::list_datatype(Arc::new(ConcreteDataType::int32_datatype()))
))))
.to_string(),
"List<List<List<Int32>>>"
);

View File

@@ -20,6 +20,7 @@
//!
use std::collections::HashSet;
use std::sync::Arc;
use common_base::bytes::StringBytes;
use ordered_float::OrderedFloat;
@@ -28,7 +29,7 @@ use snafu::{ResultExt, ensure};
use crate::data_type::{ConcreteDataType, DataType};
use crate::error::{self, Error};
use crate::types::{ListType, StructField, StructType};
use crate::types::{StructField, StructType};
use crate::value::{ListValue, StructValue, Value};
/// The configuration of JSON encoding
@@ -146,11 +147,11 @@ pub fn encode_json_with_context<'a>(
let json_string = json.to_string();
let struct_value = StructValue::try_new(
vec![Value::String(json_string.into())],
StructType::new(vec![StructField::new(
StructType::new(Arc::new(vec![StructField::new(
JsonStructureSettings::RAW_FIELD.to_string(),
ConcreteDataType::string_datatype(),
true,
)]),
)])),
)?;
return Ok(Value::Struct(struct_value));
}
@@ -217,7 +218,7 @@ fn encode_json_object_with_context<'a>(
let mut struct_fields = Vec::with_capacity(total_json_keys);
// First, process fields from the provided schema in their original order
if let Some(fields) = fields {
for field in fields.fields() {
for field in fields.fields().iter() {
let field_name = field.name();
if let Some(value) = json_object.remove(field_name) {
@@ -252,7 +253,7 @@ fn encode_json_object_with_context<'a>(
));
}
let struct_type = StructType::new(struct_fields);
let struct_type = StructType::new(Arc::new(struct_fields));
StructValue::try_new(items, struct_type)
}
@@ -291,9 +292,8 @@ fn encode_json_array_with_context<'a>(
} else {
element_type.unwrap_or_else(ConcreteDataType::string_datatype)
};
let list_type = ListType::new(element_type);
Ok(ListValue::new(items, ConcreteDataType::List(list_type)))
Ok(ListValue::new(items, Arc::new(element_type)))
}
/// Helper function to encode a JSON value to a Value and determine its ConcreteDataType with context
@@ -369,7 +369,7 @@ fn encode_json_value_with_context<'a>(
Json::Array(arr) => {
let list_value = encode_json_array_with_context(arr, expected_type, context)?;
let data_type = list_value.datatype().clone();
Ok((Value::List(list_value), data_type))
Ok((Value::List(list_value), (*data_type).clone()))
}
Json::Object(obj) => {
let struct_value = encode_json_object_with_context(obj, None, context)?;
@@ -593,7 +593,7 @@ fn decode_struct_with_settings<'a>(
}
}
let struct_type = StructType::new(struct_fields);
let struct_type = StructType::new(Arc::new(struct_fields));
StructValue::try_new(items, struct_type)
}
@@ -651,8 +651,11 @@ fn decode_unstructured_raw_struct(struct_value: StructValue) -> Result<StructVal
return Ok(decoded_struct);
} else {
// If the decoded value is not a struct, wrap it in a struct
let struct_type =
StructType::new(vec![StructField::new("value".to_string(), data_type, true)]);
let struct_type = StructType::new(Arc::new(vec![StructField::new(
"value".to_string(),
data_type,
true,
)]));
return StructValue::try_new(vec![decoded_value], struct_type);
}
}
@@ -688,9 +691,11 @@ where
#[cfg(test)]
mod tests {
use serde_json::json;
use super::*;
use crate::types::ListType;
#[test]
fn test_encode_json_null() {
@@ -798,7 +803,8 @@ mod tests {
let struct_type = result.struct_type();
// Check that we have the expected fields
let field_names: Vec<&str> = struct_type.fields().iter().map(|f| f.name()).collect();
let fields = struct_type.fields();
let field_names: Vec<&str> = fields.iter().map(|f| f.name()).collect();
assert!(field_names.contains(&"name"));
assert!(field_names.contains(&"age"));
assert!(field_names.contains(&"active"));
@@ -855,12 +861,8 @@ mod tests {
.unwrap();
if let Value::Struct(person_struct) = &items[person_index] {
assert_eq!(person_struct.items().len(), 2);
let person_fields: Vec<&str> = person_struct
.struct_type()
.fields()
.iter()
.map(|f| f.name())
.collect();
let fields = person_struct.struct_type().fields();
let person_fields: Vec<&str> = fields.iter().map(|f| f.name()).collect();
assert!(person_fields.contains(&"name"));
assert!(person_fields.contains(&"age"));
} else {
@@ -920,7 +922,7 @@ mod tests {
// In this case, it should be string since we can't find a common numeric type
assert_eq!(
list_value.datatype(),
&ConcreteDataType::List(ListType::new(ConcreteDataType::int64_datatype()))
Arc::new(ConcreteDataType::int64_datatype())
);
} else {
panic!("Expected List value");
@@ -942,7 +944,7 @@ mod tests {
// Empty arrays default to string type
assert_eq!(
list_value.datatype(),
&ConcreteDataType::List(ListType::new(ConcreteDataType::string_datatype()))
Arc::new(ConcreteDataType::string_datatype())
);
} else {
panic!("Expected List value");
@@ -961,12 +963,8 @@ mod tests {
if let Value::Struct(struct_value) = result {
assert_eq!(struct_value.items().len(), 2);
let field_names: Vec<&str> = struct_value
.struct_type()
.fields()
.iter()
.map(|f| f.name())
.collect();
let fields = struct_value.struct_type().fields();
let field_names: Vec<&str> = fields.iter().map(|f| f.name()).collect();
assert!(field_names.contains(&"name"));
assert!(field_names.contains(&"age"));
} else {
@@ -990,7 +988,7 @@ mod tests {
),
StructField::new("age".to_string(), ConcreteDataType::int64_datatype(), true),
];
let struct_type = StructType::new(fields);
let struct_type = StructType::new(Arc::new(fields));
let concrete_type = ConcreteDataType::Struct(struct_type);
let settings = JsonStructureSettings::Structured(None);
@@ -1044,7 +1042,7 @@ mod tests {
true,
),
];
let struct_type = StructType::new(fields);
let struct_type = StructType::new(Arc::new(fields));
let result = encode_json_object_with_context(
json.as_object().unwrap().clone(),
@@ -1086,7 +1084,7 @@ mod tests {
),
StructField::new("age".to_string(), ConcreteDataType::int64_datatype(), true),
];
let struct_type = StructType::new(fields);
let struct_type = StructType::new(Arc::new(fields));
let result = encode_json_object_with_context(
json.as_object().unwrap().clone(),
@@ -1127,7 +1125,7 @@ mod tests {
),
StructField::new("age".to_string(), ConcreteDataType::int64_datatype(), true),
];
let struct_type = StructType::new(fields);
let struct_type = StructType::new(Arc::new(fields));
let result = encode_json_object_with_context(
json.as_object().unwrap().clone(),
@@ -1170,7 +1168,8 @@ mod tests {
#[test]
fn test_encode_json_array_with_item_type() {
let json = json!([1, 2, 3]);
let list_type = ListType::new(ConcreteDataType::int8_datatype());
let item_type = Arc::new(ConcreteDataType::int8_datatype());
let list_type = ListType::new(item_type.clone());
let concrete_type = ConcreteDataType::List(list_type);
let settings = JsonStructureSettings::Structured(None);
let result = settings
@@ -1184,10 +1183,7 @@ mod tests {
assert_eq!(list_value.items()[0], Value::Int8(1));
assert_eq!(list_value.items()[1], Value::Int8(2));
assert_eq!(list_value.items()[2], Value::Int8(3));
assert_eq!(
list_value.datatype(),
&ConcreteDataType::List(ListType::new(ConcreteDataType::int8_datatype()))
);
assert_eq!(list_value.datatype(), item_type);
} else {
panic!("Expected List value");
}
@@ -1196,7 +1192,8 @@ mod tests {
#[test]
fn test_encode_json_array_empty_with_item_type() {
let json = json!([]);
let list_type = ListType::new(ConcreteDataType::string_datatype());
let item_type = Arc::new(ConcreteDataType::string_datatype());
let list_type = ListType::new(item_type.clone());
let concrete_type = ConcreteDataType::List(list_type);
let settings = JsonStructureSettings::Structured(None);
let result = settings
@@ -1207,10 +1204,7 @@ mod tests {
if let Value::List(list_value) = result {
assert_eq!(list_value.items().len(), 0);
assert_eq!(
list_value.datatype(),
&ConcreteDataType::List(ListType::new(ConcreteDataType::string_datatype()))
);
assert_eq!(list_value.datatype(), item_type);
} else {
panic!("Expected List value");
}
@@ -1257,7 +1251,7 @@ mod tests {
Value::Int64(25),
Value::Boolean(true),
],
StructType::new(vec![
StructType::new(Arc::new(vec![
StructField::new(
"name".to_string(),
ConcreteDataType::string_datatype(),
@@ -1269,7 +1263,7 @@ mod tests {
ConcreteDataType::boolean_datatype(),
true,
),
]),
])),
);
let result = settings.decode(Value::Struct(struct_value)).unwrap();
@@ -1287,7 +1281,7 @@ mod tests {
let list_value = ListValue::new(
vec![Value::Int64(1), Value::Int64(2), Value::Int64(3)],
ConcreteDataType::List(ListType::new(ConcreteDataType::int64_datatype())),
Arc::new(ConcreteDataType::int64_datatype()),
);
let result = settings.decode(Value::List(list_value)).unwrap();
@@ -1301,28 +1295,29 @@ mod tests {
let inner_struct = StructValue::new(
vec![Value::String("Alice".into()), Value::Int64(25)],
StructType::new(vec![
StructType::new(Arc::new(vec![
StructField::new(
"name".to_string(),
ConcreteDataType::string_datatype(),
true,
),
StructField::new("age".to_string(), ConcreteDataType::int64_datatype(), true),
]),
])),
);
let score_list_item_type = Arc::new(ConcreteDataType::int64_datatype());
let outer_struct = StructValue::new(
vec![
Value::Struct(inner_struct),
Value::List(ListValue::new(
vec![Value::Int64(95), Value::Int64(87)],
ConcreteDataType::List(ListType::new(ConcreteDataType::int64_datatype())),
score_list_item_type.clone(),
)),
],
StructType::new(vec![
StructType::new(Arc::new(vec![
StructField::new(
"user".to_string(),
ConcreteDataType::Struct(StructType::new(vec![
ConcreteDataType::Struct(StructType::new(Arc::new(vec![
StructField::new(
"name".to_string(),
ConcreteDataType::string_datatype(),
@@ -1333,15 +1328,15 @@ mod tests {
ConcreteDataType::int64_datatype(),
true,
),
])),
]))),
true,
),
StructField::new(
"scores".to_string(),
ConcreteDataType::List(ListType::new(ConcreteDataType::int64_datatype())),
ConcreteDataType::List(ListType::new(score_list_item_type.clone())),
true,
),
]),
])),
);
let result = settings.decode(Value::Struct(outer_struct)).unwrap();
@@ -1374,11 +1369,11 @@ mod tests {
let json_str = r#"{"name": "Bob", "age": 30}"#;
let struct_value = StructValue::new(
vec![Value::String(json_str.into())],
StructType::new(vec![StructField::new(
StructType::new(Arc::new(vec![StructField::new(
JsonStructureSettings::RAW_FIELD.to_string(),
ConcreteDataType::string_datatype(),
true,
)]),
)])),
);
let value = Value::Struct(struct_value);
@@ -1404,7 +1399,7 @@ mod tests {
Value::String("Alice".into()),
Value::String(metadata_json.into()),
],
StructType::new(vec![
StructType::new(Arc::new(vec![
StructField::new(
"name".to_string(),
ConcreteDataType::string_datatype(),
@@ -1415,7 +1410,7 @@ mod tests {
ConcreteDataType::string_datatype(),
true,
),
]),
])),
);
let result = settings.decode(Value::Struct(struct_value)).unwrap();
@@ -1445,14 +1440,14 @@ mod tests {
Value::String("Bob".into()),
Value::Null, // missing age field
],
StructType::new(vec![
StructType::new(Arc::new(vec![
StructField::new(
"name".to_string(),
ConcreteDataType::string_datatype(),
true,
),
StructField::new("age".to_string(), ConcreteDataType::int64_datatype(), true),
]),
])),
);
let result = settings.decode(Value::Struct(struct_value)).unwrap();
@@ -1513,7 +1508,8 @@ mod tests {
#[test]
fn test_encode_json_array_with_list_type() {
let json = json!([1, 2, 3]);
let list_type = ListType::new(ConcreteDataType::int64_datatype());
let item_type = Arc::new(ConcreteDataType::int64_datatype());
let list_type = ListType::new(item_type.clone());
let concrete_type = ConcreteDataType::List(list_type);
let settings = JsonStructureSettings::Structured(None);
@@ -1528,10 +1524,7 @@ mod tests {
assert_eq!(list_value.items()[0], Value::Int64(1));
assert_eq!(list_value.items()[1], Value::Int64(2));
assert_eq!(list_value.items()[2], Value::Int64(3));
assert_eq!(
list_value.datatype(),
&ConcreteDataType::List(ListType::new(ConcreteDataType::int64_datatype()))
);
assert_eq!(list_value.datatype(), item_type);
} else {
panic!("Expected List value");
}
@@ -1644,7 +1637,7 @@ mod tests {
}
// Test with encode_with_type (with type)
let struct_type = StructType::new(vec![
let struct_type = StructType::new(Arc::new(vec![
StructField::new(
"name".to_string(),
ConcreteDataType::string_datatype(),
@@ -1656,7 +1649,7 @@ mod tests {
ConcreteDataType::boolean_datatype(),
true,
),
]);
]));
let concrete_type = ConcreteDataType::Struct(struct_type);
let result2 = settings
@@ -1759,12 +1752,8 @@ mod tests {
.unwrap();
if let Value::Struct(user_struct) = &items[user_index] {
let user_items = user_struct.items();
let user_fields: Vec<&str> = user_struct
.struct_type()
.fields()
.iter()
.map(|f| f.name())
.collect();
let fields = user_struct.struct_type().fields();
let user_fields: Vec<&str> = fields.iter().map(|f| f.name()).collect();
// name should be structured
let name_index = user_fields.iter().position(|&f| f == "name").unwrap();
@@ -1798,7 +1787,7 @@ mod tests {
Value::Int64(25),
Value::Boolean(true),
],
StructType::new(vec![
StructType::new(Arc::new(vec![
StructField::new(
"name".to_string(),
ConcreteDataType::string_datatype(),
@@ -1810,7 +1799,7 @@ mod tests {
ConcreteDataType::boolean_datatype(),
true,
),
]),
])),
);
let decoded_struct = settings.decode_struct(original_struct.clone()).unwrap();
@@ -1834,7 +1823,7 @@ mod tests {
Value::Int64(25),
Value::Boolean(true),
],
StructType::new(vec![
StructType::new(Arc::new(vec![
StructField::new(
"name".to_string(),
ConcreteDataType::string_datatype(),
@@ -1846,7 +1835,7 @@ mod tests {
ConcreteDataType::boolean_datatype(),
true,
),
]),
])),
);
let decoded_struct = settings.decode_struct(original_struct.clone()).unwrap();
@@ -1863,7 +1852,28 @@ mod tests {
unstructured_keys.insert("metadata".to_string());
let settings = JsonStructureSettings::PartialUnstructuredByKey {
fields: Some(StructType::new(vec![
fields: Some(StructType::new(Arc::new(vec![
StructField::new(
"name".to_string(),
ConcreteDataType::string_datatype(),
true,
),
StructField::new(
"metadata".to_string(),
ConcreteDataType::string_datatype(),
true,
),
]))),
unstructured_keys,
};
// Create a struct where metadata is stored as unstructured JSON string
let encoded_struct = StructValue::new(
vec![
Value::String("Alice".into()),
Value::String(r#"{"preferences":{"theme":"dark"},"history":[1,2,3]}"#.into()),
],
StructType::new(Arc::new(vec![
StructField::new(
"name".to_string(),
ConcreteDataType::string_datatype(),
@@ -1875,27 +1885,6 @@ mod tests {
true,
),
])),
unstructured_keys,
};
// Create a struct where metadata is stored as unstructured JSON string
let encoded_struct = StructValue::new(
vec![
Value::String("Alice".into()),
Value::String(r#"{"preferences":{"theme":"dark"},"history":[1,2,3]}"#.into()),
],
StructType::new(vec![
StructField::new(
"name".to_string(),
ConcreteDataType::string_datatype(),
true,
),
StructField::new(
"metadata".to_string(),
ConcreteDataType::string_datatype(),
true,
),
]),
);
let decoded_struct = settings.decode_struct(encoded_struct).unwrap();
@@ -1905,12 +1894,8 @@ mod tests {
// Verify metadata field is now properly structured
if let Value::Struct(metadata_struct) = &decoded_struct.items()[1] {
let metadata_fields: Vec<&str> = metadata_struct
.struct_type()
.fields()
.iter()
.map(|f| f.name())
.collect();
let fields = metadata_struct.struct_type().fields();
let metadata_fields: Vec<&str> = fields.iter().map(|f| f.name()).collect();
assert!(metadata_fields.contains(&"preferences"));
assert!(metadata_fields.contains(&"history"));
@@ -1936,7 +1921,7 @@ mod tests {
Value::String("Alice".into()),
Value::String(r#"{"preferences":{"theme":"dark"},"history":[1,2,3]}"#.into()),
],
StructType::new(vec![
StructType::new(Arc::new(vec![
StructField::new(
"name".to_string(),
ConcreteDataType::string_datatype(),
@@ -1947,16 +1932,16 @@ mod tests {
ConcreteDataType::string_datatype(),
true,
),
]),
])),
);
let encoded_struct = StructValue::new(
vec![Value::Struct(user_struct)],
StructType::new(vec![StructField::new(
StructType::new(Arc::new(vec![StructField::new(
"user".to_string(),
ConcreteDataType::struct_datatype(StructType::new(vec![])),
ConcreteDataType::struct_datatype(StructType::new(Arc::new(vec![]))),
true,
)]),
)])),
);
let decoded_struct = settings.decode_struct(encoded_struct).unwrap();
@@ -1964,12 +1949,8 @@ mod tests {
// Verify the nested structure is properly decoded
if let Value::Struct(decoded_user) = &decoded_struct.items()[0] {
if let Value::Struct(metadata_struct) = &decoded_user.items()[1] {
let metadata_fields: Vec<&str> = metadata_struct
.struct_type()
.fields()
.iter()
.map(|f| f.name())
.collect();
let fields = metadata_struct.struct_type().fields();
let metadata_fields: Vec<&str> = fields.iter().map(|f| f.name()).collect();
assert!(metadata_fields.contains(&"preferences"));
assert!(metadata_fields.contains(&"history"));
@@ -1987,12 +1968,8 @@ mod tests {
if let Value::Struct(preferences_struct) =
&metadata_struct.items()[preference_index]
{
let pref_fields: Vec<&str> = preferences_struct
.struct_type()
.fields()
.iter()
.map(|f| f.name())
.collect();
let fields = preferences_struct.struct_type().fields();
let pref_fields: Vec<&str> = fields.iter().map(|f| f.name()).collect();
assert!(pref_fields.contains(&"theme"));
} else {
panic!("Expected preferences to be decoded as structured value");
@@ -2022,22 +1999,18 @@ mod tests {
vec![Value::String(
r#"{"name":"Alice","age":25,"active":true}"#.into(),
)],
StructType::new(vec![StructField::new(
StructType::new(Arc::new(vec![StructField::new(
"_raw".to_string(),
ConcreteDataType::string_datatype(),
true,
)]),
)])),
);
let decoded_struct = settings.decode_struct(encoded_struct).unwrap();
// With UnstructuredRaw, the entire struct should be reconstructed from _raw field
let decoded_fields: Vec<&str> = decoded_struct
.struct_type()
.fields()
.iter()
.map(|f| f.name())
.collect();
let fields = decoded_struct.struct_type().fields();
let decoded_fields: Vec<&str> = fields.iter().map(|f| f.name()).collect();
assert!(decoded_fields.contains(&"name"));
assert!(decoded_fields.contains(&"age"));
@@ -2064,14 +2037,14 @@ mod tests {
// Create a struct that doesn't match the expected UnstructuredRaw format
let invalid_struct = StructValue::new(
vec![Value::String("Alice".into()), Value::Int64(25)],
StructType::new(vec![
StructType::new(Arc::new(vec![
StructField::new(
"name".to_string(),
ConcreteDataType::string_datatype(),
true,
),
StructField::new("age".to_string(), ConcreteDataType::int64_datatype(), true),
]),
])),
);
// Should fail with error since it doesn't match expected UnstructuredRaw format
@@ -2093,20 +2066,16 @@ mod tests {
// Test with a string primitive in _raw field
let string_struct = StructValue::new(
vec![Value::String("\"hello world\"".into())],
StructType::new(vec![StructField::new(
StructType::new(Arc::new(vec![StructField::new(
"_raw".to_string(),
ConcreteDataType::string_datatype(),
true,
)]),
)])),
);
let decoded_struct = settings.decode_struct(string_struct).unwrap();
let decoded_fields: Vec<&str> = decoded_struct
.struct_type()
.fields()
.iter()
.map(|f| f.name())
.collect();
let fields = decoded_struct.struct_type().fields();
let decoded_fields: Vec<&str> = fields.iter().map(|f| f.name()).collect();
assert!(decoded_fields.contains(&"value"));
assert_eq!(
decoded_struct.items()[0],
@@ -2116,60 +2085,48 @@ mod tests {
// Test with a number primitive in _raw field
let number_struct = StructValue::new(
vec![Value::String("42".into())],
StructType::new(vec![StructField::new(
StructType::new(Arc::new(vec![StructField::new(
"_raw".to_string(),
ConcreteDataType::string_datatype(),
true,
)]),
)])),
);
let decoded_struct = settings.decode_struct(number_struct).unwrap();
let decoded_fields: Vec<&str> = decoded_struct
.struct_type()
.fields()
.iter()
.map(|f| f.name())
.collect();
let fields = decoded_struct.struct_type().fields();
let decoded_fields: Vec<&str> = fields.iter().map(|f| f.name()).collect();
assert!(decoded_fields.contains(&"value"));
assert_eq!(decoded_struct.items()[0], Value::Int64(42));
// Test with a boolean primitive in _raw field
let bool_struct = StructValue::new(
vec![Value::String("true".into())],
StructType::new(vec![StructField::new(
StructType::new(Arc::new(vec![StructField::new(
"_raw".to_string(),
ConcreteDataType::string_datatype(),
true,
)]),
)])),
);
let decoded_struct = settings.decode_struct(bool_struct).unwrap();
let decoded_fields: Vec<&str> = decoded_struct
.struct_type()
.fields()
.iter()
.map(|f| f.name())
.collect();
let fields = decoded_struct.struct_type().fields();
let decoded_fields: Vec<&str> = fields.iter().map(|f| f.name()).collect();
assert!(decoded_fields.contains(&"value"));
assert_eq!(decoded_struct.items()[0], Value::Boolean(true));
// Test with a null primitive in _raw field
let null_struct = StructValue::new(
vec![Value::String("null".into())],
StructType::new(vec![StructField::new(
StructType::new(Arc::new(vec![StructField::new(
"_raw".to_string(),
ConcreteDataType::string_datatype(),
true,
)]),
)])),
);
let decoded_struct = settings.decode_struct(null_struct).unwrap();
let decoded_fields: Vec<&str> = decoded_struct
.struct_type()
.fields()
.iter()
.map(|f| f.name())
.collect();
let fields = decoded_struct.struct_type().fields();
let decoded_fields: Vec<&str> = fields.iter().map(|f| f.name()).collect();
assert!(decoded_fields.contains(&"value"));
assert_eq!(decoded_struct.items()[0], Value::Null);
}
@@ -2182,20 +2139,16 @@ mod tests {
// Test with an array in _raw field
let array_struct = StructValue::new(
vec![Value::String("[1, \"hello\", true, 3.15]".into())],
StructType::new(vec![StructField::new(
StructType::new(Arc::new(vec![StructField::new(
"_raw".to_string(),
ConcreteDataType::string_datatype(),
true,
)]),
)])),
);
let decoded_struct = settings.decode_struct(array_struct).unwrap();
let decoded_fields: Vec<&str> = decoded_struct
.struct_type()
.fields()
.iter()
.map(|f| f.name())
.collect();
let fields = decoded_struct.struct_type().fields();
let decoded_fields: Vec<&str> = fields.iter().map(|f| f.name()).collect();
assert!(decoded_fields.contains(&"value"));
if let Value::List(list_value) = &decoded_struct.items()[0] {
@@ -2254,12 +2207,8 @@ mod tests {
// Verify encoding worked - metadata and user.profile.settings should be unstructured
if let Value::Struct(encoded_struct) = encoded_value {
let fields: Vec<&str> = encoded_struct
.struct_type()
.fields()
.iter()
.map(|f| f.name())
.collect();
let fields = encoded_struct.struct_type().fields();
let fields: Vec<&str> = fields.iter().map(|f| f.name()).collect();
assert!(fields.contains(&"name"));
assert!(fields.contains(&"age"));
@@ -2277,21 +2226,13 @@ mod tests {
// Check that user.profile.settings is unstructured
let user_index = fields.iter().position(|&f| f == "user").unwrap();
if let Value::Struct(user_struct) = &encoded_struct.items()[user_index] {
let user_fields: Vec<&str> = user_struct
.struct_type()
.fields()
.iter()
.map(|f| f.name())
.collect();
let fields = user_struct.struct_type().fields();
let user_fields: Vec<&str> = fields.iter().map(|f| f.name()).collect();
let profile_index = user_fields.iter().position(|&f| f == "profile").unwrap();
if let Value::Struct(profile_struct) = &user_struct.items()[profile_index] {
let profile_fields: Vec<&str> = profile_struct
.struct_type()
.fields()
.iter()
.map(|f| f.name())
.collect();
let fields = profile_struct.struct_type().fields();
let profile_fields: Vec<&str> = fields.iter().map(|f| f.name()).collect();
let settings_index = profile_fields
.iter()
@@ -2315,12 +2256,8 @@ mod tests {
let decoded_struct = settings.decode_struct(encoded_struct).unwrap();
// Verify the decoded struct has proper structure
let decoded_fields: Vec<&str> = decoded_struct
.struct_type()
.fields()
.iter()
.map(|f| f.name())
.collect();
let fields = decoded_struct.struct_type().fields();
let decoded_fields: Vec<&str> = fields.iter().map(|f| f.name()).collect();
assert!(decoded_fields.contains(&"name"));
assert!(decoded_fields.contains(&"age"));
@@ -2333,12 +2270,8 @@ mod tests {
.position(|&f| f == "metadata")
.unwrap();
if let Value::Struct(metadata_struct) = &decoded_struct.items()[metadata_index] {
let metadata_fields: Vec<&str> = metadata_struct
.struct_type()
.fields()
.iter()
.map(|f| f.name())
.collect();
let fields = metadata_struct.struct_type().fields();
let metadata_fields: Vec<&str> = fields.iter().map(|f| f.name()).collect();
assert!(metadata_fields.contains(&"tags"));
assert!(metadata_fields.contains(&"preferences"));
@@ -2349,12 +2282,8 @@ mod tests {
.position(|&f| f == "preferences")
.unwrap();
if let Value::Struct(prefs_struct) = &metadata_struct.items()[preferences_index] {
let prefs_fields: Vec<&str> = prefs_struct
.struct_type()
.fields()
.iter()
.map(|f| f.name())
.collect();
let fields = prefs_struct.struct_type().fields();
let prefs_fields: Vec<&str> = fields.iter().map(|f| f.name()).collect();
assert!(prefs_fields.contains(&"theme"));
assert!(prefs_fields.contains(&"notifications"));
@@ -2368,21 +2297,13 @@ mod tests {
// Check that user.profile.settings is now properly structured
let user_index = decoded_fields.iter().position(|&f| f == "user").unwrap();
if let Value::Struct(user_struct) = &decoded_struct.items()[user_index] {
let user_fields: Vec<&str> = user_struct
.struct_type()
.fields()
.iter()
.map(|f| f.name())
.collect();
let fields = user_struct.struct_type().fields();
let user_fields: Vec<&str> = fields.iter().map(|f| f.name()).collect();
let profile_index = user_fields.iter().position(|&f| f == "profile").unwrap();
if let Value::Struct(profile_struct) = &user_struct.items()[profile_index] {
let profile_fields: Vec<&str> = profile_struct
.struct_type()
.fields()
.iter()
.map(|f| f.name())
.collect();
let fields = profile_struct.struct_type().fields();
let profile_fields: Vec<&str> = fields.iter().map(|f| f.name()).collect();
let settings_index = profile_fields
.iter()
@@ -2390,12 +2311,8 @@ mod tests {
.unwrap();
if let Value::Struct(settings_struct) = &profile_struct.items()[settings_index]
{
let settings_fields: Vec<&str> = settings_struct
.struct_type()
.fields()
.iter()
.map(|f| f.name())
.collect();
let fields = settings_struct.struct_type().fields();
let settings_fields: Vec<&str> = fields.iter().map(|f| f.name()).collect();
assert!(settings_fields.contains(&"language"));
assert!(settings_fields.contains(&"timezone"));

View File

@@ -380,6 +380,8 @@ impl<'a> ScalarRef<'a> for StructValueRef<'a> {
#[cfg(test)]
mod tests {
use std::sync::Arc;
use super::*;
use crate::data_type::ConcreteDataType;
use crate::timestamp::TimestampSecond;
@@ -451,14 +453,13 @@ mod tests {
#[test]
fn test_list_value_scalar() {
let list_value =
ListValue::new(vec![Value::Int32(123)], ConcreteDataType::int32_datatype());
let item_type = Arc::new(ConcreteDataType::int32_datatype());
let list_value = ListValue::new(vec![Value::Int32(123)], item_type.clone());
let list_ref = ListValueRef::Ref { val: &list_value };
assert_eq!(list_ref, list_value.as_scalar_ref());
assert_eq!(list_value, list_ref.to_owned_scalar());
let mut builder =
ListVectorBuilder::with_type_capacity(ConcreteDataType::int32_datatype(), 1);
let mut builder = ListVectorBuilder::with_type_capacity(item_type.clone(), 1);
builder.push(None);
builder.push(Some(list_value.as_scalar_ref()));
let vector = builder.finish();

View File

@@ -80,7 +80,10 @@ impl LogicalTypeId {
/// Panics if data type is not supported.
#[cfg(any(test, feature = "test"))]
pub fn data_type(&self) -> crate::data_type::ConcreteDataType {
use std::sync::Arc;
use crate::data_type::ConcreteDataType;
use crate::types::StructType;
match self {
LogicalTypeId::Null => ConcreteDataType::null_datatype(),
@@ -107,9 +110,11 @@ impl LogicalTypeId {
}
LogicalTypeId::TimestampNanosecond => ConcreteDataType::timestamp_nanosecond_datatype(),
LogicalTypeId::List => {
ConcreteDataType::list_datatype(ConcreteDataType::null_datatype())
ConcreteDataType::list_datatype(Arc::new(ConcreteDataType::null_datatype()))
}
LogicalTypeId::Struct => {
ConcreteDataType::struct_datatype(StructType::new(Arc::new(vec![])))
}
LogicalTypeId::Struct => ConcreteDataType::struct_datatype(vec![].into()),
LogicalTypeId::Dictionary => ConcreteDataType::dictionary_datatype(
ConcreteDataType::null_datatype(),
ConcreteDataType::null_datatype(),

View File

@@ -26,22 +26,19 @@ use crate::vectors::{ListVectorBuilder, MutableVector};
#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize, Deserialize)]
pub struct ListType {
/// The type of List's item.
// Use Box to avoid recursive dependency, as enum ConcreteDataType depends on ListType.
item_type: Box<ConcreteDataType>,
item_type: Arc<ConcreteDataType>,
}
impl Default for ListType {
fn default() -> Self {
ListType::new(ConcreteDataType::null_datatype())
ListType::new(Arc::new(ConcreteDataType::null_datatype()))
}
}
impl ListType {
/// Create a new `ListType` whose item's data type is `item_type`.
pub fn new(item_type: ConcreteDataType) -> Self {
ListType {
item_type: Box::new(item_type),
}
pub fn new(item_type: Arc<ConcreteDataType>) -> Self {
ListType { item_type }
}
/// Returns the item data type.
@@ -61,7 +58,7 @@ impl DataType for ListType {
}
fn default_value(&self) -> Value {
Value::List(ListValue::new(vec![], *self.item_type.clone()))
Value::List(ListValue::new(vec![], self.item_type.clone()))
}
fn as_arrow_type(&self) -> ArrowDataType {
@@ -75,7 +72,7 @@ impl DataType for ListType {
fn create_mutable_vector(&self, capacity: usize) -> Box<dyn MutableVector> {
Box::new(ListVectorBuilder::with_type_capacity(
*self.item_type.clone(),
self.item_type.clone(),
capacity,
))
}
@@ -95,11 +92,14 @@ mod tests {
#[test]
fn test_list_type() {
let t = ListType::new(ConcreteDataType::boolean_datatype());
let t = ListType::new(Arc::new(ConcreteDataType::boolean_datatype()));
assert_eq!("List<Boolean>", t.name());
assert_eq!(LogicalTypeId::List, t.logical_type_id());
assert_eq!(
Value::List(ListValue::new(vec![], ConcreteDataType::boolean_datatype())),
Value::List(ListValue::new(
vec![],
Arc::new(ConcreteDataType::boolean_datatype())
)),
t.default_value()
);
assert_eq!(

View File

@@ -12,6 +12,8 @@
// See the License for the specific language governing permissions and
// limitations under the License.
use std::sync::Arc;
use arrow::datatypes::{DataType as ArrowDataType, Field};
use arrow_schema::Fields;
use serde::{Deserialize, Serialize};
@@ -22,7 +24,7 @@ use crate::vectors::StructVectorBuilder;
#[derive(Clone, Debug, Default, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize, Deserialize)]
pub struct StructType {
fields: Vec<StructField>,
fields: Arc<Vec<StructField>>,
}
impl TryFrom<&Fields> for StructType {
@@ -38,13 +40,9 @@ impl TryFrom<&Fields> for StructType {
))
})
.collect::<Result<Vec<StructField>, Self::Error>>()?;
Ok(StructType { fields })
}
}
impl From<Vec<StructField>> for StructType {
fn from(fields: Vec<StructField>) -> Self {
StructType { fields }
Ok(StructType {
fields: Arc::new(fields),
})
}
}
@@ -87,16 +85,14 @@ impl DataType for StructType {
}
impl StructType {
pub fn new(fields: Vec<StructField>) -> Self {
StructType { fields }
pub fn new(fields: Arc<Vec<StructField>>) -> Self {
StructType {
fields: fields.clone(),
}
}
pub fn fields(&self) -> &[StructField] {
&self.fields
}
pub fn take_fields(self) -> Vec<StructField> {
self.fields
pub fn fields(&self) -> Arc<Vec<StructField>> {
self.fields.clone()
}
pub fn as_arrow_fields(&self) -> Fields {

View File

@@ -911,11 +911,14 @@ impl TryFrom<Value> for serde_json::Value {
Value::Struct(v) => {
let (items, struct_type) = v.into_parts();
let map = struct_type
.take_fields()
.into_iter()
.fields()
.iter()
.zip(items.into_iter())
.map(|(field, value)| {
Ok((field.take_name(), serde_json::Value::try_from(value)?))
Ok((
field.name().to_string(),
serde_json::Value::try_from(value)?,
))
})
.collect::<serde_json::Result<Map<String, serde_json::Value>>>()?;
serde_json::Value::Object(map)
@@ -934,13 +937,13 @@ pub struct ListValue {
items: Vec<Value>,
/// Inner values datatype, to distinguish empty lists of different datatypes.
/// Restricted by DataFusion, cannot use null datatype for empty list.
datatype: ConcreteDataType,
datatype: Arc<ConcreteDataType>,
}
impl Eq for ListValue {}
impl ListValue {
pub fn new(items: Vec<Value>, datatype: ConcreteDataType) -> Self {
pub fn new(items: Vec<Value>, datatype: Arc<ConcreteDataType>) -> Self {
Self { items, datatype }
}
@@ -952,12 +955,13 @@ impl ListValue {
self.items
}
pub fn into_parts(self) -> (Vec<Value>, ConcreteDataType) {
pub fn into_parts(self) -> (Vec<Value>, Arc<ConcreteDataType>) {
(self.items, self.datatype)
}
pub fn datatype(&self) -> &ConcreteDataType {
&self.datatype
/// List value's inner type data type
pub fn datatype(&self) -> Arc<ConcreteDataType> {
self.datatype.clone()
}
pub fn len(&self) -> usize {
@@ -988,12 +992,13 @@ impl ListValue {
.first()
.map(|x| x.as_value_ref().data_size() * self.items.len())
.unwrap_or(0)
+ std::mem::size_of::<Arc<ConcreteDataType>>()
}
}
impl Default for ListValue {
fn default() -> ListValue {
ListValue::new(vec![], ConcreteDataType::null_datatype())
ListValue::new(vec![], Arc::new(ConcreteDataType::null_datatype()))
}
}
@@ -1066,7 +1071,8 @@ impl StructValue {
self.items
.iter()
.map(|x| x.as_value_ref().data_size())
.sum()
.sum::<usize>()
+ std::mem::size_of::<StructType>()
}
fn try_to_scalar_value(&self, output_type: &StructType) -> Result<ScalarValue> {
@@ -1089,7 +1095,7 @@ impl StructValue {
impl Default for StructValue {
fn default() -> StructValue {
StructValue::try_new(vec![], StructType::new(vec![])).unwrap()
StructValue::try_new(vec![], StructType::new(Arc::new(vec![]))).unwrap()
}
}
@@ -1136,7 +1142,7 @@ impl TryFrom<ScalarValue> for Value {
.flatten()
.map(|x| x.try_into())
.collect::<Result<Vec<Value>>>()?;
Value::List(ListValue::new(items, datatype))
Value::List(ListValue::new(items, Arc::new(datatype)))
}
ScalarValue::Date32(d) => d.map(|x| Value::Date(Date::new(x))).unwrap_or(Value::Null),
ScalarValue::TimestampSecond(t, _) => t
@@ -1547,7 +1553,7 @@ pub enum ListValueRef<'a> {
},
RefList {
val: Vec<ValueRef<'a>>,
item_datatype: ConcreteDataType,
item_datatype: Arc<ConcreteDataType>,
},
}
@@ -1564,9 +1570,9 @@ impl ListValueRef<'_> {
}
}
/// Returns the inner element's data type.
fn datatype(&self) -> ConcreteDataType {
fn datatype(&self) -> Arc<ConcreteDataType> {
match self {
ListValueRef::Indexed { vector, .. } => vector.data_type(),
ListValueRef::Indexed { vector, .. } => vector.item_type(),
ListValueRef::Ref { val } => val.datatype().clone(),
ListValueRef::RefList { item_datatype, .. } => item_datatype.clone(),
}
@@ -1707,12 +1713,18 @@ impl ValueRef<'_> {
ValueRef::List(v) => match v {
ListValueRef::Indexed { vector, .. } => vector.memory_size() / vector.len(),
ListValueRef::Ref { val } => val.estimated_size(),
ListValueRef::RefList { val, .. } => val.iter().map(|v| v.data_size()).sum(),
ListValueRef::RefList { val, .. } => {
val.iter().map(|v| v.data_size()).sum::<usize>()
+ std::mem::size_of::<Arc<ConcreteDataType>>()
}
},
ValueRef::Struct(val) => match val {
StructValueRef::Indexed { vector, .. } => vector.memory_size() / vector.len(),
StructValueRef::Ref(val) => val.estimated_size(),
StructValueRef::RefList { val, .. } => val.iter().map(|v| v.data_size()).sum(),
StructValueRef::RefList { val, .. } => {
val.iter().map(|v| v.data_size()).sum::<usize>()
+ std::mem::size_of::<StructType>()
}
},
ValueRef::Json(v) => v.data_size(),
}
@@ -1730,7 +1742,7 @@ pub(crate) mod tests {
use crate::vectors::ListVectorBuilder;
pub(crate) fn build_struct_type() -> StructType {
StructType::new(vec![
StructType::new(Arc::new(vec![
StructField::new("id".to_string(), ConcreteDataType::int32_datatype(), false),
StructField::new(
"name".to_string(),
@@ -1745,10 +1757,10 @@ pub(crate) mod tests {
),
StructField::new(
"awards".to_string(),
ConcreteDataType::list_datatype(ConcreteDataType::boolean_datatype()),
ConcreteDataType::list_datatype(Arc::new(ConcreteDataType::boolean_datatype())),
true,
),
])
]))
}
pub(crate) fn build_struct_value() -> StructValue {
@@ -1778,12 +1790,12 @@ pub(crate) mod tests {
}
pub fn build_list_type() -> ConcreteDataType {
ConcreteDataType::list_datatype(ConcreteDataType::boolean_datatype())
ConcreteDataType::list_datatype(Arc::new(ConcreteDataType::boolean_datatype()))
}
pub(crate) fn build_list_value() -> ListValue {
let items = vec![Value::Boolean(true), Value::Boolean(false)];
ListValue::new(items, ConcreteDataType::boolean_datatype())
ListValue::new(items, Arc::new(ConcreteDataType::boolean_datatype()))
}
pub(crate) fn build_scalar_list_value() -> ScalarValue {
@@ -1909,7 +1921,10 @@ pub(crate) mod tests {
build_scalar_list_value().try_into().unwrap()
);
assert_eq!(
Value::List(ListValue::new(vec![], ConcreteDataType::uint32_datatype())),
Value::List(ListValue::new(
vec![],
Arc::new(ConcreteDataType::uint32_datatype())
)),
ScalarValue::List(ScalarValue::new_list(&[], &ArrowDataType::UInt32, true))
.try_into()
.unwrap()
@@ -2176,15 +2191,13 @@ pub(crate) mod tests {
&ConcreteDataType::binary_datatype(),
&Value::Binary(Bytes::from(b"world".as_slice())),
);
let item_type = Arc::new(ConcreteDataType::int32_datatype());
check_type_and_value(
&ConcreteDataType::list_datatype(ConcreteDataType::int32_datatype()),
&Value::List(ListValue::new(
vec![Value::Int32(10)],
ConcreteDataType::int32_datatype(),
)),
&ConcreteDataType::list_datatype(item_type.clone()),
&Value::List(ListValue::new(vec![Value::Int32(10)], item_type.clone())),
);
check_type_and_value(
&ConcreteDataType::list_datatype(ConcreteDataType::null_datatype()),
&ConcreteDataType::list_datatype(Arc::new(ConcreteDataType::null_datatype())),
&Value::List(ListValue::default()),
);
check_type_and_value(
@@ -2244,11 +2257,12 @@ pub(crate) mod tests {
&Value::Decimal128(Decimal128::new(1, 38, 10)),
);
let item_type = Arc::new(ConcreteDataType::boolean_datatype());
check_type_and_value(
&ConcreteDataType::list_datatype(ConcreteDataType::boolean_datatype()),
&ConcreteDataType::list_datatype(item_type.clone()),
&Value::List(ListValue::new(
vec![Value::Boolean(true)],
ConcreteDataType::boolean_datatype(),
item_type.clone(),
)),
);
@@ -2377,7 +2391,7 @@ pub(crate) mod tests {
json_value,
to_json(Value::List(ListValue {
items: vec![Value::Int32(123)],
datatype: ConcreteDataType::int32_datatype(),
datatype: Arc::new(ConcreteDataType::int32_datatype()),
}))
);
@@ -2387,7 +2401,7 @@ pub(crate) mod tests {
Value::String("tomcat".into()),
Value::Boolean(true),
],
StructType::new(vec![
StructType::new(Arc::new(vec![
StructField::new("num".to_string(), ConcreteDataType::int64_datatype(), true),
StructField::new(
"name".to_string(),
@@ -2399,7 +2413,7 @@ pub(crate) mod tests {
ConcreteDataType::boolean_datatype(),
true,
),
]),
])),
)
.unwrap();
assert_eq!(
@@ -2422,7 +2436,7 @@ pub(crate) mod tests {
assert_eq!(
serde_json::Value::try_from(Value::Json(Box::new(Value::List(ListValue::new(
vec![Value::Int32(1), Value::Int32(2), Value::Int32(3),],
ConcreteDataType::int32_datatype()
Arc::new(ConcreteDataType::int32_datatype())
)))))
.unwrap(),
serde_json::json!([1, 2, 3])
@@ -2617,7 +2631,7 @@ pub(crate) mod tests {
assert_eq!(
Value::List(ListValue::new(
vec![],
ConcreteDataType::timestamp_second_datatype(),
Arc::new(ConcreteDataType::timestamp_second_datatype()),
))
.to_string(),
"TimestampSecond[]"
@@ -2625,7 +2639,7 @@ pub(crate) mod tests {
assert_eq!(
Value::List(ListValue::new(
vec![],
ConcreteDataType::timestamp_millisecond_datatype(),
Arc::new(ConcreteDataType::timestamp_millisecond_datatype()),
))
.to_string(),
"TimestampMillisecond[]"
@@ -2633,7 +2647,7 @@ pub(crate) mod tests {
assert_eq!(
Value::List(ListValue::new(
vec![],
ConcreteDataType::timestamp_microsecond_datatype(),
Arc::new(ConcreteDataType::timestamp_microsecond_datatype()),
))
.to_string(),
"TimestampMicrosecond[]"
@@ -2641,7 +2655,7 @@ pub(crate) mod tests {
assert_eq!(
Value::List(ListValue::new(
vec![],
ConcreteDataType::timestamp_nanosecond_datatype(),
Arc::new(ConcreteDataType::timestamp_nanosecond_datatype()),
))
.to_string(),
"TimestampNanosecond[]"
@@ -2765,9 +2779,9 @@ pub(crate) mod tests {
assert_eq!(
build_scalar_list_value(),
Value::List(build_list_value())
.try_to_scalar_value(&ConcreteDataType::list_datatype(
.try_to_scalar_value(&ConcreteDataType::list_datatype(Arc::new(
ConcreteDataType::boolean_datatype()
))
)))
.unwrap()
);
}
@@ -2912,9 +2926,9 @@ pub(crate) mod tests {
assert_eq!(
ScalarValue::new_null_list(ArrowDataType::Boolean, true, 1),
Value::Null
.try_to_scalar_value(&ConcreteDataType::list_datatype(
ConcreteDataType::boolean_datatype(),
))
.try_to_scalar_value(&ConcreteDataType::list_datatype(Arc::new(
ConcreteDataType::boolean_datatype()
)))
.unwrap()
);
@@ -2929,11 +2943,10 @@ pub(crate) mod tests {
#[test]
fn test_list_value_to_scalar_value() {
let items = vec![Value::Int32(-1), Value::Null];
let list = Value::List(ListValue::new(items, ConcreteDataType::int32_datatype()));
let item_type = Arc::new(ConcreteDataType::int32_datatype());
let list = Value::List(ListValue::new(items, item_type.clone()));
let df_list = list
.try_to_scalar_value(&ConcreteDataType::list_datatype(
ConcreteDataType::int32_datatype(),
))
.try_to_scalar_value(&ConcreteDataType::list_datatype(item_type.clone()))
.unwrap();
assert!(matches!(df_list, ScalarValue::List(_)));
match df_list {
@@ -3092,10 +3105,10 @@ pub(crate) mod tests {
Value::String("hello world".into()),
Value::String("greptimedb".into()),
],
datatype: ConcreteDataType::string_datatype(),
datatype: Arc::new(ConcreteDataType::string_datatype()),
},
}),
22,
30,
);
let data = vec![
@@ -3103,12 +3116,12 @@ pub(crate) mod tests {
None,
Some(vec![Some(4), None, Some(6)]),
];
let mut builder =
ListVectorBuilder::with_type_capacity(ConcreteDataType::int32_datatype(), 8);
let item_type = Arc::new(ConcreteDataType::int32_datatype());
let mut builder = ListVectorBuilder::with_type_capacity(item_type.clone(), 8);
for vec_opt in &data {
if let Some(vec) = vec_opt {
let values = vec.iter().map(|v| Value::from(*v)).collect();
let list_value = ListValue::new(values, ConcreteDataType::int32_datatype());
let list_value = ListValue::new(values, item_type.clone());
builder.push(Some(ListValueRef::Ref { val: &list_value }));
} else {
@@ -3142,14 +3155,14 @@ pub(crate) mod tests {
check_value_ref_size_eq(
&ValueRef::Struct(StructValueRef::Ref(&build_struct_value())),
15,
31,
);
check_value_ref_size_eq(
&ValueRef::Json(Box::new(ValueRef::Struct(StructValueRef::Ref(
&build_struct_value(),
)))),
15,
31,
);
}

View File

@@ -424,12 +424,12 @@ pub mod tests {
#[test]
#[should_panic(expected = "Must use ListVectorBuilder::with_type_capacity()")]
fn test_mutable_vector_list_data_type() {
let item_type = Arc::new(ConcreteDataType::int32_datatype());
// List type
let builder =
ListVectorBuilder::with_type_capacity(ConcreteDataType::int32_datatype(), 1024);
let builder = ListVectorBuilder::with_type_capacity(item_type.clone(), 1024);
assert_eq!(
builder.data_type(),
ConcreteDataType::list_datatype(ConcreteDataType::int32_datatype())
ConcreteDataType::list_datatype(item_type)
);
// Panic with_capacity

View File

@@ -168,7 +168,7 @@ impl Helper {
ConstantVector::new(Arc::new(BinaryVector::from(vec![v])), length)
}
ScalarValue::List(array) => {
let item_type = ConcreteDataType::try_from(&array.value_type())?;
let item_type = Arc::new(ConcreteDataType::try_from(&array.value_type())?);
let mut builder = ListVectorBuilder::with_type_capacity(item_type.clone(), 1);
let values = ScalarValue::convert_array_to_scalar_vec(array.as_ref())
.context(ConvertArrowArrayToScalarsSnafu)?
@@ -560,7 +560,7 @@ mod tests {
));
let vector = Helper::try_from_scalar_value(value, 3).unwrap();
assert_eq!(
ConcreteDataType::list_datatype(ConcreteDataType::int32_datatype()),
ConcreteDataType::list_datatype(Arc::new(ConcreteDataType::int32_datatype())),
vector.data_type()
);
assert_eq!(3, vector.len());

View File

@@ -35,7 +35,7 @@ use crate::vectors::{self, Helper, MutableVector, Validity, Vector, VectorRef};
pub struct ListVector {
array: ListArray,
/// The datatype of the items in the list.
item_type: ConcreteDataType,
item_type: Arc<ConcreteDataType>,
}
impl ListVector {
@@ -50,7 +50,7 @@ impl ListVector {
&self.array
}
pub(crate) fn item_type(&self) -> ConcreteDataType {
pub(crate) fn item_type(&self) -> Arc<ConcreteDataType> {
self.item_type.clone()
}
}
@@ -145,10 +145,10 @@ impl Serializable for ListVector {
impl From<ListArray> for ListVector {
fn from(array: ListArray) -> Self {
let item_type = ConcreteDataType::from_arrow_type(match array.data_type() {
let item_type = Arc::new(ConcreteDataType::from_arrow_type(match array.data_type() {
ArrowDataType::List(field) => field.data_type(),
other => panic!("Try to create ListVector from an arrow array with type {other:?}"),
});
}));
Self { array, item_type }
}
}
@@ -217,7 +217,7 @@ impl ScalarVector for ListVector {
// See https://github.com/apache/arrow-rs/blob/94565bca99b5d9932a3e9a8e094aaf4e4384b1e5/arrow-array/src/builder/generic_list_builder.rs
/// [ListVector] builder.
pub struct ListVectorBuilder {
item_type: ConcreteDataType,
item_type: Arc<ConcreteDataType>,
offsets_builder: Int32BufferBuilder,
null_buffer_builder: NullBufferBuilder,
values_builder: Box<dyn MutableVector>,
@@ -226,7 +226,10 @@ pub struct ListVectorBuilder {
impl ListVectorBuilder {
/// Creates a new [`ListVectorBuilder`]. `item_type` is the data type of the list item, `capacity`
/// is the number of items to pre-allocate space for in this builder.
pub fn with_type_capacity(item_type: ConcreteDataType, capacity: usize) -> ListVectorBuilder {
pub fn with_type_capacity(
item_type: Arc<ConcreteDataType>,
capacity: usize,
) -> ListVectorBuilder {
let mut offsets_builder = Int32BufferBuilder::new(capacity + 1);
offsets_builder.append(0);
// The actual required capacity might be greater than the capacity of the `ListVector`
@@ -496,12 +499,12 @@ pub mod tests {
use crate::vectors::Int32Vector;
pub fn new_list_vector(data: &[Option<Vec<Option<i32>>>]) -> ListVector {
let mut builder =
ListVectorBuilder::with_type_capacity(ConcreteDataType::int32_datatype(), 8);
let item_type = Arc::new(ConcreteDataType::int32_datatype());
let mut builder = ListVectorBuilder::with_type_capacity(item_type.clone(), 8);
for vec_opt in data {
if let Some(vec) = vec_opt {
let values = vec.iter().map(|v| Value::from(*v)).collect();
let list_value = ListValue::new(values, ConcreteDataType::int32_datatype());
let list_value = ListValue::new(values, item_type.clone());
builder.push(Some(ListValueRef::Ref { val: &list_value }));
} else {
@@ -537,10 +540,11 @@ pub mod tests {
Some(vec![Some(4), None, Some(6)]),
];
let item_type = Arc::new(ConcreteDataType::int32_datatype());
let list_vector = new_list_vector(&data);
assert_eq!(
ConcreteDataType::List(ListType::new(ConcreteDataType::int32_datatype())),
ConcreteDataType::List(ListType::new(item_type.clone())),
list_vector.data_type()
);
assert_eq!("ListVector", list_vector.vector_type_name());
@@ -581,7 +585,7 @@ pub mod tests {
assert_eq!(
Value::List(ListValue::new(
vec![Value::Int32(1), Value::Int32(2), Value::Int32(3)],
ConcreteDataType::int32_datatype()
item_type.clone()
)),
list_vector.get(0)
);
@@ -600,7 +604,7 @@ pub mod tests {
assert_eq!(
Value::List(ListValue::new(
vec![Value::Int32(4), Value::Null, Value::Int32(6)],
ConcreteDataType::int32_datatype()
item_type.clone()
)),
list_vector.get(2)
);
@@ -636,10 +640,11 @@ pub mod tests {
Some(vec![Some(4), None, Some(6)]),
];
let item_type = Arc::new(ConcreteDataType::int32_datatype());
let list_vector = new_list_vector(&data);
assert_eq!(
ConcreteDataType::List(ListType::new(ConcreteDataType::int32_datatype())),
ConcreteDataType::List(ListType::new(item_type.clone())),
list_vector.data_type()
);
let mut iter = list_vector.values_iter();
@@ -672,12 +677,12 @@ pub mod tests {
#[test]
fn test_list_vector_builder() {
let mut builder =
ListType::new(ConcreteDataType::int32_datatype()).create_mutable_vector(3);
let item_type = Arc::new(ConcreteDataType::int32_datatype());
let mut builder = ListType::new(item_type.clone()).create_mutable_vector(3);
builder.push_value_ref(&ValueRef::List(ListValueRef::Ref {
val: &ListValue::new(
vec![Value::Int32(4), Value::Null, Value::Int32(6)],
ConcreteDataType::int32_datatype(),
item_type.clone(),
),
}));
assert!(builder.try_push_value_ref(&ValueRef::Int32(123)).is_err());
@@ -706,13 +711,13 @@ pub mod tests {
#[test]
fn test_list_vector_for_scalar() {
let mut builder =
ListVectorBuilder::with_type_capacity(ConcreteDataType::int32_datatype(), 2);
let item_type = Arc::new(ConcreteDataType::int32_datatype());
let mut builder = ListVectorBuilder::with_type_capacity(item_type.clone(), 2);
builder.push(None);
builder.push(Some(ListValueRef::Ref {
val: &ListValue::new(
vec![Value::Int32(4), Value::Null, Value::Int32(6)],
ConcreteDataType::int32_datatype(),
item_type.clone(),
),
}));
let vector = builder.finish();
@@ -757,13 +762,13 @@ pub mod tests {
#[test]
fn test_list_vector_builder_finish_cloned() {
let mut builder =
ListVectorBuilder::with_type_capacity(ConcreteDataType::int32_datatype(), 2);
let item_type = Arc::new(ConcreteDataType::int32_datatype());
let mut builder = ListVectorBuilder::with_type_capacity(item_type.clone(), 2);
builder.push(None);
builder.push(Some(ListValueRef::Ref {
val: &ListValue::new(
vec![Value::Int32(4), Value::Null, Value::Int32(6)],
ConcreteDataType::int32_datatype(),
item_type.clone(),
),
}));
let vector = builder.finish_cloned();

View File

@@ -493,20 +493,21 @@ mod tests {
let struct_type = ConcreteDataType::struct_datatype(build_struct_type());
let struct_value = build_struct_value();
// level 2: list
let list_type = ConcreteDataType::list_datatype(struct_type.clone());
let struct_type_ref = Arc::new(struct_type);
let list_type = ConcreteDataType::list_datatype(struct_type_ref.clone());
let list_value = ListValue::new(
vec![
Value::Struct(struct_value.clone()),
Value::Struct(struct_value.clone()),
],
struct_type.clone(),
struct_type_ref.clone(),
);
// level 3: struct
let root_type = StructType::new(vec![StructField::new(
let root_type = StructType::new(Arc::new(vec![StructField::new(
"items".to_string(),
list_type,
false,
)]);
)]));
let root_value = StructValue::new(vec![Value::List(list_value)], root_type.clone());
let mut builder = StructVectorBuilder::with_type_and_capacity(root_type.clone(), 20);

View File

@@ -1146,7 +1146,7 @@ fn from_accums_to_offsetted_accum(new_accums: Vec<Vec<Value>>) -> Vec<Value> {
})
.map(Value::from)
.collect::<Vec<_>>();
let first = ListValue::new(offset, ConcreteDataType::uint64_datatype());
let first = ListValue::new(offset, Arc::new(ConcreteDataType::uint64_datatype()));
let first = Value::List(first);
// construct new_accums

View File

@@ -19,6 +19,7 @@ mod interval;
use std::collections::HashMap;
use std::ops::Deref;
use std::sync::Arc;
use chrono::{DateTime, NaiveDate, NaiveDateTime, NaiveTime};
use common_time::{IntervalDayTime, IntervalMonthDayNano, IntervalYearMonth};
@@ -90,7 +91,7 @@ fn encode_array(
value_list: ListValue,
builder: &mut DataRowEncoder,
) -> PgWireResult<()> {
match &value_list.datatype() {
match value_list.datatype().as_ref() {
ConcreteDataType::Boolean(_) => {
let array = value_list
.items()
@@ -551,21 +552,21 @@ pub(super) fn type_pg_to_gt(origin: &Type) -> Result<ConcreteDataType> {
&Type::TIME => Ok(ConcreteDataType::timestamp_datatype(
common_time::timestamp::TimeUnit::Microsecond,
)),
&Type::CHAR_ARRAY => Ok(ConcreteDataType::list_datatype(
&Type::CHAR_ARRAY => Ok(ConcreteDataType::list_datatype(Arc::new(
ConcreteDataType::int8_datatype(),
)),
&Type::INT2_ARRAY => Ok(ConcreteDataType::list_datatype(
))),
&Type::INT2_ARRAY => Ok(ConcreteDataType::list_datatype(Arc::new(
ConcreteDataType::int16_datatype(),
)),
&Type::INT4_ARRAY => Ok(ConcreteDataType::list_datatype(
))),
&Type::INT4_ARRAY => Ok(ConcreteDataType::list_datatype(Arc::new(
ConcreteDataType::int32_datatype(),
)),
&Type::INT8_ARRAY => Ok(ConcreteDataType::list_datatype(
))),
&Type::INT8_ARRAY => Ok(ConcreteDataType::list_datatype(Arc::new(
ConcreteDataType::int64_datatype(),
)),
&Type::VARCHAR_ARRAY => Ok(ConcreteDataType::list_datatype(
))),
&Type::VARCHAR_ARRAY => Ok(ConcreteDataType::list_datatype(Arc::new(
ConcreteDataType::string_datatype(),
)),
))),
_ => server_error::InternalSnafu {
err_msg: format!("unimplemented datatype {origin:?}"),
}
@@ -1345,10 +1346,12 @@ mod test {
ConcreteDataType::interval_datatype(IntervalUnit::YearMonth),
ConcreteDataType::interval_datatype(IntervalUnit::DayTime),
ConcreteDataType::interval_datatype(IntervalUnit::MonthDayNano),
ConcreteDataType::list_datatype(ConcreteDataType::int64_datatype()),
ConcreteDataType::list_datatype(ConcreteDataType::float64_datatype()),
ConcreteDataType::list_datatype(ConcreteDataType::string_datatype()),
ConcreteDataType::list_datatype(ConcreteDataType::timestamp_second_datatype()),
ConcreteDataType::list_datatype(Arc::new(ConcreteDataType::int64_datatype())),
ConcreteDataType::list_datatype(Arc::new(ConcreteDataType::float64_datatype())),
ConcreteDataType::list_datatype(Arc::new(ConcreteDataType::string_datatype())),
ConcreteDataType::list_datatype(
Arc::new(ConcreteDataType::timestamp_second_datatype()),
),
];
let values = vec![
Value::Null,
@@ -1381,19 +1384,19 @@ mod test {
Value::IntervalMonthDayNano(IntervalMonthDayNano::new(1, 1, 10)),
Value::List(ListValue::new(
vec![Value::Int64(1i64)],
ConcreteDataType::int64_datatype(),
Arc::new(ConcreteDataType::int64_datatype()),
)),
Value::List(ListValue::new(
vec![Value::Float64(1.0f64.into())],
ConcreteDataType::float64_datatype(),
Arc::new(ConcreteDataType::float64_datatype()),
)),
Value::List(ListValue::new(
vec![Value::String("tom".into())],
ConcreteDataType::string_datatype(),
Arc::new(ConcreteDataType::string_datatype()),
)),
Value::List(ListValue::new(
vec![Value::Timestamp(Timestamp::new(1i64, TimeUnit::Second))],
ConcreteDataType::timestamp_second_datatype(),
Arc::new(ConcreteDataType::timestamp_second_datatype()),
)),
];
let query_context = QueryContextBuilder::default()