diff --git a/src/datatypes/src/data_type.rs b/src/datatypes/src/data_type.rs index bb84e5a30b..4f5e8ab531 100644 --- a/src/datatypes/src/data_type.rs +++ b/src/datatypes/src/data_type.rs @@ -277,6 +277,10 @@ impl ConcreteDataType { matches!(self, ConcreteDataType::Null(NullType)) } + pub(crate) fn is_struct(&self) -> bool { + matches!(self, ConcreteDataType::Struct(_)) + } + /// Try to cast the type as a [`ListType`]. pub fn as_list(&self) -> Option<&ListType> { match self { diff --git a/src/datatypes/src/lib.rs b/src/datatypes/src/lib.rs index 6b20080380..56be63b229 100644 --- a/src/datatypes/src/lib.rs +++ b/src/datatypes/src/lib.rs @@ -13,6 +13,7 @@ // limitations under the License. #![feature(assert_matches)] +#![feature(box_patterns)] pub mod arrow_array; pub mod data_type; diff --git a/src/datatypes/src/types/json_type.rs b/src/datatypes/src/types/json_type.rs index 141db03728..660ddfe2c4 100644 --- a/src/datatypes/src/types/json_type.rs +++ b/src/datatypes/src/types/json_type.rs @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -use std::collections::BTreeMap; +use std::collections::{BTreeMap, HashMap}; use std::str::FromStr; use std::sync::Arc; @@ -31,9 +31,12 @@ use crate::scalars::ScalarVectorBuilder; use crate::type_id::LogicalTypeId; use crate::types::{ListType, StructField, StructType}; use crate::value::Value; +use crate::vectors::json::builder::JsonVectorBuilder; use crate::vectors::{BinaryVectorBuilder, MutableVector}; pub const JSON_TYPE_NAME: &str = "Json"; +const JSON_PLAIN_FIELD_NAME: &str = "__plain__"; +const JSON_PLAIN_FIELD_METADATA_KEY: &str = "is_plain_json"; #[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize, Deserialize, Default)] pub enum JsonFormat { @@ -54,28 +57,46 @@ impl JsonType { Self { format } } - // TODO(LFC): remove "allow unused" - #[allow(unused)] + pub(crate) fn empty() -> Self { + Self { + format: JsonFormat::Native(Box::new(ConcreteDataType::null_datatype())), + } + } + /// Make json type a struct type, by: /// - if the json is an object, its entries are mapped to struct fields, obviously; /// - if not, the json is one of bool, number, string or array, make it a special field called - /// "__plain" in a struct with only that field. + /// [JSON_PLAIN_FIELD_NAME] with metadata [JSON_PLAIN_FIELD_METADATA_KEY] = `"true"` in a + /// struct with only that field. pub(crate) fn as_struct_type(&self) -> StructType { match &self.format { JsonFormat::Jsonb => StructType::default(), JsonFormat::Native(inner) => match inner.as_ref() { ConcreteDataType::Struct(t) => t.clone(), - x => StructType::new(Arc::new(vec![StructField::new( - "__plain".to_string(), - x.clone(), - true, - )])), + x => { + let mut field = + StructField::new(JSON_PLAIN_FIELD_NAME.to_string(), x.clone(), true); + field.insert_metadata(JSON_PLAIN_FIELD_METADATA_KEY, true); + StructType::new(Arc::new(vec![field])) + } }, } } - // TODO(LFC): remove "allow unused" - #[allow(unused)] + /// Check if this json type is the special "plain" one. + /// See [JsonType::as_struct_type]. + pub(crate) fn is_plain_json(&self) -> bool { + let JsonFormat::Native(box ConcreteDataType::Struct(t)) = &self.format else { + return true; + }; + let fields = t.fields(); + let Some((single, [])) = fields.split_first() else { + return false; + }; + single.name() == JSON_PLAIN_FIELD_NAME + && single.metadata(JSON_PLAIN_FIELD_METADATA_KEY) == Some("true") + } + /// Try to merge this json type with others, error on datatype conflict. pub(crate) fn merge(&mut self, other: &JsonType) -> Result<()> { match (&self.format, &other.format) { @@ -91,6 +112,47 @@ impl JsonType { .fail(), } } + + pub(crate) fn is_mergeable(&self, other: &JsonType) -> bool { + match (&self.format, &other.format) { + (JsonFormat::Jsonb, JsonFormat::Jsonb) => true, + (JsonFormat::Native(this), JsonFormat::Native(that)) => { + is_mergeable(this.as_ref(), that.as_ref()) + } + _ => false, + } + } +} + +fn is_mergeable(this: &ConcreteDataType, that: &ConcreteDataType) -> bool { + fn is_mergeable_struct(this: &StructType, that: &StructType) -> bool { + let this_fields = this.fields(); + let this_fields = this_fields + .iter() + .map(|x| (x.name(), x)) + .collect::>(); + + for that_field in that.fields().iter() { + if let Some(this_field) = this_fields.get(that_field.name()) + && !is_mergeable(this_field.data_type(), that_field.data_type()) + { + return false; + } + } + true + } + + match (this, that) { + (this, that) if this == that => true, + (ConcreteDataType::List(this), ConcreteDataType::List(that)) => { + is_mergeable(this.item_type(), that.item_type()) + } + (ConcreteDataType::Struct(this), ConcreteDataType::Struct(that)) => { + is_mergeable_struct(this, that) + } + (ConcreteDataType::Null(_), _) | (_, ConcreteDataType::Null(_)) => true, + _ => false, + } } fn merge(this: &ConcreteDataType, that: &ConcreteDataType) -> Result { @@ -166,7 +228,10 @@ impl DataType for JsonType { } fn create_mutable_vector(&self, capacity: usize) -> Box { - Box::new(BinaryVectorBuilder::with_capacity(capacity)) + match self.format { + JsonFormat::Jsonb => Box::new(BinaryVectorBuilder::with_capacity(capacity)), + JsonFormat::Native(_) => Box::new(JsonVectorBuilder::with_capacity(capacity)), + } } fn try_cast(&self, from: Value) -> Option { @@ -226,10 +291,12 @@ mod tests { let result = json_type.merge(other); match (result, expected) { (Ok(()), Ok(expected)) => { - assert_eq!(json_type.name(), expected) + assert_eq!(json_type.name(), expected); + assert!(json_type.is_mergeable(other)); } (Err(err), Err(expected)) => { - assert_eq!(err.to_string(), expected) + assert_eq!(err.to_string(), expected); + assert!(!json_type.is_mergeable(other)); } _ => unreachable!(), } diff --git a/src/datatypes/src/types/struct_type.rs b/src/datatypes/src/types/struct_type.rs index c082aeb9e6..90ea6ac9f5 100644 --- a/src/datatypes/src/types/struct_type.rs +++ b/src/datatypes/src/types/struct_type.rs @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +use std::collections::BTreeMap; use std::sync::Arc; use arrow::datatypes::{DataType as ArrowDataType, Field}; @@ -46,6 +47,15 @@ impl TryFrom<&Fields> for StructType { } } +impl From<[StructField; N]> for StructType { + fn from(value: [StructField; N]) -> Self { + let value: Box<[StructField]> = Box::new(value); + Self { + fields: Arc::new(value.into_vec()), + } + } +} + impl DataType for StructType { fn name(&self) -> String { format!( @@ -108,6 +118,7 @@ pub struct StructField { name: String, data_type: ConcreteDataType, nullable: bool, + metadata: BTreeMap, } impl StructField { @@ -116,6 +127,7 @@ impl StructField { name, data_type, nullable, + metadata: BTreeMap::new(), } } @@ -135,11 +147,25 @@ impl StructField { self.nullable } + pub(crate) fn insert_metadata(&mut self, key: impl ToString, value: impl ToString) { + self.metadata.insert(key.to_string(), value.to_string()); + } + + pub(crate) fn metadata(&self, key: &str) -> Option<&str> { + self.metadata.get(key).map(String::as_str) + } + pub fn to_df_field(&self) -> Field { + let metadata = self + .metadata + .iter() + .map(|(k, v)| (k.clone(), v.clone())) + .collect(); Field::new( self.name.clone(), self.data_type.as_arrow_type(), self.nullable, ) + .with_metadata(metadata) } } diff --git a/src/datatypes/src/vectors.rs b/src/datatypes/src/vectors.rs index e61b2ca35e..5355b35ff4 100644 --- a/src/datatypes/src/vectors.rs +++ b/src/datatypes/src/vectors.rs @@ -35,6 +35,7 @@ mod duration; mod eq; mod helper; mod interval; +pub(crate) mod json; mod list; mod null; pub(crate) mod operations; diff --git a/src/datatypes/src/vectors/helper.rs b/src/datatypes/src/vectors/helper.rs index 024a01c6b1..1bc6951ce3 100644 --- a/src/datatypes/src/vectors/helper.rs +++ b/src/datatypes/src/vectors/helper.rs @@ -464,6 +464,14 @@ impl Helper { } } +#[cfg(test)] +pub(crate) fn pretty_print(vector: VectorRef) -> String { + let array = vector.to_arrow_array(); + arrow::util::pretty::pretty_format_columns(&vector.vector_type_name(), &[array]) + .map(|x| x.to_string()) + .unwrap_or_else(|e| e.to_string()) +} + #[cfg(test)] mod tests { use arrow::array::{ diff --git a/src/datatypes/src/vectors/json.rs b/src/datatypes/src/vectors/json.rs new file mode 100644 index 0000000000..83aa1dd2aa --- /dev/null +++ b/src/datatypes/src/vectors/json.rs @@ -0,0 +1,15 @@ +// Copyright 2023 Greptime Team +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +pub(crate) mod builder; diff --git a/src/datatypes/src/vectors/json/builder.rs b/src/datatypes/src/vectors/json/builder.rs new file mode 100644 index 0000000000..cb19a329ef --- /dev/null +++ b/src/datatypes/src/vectors/json/builder.rs @@ -0,0 +1,485 @@ +// Copyright 2023 Greptime Team +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::any::Any; +use std::collections::HashMap; + +use snafu::OptionExt; + +use crate::data_type::ConcreteDataType; +use crate::error::{Result, TryFromValueSnafu, UnsupportedOperationSnafu}; +use crate::prelude::{ValueRef, Vector, VectorRef}; +use crate::types::JsonType; +use crate::value::StructValueRef; +use crate::vectors::{MutableVector, StructVectorBuilder}; + +struct JsonStructsBuilder { + json_type: JsonType, + inner: StructVectorBuilder, +} + +impl JsonStructsBuilder { + fn new(json_type: JsonType, capacity: usize) -> Self { + let struct_type = json_type.as_struct_type(); + let inner = StructVectorBuilder::with_type_and_capacity(struct_type, capacity); + Self { json_type, inner } + } + + fn len(&self) -> usize { + self.inner.len() + } + + fn push(&mut self, value: &ValueRef) -> Result<()> { + if self.json_type.is_plain_json() { + let value = ValueRef::Struct(StructValueRef::RefList { + val: vec![value.clone()], + fields: self.json_type.as_struct_type(), + }); + self.inner.try_push_value_ref(&value) + } else { + self.inner.try_push_value_ref(value) + } + } + + /// Try to merge (and consume the data of) other json vector builder into this one. + /// Note that the other builder's json type must be able to be merged with this one's + /// (this one's json type has all the fields in other one's, and no datatypes conflict). + /// Normally this is guaranteed, as long as json values are pushed through [JsonVectorBuilder]. + fn try_merge(&mut self, other: &mut JsonStructsBuilder) -> Result<()> { + debug_assert!(self.json_type.is_mergeable(&other.json_type)); + + fn helper(this: &mut StructVectorBuilder, that: &mut StructVectorBuilder) -> Result<()> { + let that_len = that.len(); + if let Some(x) = that.mut_null_buffer().finish() { + this.mut_null_buffer().append_buffer(&x) + } else { + this.mut_null_buffer().append_n_non_nulls(that_len); + } + + let that_fields = that.struct_type().fields(); + let mut that_builders = that_fields + .iter() + .zip(that.mut_value_builders().iter_mut()) + .map(|(field, builder)| (field.name(), builder)) + .collect::>(); + + for (field, this_builder) in this + .struct_type() + .fields() + .iter() + .zip(this.mut_value_builders().iter_mut()) + { + if let Some(that_builder) = that_builders.get_mut(field.name()) { + if field.data_type().is_struct() { + let this = this_builder + .as_mut_any() + .downcast_mut::() + // Safety: a struct datatype field must be corresponding to a struct vector builder. + .unwrap(); + + let that = that_builder + .as_mut_any() + .downcast_mut::() + // Safety: other builder with same field name must have same datatype, + // ensured because the two json types are mergeable. + .unwrap(); + helper(this, that)?; + } else { + let vector = that_builder.to_vector(); + this_builder.extend_slice_of(vector.as_ref(), 0, vector.len())?; + } + } else { + this_builder.push_nulls(that_len); + } + } + Ok(()) + } + helper(&mut self.inner, &mut other.inner) + } + + /// Same as [JsonStructsBuilder::try_merge], but does not consume the other builder's data. + fn try_merge_cloned(&mut self, other: &JsonStructsBuilder) -> Result<()> { + debug_assert!(self.json_type.is_mergeable(&other.json_type)); + + fn helper(this: &mut StructVectorBuilder, that: &StructVectorBuilder) -> Result<()> { + let that_len = that.len(); + if let Some(x) = that.null_buffer().finish_cloned() { + this.mut_null_buffer().append_buffer(&x) + } else { + this.mut_null_buffer().append_n_non_nulls(that_len); + } + + let that_fields = that.struct_type().fields(); + let that_builders = that_fields + .iter() + .zip(that.value_builders().iter()) + .map(|(field, builder)| (field.name(), builder)) + .collect::>(); + + for (field, this_builder) in this + .struct_type() + .fields() + .iter() + .zip(this.mut_value_builders().iter_mut()) + { + if let Some(that_builder) = that_builders.get(field.name()) { + if field.data_type().is_struct() { + let this = this_builder + .as_mut_any() + .downcast_mut::() + // Safety: a struct datatype field must be corresponding to a struct vector builder. + .unwrap(); + + let that = that_builder + .as_any() + .downcast_ref::() + // Safety: other builder with same field name must have same datatype, + // ensured because the two json types are mergeable. + .unwrap(); + helper(this, that)?; + } else { + let vector = that_builder.to_vector_cloned(); + this_builder.extend_slice_of(vector.as_ref(), 0, vector.len())?; + } + } else { + this_builder.push_nulls(that_len); + } + } + Ok(()) + } + helper(&mut self.inner, &other.inner) + } +} + +/// The vector builder for json type values. +/// +/// Json type are dynamic, to some degree (as long as they can be merged into each other). So are +/// json values. Json values are physically stored in struct vectors, which require the types of +/// struct values to be fixed inside a certain struct vector. So to resolve "dynamic" vs "fixed" +/// datatype problem, in this builder, each type of json value gets its own struct vector builder. +/// Once new json type value is pushing into this builder, it creates a new "child" builder for it. +/// +/// Given the "mixed" nature of the values stored in this builder, to produce the json vector, a +/// "merge" operation is performed. The "merge" is to iterate over all the "child" builders, and fill +/// nulls for missing json fields. The final vector's json type is fixed to be the "merge" of all +/// pushed json types. +pub(crate) struct JsonVectorBuilder { + merged_type: JsonType, + capacity: usize, + builders: Vec, +} + +impl JsonVectorBuilder { + pub(crate) fn with_capacity(capacity: usize) -> Self { + Self { + merged_type: JsonType::empty(), + capacity, + builders: vec![], + } + } + + fn try_create_new_builder(&mut self, json_type: &JsonType) -> Result<&mut JsonStructsBuilder> { + self.merged_type.merge(json_type)?; + + let builder = JsonStructsBuilder::new(json_type.clone(), self.capacity); + self.builders.push(builder); + + let len = self.builders.len(); + Ok(&mut self.builders[len - 1]) + } +} + +impl MutableVector for JsonVectorBuilder { + fn data_type(&self) -> ConcreteDataType { + ConcreteDataType::Json(self.merged_type.clone()) + } + + fn len(&self) -> usize { + self.builders.iter().map(|x| x.len()).sum() + } + + fn as_any(&self) -> &dyn Any { + self + } + + fn as_mut_any(&mut self) -> &mut dyn Any { + self + } + + fn to_vector(&mut self) -> VectorRef { + // Fast path: + if self.builders.len() == 1 { + return self.builders[0].inner.to_vector(); + } + + let mut unified_jsons = JsonStructsBuilder::new(self.merged_type.clone(), self.capacity); + for builder in self.builders.iter_mut() { + unified_jsons + .try_merge(builder) + // Safety: the "unified_jsons" has the merged json type from all the builders, + // so it should merge them without errors. + .unwrap_or_else(|e| panic!("failed to merge json builders, error: {e}")); + } + unified_jsons.inner.to_vector() + } + + fn to_vector_cloned(&self) -> VectorRef { + // Fast path: + if self.builders.len() == 1 { + return self.builders[0].inner.to_vector_cloned(); + } + + let mut unified_jsons = JsonStructsBuilder::new(self.merged_type.clone(), self.capacity); + for builder in self.builders.iter() { + unified_jsons + .try_merge_cloned(builder) + // Safety: the "unified_jsons" has the merged json type from all the builders, + // so it should merge them without errors. + .unwrap_or_else(|e| panic!("failed to merge json builders, error: {e}")); + } + unified_jsons.inner.to_vector_cloned() + } + + fn try_push_value_ref(&mut self, value: &ValueRef) -> Result<()> { + let data_type = value.data_type(); + let json_type = data_type.as_json().with_context(|| TryFromValueSnafu { + reason: format!("expected json value, got {value:?}"), + })?; + + let builder = match self.builders.last_mut() { + Some(last) => { + if &last.json_type != json_type { + self.try_create_new_builder(json_type)? + } else { + last + } + } + None => self.try_create_new_builder(json_type)?, + }; + + let ValueRef::Json(value) = value else { + // Safety: json datatype value must be the value of json. + unreachable!() + }; + builder.push(value) + } + + fn push_null(&mut self) { + let null_json_value = ValueRef::Json(Box::new(ValueRef::Null)); + self.try_push_value_ref(&null_json_value) + // Safety: learning from the method "try_push_value_ref", a null json value should be + // always able to push into any json vectors. + .unwrap_or_else(|e| { + panic!("failed to push null json value: {null_json_value:?}, error: {e}") + }); + } + + fn extend_slice_of(&mut self, _: &dyn Vector, _: usize, _: usize) -> Result<()> { + UnsupportedOperationSnafu { + op: "extend_slice_of", + vector_type: "JsonVector", + } + .fail() + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::data_type::DataType; + use crate::json::JsonStructureSettings; + use crate::vectors::helper::pretty_print; + + fn push(json: &str, builder: &mut JsonVectorBuilder, expected: std::result::Result<(), &str>) { + let settings = JsonStructureSettings::Structured(None); + let json: serde_json::Value = serde_json::from_str(json).unwrap(); + let value = settings.encode(json).unwrap(); + + let value = value.as_value_ref(); + let result = builder.try_push_value_ref(&value); + match (result, expected) { + (Ok(()), Ok(())) => (), + (Err(e), Err(expected)) => assert_eq!(e.to_string(), expected), + _ => unreachable!(), + } + } + + #[test] + fn test_push_plain_jsons() -> Result<()> { + let jsons = vec!["1", "2", r#""s""#, "[true]"]; + let results = vec![ + Ok(()), + Ok(()), + Err( + "Failed to merge JSON datatype: datatypes have conflict, this: Int64, that: String", + ), + Err( + "Failed to merge JSON datatype: datatypes have conflict, this: Int64, that: List", + ), + ]; + let mut builder = JsonVectorBuilder::with_capacity(1); + for (json, result) in jsons.into_iter().zip(results.into_iter()) { + push(json, &mut builder, result); + } + let vector = builder.to_vector(); + let expected = r#" ++----------------+ +| StructVector | ++----------------+ +| {__plain__: 1} | +| {__plain__: 2} | ++----------------+"#; + assert_eq!(pretty_print(vector), expected.trim()); + Ok(()) + } + + #[test] + fn test_push_json_objects() -> Result<()> { + let jsons = vec![ + r#"{ + "s": "a", + "list": [1, 2, 3] + }"#, + r#"{ + "list": [4], + "s": "b" + }"#, + r#"{ + "s": "c", + "float": 0.9 + }"#, + r#"{ + "float": 0.8, + "s": "d" + }"#, + r#"{ + "float": 0.7, + "int": -1 + }"#, + r#"{ + "int": 0, + "float": 0.6 + }"#, + r#"{ + "int": 1, + "object": {"hello": "world", "timestamp": 1761523200000} + }"#, + r#"{ + "object": {"hello": "greptime", "timestamp": 1761523201000}, + "int": 2 + }"#, + r#"{ + "object": {"timestamp": 1761523202000}, + "nested": {"a": {"b": {"b": {"a": "abba"}}}} + }"#, + r#"{ + "nested": {"a": {"b": {"a": {"b": "abab"}}}}, + "object": {"timestamp": 1761523203000} + }"#, + ]; + let mut builder = JsonVectorBuilder::with_capacity(1); + for json in jsons { + push(json, &mut builder, Ok(())); + } + assert_eq!(builder.len(), 10); + + // test children builders: + assert_eq!(builder.builders.len(), 6); + let expect_types = [ + r#"Json, "s": String>>"#, + r#"Json>"#, + r#"Json>"#, + r#"Json>>"#, + r#"Json>>>, "object": Struct<"timestamp": Int64>>>"#, + r#"Json>>>, "object": Struct<"timestamp": Int64>>>"#, + ]; + let expect_vectors = [ + r#" ++-------------------------+ +| StructVector | ++-------------------------+ +| {list: [1, 2, 3], s: a} | +| {list: [4], s: b} | ++-------------------------+"#, + r#" ++--------------------+ +| StructVector | ++--------------------+ +| {float: 0.9, s: c} | +| {float: 0.8, s: d} | ++--------------------+"#, + r#" ++-----------------------+ +| StructVector | ++-----------------------+ +| {float: 0.7, int: -1} | +| {float: 0.6, int: 0} | ++-----------------------+"#, + r#" ++---------------------------------------------------------------+ +| StructVector | ++---------------------------------------------------------------+ +| {int: 1, object: {hello: world, timestamp: 1761523200000}} | +| {int: 2, object: {hello: greptime, timestamp: 1761523201000}} | ++---------------------------------------------------------------+"#, + r#" ++------------------------------------------------------------------------+ +| StructVector | ++------------------------------------------------------------------------+ +| {nested: {a: {b: {b: {a: abba}}}}, object: {timestamp: 1761523202000}} | ++------------------------------------------------------------------------+"#, + r#" ++------------------------------------------------------------------------+ +| StructVector | ++------------------------------------------------------------------------+ +| {nested: {a: {b: {a: {b: abab}}}}, object: {timestamp: 1761523203000}} | ++------------------------------------------------------------------------+"#, + ]; + for (builder, (expect_type, expect_vector)) in builder + .builders + .iter() + .zip(expect_types.into_iter().zip(expect_vectors.into_iter())) + { + assert_eq!(builder.json_type.name(), expect_type); + let vector = builder.inner.to_vector_cloned(); + assert_eq!(pretty_print(vector), expect_vector.trim()); + } + + // test final merged json type: + let expected = r#"Json, "nested": Struct<"a": Struct<"b": Struct<"a": Struct<"b": String>, "b": Struct<"a": String>>>>, "object": Struct<"hello": String, "timestamp": Int64>, "s": String>>"#; + assert_eq!(builder.data_type().to_string(), expected); + + // test final produced vector: + let expected = r#" ++-------------------------------------------------------------------------------------------------------------------+ +| StructVector | ++-------------------------------------------------------------------------------------------------------------------+ +| {float: , int: , list: [1, 2, 3], nested: , object: , s: a} | +| {float: , int: , list: [4], nested: , object: , s: b} | +| {float: 0.9, int: , list: , nested: , object: , s: c} | +| {float: 0.8, int: , list: , nested: , object: , s: d} | +| {float: 0.7, int: -1, list: , nested: , object: , s: } | +| {float: 0.6, int: 0, list: , nested: , object: , s: } | +| {float: , int: 1, list: , nested: , object: {hello: world, timestamp: 1761523200000}, s: } | +| {float: , int: 2, list: , nested: , object: {hello: greptime, timestamp: 1761523201000}, s: } | +| {float: , int: , list: , nested: {a: {b: {a: , b: {a: abba}}}}, object: {hello: , timestamp: 1761523202000}, s: } | +| {float: , int: , list: , nested: {a: {b: {a: {b: abab}, b: }}}, object: {hello: , timestamp: 1761523203000}, s: } | ++-------------------------------------------------------------------------------------------------------------------+"#; + let vector = builder.to_vector_cloned(); + assert_eq!(pretty_print(vector), expected.trim()); + let vector = builder.to_vector(); + assert_eq!(pretty_print(vector), expected.trim()); + Ok(()) + } +} diff --git a/src/datatypes/src/vectors/struct_vector.rs b/src/datatypes/src/vectors/struct_vector.rs index e4f0fe5b2a..d9490a63bb 100644 --- a/src/datatypes/src/vectors/struct_vector.rs +++ b/src/datatypes/src/vectors/struct_vector.rs @@ -323,6 +323,26 @@ impl StructVectorBuilder { } self.null_buffer.append_null(); } + + pub(crate) fn struct_type(&self) -> &StructType { + &self.fields + } + + pub(crate) fn value_builders(&self) -> &[Box] { + &self.value_builders + } + + pub(crate) fn mut_value_builders(&mut self) -> &mut [Box] { + &mut self.value_builders + } + + pub(crate) fn null_buffer(&self) -> &NullBufferBuilder { + &self.null_buffer + } + + pub(crate) fn mut_null_buffer(&mut self) -> &mut NullBufferBuilder { + &mut self.null_buffer + } } impl MutableVector for StructVectorBuilder {