mirror of
https://github.com/GreptimeTeam/greptimedb.git
synced 2026-01-07 13:52:59 +00:00
feat: json vector builder (#7151)
* resolve PR comments Signed-off-by: luofucong <luofc@foxmail.com> Update src/datatypes/src/vectors/json/builder.rs Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> feat: json vector builder Signed-off-by: luofucong <luofc@foxmail.com> * resolve PR comments Signed-off-by: luofucong <luofc@foxmail.com> --------- Signed-off-by: luofucong <luofc@foxmail.com>
This commit is contained in:
@@ -277,6 +277,10 @@ impl ConcreteDataType {
|
||||
matches!(self, ConcreteDataType::Null(NullType))
|
||||
}
|
||||
|
||||
pub(crate) fn is_struct(&self) -> bool {
|
||||
matches!(self, ConcreteDataType::Struct(_))
|
||||
}
|
||||
|
||||
/// Try to cast the type as a [`ListType`].
|
||||
pub fn as_list(&self) -> Option<&ListType> {
|
||||
match self {
|
||||
|
||||
@@ -13,6 +13,7 @@
|
||||
// limitations under the License.
|
||||
|
||||
#![feature(assert_matches)]
|
||||
#![feature(box_patterns)]
|
||||
|
||||
pub mod arrow_array;
|
||||
pub mod data_type;
|
||||
|
||||
@@ -12,7 +12,7 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::collections::BTreeMap;
|
||||
use std::collections::{BTreeMap, HashMap};
|
||||
use std::str::FromStr;
|
||||
use std::sync::Arc;
|
||||
|
||||
@@ -31,9 +31,12 @@ use crate::scalars::ScalarVectorBuilder;
|
||||
use crate::type_id::LogicalTypeId;
|
||||
use crate::types::{ListType, StructField, StructType};
|
||||
use crate::value::Value;
|
||||
use crate::vectors::json::builder::JsonVectorBuilder;
|
||||
use crate::vectors::{BinaryVectorBuilder, MutableVector};
|
||||
|
||||
pub const JSON_TYPE_NAME: &str = "Json";
|
||||
const JSON_PLAIN_FIELD_NAME: &str = "__plain__";
|
||||
const JSON_PLAIN_FIELD_METADATA_KEY: &str = "is_plain_json";
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize, Deserialize, Default)]
|
||||
pub enum JsonFormat {
|
||||
@@ -54,28 +57,46 @@ impl JsonType {
|
||||
Self { format }
|
||||
}
|
||||
|
||||
// TODO(LFC): remove "allow unused"
|
||||
#[allow(unused)]
|
||||
pub(crate) fn empty() -> Self {
|
||||
Self {
|
||||
format: JsonFormat::Native(Box::new(ConcreteDataType::null_datatype())),
|
||||
}
|
||||
}
|
||||
|
||||
/// Make json type a struct type, by:
|
||||
/// - if the json is an object, its entries are mapped to struct fields, obviously;
|
||||
/// - if not, the json is one of bool, number, string or array, make it a special field called
|
||||
/// "__plain" in a struct with only that field.
|
||||
/// [JSON_PLAIN_FIELD_NAME] with metadata [JSON_PLAIN_FIELD_METADATA_KEY] = `"true"` in a
|
||||
/// struct with only that field.
|
||||
pub(crate) fn as_struct_type(&self) -> StructType {
|
||||
match &self.format {
|
||||
JsonFormat::Jsonb => StructType::default(),
|
||||
JsonFormat::Native(inner) => match inner.as_ref() {
|
||||
ConcreteDataType::Struct(t) => t.clone(),
|
||||
x => StructType::new(Arc::new(vec![StructField::new(
|
||||
"__plain".to_string(),
|
||||
x.clone(),
|
||||
true,
|
||||
)])),
|
||||
x => {
|
||||
let mut field =
|
||||
StructField::new(JSON_PLAIN_FIELD_NAME.to_string(), x.clone(), true);
|
||||
field.insert_metadata(JSON_PLAIN_FIELD_METADATA_KEY, true);
|
||||
StructType::new(Arc::new(vec![field]))
|
||||
}
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
// TODO(LFC): remove "allow unused"
|
||||
#[allow(unused)]
|
||||
/// Check if this json type is the special "plain" one.
|
||||
/// See [JsonType::as_struct_type].
|
||||
pub(crate) fn is_plain_json(&self) -> bool {
|
||||
let JsonFormat::Native(box ConcreteDataType::Struct(t)) = &self.format else {
|
||||
return true;
|
||||
};
|
||||
let fields = t.fields();
|
||||
let Some((single, [])) = fields.split_first() else {
|
||||
return false;
|
||||
};
|
||||
single.name() == JSON_PLAIN_FIELD_NAME
|
||||
&& single.metadata(JSON_PLAIN_FIELD_METADATA_KEY) == Some("true")
|
||||
}
|
||||
|
||||
/// Try to merge this json type with others, error on datatype conflict.
|
||||
pub(crate) fn merge(&mut self, other: &JsonType) -> Result<()> {
|
||||
match (&self.format, &other.format) {
|
||||
@@ -91,6 +112,47 @@ impl JsonType {
|
||||
.fail(),
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn is_mergeable(&self, other: &JsonType) -> bool {
|
||||
match (&self.format, &other.format) {
|
||||
(JsonFormat::Jsonb, JsonFormat::Jsonb) => true,
|
||||
(JsonFormat::Native(this), JsonFormat::Native(that)) => {
|
||||
is_mergeable(this.as_ref(), that.as_ref())
|
||||
}
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn is_mergeable(this: &ConcreteDataType, that: &ConcreteDataType) -> bool {
|
||||
fn is_mergeable_struct(this: &StructType, that: &StructType) -> bool {
|
||||
let this_fields = this.fields();
|
||||
let this_fields = this_fields
|
||||
.iter()
|
||||
.map(|x| (x.name(), x))
|
||||
.collect::<HashMap<_, _>>();
|
||||
|
||||
for that_field in that.fields().iter() {
|
||||
if let Some(this_field) = this_fields.get(that_field.name())
|
||||
&& !is_mergeable(this_field.data_type(), that_field.data_type())
|
||||
{
|
||||
return false;
|
||||
}
|
||||
}
|
||||
true
|
||||
}
|
||||
|
||||
match (this, that) {
|
||||
(this, that) if this == that => true,
|
||||
(ConcreteDataType::List(this), ConcreteDataType::List(that)) => {
|
||||
is_mergeable(this.item_type(), that.item_type())
|
||||
}
|
||||
(ConcreteDataType::Struct(this), ConcreteDataType::Struct(that)) => {
|
||||
is_mergeable_struct(this, that)
|
||||
}
|
||||
(ConcreteDataType::Null(_), _) | (_, ConcreteDataType::Null(_)) => true,
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
|
||||
fn merge(this: &ConcreteDataType, that: &ConcreteDataType) -> Result<ConcreteDataType> {
|
||||
@@ -166,7 +228,10 @@ impl DataType for JsonType {
|
||||
}
|
||||
|
||||
fn create_mutable_vector(&self, capacity: usize) -> Box<dyn MutableVector> {
|
||||
Box::new(BinaryVectorBuilder::with_capacity(capacity))
|
||||
match self.format {
|
||||
JsonFormat::Jsonb => Box::new(BinaryVectorBuilder::with_capacity(capacity)),
|
||||
JsonFormat::Native(_) => Box::new(JsonVectorBuilder::with_capacity(capacity)),
|
||||
}
|
||||
}
|
||||
|
||||
fn try_cast(&self, from: Value) -> Option<Value> {
|
||||
@@ -226,10 +291,12 @@ mod tests {
|
||||
let result = json_type.merge(other);
|
||||
match (result, expected) {
|
||||
(Ok(()), Ok(expected)) => {
|
||||
assert_eq!(json_type.name(), expected)
|
||||
assert_eq!(json_type.name(), expected);
|
||||
assert!(json_type.is_mergeable(other));
|
||||
}
|
||||
(Err(err), Err(expected)) => {
|
||||
assert_eq!(err.to_string(), expected)
|
||||
assert_eq!(err.to_string(), expected);
|
||||
assert!(!json_type.is_mergeable(other));
|
||||
}
|
||||
_ => unreachable!(),
|
||||
}
|
||||
|
||||
@@ -12,6 +12,7 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::collections::BTreeMap;
|
||||
use std::sync::Arc;
|
||||
|
||||
use arrow::datatypes::{DataType as ArrowDataType, Field};
|
||||
@@ -46,6 +47,15 @@ impl TryFrom<&Fields> for StructType {
|
||||
}
|
||||
}
|
||||
|
||||
impl<const N: usize> From<[StructField; N]> for StructType {
|
||||
fn from(value: [StructField; N]) -> Self {
|
||||
let value: Box<[StructField]> = Box::new(value);
|
||||
Self {
|
||||
fields: Arc::new(value.into_vec()),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl DataType for StructType {
|
||||
fn name(&self) -> String {
|
||||
format!(
|
||||
@@ -108,6 +118,7 @@ pub struct StructField {
|
||||
name: String,
|
||||
data_type: ConcreteDataType,
|
||||
nullable: bool,
|
||||
metadata: BTreeMap<String, String>,
|
||||
}
|
||||
|
||||
impl StructField {
|
||||
@@ -116,6 +127,7 @@ impl StructField {
|
||||
name,
|
||||
data_type,
|
||||
nullable,
|
||||
metadata: BTreeMap::new(),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -135,11 +147,25 @@ impl StructField {
|
||||
self.nullable
|
||||
}
|
||||
|
||||
pub(crate) fn insert_metadata(&mut self, key: impl ToString, value: impl ToString) {
|
||||
self.metadata.insert(key.to_string(), value.to_string());
|
||||
}
|
||||
|
||||
pub(crate) fn metadata(&self, key: &str) -> Option<&str> {
|
||||
self.metadata.get(key).map(String::as_str)
|
||||
}
|
||||
|
||||
pub fn to_df_field(&self) -> Field {
|
||||
let metadata = self
|
||||
.metadata
|
||||
.iter()
|
||||
.map(|(k, v)| (k.clone(), v.clone()))
|
||||
.collect();
|
||||
Field::new(
|
||||
self.name.clone(),
|
||||
self.data_type.as_arrow_type(),
|
||||
self.nullable,
|
||||
)
|
||||
.with_metadata(metadata)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -35,6 +35,7 @@ mod duration;
|
||||
mod eq;
|
||||
mod helper;
|
||||
mod interval;
|
||||
pub(crate) mod json;
|
||||
mod list;
|
||||
mod null;
|
||||
pub(crate) mod operations;
|
||||
|
||||
@@ -464,6 +464,14 @@ impl Helper {
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
pub(crate) fn pretty_print(vector: VectorRef) -> String {
|
||||
let array = vector.to_arrow_array();
|
||||
arrow::util::pretty::pretty_format_columns(&vector.vector_type_name(), &[array])
|
||||
.map(|x| x.to_string())
|
||||
.unwrap_or_else(|e| e.to_string())
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use arrow::array::{
|
||||
|
||||
15
src/datatypes/src/vectors/json.rs
Normal file
15
src/datatypes/src/vectors/json.rs
Normal file
@@ -0,0 +1,15 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
pub(crate) mod builder;
|
||||
485
src/datatypes/src/vectors/json/builder.rs
Normal file
485
src/datatypes/src/vectors/json/builder.rs
Normal file
@@ -0,0 +1,485 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::any::Any;
|
||||
use std::collections::HashMap;
|
||||
|
||||
use snafu::OptionExt;
|
||||
|
||||
use crate::data_type::ConcreteDataType;
|
||||
use crate::error::{Result, TryFromValueSnafu, UnsupportedOperationSnafu};
|
||||
use crate::prelude::{ValueRef, Vector, VectorRef};
|
||||
use crate::types::JsonType;
|
||||
use crate::value::StructValueRef;
|
||||
use crate::vectors::{MutableVector, StructVectorBuilder};
|
||||
|
||||
struct JsonStructsBuilder {
|
||||
json_type: JsonType,
|
||||
inner: StructVectorBuilder,
|
||||
}
|
||||
|
||||
impl JsonStructsBuilder {
|
||||
fn new(json_type: JsonType, capacity: usize) -> Self {
|
||||
let struct_type = json_type.as_struct_type();
|
||||
let inner = StructVectorBuilder::with_type_and_capacity(struct_type, capacity);
|
||||
Self { json_type, inner }
|
||||
}
|
||||
|
||||
fn len(&self) -> usize {
|
||||
self.inner.len()
|
||||
}
|
||||
|
||||
fn push(&mut self, value: &ValueRef) -> Result<()> {
|
||||
if self.json_type.is_plain_json() {
|
||||
let value = ValueRef::Struct(StructValueRef::RefList {
|
||||
val: vec![value.clone()],
|
||||
fields: self.json_type.as_struct_type(),
|
||||
});
|
||||
self.inner.try_push_value_ref(&value)
|
||||
} else {
|
||||
self.inner.try_push_value_ref(value)
|
||||
}
|
||||
}
|
||||
|
||||
/// Try to merge (and consume the data of) other json vector builder into this one.
|
||||
/// Note that the other builder's json type must be able to be merged with this one's
|
||||
/// (this one's json type has all the fields in other one's, and no datatypes conflict).
|
||||
/// Normally this is guaranteed, as long as json values are pushed through [JsonVectorBuilder].
|
||||
fn try_merge(&mut self, other: &mut JsonStructsBuilder) -> Result<()> {
|
||||
debug_assert!(self.json_type.is_mergeable(&other.json_type));
|
||||
|
||||
fn helper(this: &mut StructVectorBuilder, that: &mut StructVectorBuilder) -> Result<()> {
|
||||
let that_len = that.len();
|
||||
if let Some(x) = that.mut_null_buffer().finish() {
|
||||
this.mut_null_buffer().append_buffer(&x)
|
||||
} else {
|
||||
this.mut_null_buffer().append_n_non_nulls(that_len);
|
||||
}
|
||||
|
||||
let that_fields = that.struct_type().fields();
|
||||
let mut that_builders = that_fields
|
||||
.iter()
|
||||
.zip(that.mut_value_builders().iter_mut())
|
||||
.map(|(field, builder)| (field.name(), builder))
|
||||
.collect::<HashMap<_, _>>();
|
||||
|
||||
for (field, this_builder) in this
|
||||
.struct_type()
|
||||
.fields()
|
||||
.iter()
|
||||
.zip(this.mut_value_builders().iter_mut())
|
||||
{
|
||||
if let Some(that_builder) = that_builders.get_mut(field.name()) {
|
||||
if field.data_type().is_struct() {
|
||||
let this = this_builder
|
||||
.as_mut_any()
|
||||
.downcast_mut::<StructVectorBuilder>()
|
||||
// Safety: a struct datatype field must be corresponding to a struct vector builder.
|
||||
.unwrap();
|
||||
|
||||
let that = that_builder
|
||||
.as_mut_any()
|
||||
.downcast_mut::<StructVectorBuilder>()
|
||||
// Safety: other builder with same field name must have same datatype,
|
||||
// ensured because the two json types are mergeable.
|
||||
.unwrap();
|
||||
helper(this, that)?;
|
||||
} else {
|
||||
let vector = that_builder.to_vector();
|
||||
this_builder.extend_slice_of(vector.as_ref(), 0, vector.len())?;
|
||||
}
|
||||
} else {
|
||||
this_builder.push_nulls(that_len);
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
helper(&mut self.inner, &mut other.inner)
|
||||
}
|
||||
|
||||
/// Same as [JsonStructsBuilder::try_merge], but does not consume the other builder's data.
|
||||
fn try_merge_cloned(&mut self, other: &JsonStructsBuilder) -> Result<()> {
|
||||
debug_assert!(self.json_type.is_mergeable(&other.json_type));
|
||||
|
||||
fn helper(this: &mut StructVectorBuilder, that: &StructVectorBuilder) -> Result<()> {
|
||||
let that_len = that.len();
|
||||
if let Some(x) = that.null_buffer().finish_cloned() {
|
||||
this.mut_null_buffer().append_buffer(&x)
|
||||
} else {
|
||||
this.mut_null_buffer().append_n_non_nulls(that_len);
|
||||
}
|
||||
|
||||
let that_fields = that.struct_type().fields();
|
||||
let that_builders = that_fields
|
||||
.iter()
|
||||
.zip(that.value_builders().iter())
|
||||
.map(|(field, builder)| (field.name(), builder))
|
||||
.collect::<HashMap<_, _>>();
|
||||
|
||||
for (field, this_builder) in this
|
||||
.struct_type()
|
||||
.fields()
|
||||
.iter()
|
||||
.zip(this.mut_value_builders().iter_mut())
|
||||
{
|
||||
if let Some(that_builder) = that_builders.get(field.name()) {
|
||||
if field.data_type().is_struct() {
|
||||
let this = this_builder
|
||||
.as_mut_any()
|
||||
.downcast_mut::<StructVectorBuilder>()
|
||||
// Safety: a struct datatype field must be corresponding to a struct vector builder.
|
||||
.unwrap();
|
||||
|
||||
let that = that_builder
|
||||
.as_any()
|
||||
.downcast_ref::<StructVectorBuilder>()
|
||||
// Safety: other builder with same field name must have same datatype,
|
||||
// ensured because the two json types are mergeable.
|
||||
.unwrap();
|
||||
helper(this, that)?;
|
||||
} else {
|
||||
let vector = that_builder.to_vector_cloned();
|
||||
this_builder.extend_slice_of(vector.as_ref(), 0, vector.len())?;
|
||||
}
|
||||
} else {
|
||||
this_builder.push_nulls(that_len);
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
helper(&mut self.inner, &other.inner)
|
||||
}
|
||||
}
|
||||
|
||||
/// The vector builder for json type values.
|
||||
///
|
||||
/// Json type are dynamic, to some degree (as long as they can be merged into each other). So are
|
||||
/// json values. Json values are physically stored in struct vectors, which require the types of
|
||||
/// struct values to be fixed inside a certain struct vector. So to resolve "dynamic" vs "fixed"
|
||||
/// datatype problem, in this builder, each type of json value gets its own struct vector builder.
|
||||
/// Once new json type value is pushing into this builder, it creates a new "child" builder for it.
|
||||
///
|
||||
/// Given the "mixed" nature of the values stored in this builder, to produce the json vector, a
|
||||
/// "merge" operation is performed. The "merge" is to iterate over all the "child" builders, and fill
|
||||
/// nulls for missing json fields. The final vector's json type is fixed to be the "merge" of all
|
||||
/// pushed json types.
|
||||
pub(crate) struct JsonVectorBuilder {
|
||||
merged_type: JsonType,
|
||||
capacity: usize,
|
||||
builders: Vec<JsonStructsBuilder>,
|
||||
}
|
||||
|
||||
impl JsonVectorBuilder {
|
||||
pub(crate) fn with_capacity(capacity: usize) -> Self {
|
||||
Self {
|
||||
merged_type: JsonType::empty(),
|
||||
capacity,
|
||||
builders: vec![],
|
||||
}
|
||||
}
|
||||
|
||||
fn try_create_new_builder(&mut self, json_type: &JsonType) -> Result<&mut JsonStructsBuilder> {
|
||||
self.merged_type.merge(json_type)?;
|
||||
|
||||
let builder = JsonStructsBuilder::new(json_type.clone(), self.capacity);
|
||||
self.builders.push(builder);
|
||||
|
||||
let len = self.builders.len();
|
||||
Ok(&mut self.builders[len - 1])
|
||||
}
|
||||
}
|
||||
|
||||
impl MutableVector for JsonVectorBuilder {
|
||||
fn data_type(&self) -> ConcreteDataType {
|
||||
ConcreteDataType::Json(self.merged_type.clone())
|
||||
}
|
||||
|
||||
fn len(&self) -> usize {
|
||||
self.builders.iter().map(|x| x.len()).sum()
|
||||
}
|
||||
|
||||
fn as_any(&self) -> &dyn Any {
|
||||
self
|
||||
}
|
||||
|
||||
fn as_mut_any(&mut self) -> &mut dyn Any {
|
||||
self
|
||||
}
|
||||
|
||||
fn to_vector(&mut self) -> VectorRef {
|
||||
// Fast path:
|
||||
if self.builders.len() == 1 {
|
||||
return self.builders[0].inner.to_vector();
|
||||
}
|
||||
|
||||
let mut unified_jsons = JsonStructsBuilder::new(self.merged_type.clone(), self.capacity);
|
||||
for builder in self.builders.iter_mut() {
|
||||
unified_jsons
|
||||
.try_merge(builder)
|
||||
// Safety: the "unified_jsons" has the merged json type from all the builders,
|
||||
// so it should merge them without errors.
|
||||
.unwrap_or_else(|e| panic!("failed to merge json builders, error: {e}"));
|
||||
}
|
||||
unified_jsons.inner.to_vector()
|
||||
}
|
||||
|
||||
fn to_vector_cloned(&self) -> VectorRef {
|
||||
// Fast path:
|
||||
if self.builders.len() == 1 {
|
||||
return self.builders[0].inner.to_vector_cloned();
|
||||
}
|
||||
|
||||
let mut unified_jsons = JsonStructsBuilder::new(self.merged_type.clone(), self.capacity);
|
||||
for builder in self.builders.iter() {
|
||||
unified_jsons
|
||||
.try_merge_cloned(builder)
|
||||
// Safety: the "unified_jsons" has the merged json type from all the builders,
|
||||
// so it should merge them without errors.
|
||||
.unwrap_or_else(|e| panic!("failed to merge json builders, error: {e}"));
|
||||
}
|
||||
unified_jsons.inner.to_vector_cloned()
|
||||
}
|
||||
|
||||
fn try_push_value_ref(&mut self, value: &ValueRef) -> Result<()> {
|
||||
let data_type = value.data_type();
|
||||
let json_type = data_type.as_json().with_context(|| TryFromValueSnafu {
|
||||
reason: format!("expected json value, got {value:?}"),
|
||||
})?;
|
||||
|
||||
let builder = match self.builders.last_mut() {
|
||||
Some(last) => {
|
||||
if &last.json_type != json_type {
|
||||
self.try_create_new_builder(json_type)?
|
||||
} else {
|
||||
last
|
||||
}
|
||||
}
|
||||
None => self.try_create_new_builder(json_type)?,
|
||||
};
|
||||
|
||||
let ValueRef::Json(value) = value else {
|
||||
// Safety: json datatype value must be the value of json.
|
||||
unreachable!()
|
||||
};
|
||||
builder.push(value)
|
||||
}
|
||||
|
||||
fn push_null(&mut self) {
|
||||
let null_json_value = ValueRef::Json(Box::new(ValueRef::Null));
|
||||
self.try_push_value_ref(&null_json_value)
|
||||
// Safety: learning from the method "try_push_value_ref", a null json value should be
|
||||
// always able to push into any json vectors.
|
||||
.unwrap_or_else(|e| {
|
||||
panic!("failed to push null json value: {null_json_value:?}, error: {e}")
|
||||
});
|
||||
}
|
||||
|
||||
fn extend_slice_of(&mut self, _: &dyn Vector, _: usize, _: usize) -> Result<()> {
|
||||
UnsupportedOperationSnafu {
|
||||
op: "extend_slice_of",
|
||||
vector_type: "JsonVector",
|
||||
}
|
||||
.fail()
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::data_type::DataType;
|
||||
use crate::json::JsonStructureSettings;
|
||||
use crate::vectors::helper::pretty_print;
|
||||
|
||||
fn push(json: &str, builder: &mut JsonVectorBuilder, expected: std::result::Result<(), &str>) {
|
||||
let settings = JsonStructureSettings::Structured(None);
|
||||
let json: serde_json::Value = serde_json::from_str(json).unwrap();
|
||||
let value = settings.encode(json).unwrap();
|
||||
|
||||
let value = value.as_value_ref();
|
||||
let result = builder.try_push_value_ref(&value);
|
||||
match (result, expected) {
|
||||
(Ok(()), Ok(())) => (),
|
||||
(Err(e), Err(expected)) => assert_eq!(e.to_string(), expected),
|
||||
_ => unreachable!(),
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_push_plain_jsons() -> Result<()> {
|
||||
let jsons = vec!["1", "2", r#""s""#, "[true]"];
|
||||
let results = vec![
|
||||
Ok(()),
|
||||
Ok(()),
|
||||
Err(
|
||||
"Failed to merge JSON datatype: datatypes have conflict, this: Int64, that: String",
|
||||
),
|
||||
Err(
|
||||
"Failed to merge JSON datatype: datatypes have conflict, this: Int64, that: List<Boolean>",
|
||||
),
|
||||
];
|
||||
let mut builder = JsonVectorBuilder::with_capacity(1);
|
||||
for (json, result) in jsons.into_iter().zip(results.into_iter()) {
|
||||
push(json, &mut builder, result);
|
||||
}
|
||||
let vector = builder.to_vector();
|
||||
let expected = r#"
|
||||
+----------------+
|
||||
| StructVector |
|
||||
+----------------+
|
||||
| {__plain__: 1} |
|
||||
| {__plain__: 2} |
|
||||
+----------------+"#;
|
||||
assert_eq!(pretty_print(vector), expected.trim());
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_push_json_objects() -> Result<()> {
|
||||
let jsons = vec![
|
||||
r#"{
|
||||
"s": "a",
|
||||
"list": [1, 2, 3]
|
||||
}"#,
|
||||
r#"{
|
||||
"list": [4],
|
||||
"s": "b"
|
||||
}"#,
|
||||
r#"{
|
||||
"s": "c",
|
||||
"float": 0.9
|
||||
}"#,
|
||||
r#"{
|
||||
"float": 0.8,
|
||||
"s": "d"
|
||||
}"#,
|
||||
r#"{
|
||||
"float": 0.7,
|
||||
"int": -1
|
||||
}"#,
|
||||
r#"{
|
||||
"int": 0,
|
||||
"float": 0.6
|
||||
}"#,
|
||||
r#"{
|
||||
"int": 1,
|
||||
"object": {"hello": "world", "timestamp": 1761523200000}
|
||||
}"#,
|
||||
r#"{
|
||||
"object": {"hello": "greptime", "timestamp": 1761523201000},
|
||||
"int": 2
|
||||
}"#,
|
||||
r#"{
|
||||
"object": {"timestamp": 1761523202000},
|
||||
"nested": {"a": {"b": {"b": {"a": "abba"}}}}
|
||||
}"#,
|
||||
r#"{
|
||||
"nested": {"a": {"b": {"a": {"b": "abab"}}}},
|
||||
"object": {"timestamp": 1761523203000}
|
||||
}"#,
|
||||
];
|
||||
let mut builder = JsonVectorBuilder::with_capacity(1);
|
||||
for json in jsons {
|
||||
push(json, &mut builder, Ok(()));
|
||||
}
|
||||
assert_eq!(builder.len(), 10);
|
||||
|
||||
// test children builders:
|
||||
assert_eq!(builder.builders.len(), 6);
|
||||
let expect_types = [
|
||||
r#"Json<Struct<"list": List<Int64>, "s": String>>"#,
|
||||
r#"Json<Struct<"float": Float64, "s": String>>"#,
|
||||
r#"Json<Struct<"float": Float64, "int": Int64>>"#,
|
||||
r#"Json<Struct<"int": Int64, "object": Struct<"hello": String, "timestamp": Int64>>>"#,
|
||||
r#"Json<Struct<"nested": Struct<"a": Struct<"b": Struct<"b": Struct<"a": String>>>>, "object": Struct<"timestamp": Int64>>>"#,
|
||||
r#"Json<Struct<"nested": Struct<"a": Struct<"b": Struct<"a": Struct<"b": String>>>>, "object": Struct<"timestamp": Int64>>>"#,
|
||||
];
|
||||
let expect_vectors = [
|
||||
r#"
|
||||
+-------------------------+
|
||||
| StructVector |
|
||||
+-------------------------+
|
||||
| {list: [1, 2, 3], s: a} |
|
||||
| {list: [4], s: b} |
|
||||
+-------------------------+"#,
|
||||
r#"
|
||||
+--------------------+
|
||||
| StructVector |
|
||||
+--------------------+
|
||||
| {float: 0.9, s: c} |
|
||||
| {float: 0.8, s: d} |
|
||||
+--------------------+"#,
|
||||
r#"
|
||||
+-----------------------+
|
||||
| StructVector |
|
||||
+-----------------------+
|
||||
| {float: 0.7, int: -1} |
|
||||
| {float: 0.6, int: 0} |
|
||||
+-----------------------+"#,
|
||||
r#"
|
||||
+---------------------------------------------------------------+
|
||||
| StructVector |
|
||||
+---------------------------------------------------------------+
|
||||
| {int: 1, object: {hello: world, timestamp: 1761523200000}} |
|
||||
| {int: 2, object: {hello: greptime, timestamp: 1761523201000}} |
|
||||
+---------------------------------------------------------------+"#,
|
||||
r#"
|
||||
+------------------------------------------------------------------------+
|
||||
| StructVector |
|
||||
+------------------------------------------------------------------------+
|
||||
| {nested: {a: {b: {b: {a: abba}}}}, object: {timestamp: 1761523202000}} |
|
||||
+------------------------------------------------------------------------+"#,
|
||||
r#"
|
||||
+------------------------------------------------------------------------+
|
||||
| StructVector |
|
||||
+------------------------------------------------------------------------+
|
||||
| {nested: {a: {b: {a: {b: abab}}}}, object: {timestamp: 1761523203000}} |
|
||||
+------------------------------------------------------------------------+"#,
|
||||
];
|
||||
for (builder, (expect_type, expect_vector)) in builder
|
||||
.builders
|
||||
.iter()
|
||||
.zip(expect_types.into_iter().zip(expect_vectors.into_iter()))
|
||||
{
|
||||
assert_eq!(builder.json_type.name(), expect_type);
|
||||
let vector = builder.inner.to_vector_cloned();
|
||||
assert_eq!(pretty_print(vector), expect_vector.trim());
|
||||
}
|
||||
|
||||
// test final merged json type:
|
||||
let expected = r#"Json<Struct<"float": Float64, "int": Int64, "list": List<Int64>, "nested": Struct<"a": Struct<"b": Struct<"a": Struct<"b": String>, "b": Struct<"a": String>>>>, "object": Struct<"hello": String, "timestamp": Int64>, "s": String>>"#;
|
||||
assert_eq!(builder.data_type().to_string(), expected);
|
||||
|
||||
// test final produced vector:
|
||||
let expected = r#"
|
||||
+-------------------------------------------------------------------------------------------------------------------+
|
||||
| StructVector |
|
||||
+-------------------------------------------------------------------------------------------------------------------+
|
||||
| {float: , int: , list: [1, 2, 3], nested: , object: , s: a} |
|
||||
| {float: , int: , list: [4], nested: , object: , s: b} |
|
||||
| {float: 0.9, int: , list: , nested: , object: , s: c} |
|
||||
| {float: 0.8, int: , list: , nested: , object: , s: d} |
|
||||
| {float: 0.7, int: -1, list: , nested: , object: , s: } |
|
||||
| {float: 0.6, int: 0, list: , nested: , object: , s: } |
|
||||
| {float: , int: 1, list: , nested: , object: {hello: world, timestamp: 1761523200000}, s: } |
|
||||
| {float: , int: 2, list: , nested: , object: {hello: greptime, timestamp: 1761523201000}, s: } |
|
||||
| {float: , int: , list: , nested: {a: {b: {a: , b: {a: abba}}}}, object: {hello: , timestamp: 1761523202000}, s: } |
|
||||
| {float: , int: , list: , nested: {a: {b: {a: {b: abab}, b: }}}, object: {hello: , timestamp: 1761523203000}, s: } |
|
||||
+-------------------------------------------------------------------------------------------------------------------+"#;
|
||||
let vector = builder.to_vector_cloned();
|
||||
assert_eq!(pretty_print(vector), expected.trim());
|
||||
let vector = builder.to_vector();
|
||||
assert_eq!(pretty_print(vector), expected.trim());
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
@@ -323,6 +323,26 @@ impl StructVectorBuilder {
|
||||
}
|
||||
self.null_buffer.append_null();
|
||||
}
|
||||
|
||||
pub(crate) fn struct_type(&self) -> &StructType {
|
||||
&self.fields
|
||||
}
|
||||
|
||||
pub(crate) fn value_builders(&self) -> &[Box<dyn MutableVector>] {
|
||||
&self.value_builders
|
||||
}
|
||||
|
||||
pub(crate) fn mut_value_builders(&mut self) -> &mut [Box<dyn MutableVector>] {
|
||||
&mut self.value_builders
|
||||
}
|
||||
|
||||
pub(crate) fn null_buffer(&self) -> &NullBufferBuilder {
|
||||
&self.null_buffer
|
||||
}
|
||||
|
||||
pub(crate) fn mut_null_buffer(&mut self) -> &mut NullBufferBuilder {
|
||||
&mut self.null_buffer
|
||||
}
|
||||
}
|
||||
|
||||
impl MutableVector for StructVectorBuilder {
|
||||
|
||||
Reference in New Issue
Block a user