mirror of
https://github.com/GreptimeTeam/greptimedb.git
synced 2026-06-01 12:50:40 +00:00
test: add jsonbench tests (#8165)
Signed-off-by: luofucong <luofc@foxmail.com>
This commit is contained in:
@@ -59,7 +59,10 @@ impl FunctionRewrite for JsonGetRewriter {
|
||||
// json_get(column, path, <data_type>)
|
||||
// )
|
||||
fn inject_type_from_cast_expr(cast: Cast) -> Result<Transformed<Expr>> {
|
||||
let Cast { expr, data_type } = cast;
|
||||
let Cast {
|
||||
expr,
|
||||
mut data_type,
|
||||
} = cast;
|
||||
|
||||
let mut json_get = match *expr {
|
||||
Expr::ScalarFunction(f)
|
||||
@@ -75,6 +78,9 @@ fn inject_type_from_cast_expr(cast: Cast) -> Result<Transformed<Expr>> {
|
||||
}
|
||||
};
|
||||
|
||||
if data_type.is_string() {
|
||||
data_type = DataType::Utf8View;
|
||||
}
|
||||
let with_type = ScalarValue::try_new_null(&data_type).map(|x| Expr::Literal(x, None))?;
|
||||
json_get.args.push(with_type);
|
||||
Ok(Transformed::yes(Expr::ScalarFunction(json_get)))
|
||||
|
||||
@@ -128,7 +128,7 @@ impl JsonNativeType {
|
||||
JsonNumberType::I64 => ArrowDataType::Int64,
|
||||
JsonNumberType::F64 => ArrowDataType::Float64,
|
||||
},
|
||||
JsonNativeType::String => ArrowDataType::Utf8,
|
||||
JsonNativeType::String => ArrowDataType::Utf8View,
|
||||
JsonNativeType::Array(array) => {
|
||||
ArrowDataType::List(Arc::new(Field::new("item", array.as_arrow_type(), true)))
|
||||
}
|
||||
|
||||
@@ -17,16 +17,24 @@ use std::sync::Arc;
|
||||
|
||||
use arrow::compute;
|
||||
use arrow::util::display::{ArrayFormatter, FormatOptions};
|
||||
use arrow_array::builder::{
|
||||
ArrayBuilder, BooleanBuilder, Float64Builder, Int64Builder, NullBuilder, StringViewBuilder,
|
||||
make_builder,
|
||||
};
|
||||
use arrow_array::cast::AsArray;
|
||||
use arrow_array::types::{Float64Type, Int64Type, UInt64Type};
|
||||
use arrow_array::{Array, ArrayRef, GenericListArray, ListArray, StructArray, new_null_array};
|
||||
use arrow_schema::{DataType, FieldRef};
|
||||
use common_telemetry::debug;
|
||||
use serde_json::Value;
|
||||
use snafu::{OptionExt, ResultExt};
|
||||
|
||||
use crate::arrow_array::{StringArray, binary_array_value, string_array_value};
|
||||
use crate::arrow_array::{
|
||||
MutableBinaryArray, StringViewArray, binary_array_value, string_array_value,
|
||||
};
|
||||
use crate::error::{
|
||||
AlignJsonArraySnafu, ArrowComputeSnafu, DeserializeSnafu, InvalidJsonSnafu, Result,
|
||||
AlignJsonArraySnafu, ArrowComputeSnafu, CastTypeSnafu, DeserializeSnafu, InvalidJsonSnafu,
|
||||
Result, SerializeSnafu,
|
||||
};
|
||||
|
||||
pub struct JsonArray<'a> {
|
||||
@@ -101,6 +109,12 @@ impl JsonArray<'_> {
|
||||
return Ok(self.inner.clone());
|
||||
}
|
||||
|
||||
debug!(
|
||||
"Try aligning JSON array {} to data type {}",
|
||||
self.inner.data_type(),
|
||||
expect
|
||||
);
|
||||
|
||||
let struct_array = self.inner.as_struct_opt().context(AlignJsonArraySnafu {
|
||||
reason: "expect struct array",
|
||||
})?;
|
||||
@@ -178,11 +192,23 @@ impl JsonArray<'_> {
|
||||
}
|
||||
|
||||
fn try_cast(&self, to_type: &DataType) -> Result<ArrayRef> {
|
||||
if compute::can_cast_types(self.inner.data_type(), to_type) {
|
||||
let from_type = self.inner.data_type();
|
||||
if from_type == to_type {
|
||||
return Ok(self.inner.clone());
|
||||
}
|
||||
|
||||
if from_type.is_binary() && !to_type.is_binary() {
|
||||
return self.decode_variant(to_type);
|
||||
}
|
||||
|
||||
if !from_type.is_binary() && to_type.is_binary() {
|
||||
return self.encode_variant();
|
||||
}
|
||||
|
||||
if compute::can_cast_types(from_type, to_type) {
|
||||
return compute::cast(&self.inner, to_type).context(ArrowComputeSnafu);
|
||||
}
|
||||
|
||||
// TODO(LFC): Cast according to `to_type` instead of formatting to String here.
|
||||
let formatter = ArrayFormatter::try_new(&self.inner, &FormatOptions::default())
|
||||
.context(ArrowComputeSnafu)?;
|
||||
let values = (0..self.inner.len())
|
||||
@@ -192,7 +218,91 @@ impl JsonArray<'_> {
|
||||
.then(|| formatter.value(i).to_string())
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
Ok(Arc::new(StringArray::from(values)))
|
||||
Ok(Arc::new(StringViewArray::from(values)))
|
||||
}
|
||||
|
||||
fn encode_variant(&self) -> Result<ArrayRef> {
|
||||
let len = self.inner.len();
|
||||
let mut encoded = Vec::with_capacity(len);
|
||||
let mut total_bytes = 0;
|
||||
|
||||
for i in 0..len {
|
||||
let value = self.try_get_value(i)?;
|
||||
if value.is_null() {
|
||||
encoded.push(None);
|
||||
} else {
|
||||
let bytes = serde_json::to_vec(&value).context(SerializeSnafu)?;
|
||||
total_bytes += bytes.len();
|
||||
encoded.push(Some(bytes));
|
||||
}
|
||||
}
|
||||
|
||||
let mut builder = MutableBinaryArray::with_capacity(len, total_bytes);
|
||||
for value in encoded {
|
||||
builder.append_option(value);
|
||||
}
|
||||
Ok(Arc::new(builder.finish()))
|
||||
}
|
||||
|
||||
fn decode_variant(&self, to_type: &DataType) -> Result<ArrayRef> {
|
||||
fn downcast_builder<'a, T: ArrayBuilder>(
|
||||
builder: &'a mut dyn ArrayBuilder,
|
||||
to_type: &DataType,
|
||||
) -> Result<&'a mut T> {
|
||||
builder
|
||||
.as_any_mut()
|
||||
.downcast_mut::<T>()
|
||||
.with_context(|| CastTypeSnafu {
|
||||
msg: format!("Expect ArrayBuilder is of type {to_type}"),
|
||||
})
|
||||
}
|
||||
|
||||
let mut builder = make_builder(to_type, self.inner.len());
|
||||
if to_type.is_null() {
|
||||
downcast_builder::<NullBuilder>(builder.as_mut(), to_type)?
|
||||
.append_nulls(self.inner.len());
|
||||
} else {
|
||||
match to_type {
|
||||
DataType::Boolean => {
|
||||
let b = downcast_builder::<BooleanBuilder>(builder.as_mut(), to_type)?;
|
||||
for i in 0..self.inner.len() {
|
||||
b.append_option(self.try_get_value(i)?.as_bool());
|
||||
}
|
||||
}
|
||||
DataType::Int64 => {
|
||||
let b = downcast_builder::<Int64Builder>(builder.as_mut(), to_type)?;
|
||||
for i in 0..self.inner.len() {
|
||||
b.append_option(self.try_get_value(i)?.as_i64());
|
||||
}
|
||||
}
|
||||
DataType::Float64 => {
|
||||
let b = downcast_builder::<Float64Builder>(builder.as_mut(), to_type)?;
|
||||
for i in 0..self.inner.len() {
|
||||
b.append_option(self.try_get_value(i)?.as_f64());
|
||||
}
|
||||
}
|
||||
DataType::Utf8View => {
|
||||
let b = downcast_builder::<StringViewBuilder>(builder.as_mut(), to_type)?;
|
||||
for i in 0..self.inner.len() {
|
||||
let v = self.try_get_value(i)?;
|
||||
if v.is_null() {
|
||||
b.append_null();
|
||||
} else if let Some(s) = v.as_str() {
|
||||
b.append_value(s);
|
||||
} else {
|
||||
b.append_value(v.to_string());
|
||||
}
|
||||
}
|
||||
}
|
||||
_ => {
|
||||
return CastTypeSnafu {
|
||||
msg: format!("Cannot cast JSON value to {to_type}"),
|
||||
}
|
||||
.fail();
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(builder.finish())
|
||||
}
|
||||
}
|
||||
|
||||
@@ -231,7 +341,9 @@ impl<'a> From<&'a ArrayRef> for JsonArray<'a> {
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use arrow_array::types::Int64Type;
|
||||
use arrow_array::{BinaryArray, BooleanArray, Float64Array, Int32Array, Int64Array, ListArray};
|
||||
use arrow_array::{
|
||||
BinaryArray, BooleanArray, Float64Array, Int32Array, Int64Array, ListArray, StringArray,
|
||||
};
|
||||
use arrow_schema::{Field, Fields};
|
||||
use serde_json::json;
|
||||
|
||||
|
||||
@@ -33,6 +33,7 @@ use datafusion_expr::Expr;
|
||||
use datafusion_expr::utils::expr_to_columns;
|
||||
use datatypes::schema::ext::ArrowSchemaExt;
|
||||
use futures::StreamExt;
|
||||
use itertools::Itertools;
|
||||
use partition::expr::PartitionExpr;
|
||||
use smallvec::SmallVec;
|
||||
use snafu::ResultExt;
|
||||
@@ -436,7 +437,16 @@ impl ScanRegion {
|
||||
.schema
|
||||
.arrow_schema()
|
||||
.has_json_extension_field()
|
||||
.then_some(&self.request.json_type_hint);
|
||||
.then_some(&self.request.json_type_hint)
|
||||
.inspect(|json_type_hint| {
|
||||
debug!(
|
||||
"Concretized JSON type: {{{}}}",
|
||||
json_type_hint
|
||||
.iter()
|
||||
.map(|(k, v)| format!("{}: {}", k, v))
|
||||
.join(", ")
|
||||
);
|
||||
});
|
||||
let mapper = FlatProjectionMapper::new_with_read_columns(
|
||||
&self.version.metadata,
|
||||
projection,
|
||||
|
||||
@@ -115,11 +115,14 @@ fn extract_untyped_json_get(expr: &mut Expr) -> Option<&mut ScalarFunction> {
|
||||
}
|
||||
}
|
||||
|
||||
fn push_json_get_type_arg(mut expr: Expr, data_type: DataType) -> Result<Either<Expr, Expr>> {
|
||||
fn push_json_get_type_arg(mut expr: Expr, mut data_type: DataType) -> Result<Either<Expr, Expr>> {
|
||||
let Some(json_get) = extract_untyped_json_get(&mut expr) else {
|
||||
return Ok(Either::Left(expr));
|
||||
};
|
||||
|
||||
if data_type.is_string() {
|
||||
data_type = DataType::Utf8View;
|
||||
}
|
||||
let with_type = ScalarValue::try_new_null(&data_type).map(|x| Expr::Literal(x, None))?;
|
||||
json_get.args.push(with_type);
|
||||
|
||||
|
||||
Reference in New Issue
Block a user