mirror of
https://github.com/GreptimeTeam/greptimedb.git
synced 2026-05-28 02:40:38 +00:00
feat: simple read write new json type values (#7175)
feat: basic json read and write Signed-off-by: luofucong <luofc@foxmail.com>
This commit is contained in:
@@ -15,7 +15,6 @@
|
||||
use std::fmt;
|
||||
use std::sync::Arc;
|
||||
|
||||
use arrow::compute::cast as arrow_array_cast;
|
||||
use arrow::datatypes::{
|
||||
DataType as ArrowDataType, IntervalUnit as ArrowIntervalUnit, TimeUnit as ArrowTimeUnit,
|
||||
};
|
||||
@@ -368,8 +367,10 @@ impl ConcreteDataType {
|
||||
|
||||
/// Checks if the data type can cast to another data type.
|
||||
pub fn can_arrow_type_cast_to(&self, to_type: &ConcreteDataType) -> bool {
|
||||
let array = arrow_array::new_empty_array(&self.as_arrow_type());
|
||||
arrow_array_cast(array.as_ref(), &to_type.as_arrow_type()).is_ok()
|
||||
match (self, to_type) {
|
||||
(ConcreteDataType::Json(this), ConcreteDataType::Json(that)) => that.is_include(this),
|
||||
_ => arrow::compute::can_cast_types(&self.as_arrow_type(), &to_type.as_arrow_type()),
|
||||
}
|
||||
}
|
||||
|
||||
/// Try to cast data type as a [`DurationType`].
|
||||
|
||||
@@ -15,7 +15,7 @@
|
||||
use std::sync::Arc;
|
||||
|
||||
use arrow_schema::extension::ExtensionType;
|
||||
use arrow_schema::{ArrowError, DataType};
|
||||
use arrow_schema::{ArrowError, DataType, FieldRef};
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::json::JsonStructureSettings;
|
||||
@@ -102,3 +102,8 @@ impl ExtensionType for JsonExtensionType {
|
||||
Ok(json)
|
||||
}
|
||||
}
|
||||
|
||||
/// Check if this field is to be treated as json extension type.
|
||||
pub fn is_json_extension_type(field: &FieldRef) -> bool {
|
||||
field.extension_type_name() == Some(JsonExtensionType::NAME)
|
||||
}
|
||||
|
||||
@@ -260,7 +260,7 @@ impl JsonValue {
|
||||
ConcreteDataType::Json(self.json_type().clone())
|
||||
}
|
||||
|
||||
pub(crate) fn json_type(&self) -> &JsonType {
|
||||
pub fn json_type(&self) -> &JsonType {
|
||||
self.json_type.get_or_init(|| self.json_variant.json_type())
|
||||
}
|
||||
|
||||
@@ -268,6 +268,14 @@ impl JsonValue {
|
||||
matches!(self.json_variant, JsonVariant::Null)
|
||||
}
|
||||
|
||||
/// Check if this JSON value is an empty object.
|
||||
pub fn is_empty_object(&self) -> bool {
|
||||
match &self.json_variant {
|
||||
JsonVariant::Object(object) => object.is_empty(),
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn as_i64(&self) -> Option<i64> {
|
||||
match self.json_variant {
|
||||
JsonVariant::Number(n) => n.as_i64(),
|
||||
|
||||
@@ -273,8 +273,9 @@ fn collect_fields(column_schemas: &[ColumnSchema]) -> Result<FieldsAndIndices> {
|
||||
_ => None,
|
||||
};
|
||||
if let Some(extype) = extype {
|
||||
let metadata = HashMap::from([(TYPE_KEY.to_string(), extype.to_string())]);
|
||||
field = field.with_metadata(metadata);
|
||||
field
|
||||
.metadata_mut()
|
||||
.insert(TYPE_KEY.to_string(), extype.to_string());
|
||||
}
|
||||
fields.push(field);
|
||||
ensure!(
|
||||
|
||||
@@ -20,7 +20,7 @@ mod decimal_type;
|
||||
mod dictionary_type;
|
||||
mod duration_type;
|
||||
mod interval_type;
|
||||
pub(crate) mod json_type;
|
||||
pub mod json_type;
|
||||
mod list_type;
|
||||
mod null_type;
|
||||
mod primitive_type;
|
||||
|
||||
@@ -18,7 +18,6 @@ use std::str::FromStr;
|
||||
use std::sync::Arc;
|
||||
|
||||
use arrow::datatypes::DataType as ArrowDataType;
|
||||
use arrow_schema::Fields;
|
||||
use common_base::bytes::Bytes;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use snafu::ResultExt;
|
||||
@@ -36,7 +35,7 @@ use crate::vectors::json::builder::JsonVectorBuilder;
|
||||
use crate::vectors::{BinaryVectorBuilder, MutableVector};
|
||||
|
||||
pub const JSON_TYPE_NAME: &str = "Json";
|
||||
const JSON_PLAIN_FIELD_NAME: &str = "__plain__";
|
||||
const JSON_PLAIN_FIELD_NAME: &str = "__json_plain__";
|
||||
const JSON_PLAIN_FIELD_METADATA_KEY: &str = "is_plain_json";
|
||||
|
||||
pub type JsonObjectType = BTreeMap<String, JsonNativeType>;
|
||||
@@ -59,6 +58,10 @@ pub enum JsonNativeType {
|
||||
}
|
||||
|
||||
impl JsonNativeType {
|
||||
pub fn is_null(&self) -> bool {
|
||||
matches!(self, JsonNativeType::Null)
|
||||
}
|
||||
|
||||
pub fn u64() -> Self {
|
||||
Self::Number(JsonNumberType::U64)
|
||||
}
|
||||
@@ -187,7 +190,7 @@ impl JsonType {
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn empty() -> Self {
|
||||
pub fn null() -> Self {
|
||||
Self {
|
||||
format: JsonFormat::Native(Box::new(JsonNativeType::Null)),
|
||||
}
|
||||
@@ -208,7 +211,7 @@ impl JsonType {
|
||||
}
|
||||
|
||||
/// Try to merge this json type with others, error on datatype conflict.
|
||||
pub(crate) fn merge(&mut self, other: &JsonType) -> Result<()> {
|
||||
pub fn merge(&mut self, other: &JsonType) -> Result<()> {
|
||||
match (&self.format, &other.format) {
|
||||
(JsonFormat::Jsonb, JsonFormat::Jsonb) => Ok(()),
|
||||
(JsonFormat::Native(this), JsonFormat::Native(that)) => {
|
||||
@@ -223,7 +226,8 @@ impl JsonType {
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn is_mergeable(&self, other: &JsonType) -> bool {
|
||||
/// Check if it can merge with `other` json type.
|
||||
pub fn is_mergeable(&self, other: &JsonType) -> bool {
|
||||
match (&self.format, &other.format) {
|
||||
(JsonFormat::Jsonb, JsonFormat::Jsonb) => true,
|
||||
(JsonFormat::Native(this), JsonFormat::Native(that)) => {
|
||||
@@ -232,6 +236,43 @@ impl JsonType {
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
|
||||
/// Check if it includes all fields in `other` json type.
|
||||
pub fn is_include(&self, other: &JsonType) -> bool {
|
||||
match (&self.format, &other.format) {
|
||||
(JsonFormat::Jsonb, JsonFormat::Jsonb) => true,
|
||||
(JsonFormat::Native(this), JsonFormat::Native(that)) => {
|
||||
is_include(this.as_ref(), that.as_ref())
|
||||
}
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn is_include(this: &JsonNativeType, that: &JsonNativeType) -> bool {
|
||||
fn is_include_object(this: &JsonObjectType, that: &JsonObjectType) -> bool {
|
||||
for (type_name, that_type) in that {
|
||||
let Some(this_type) = this.get(type_name) else {
|
||||
return false;
|
||||
};
|
||||
if !is_include(this_type, that_type) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
true
|
||||
}
|
||||
|
||||
match (this, that) {
|
||||
(this, that) if this == that => true,
|
||||
(JsonNativeType::Array(this), JsonNativeType::Array(that)) => {
|
||||
is_include(this.as_ref(), that.as_ref())
|
||||
}
|
||||
(JsonNativeType::Object(this), JsonNativeType::Object(that)) => {
|
||||
is_include_object(this, that)
|
||||
}
|
||||
(_, JsonNativeType::Null) => true,
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
|
||||
/// A special struct type for denoting "plain"(not object) json value. It has only one field, with
|
||||
@@ -317,14 +358,14 @@ impl DataType for JsonType {
|
||||
fn as_arrow_type(&self) -> ArrowDataType {
|
||||
match self.format {
|
||||
JsonFormat::Jsonb => ArrowDataType::Binary,
|
||||
JsonFormat::Native(_) => ArrowDataType::Struct(Fields::empty()),
|
||||
JsonFormat::Native(_) => self.as_struct_type().as_arrow_type(),
|
||||
}
|
||||
}
|
||||
|
||||
fn create_mutable_vector(&self, capacity: usize) -> Box<dyn MutableVector> {
|
||||
match self.format {
|
||||
match &self.format {
|
||||
JsonFormat::Jsonb => Box::new(BinaryVectorBuilder::with_capacity(capacity)),
|
||||
JsonFormat::Native(_) => Box::new(JsonVectorBuilder::with_capacity(capacity)),
|
||||
JsonFormat::Native(x) => Box::new(JsonVectorBuilder::new(*x.clone(), capacity)),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -336,6 +377,12 @@ impl DataType for JsonType {
|
||||
}
|
||||
}
|
||||
|
||||
impl Display for JsonType {
|
||||
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
|
||||
write!(f, "{}", self.name())
|
||||
}
|
||||
}
|
||||
|
||||
/// Converts a json type value to string
|
||||
pub fn jsonb_to_string(val: &[u8]) -> Result<String> {
|
||||
match jsonb::from_slice(val) {
|
||||
@@ -366,6 +413,204 @@ mod tests {
|
||||
use super::*;
|
||||
use crate::json::JsonStructureSettings;
|
||||
|
||||
#[test]
|
||||
fn test_json_type_include() {
|
||||
fn test(this: &JsonNativeType, that: &JsonNativeType, expected: bool) {
|
||||
assert_eq!(is_include(this, that), expected);
|
||||
}
|
||||
|
||||
test(&JsonNativeType::Null, &JsonNativeType::Null, true);
|
||||
test(&JsonNativeType::Null, &JsonNativeType::Bool, false);
|
||||
|
||||
test(&JsonNativeType::Bool, &JsonNativeType::Null, true);
|
||||
test(&JsonNativeType::Bool, &JsonNativeType::Bool, true);
|
||||
test(&JsonNativeType::Bool, &JsonNativeType::u64(), false);
|
||||
|
||||
test(&JsonNativeType::u64(), &JsonNativeType::Null, true);
|
||||
test(&JsonNativeType::u64(), &JsonNativeType::u64(), true);
|
||||
test(&JsonNativeType::u64(), &JsonNativeType::String, false);
|
||||
|
||||
test(&JsonNativeType::String, &JsonNativeType::Null, true);
|
||||
test(&JsonNativeType::String, &JsonNativeType::String, true);
|
||||
test(
|
||||
&JsonNativeType::String,
|
||||
&JsonNativeType::Array(Box::new(JsonNativeType::f64())),
|
||||
false,
|
||||
);
|
||||
|
||||
test(
|
||||
&JsonNativeType::Array(Box::new(JsonNativeType::f64())),
|
||||
&JsonNativeType::Null,
|
||||
true,
|
||||
);
|
||||
test(
|
||||
&JsonNativeType::Array(Box::new(JsonNativeType::f64())),
|
||||
&JsonNativeType::Array(Box::new(JsonNativeType::Null)),
|
||||
true,
|
||||
);
|
||||
test(
|
||||
&JsonNativeType::Array(Box::new(JsonNativeType::f64())),
|
||||
&JsonNativeType::Array(Box::new(JsonNativeType::f64())),
|
||||
true,
|
||||
);
|
||||
test(
|
||||
&JsonNativeType::Array(Box::new(JsonNativeType::f64())),
|
||||
&JsonNativeType::String,
|
||||
false,
|
||||
);
|
||||
test(
|
||||
&JsonNativeType::Array(Box::new(JsonNativeType::f64())),
|
||||
&JsonNativeType::Object(JsonObjectType::new()),
|
||||
false,
|
||||
);
|
||||
|
||||
let simple_json_object = &JsonNativeType::Object(JsonObjectType::from([(
|
||||
"foo".to_string(),
|
||||
JsonNativeType::String,
|
||||
)]));
|
||||
test(simple_json_object, &JsonNativeType::Null, true);
|
||||
test(simple_json_object, simple_json_object, true);
|
||||
test(simple_json_object, &JsonNativeType::i64(), false);
|
||||
test(
|
||||
simple_json_object,
|
||||
&JsonNativeType::Object(JsonObjectType::from([(
|
||||
"bar".to_string(),
|
||||
JsonNativeType::i64(),
|
||||
)])),
|
||||
false,
|
||||
);
|
||||
|
||||
let complex_json_object = &JsonNativeType::Object(JsonObjectType::from([
|
||||
(
|
||||
"nested".to_string(),
|
||||
JsonNativeType::Object(JsonObjectType::from([(
|
||||
"a".to_string(),
|
||||
JsonNativeType::Object(JsonObjectType::from([(
|
||||
"b".to_string(),
|
||||
JsonNativeType::Object(JsonObjectType::from([(
|
||||
"c".to_string(),
|
||||
JsonNativeType::String,
|
||||
)])),
|
||||
)])),
|
||||
)])),
|
||||
),
|
||||
("bar".to_string(), JsonNativeType::i64()),
|
||||
]));
|
||||
test(complex_json_object, &JsonNativeType::Null, true);
|
||||
test(complex_json_object, &JsonNativeType::String, false);
|
||||
test(complex_json_object, complex_json_object, true);
|
||||
test(
|
||||
complex_json_object,
|
||||
&JsonNativeType::Object(JsonObjectType::from([(
|
||||
"bar".to_string(),
|
||||
JsonNativeType::i64(),
|
||||
)])),
|
||||
true,
|
||||
);
|
||||
test(
|
||||
complex_json_object,
|
||||
&JsonNativeType::Object(JsonObjectType::from([
|
||||
(
|
||||
"nested".to_string(),
|
||||
JsonNativeType::Object(JsonObjectType::from([(
|
||||
"a".to_string(),
|
||||
JsonNativeType::Null,
|
||||
)])),
|
||||
),
|
||||
("bar".to_string(), JsonNativeType::i64()),
|
||||
])),
|
||||
true,
|
||||
);
|
||||
test(
|
||||
complex_json_object,
|
||||
&JsonNativeType::Object(JsonObjectType::from([
|
||||
(
|
||||
"nested".to_string(),
|
||||
JsonNativeType::Object(JsonObjectType::from([(
|
||||
"a".to_string(),
|
||||
JsonNativeType::String,
|
||||
)])),
|
||||
),
|
||||
("bar".to_string(), JsonNativeType::i64()),
|
||||
])),
|
||||
false,
|
||||
);
|
||||
test(
|
||||
complex_json_object,
|
||||
&JsonNativeType::Object(JsonObjectType::from([
|
||||
(
|
||||
"nested".to_string(),
|
||||
JsonNativeType::Object(JsonObjectType::from([(
|
||||
"a".to_string(),
|
||||
JsonNativeType::Object(JsonObjectType::from([(
|
||||
"b".to_string(),
|
||||
JsonNativeType::String,
|
||||
)])),
|
||||
)])),
|
||||
),
|
||||
("bar".to_string(), JsonNativeType::i64()),
|
||||
])),
|
||||
false,
|
||||
);
|
||||
test(
|
||||
complex_json_object,
|
||||
&JsonNativeType::Object(JsonObjectType::from([
|
||||
(
|
||||
"nested".to_string(),
|
||||
JsonNativeType::Object(JsonObjectType::from([(
|
||||
"a".to_string(),
|
||||
JsonNativeType::Object(JsonObjectType::from([(
|
||||
"b".to_string(),
|
||||
JsonNativeType::Object(JsonObjectType::from([(
|
||||
"c".to_string(),
|
||||
JsonNativeType::Null,
|
||||
)])),
|
||||
)])),
|
||||
)])),
|
||||
),
|
||||
("bar".to_string(), JsonNativeType::i64()),
|
||||
])),
|
||||
true,
|
||||
);
|
||||
test(
|
||||
complex_json_object,
|
||||
&JsonNativeType::Object(JsonObjectType::from([
|
||||
(
|
||||
"nested".to_string(),
|
||||
JsonNativeType::Object(JsonObjectType::from([(
|
||||
"a".to_string(),
|
||||
JsonNativeType::Object(JsonObjectType::from([(
|
||||
"b".to_string(),
|
||||
JsonNativeType::Object(JsonObjectType::from([(
|
||||
"c".to_string(),
|
||||
JsonNativeType::Bool,
|
||||
)])),
|
||||
)])),
|
||||
)])),
|
||||
),
|
||||
("bar".to_string(), JsonNativeType::i64()),
|
||||
])),
|
||||
false,
|
||||
);
|
||||
test(
|
||||
complex_json_object,
|
||||
&JsonNativeType::Object(JsonObjectType::from([(
|
||||
"nested".to_string(),
|
||||
JsonNativeType::Object(JsonObjectType::from([(
|
||||
"a".to_string(),
|
||||
JsonNativeType::Object(JsonObjectType::from([(
|
||||
"b".to_string(),
|
||||
JsonNativeType::Object(JsonObjectType::from([(
|
||||
"c".to_string(),
|
||||
JsonNativeType::String,
|
||||
)])),
|
||||
)])),
|
||||
)])),
|
||||
)])),
|
||||
true,
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_merge_json_type() -> Result<()> {
|
||||
fn test(
|
||||
|
||||
@@ -20,6 +20,7 @@ use crate::data_type::ConcreteDataType;
|
||||
use crate::error::{Result, TryFromValueSnafu, UnsupportedOperationSnafu};
|
||||
use crate::json::value::JsonValueRef;
|
||||
use crate::prelude::{ValueRef, Vector, VectorRef};
|
||||
use crate::types::json_type::JsonNativeType;
|
||||
use crate::types::{JsonType, json_type};
|
||||
use crate::value::StructValueRef;
|
||||
use crate::vectors::{MutableVector, StructVectorBuilder};
|
||||
@@ -181,9 +182,9 @@ pub(crate) struct JsonVectorBuilder {
|
||||
}
|
||||
|
||||
impl JsonVectorBuilder {
|
||||
pub(crate) fn with_capacity(capacity: usize) -> Self {
|
||||
pub(crate) fn new(json_type: JsonNativeType, capacity: usize) -> Self {
|
||||
Self {
|
||||
merged_type: JsonType::empty(),
|
||||
merged_type: JsonType::new_native(json_type),
|
||||
capacity,
|
||||
builders: vec![],
|
||||
}
|
||||
@@ -326,18 +327,18 @@ mod tests {
|
||||
"Failed to merge JSON datatype: datatypes have conflict, this: Number(I64), that: Array[Bool]",
|
||||
),
|
||||
];
|
||||
let mut builder = JsonVectorBuilder::with_capacity(1);
|
||||
let mut builder = JsonVectorBuilder::new(JsonNativeType::Null, 1);
|
||||
for (json, result) in jsons.into_iter().zip(results.into_iter()) {
|
||||
push(json, &mut builder, result);
|
||||
}
|
||||
let vector = builder.to_vector();
|
||||
let expected = r#"
|
||||
+----------------+
|
||||
| StructVector |
|
||||
+----------------+
|
||||
| {__plain__: 1} |
|
||||
| {__plain__: 2} |
|
||||
+----------------+"#;
|
||||
+---------------------+
|
||||
| StructVector |
|
||||
+---------------------+
|
||||
| {__json_plain__: 1} |
|
||||
| {__json_plain__: 2} |
|
||||
+---------------------+"#;
|
||||
assert_eq!(pretty_print(vector), expected.trim());
|
||||
Ok(())
|
||||
}
|
||||
@@ -386,7 +387,7 @@ mod tests {
|
||||
"object": {"timestamp": 1761523203000}
|
||||
}"#,
|
||||
];
|
||||
let mut builder = JsonVectorBuilder::with_capacity(1);
|
||||
let mut builder = JsonVectorBuilder::new(JsonNativeType::Null, 1);
|
||||
for json in jsons {
|
||||
push(json, &mut builder, Ok(()));
|
||||
}
|
||||
|
||||
@@ -379,10 +379,8 @@ impl MutableVector for StructVectorBuilder {
|
||||
},
|
||||
StructValueRef::Ref(val) => self.push_struct_value(val)?,
|
||||
StructValueRef::RefList { val, fields } => {
|
||||
let struct_value = StructValue::try_new(
|
||||
val.iter().map(|v| Value::from(v.clone())).collect(),
|
||||
fields.clone(),
|
||||
)?;
|
||||
let struct_value =
|
||||
StructValue::try_new(val.into_iter().map(Value::from).collect(), fields)?;
|
||||
self.push_struct_value(&struct_value)?;
|
||||
}
|
||||
}
|
||||
@@ -429,12 +427,17 @@ impl ScalarVectorBuilder for StructVectorBuilder {
|
||||
.value_builders
|
||||
.iter_mut()
|
||||
.map(|b| b.to_vector().to_arrow_array())
|
||||
.collect();
|
||||
let struct_array = StructArray::new(
|
||||
self.fields.as_arrow_fields(),
|
||||
arrays,
|
||||
self.null_buffer.finish(),
|
||||
);
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
let struct_array = if arrays.is_empty() {
|
||||
StructArray::new_empty_fields(self.len(), self.null_buffer.finish())
|
||||
} else {
|
||||
StructArray::new(
|
||||
self.fields.as_arrow_fields(),
|
||||
arrays,
|
||||
self.null_buffer.finish(),
|
||||
)
|
||||
};
|
||||
|
||||
StructVector::try_new(self.fields.clone(), struct_array).unwrap()
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user