refactor: create JsonValue for json value (#7214)

* refactor: create `JsonValue` for json value

Signed-off-by: luofucong <luofc@foxmail.com>

* resolve PR comments

Signed-off-by: luofucong <luofc@foxmail.com>

* update proto

Signed-off-by: luofucong <luofc@foxmail.com>

---------

Signed-off-by: luofucong <luofc@foxmail.com>
This commit is contained in:
LFC
2025-11-17 16:21:17 +08:00
committed by GitHub
parent 32168e8ca8
commit cdf9d18c36
14 changed files with 1283 additions and 307 deletions

2
Cargo.lock generated
View File

@@ -5348,7 +5348,7 @@ dependencies = [
[[package]]
name = "greptime-proto"
version = "0.1.0"
source = "git+https://github.com/GreptimeTeam/greptime-proto.git?rev=14b9dc40bdc8288742b0cefc7bb024303b7429ef#14b9dc40bdc8288742b0cefc7bb024303b7429ef"
source = "git+https://github.com/GreptimeTeam/greptime-proto.git?rev=fd699b21991e358201ee81ead4d319545c5df2ad#fd699b21991e358201ee81ead4d319545c5df2ad"
dependencies = [
"prost 0.13.5",
"prost-types 0.13.5",

View File

@@ -148,7 +148,7 @@ etcd-client = { git = "https://github.com/GreptimeTeam/etcd-client", rev = "f62d
fst = "0.4.7"
futures = "0.3"
futures-util = "0.3"
greptime-proto = { git = "https://github.com/GreptimeTeam/greptime-proto.git", rev = "14b9dc40bdc8288742b0cefc7bb024303b7429ef" }
greptime-proto = { git = "https://github.com/GreptimeTeam/greptime-proto.git", rev = "fd699b21991e358201ee81ead4d319545c5df2ad" }
hex = "0.4"
http = "1"
humantime = "2.1"

View File

@@ -12,7 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
use std::collections::HashSet;
use std::collections::{BTreeMap, HashSet};
use std::sync::Arc;
use common_decimal::Decimal128;
@@ -20,6 +20,7 @@ use common_decimal::decimal128::{DECIMAL128_DEFAULT_SCALE, DECIMAL128_MAX_PRECIS
use common_time::time::Time;
use common_time::timestamp::TimeUnit;
use common_time::{Date, IntervalDayTime, IntervalMonthDayNano, IntervalYearMonth, Timestamp};
use datatypes::json::value::{JsonNumber, JsonValue, JsonValueRef, JsonVariant};
use datatypes::prelude::{ConcreteDataType, ValueRef};
use datatypes::types::{
IntervalType, JsonFormat, StructField, StructType, TimeType, TimestampType,
@@ -34,9 +35,9 @@ use greptime_proto::v1::greptime_request::Request;
use greptime_proto::v1::query_request::Query;
use greptime_proto::v1::value::ValueData;
use greptime_proto::v1::{
self, ColumnDataTypeExtension, DdlRequest, DecimalTypeExtension, JsonNativeTypeExtension,
JsonTypeExtension, ListTypeExtension, QueryRequest, Row, SemanticType, StructTypeExtension,
VectorTypeExtension,
self, ColumnDataTypeExtension, DdlRequest, DecimalTypeExtension, JsonList,
JsonNativeTypeExtension, JsonObject, JsonTypeExtension, ListTypeExtension, QueryRequest, Row,
SemanticType, StructTypeExtension, VectorTypeExtension, json_value,
};
use paste::paste;
use snafu::prelude::*;
@@ -801,21 +802,8 @@ pub fn pb_value_to_value_ref<'a>(
}
ValueData::JsonValue(inner_value) => {
let json_datatype_ext = datatype_ext
.as_ref()
.and_then(|ext| {
if let Some(TypeExt::JsonNativeType(l)) = &ext.type_ext {
Some(l)
} else {
None
}
})
.expect("json value must contain datatype ext");
ValueRef::Json(Box::new(pb_value_to_value_ref(
inner_value,
json_datatype_ext.datatype_extension.as_deref(),
)))
let value = decode_json_value(inner_value);
ValueRef::Json(Box::new(value))
}
}
}
@@ -938,12 +926,72 @@ pub fn to_proto_value(value: Value) -> v1::Value {
})),
},
Value::Json(v) => v1::Value {
value_data: Some(ValueData::JsonValue(Box::new(to_proto_value(*v)))),
value_data: Some(ValueData::JsonValue(encode_json_value(*v))),
},
Value::Duration(_) => v1::Value { value_data: None },
}
}
fn encode_json_value(value: JsonValue) -> v1::JsonValue {
fn helper(json: JsonVariant) -> v1::JsonValue {
let value = match json {
JsonVariant::Null => None,
JsonVariant::Bool(x) => Some(json_value::Value::Boolean(x)),
JsonVariant::Number(x) => Some(match x {
JsonNumber::PosInt(i) => json_value::Value::Uint(i),
JsonNumber::NegInt(i) => json_value::Value::Int(i),
JsonNumber::Float(f) => json_value::Value::Float(f.0),
}),
JsonVariant::String(x) => Some(json_value::Value::Str(x)),
JsonVariant::Array(x) => Some(json_value::Value::Array(JsonList {
items: x.into_iter().map(helper).collect::<Vec<_>>(),
})),
JsonVariant::Object(x) => {
let entries = x
.into_iter()
.map(|(key, v)| v1::json_object::Entry {
key,
value: Some(helper(v)),
})
.collect::<Vec<_>>();
Some(json_value::Value::Object(JsonObject { entries }))
}
};
v1::JsonValue { value }
}
helper(value.into_variant())
}
fn decode_json_value(value: &v1::JsonValue) -> JsonValueRef<'_> {
let Some(value) = &value.value else {
return JsonValueRef::null();
};
match value {
json_value::Value::Boolean(x) => (*x).into(),
json_value::Value::Int(x) => (*x).into(),
json_value::Value::Uint(x) => (*x).into(),
json_value::Value::Float(x) => (*x).into(),
json_value::Value::Str(x) => (x.as_str()).into(),
json_value::Value::Array(array) => array
.items
.iter()
.map(|x| decode_json_value(x).into_variant())
.collect::<Vec<_>>()
.into(),
json_value::Value::Object(x) => x
.entries
.iter()
.filter_map(|entry| {
entry
.value
.as_ref()
.map(|v| (entry.key.as_str(), decode_json_value(v).into_variant()))
})
.collect::<BTreeMap<_, _>>()
.into(),
}
}
fn convert_list_to_pb_values(list_value: ListValue) -> Vec<v1::Value> {
list_value
.take_items()
@@ -1065,9 +1113,7 @@ pub fn value_to_grpc_value(value: Value) -> GrpcValue {
.collect();
Some(ValueData::StructValue(v1::StructValue { items }))
}
Value::Json(inner_value) => Some(ValueData::JsonValue(Box::new(value_to_grpc_value(
*inner_value,
)))),
Value::Json(v) => Some(ValueData::JsonValue(encode_json_value(*v))),
Value::Duration(_) => unreachable!(),
},
}
@@ -1778,4 +1824,199 @@ mod tests {
_ => panic!("Unexpected value type"),
}
}
#[test]
fn test_encode_decode_json_value() {
let json = JsonValue::null();
let proto = encode_json_value(json.clone());
assert!(proto.value.is_none());
let value = decode_json_value(&proto);
assert_eq!(json.as_ref(), value);
let json: JsonValue = true.into();
let proto = encode_json_value(json.clone());
assert_eq!(proto.value, Some(json_value::Value::Boolean(true)));
let value = decode_json_value(&proto);
assert_eq!(json.as_ref(), value);
let json: JsonValue = (-1i64).into();
let proto = encode_json_value(json.clone());
assert_eq!(proto.value, Some(json_value::Value::Int(-1)));
let value = decode_json_value(&proto);
assert_eq!(json.as_ref(), value);
let json: JsonValue = 1u64.into();
let proto = encode_json_value(json.clone());
assert_eq!(proto.value, Some(json_value::Value::Uint(1)));
let value = decode_json_value(&proto);
assert_eq!(json.as_ref(), value);
let json: JsonValue = 1.0f64.into();
let proto = encode_json_value(json.clone());
assert_eq!(proto.value, Some(json_value::Value::Float(1.0)));
let value = decode_json_value(&proto);
assert_eq!(json.as_ref(), value);
let json: JsonValue = "s".into();
let proto = encode_json_value(json.clone());
assert_eq!(proto.value, Some(json_value::Value::Str("s".to_string())));
let value = decode_json_value(&proto);
assert_eq!(json.as_ref(), value);
let json: JsonValue = [1i64, 2, 3].into();
let proto = encode_json_value(json.clone());
assert_eq!(
proto.value,
Some(json_value::Value::Array(JsonList {
items: vec![
v1::JsonValue {
value: Some(json_value::Value::Int(1))
},
v1::JsonValue {
value: Some(json_value::Value::Int(2))
},
v1::JsonValue {
value: Some(json_value::Value::Int(3))
}
]
}))
);
let value = decode_json_value(&proto);
assert_eq!(json.as_ref(), value);
let json: JsonValue = [(); 0].into();
let proto = encode_json_value(json.clone());
assert_eq!(
proto.value,
Some(json_value::Value::Array(JsonList { items: vec![] }))
);
let value = decode_json_value(&proto);
assert_eq!(json.as_ref(), value);
let json: JsonValue = [("k3", 3i64), ("k2", 2i64), ("k1", 1i64)].into();
let proto = encode_json_value(json.clone());
assert_eq!(
proto.value,
Some(json_value::Value::Object(JsonObject {
entries: vec![
v1::json_object::Entry {
key: "k1".to_string(),
value: Some(v1::JsonValue {
value: Some(json_value::Value::Int(1))
}),
},
v1::json_object::Entry {
key: "k2".to_string(),
value: Some(v1::JsonValue {
value: Some(json_value::Value::Int(2))
}),
},
v1::json_object::Entry {
key: "k3".to_string(),
value: Some(v1::JsonValue {
value: Some(json_value::Value::Int(3))
}),
},
]
}))
);
let value = decode_json_value(&proto);
assert_eq!(json.as_ref(), value);
let json: JsonValue = [("null", ()); 0].into();
let proto = encode_json_value(json.clone());
assert_eq!(
proto.value,
Some(json_value::Value::Object(JsonObject { entries: vec![] }))
);
let value = decode_json_value(&proto);
assert_eq!(json.as_ref(), value);
let json: JsonValue = [
("null", JsonVariant::from(())),
("bool", false.into()),
("list", ["hello", "world"].into()),
(
"object",
[
("positive_i", JsonVariant::from(42u64)),
("negative_i", (-42i64).into()),
("nested", [("what", "blah")].into()),
]
.into(),
),
]
.into();
let proto = encode_json_value(json.clone());
assert_eq!(
proto.value,
Some(json_value::Value::Object(JsonObject {
entries: vec![
v1::json_object::Entry {
key: "bool".to_string(),
value: Some(v1::JsonValue {
value: Some(json_value::Value::Boolean(false))
}),
},
v1::json_object::Entry {
key: "list".to_string(),
value: Some(v1::JsonValue {
value: Some(json_value::Value::Array(JsonList {
items: vec![
v1::JsonValue {
value: Some(json_value::Value::Str("hello".to_string()))
},
v1::JsonValue {
value: Some(json_value::Value::Str("world".to_string()))
},
]
}))
}),
},
v1::json_object::Entry {
key: "null".to_string(),
value: Some(v1::JsonValue { value: None }),
},
v1::json_object::Entry {
key: "object".to_string(),
value: Some(v1::JsonValue {
value: Some(json_value::Value::Object(JsonObject {
entries: vec![
v1::json_object::Entry {
key: "negative_i".to_string(),
value: Some(v1::JsonValue {
value: Some(json_value::Value::Int(-42))
}),
},
v1::json_object::Entry {
key: "nested".to_string(),
value: Some(v1::JsonValue {
value: Some(json_value::Value::Object(JsonObject {
entries: vec![v1::json_object::Entry {
key: "what".to_string(),
value: Some(v1::JsonValue {
value: Some(json_value::Value::Str(
"blah".to_string()
))
}),
},]
}))
}),
},
v1::json_object::Entry {
key: "positive_i".to_string(),
value: Some(v1::JsonValue {
value: Some(json_value::Value::Uint(42))
}),
},
]
}))
}),
},
]
}))
);
let value = decode_json_value(&proto);
assert_eq!(json.as_ref(), value);
}
}

View File

@@ -211,8 +211,7 @@ pub fn sql_value_to_value(
| Value::Duration(_)
| Value::IntervalYearMonth(_)
| Value::IntervalDayTime(_)
| Value::IntervalMonthDayNano(_)
| Value::Json(_) => match unary_op {
| Value::IntervalMonthDayNano(_) => match unary_op {
UnaryOperator::Plus => {}
UnaryOperator::Minus => {
value = value
@@ -222,7 +221,11 @@ pub fn sql_value_to_value(
_ => return InvalidUnaryOpSnafu { unary_op, value }.fail(),
},
Value::String(_) | Value::Binary(_) | Value::List(_) | Value::Struct(_) => {
Value::String(_)
| Value::Binary(_)
| Value::List(_)
| Value::Struct(_)
| Value::Json(_) => {
return InvalidUnaryOpSnafu { unary_op, value }.fail();
}
}

View File

@@ -189,7 +189,7 @@ pub enum Error {
location: Location,
},
#[snafu(display("Invalid JSON text: {}", value))]
#[snafu(display("Invalid JSON: {}", value))]
InvalidJson {
value: String,
#[snafu(implicit)]

View File

@@ -19,18 +19,19 @@
//! The struct will carry all the fields of the Json object. We will not flatten any json object in this implementation.
//!
use std::collections::HashSet;
pub mod value;
use std::collections::{BTreeMap, HashSet};
use std::sync::Arc;
use common_base::bytes::StringBytes;
use ordered_float::OrderedFloat;
use serde::{Deserialize, Serialize};
use serde_json::{Map, Value as Json};
use snafu::{ResultExt, ensure};
use crate::data_type::{ConcreteDataType, DataType};
use crate::error::{self, Error};
use crate::types::{ListType, StructField, StructType};
use crate::json::value::{JsonValue, JsonVariant};
use crate::types::{StructField, StructType};
use crate::value::{ListValue, StructValue, Value};
/// The configuration of JSON encoding
@@ -148,24 +149,16 @@ pub fn encode_json_with_context<'a>(
json: Json,
data_type: Option<&ConcreteDataType>,
context: &JsonContext<'a>,
) -> Result<Value, Error> {
) -> Result<JsonValue, Error> {
// Check if the entire encoding should be unstructured
if matches!(context.settings, JsonStructureSettings::UnstructuredRaw) {
let json_string = json.to_string();
let struct_value = StructValue::try_new(
vec![Value::String(json_string.into())],
StructType::new(Arc::new(vec![StructField::new(
JsonStructureSettings::RAW_FIELD.to_string(),
ConcreteDataType::string_datatype(),
true,
)])),
)?;
return Ok(Value::Struct(struct_value));
return Ok([(JsonStructureSettings::RAW_FIELD, json_string)].into());
}
// Check if current key should be treated as unstructured
if context.is_unstructured_key() {
return Ok(Value::String(json.to_string().into()));
return Ok(json.to_string().into());
}
match json {
@@ -178,8 +171,7 @@ pub fn encode_json_with_context<'a>(
);
let data_type = data_type.and_then(|x| x.as_struct());
let struct_value = encode_json_object_with_context(json_object, data_type, context)?;
Ok(Value::Struct(struct_value))
encode_json_object_with_context(json_object, data_type, context)
}
Json::Array(json_array) => {
let item_type = if let Some(ConcreteDataType::List(list_type)) = data_type {
@@ -187,14 +179,13 @@ pub fn encode_json_with_context<'a>(
} else {
None
};
let list_value = encode_json_array_with_context(json_array, item_type, context)?;
Ok(Value::List(list_value))
encode_json_array_with_context(json_array, item_type, context)
}
_ => {
// For non-collection types, verify type compatibility
if let Some(expected_type) = data_type {
let (value, actual_type) =
encode_json_value_with_context(json, Some(expected_type), context)?;
let value = encode_json_value_with_context(json, Some(expected_type), context)?;
let actual_type = value.json_type().native_type();
if &actual_type == expected_type {
Ok(value)
} else {
@@ -208,8 +199,7 @@ pub fn encode_json_with_context<'a>(
.build())
}
} else {
let (value, _) = encode_json_value_with_context(json, None, context)?;
Ok(value)
encode_json_value_with_context(json, None, context)
}
}
}
@@ -219,10 +209,8 @@ fn encode_json_object_with_context<'a>(
mut json_object: Map<String, Json>,
fields: Option<&StructType>,
context: &JsonContext<'a>,
) -> Result<StructValue, Error> {
let total_json_keys = json_object.len();
let mut items = Vec::with_capacity(total_json_keys);
let mut struct_fields = Vec::with_capacity(total_json_keys);
) -> Result<JsonValue, Error> {
let mut object = BTreeMap::new();
// First, process fields from the provided schema in their original order
if let Some(fields) = fields {
for field in fields.fields().iter() {
@@ -230,18 +218,12 @@ fn encode_json_object_with_context<'a>(
if let Some(value) = json_object.remove(field_name) {
let field_context = context.with_key(field_name);
let (value, data_type) =
let value =
encode_json_value_with_context(value, Some(field.data_type()), &field_context)?;
items.push(value);
struct_fields.push(StructField::new(
field_name.to_string(),
data_type,
true, // JSON fields are always nullable
));
object.insert(field_name.to_string(), value.into_variant());
} else {
// Field exists in schema but not in JSON - add null value
items.push(Value::Null);
struct_fields.push(field.clone());
object.insert(field_name.to_string(), ().into());
}
}
}
@@ -250,57 +232,49 @@ fn encode_json_object_with_context<'a>(
for (key, value) in json_object {
let field_context = context.with_key(&key);
let (value, data_type) = encode_json_value_with_context(value, None, &field_context)?;
items.push(value);
let value = encode_json_value_with_context(value, None, &field_context)?;
struct_fields.push(StructField::new(
key.clone(),
data_type,
true, // JSON fields are always nullable
));
object.insert(key, value.into_variant());
}
let struct_type = StructType::new(Arc::new(struct_fields));
StructValue::try_new(items, struct_type)
Ok(JsonValue::new(JsonVariant::Object(object)))
}
fn encode_json_array_with_context<'a>(
json_array: Vec<Json>,
item_type: Option<&ConcreteDataType>,
context: &JsonContext<'a>,
) -> Result<ListValue, Error> {
) -> Result<JsonValue, Error> {
let json_array_len = json_array.len();
let mut items = Vec::with_capacity(json_array_len);
let mut element_type = None;
let mut element_type = item_type.cloned();
for (index, value) in json_array.into_iter().enumerate() {
let array_context = context.with_key(&index.to_string());
let (item_value, item_type) =
encode_json_value_with_context(value, item_type, &array_context)?;
items.push(item_value);
let item_value =
encode_json_value_with_context(value, element_type.as_ref(), &array_context)?;
let item_type = item_value.json_type().native_type();
items.push(item_value.into_variant());
// Determine the common type for the list
if let Some(current_type) = &element_type {
// For now, we'll use the first non-null type we encounter
// In a more sophisticated implementation, we might want to find a common supertype
if *current_type == ConcreteDataType::null_datatype()
&& item_type != ConcreteDataType::null_datatype()
{
element_type = Some(item_type);
}
// It's valid for json array to have different types of items, for example,
// ["a string", 1]. However, the `JsonValue` will be converted to Arrow list array,
// which requires all items have exactly same type. So we forbid the different types
// case here. Besides, it's not common for items in a json array to differ. So I think
// we are good here.
ensure!(
item_type == *current_type,
error::InvalidJsonSnafu {
value: "all items in json array must have the same type"
}
);
} else {
element_type = Some(item_type);
}
}
// Use provided item_type if available, otherwise determine from elements
let element_type = if let Some(item_type) = item_type {
item_type.clone()
} else {
element_type.unwrap_or_else(ConcreteDataType::string_datatype)
};
Ok(ListValue::new(items, Arc::new(element_type)))
Ok(JsonValue::new(JsonVariant::Array(items)))
}
/// Helper function to encode a JSON value to a Value and determine its ConcreteDataType with context
@@ -308,81 +282,61 @@ fn encode_json_value_with_context<'a>(
json: Json,
expected_type: Option<&ConcreteDataType>,
context: &JsonContext<'a>,
) -> Result<(Value, ConcreteDataType), Error> {
) -> Result<JsonValue, Error> {
// Check if current key should be treated as unstructured
if context.is_unstructured_key() {
return Ok((
Value::String(json.to_string().into()),
ConcreteDataType::string_datatype(),
));
return Ok(json.to_string().into());
}
match json {
Json::Null => Ok((Value::Null, ConcreteDataType::null_datatype())),
Json::Bool(b) => Ok((Value::Boolean(b), ConcreteDataType::boolean_datatype())),
Json::Null => Ok(JsonValue::null()),
Json::Bool(b) => Ok(b.into()),
Json::Number(n) => {
if let Some(i) = n.as_i64() {
// Use int64 for all integer numbers when possible
if let Some(expected) = expected_type
&& let Ok(value) = try_convert_to_expected_type(i, expected)
{
return Ok((value, expected.clone()));
return Ok(value);
}
Ok((Value::Int64(i), ConcreteDataType::int64_datatype()))
Ok(i.into())
} else if let Some(u) = n.as_u64() {
// Use int64 for unsigned integers that fit, otherwise use u64
if let Some(expected) = expected_type
&& let Ok(value) = try_convert_to_expected_type(u, expected)
{
return Ok((value, expected.clone()));
return Ok(value);
}
if u <= i64::MAX as u64 {
Ok((Value::Int64(u as i64), ConcreteDataType::int64_datatype()))
Ok((u as i64).into())
} else {
Ok((Value::UInt64(u), ConcreteDataType::uint64_datatype()))
Ok(u.into())
}
} else if let Some(f) = n.as_f64() {
// Try to use the expected type if provided
if let Some(expected) = expected_type
&& let Ok(value) = try_convert_to_expected_type(f, expected)
{
return Ok((value, expected.clone()));
return Ok(value);
}
// Default to f64 for floating point numbers
Ok((
Value::Float64(OrderedFloat(f)),
ConcreteDataType::float64_datatype(),
))
Ok(f.into())
} else {
// Fallback to string representation
Ok((
Value::String(StringBytes::from(n.to_string())),
ConcreteDataType::string_datatype(),
))
Ok(n.to_string().into())
}
}
Json::String(s) => {
if let Some(expected) = expected_type
&& let Ok(value) = try_convert_to_expected_type(s.as_str(), expected)
{
return Ok((value, expected.clone()));
return Ok(value);
}
Ok((
Value::String(StringBytes::from(s.clone())),
ConcreteDataType::string_datatype(),
))
}
Json::Array(arr) => {
let list_value = encode_json_array_with_context(arr, expected_type, context)?;
let datatype = ConcreteDataType::List(ListType::new(list_value.datatype()));
Ok((Value::List(list_value), datatype))
}
Json::Object(obj) => {
let struct_value = encode_json_object_with_context(obj, None, context)?;
let data_type = ConcreteDataType::Struct(struct_value.struct_type().clone());
Ok((Value::Struct(struct_value), data_type))
Ok(s.into())
}
Json::Array(arr) => encode_json_array_with_context(arr, expected_type, context),
Json::Object(obj) => encode_json_object_with_context(obj, None, context),
}
}
@@ -402,7 +356,6 @@ pub fn decode_value_with_context<'a>(
}
match value {
Value::Json(inner) => decode_value_with_context(*inner, context),
Value::Struct(struct_value) => decode_struct_with_context(struct_value, context),
Value::List(list_value) => decode_list_with_context(list_value, context),
_ => decode_primitive_value(value),
@@ -569,11 +522,13 @@ fn decode_struct_with_settings<'a>(
key_path: field_context.key_path.clone(),
settings: &JsonStructureSettings::Structured(None),
};
let (decoded_value, data_type) = encode_json_value_with_context(
let decoded_value = encode_json_value_with_context(
json_value,
None, // Don't force a specific type, let it be inferred from JSON
&structured_context,
)?;
)?
.into_value();
let data_type = decoded_value.data_type();
items.push(decoded_value);
struct_fields.push(StructField::new(
@@ -651,8 +606,9 @@ fn decode_unstructured_raw_struct(struct_value: StructValue) -> Result<StructVal
key_path: String::new(),
settings: &JsonStructureSettings::Structured(None),
};
let (decoded_value, data_type) =
encode_json_value_with_context(json_value, None, &context)?;
let decoded_value =
encode_json_value_with_context(json_value, None, &context)?.into_value();
let data_type = decoded_value.data_type();
if let Value::Struct(decoded_struct) = decoded_value {
return Ok(decoded_struct);
@@ -679,21 +635,51 @@ fn decode_unstructured_raw_struct(struct_value: StructValue) -> Result<StructVal
fn try_convert_to_expected_type<T>(
value: T,
expected_type: &ConcreteDataType,
) -> Result<Value, Error>
) -> Result<JsonValue, Error>
where
T: Into<Value>,
T: Into<JsonValue>,
{
let value = value.into();
expected_type.try_cast(value.clone()).ok_or_else(|| {
let cast_error = || {
error::CastTypeSnafu {
msg: format!(
"Cannot cast from {} to {}",
value.data_type().name(),
expected_type.name()
),
msg: format!("Cannot cast value {value} to {expected_type}"),
}
.build()
})
.fail()
};
let actual_type = value.json_type().native_type();
match (&actual_type, expected_type) {
(x, y) if x == y => Ok(value),
(ConcreteDataType::UInt64(_), ConcreteDataType::Int64(_)) => {
if let Some(i) = value.as_i64() {
Ok(i.into())
} else {
cast_error()
}
}
(ConcreteDataType::UInt64(_), ConcreteDataType::Float64(_)) => {
if let Some(f) = value.as_f64() {
Ok(f.into())
} else {
cast_error()
}
}
(ConcreteDataType::Int64(_), ConcreteDataType::UInt64(_)) => {
if let Some(i) = value.as_u64() {
Ok(i.into())
} else {
cast_error()
}
}
(ConcreteDataType::Int64(_), ConcreteDataType::Float64(_)) => {
if let Some(f) = value.as_f64() {
Ok(f.into())
} else {
cast_error()
}
}
(_, ConcreteDataType::String(_)) => Ok(value.to_string().into()),
_ => cast_error(),
}
}
#[cfg(test)]
@@ -898,11 +884,11 @@ mod tests {
let json = Json::from(42);
let settings = JsonStructureSettings::Structured(None);
let result = settings
.encode_with_type(json.clone(), Some(&ConcreteDataType::int8_datatype()))
.encode_with_type(json.clone(), Some(&ConcreteDataType::uint64_datatype()))
.unwrap()
.into_json_inner()
.unwrap();
assert_eq!(result, Value::Int8(42));
assert_eq!(result, Value::UInt64(42));
// Test with expected string type
let result = settings
@@ -917,23 +903,11 @@ mod tests {
fn test_encode_json_array_mixed_types() {
let json = json!([1, "hello", true, 3.15]);
let settings = JsonStructureSettings::Structured(None);
let result = settings
.encode_with_type(json, None)
.unwrap()
.into_json_inner()
.unwrap();
if let Value::List(list_value) = result {
assert_eq!(list_value.items().len(), 4);
// The first non-null type should determine the list type
// In this case, it should be string since we can't find a common numeric type
assert_eq!(
list_value.datatype(),
Arc::new(ConcreteDataType::int64_datatype())
);
} else {
panic!("Expected List value");
}
let result = settings.encode_with_type(json, None);
assert_eq!(
result.unwrap_err().to_string(),
"Invalid JSON: all items in json array must have the same type"
);
}
#[test]
@@ -951,7 +925,7 @@ mod tests {
// Empty arrays default to string type
assert_eq!(
list_value.datatype(),
Arc::new(ConcreteDataType::string_datatype())
Arc::new(ConcreteDataType::null_datatype())
);
} else {
panic!("Expected List value");
@@ -1008,15 +982,15 @@ mod tests {
if let Value::Struct(struct_value) = result {
assert_eq!(struct_value.items().len(), 2);
let struct_fields = struct_value.struct_type().fields();
assert_eq!(struct_fields[0].name(), "name");
assert_eq!(struct_fields[0].name(), "age");
assert_eq!(
struct_fields[0].data_type(),
&ConcreteDataType::string_datatype()
&ConcreteDataType::int64_datatype()
);
assert_eq!(struct_fields[1].name(), "age");
assert_eq!(struct_fields[1].name(), "name");
assert_eq!(
struct_fields[1].data_type(),
&ConcreteDataType::int64_datatype()
&ConcreteDataType::string_datatype()
);
} else {
panic!("Expected Struct value");
@@ -1051,7 +1025,7 @@ mod tests {
];
let struct_type = StructType::new(Arc::new(fields));
let result = encode_json_object_with_context(
let Value::Struct(result) = encode_json_object_with_context(
json.as_object().unwrap().clone(),
Some(&struct_type),
&JsonContext {
@@ -1059,7 +1033,10 @@ mod tests {
settings: &JsonStructureSettings::Structured(None),
},
)
.unwrap();
.map(|x| x.into_value())
.unwrap() else {
unreachable!()
};
// Verify field order is preserved from schema
let struct_fields = result.struct_type().fields();
@@ -1093,7 +1070,7 @@ mod tests {
];
let struct_type = StructType::new(Arc::new(fields));
let result = encode_json_object_with_context(
let Value::Struct(result) = encode_json_object_with_context(
json.as_object().unwrap().clone(),
Some(&struct_type),
&JsonContext {
@@ -1101,19 +1078,22 @@ mod tests {
settings: &JsonStructureSettings::Structured(None),
},
)
.unwrap();
.map(|x| x.into_value())
.unwrap() else {
unreachable!()
};
// Verify schema fields come first in order
// verify fields are sorted in json value
let struct_fields = result.struct_type().fields();
assert_eq!(struct_fields[0].name(), "name");
assert_eq!(struct_fields[0].name(), "active");
assert_eq!(struct_fields[1].name(), "age");
assert_eq!(struct_fields[2].name(), "active");
assert_eq!(struct_fields[2].name(), "name");
// Verify values are correct
let items = result.items();
assert_eq!(items[0], Value::String("Alice".into()));
assert_eq!(items[0], Value::Boolean(true));
assert_eq!(items[1], Value::Int64(25));
assert_eq!(items[2], Value::Boolean(true));
assert_eq!(items[2], Value::String("Alice".into()));
}
#[test]
@@ -1134,7 +1114,7 @@ mod tests {
];
let struct_type = StructType::new(Arc::new(fields));
let result = encode_json_object_with_context(
let Value::Struct(result) = encode_json_object_with_context(
json.as_object().unwrap().clone(),
Some(&struct_type),
&JsonContext {
@@ -1142,17 +1122,20 @@ mod tests {
settings: &JsonStructureSettings::Structured(None),
},
)
.unwrap();
.map(|x| x.into_value())
.unwrap() else {
unreachable!()
};
// Verify both schema fields are present
let struct_fields = result.struct_type().fields();
assert_eq!(struct_fields[0].name(), "name");
assert_eq!(struct_fields[1].name(), "age");
assert_eq!(struct_fields[0].name(), "age");
assert_eq!(struct_fields[1].name(), "name");
// Verify values - name has value, age is null
let items = result.items();
assert_eq!(items[0], Value::String("Bob".into()));
assert_eq!(items[1], Value::Null);
assert_eq!(items[0], Value::Null);
assert_eq!(items[1], Value::String("Bob".into()));
}
#[test]
@@ -1175,7 +1158,7 @@ mod tests {
#[test]
fn test_encode_json_array_with_item_type() {
let json = json!([1, 2, 3]);
let item_type = Arc::new(ConcreteDataType::int8_datatype());
let item_type = Arc::new(ConcreteDataType::uint64_datatype());
let list_type = ListType::new(item_type.clone());
let concrete_type = ConcreteDataType::List(list_type);
let settings = JsonStructureSettings::Structured(None);
@@ -1187,9 +1170,9 @@ mod tests {
if let Value::List(list_value) = result {
assert_eq!(list_value.items().len(), 3);
assert_eq!(list_value.items()[0], Value::Int8(1));
assert_eq!(list_value.items()[1], Value::Int8(2));
assert_eq!(list_value.items()[2], Value::Int8(3));
assert_eq!(list_value.items()[0], Value::UInt64(1));
assert_eq!(list_value.items()[1], Value::UInt64(2));
assert_eq!(list_value.items()[2], Value::UInt64(3));
assert_eq!(list_value.datatype(), item_type);
} else {
panic!("Expected List value");
@@ -1199,7 +1182,7 @@ mod tests {
#[test]
fn test_encode_json_array_empty_with_item_type() {
let json = json!([]);
let item_type = Arc::new(ConcreteDataType::string_datatype());
let item_type = Arc::new(ConcreteDataType::null_datatype());
let list_type = ListType::new(item_type.clone());
let concrete_type = ConcreteDataType::List(list_type);
let settings = JsonStructureSettings::Structured(None);
@@ -1219,6 +1202,7 @@ mod tests {
#[cfg(test)]
mod decode_tests {
use ordered_float::OrderedFloat;
use serde_json::json;
use super::*;
@@ -2153,20 +2137,11 @@ mod tests {
)])),
);
let decoded_struct = settings.decode_struct(array_struct).unwrap();
let fields = decoded_struct.struct_type().fields();
let decoded_fields: Vec<&str> = fields.iter().map(|f| f.name()).collect();
assert!(decoded_fields.contains(&"value"));
if let Value::List(list_value) = &decoded_struct.items()[0] {
assert_eq!(list_value.items().len(), 4);
assert_eq!(list_value.items()[0], Value::Int64(1));
assert_eq!(list_value.items()[1], Value::String("hello".into()));
assert_eq!(list_value.items()[2], Value::Boolean(true));
assert_eq!(list_value.items()[3], Value::Float64(OrderedFloat(3.15)));
} else {
panic!("Expected array to be decoded as ListValue");
}
let decoded_struct = settings.decode_struct(array_struct);
assert_eq!(
decoded_struct.unwrap_err().to_string(),
"Invalid JSON: all items in json array must have the same type"
);
}
#[test]

View File

@@ -0,0 +1,689 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::collections::BTreeMap;
use std::fmt::{Display, Formatter};
use std::hash::{Hash, Hasher};
use std::sync::{Arc, OnceLock};
use num_traits::ToPrimitive;
use ordered_float::OrderedFloat;
use serde::{Deserialize, Serialize};
use serde_json::Number;
use crate::data_type::ConcreteDataType;
use crate::types::{JsonType, StructField, StructType};
use crate::value::{ListValue, ListValueRef, StructValue, StructValueRef, Value, ValueRef};
/// Number in json, can be a positive integer, a negative integer, or a floating number.
/// Each of which is represented as `u64`, `i64` and `f64`.
///
/// This follows how `serde_json` designs number.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
pub enum JsonNumber {
PosInt(u64),
NegInt(i64),
Float(OrderedFloat<f64>),
}
impl JsonNumber {
fn as_u64(&self) -> Option<u64> {
match self {
JsonNumber::PosInt(n) => Some(*n),
JsonNumber::NegInt(n) => (*n >= 0).then_some(*n as u64),
_ => None,
}
}
fn as_i64(&self) -> Option<i64> {
match self {
JsonNumber::PosInt(n) => (*n <= i64::MAX as u64).then_some(*n as i64),
JsonNumber::NegInt(n) => Some(*n),
_ => None,
}
}
fn as_f64(&self) -> f64 {
match self {
JsonNumber::PosInt(n) => *n as f64,
JsonNumber::NegInt(n) => *n as f64,
JsonNumber::Float(n) => n.0,
}
}
}
impl From<u64> for JsonNumber {
fn from(i: u64) -> Self {
Self::PosInt(i)
}
}
impl From<i64> for JsonNumber {
fn from(n: i64) -> Self {
Self::NegInt(n)
}
}
impl From<f64> for JsonNumber {
fn from(i: f64) -> Self {
Self::Float(i.into())
}
}
impl Display for JsonNumber {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
match self {
Self::PosInt(x) => write!(f, "{x}"),
Self::NegInt(x) => write!(f, "{x}"),
Self::Float(x) => write!(f, "{x}"),
}
}
}
/// Variants of json.
///
/// This follows how [serde_json::Value] designs except that we only choose to use [BTreeMap] to
/// preserve the fields order by their names in the json object. (By default `serde_json` uses
/// [BTreeMap], too. But it additionally supports "IndexMap" which preserves the order by insertion
/// times of fields.)
#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
pub enum JsonVariant {
Null,
Bool(bool),
Number(JsonNumber),
String(String),
Array(Vec<JsonVariant>),
Object(BTreeMap<String, JsonVariant>),
}
impl JsonVariant {
fn json_type(&self) -> JsonType {
match self {
JsonVariant::Null => JsonType::new_native(ConcreteDataType::null_datatype()),
JsonVariant::Bool(_) => JsonType::new_native(ConcreteDataType::boolean_datatype()),
JsonVariant::Number(n) => JsonType::new_native(match n {
JsonNumber::PosInt(_) => ConcreteDataType::uint64_datatype(),
JsonNumber::NegInt(_) => ConcreteDataType::int64_datatype(),
JsonNumber::Float(_) => ConcreteDataType::float64_datatype(),
}),
JsonVariant::String(_) => JsonType::new_native(ConcreteDataType::string_datatype()),
JsonVariant::Array(array) => {
let item_type = if let Some(first) = array.first() {
first.json_type().native_type()
} else {
ConcreteDataType::null_datatype()
};
JsonType::new_native(ConcreteDataType::list_datatype(Arc::new(item_type)))
}
JsonVariant::Object(object) => {
let mut fields = Vec::with_capacity(object.len());
for (k, v) in object.iter() {
fields.push(StructField::new(
k.clone(),
v.json_type().native_type(),
true,
))
}
JsonType::new_native(ConcreteDataType::struct_datatype(StructType::new(
Arc::new(fields),
)))
}
}
}
fn as_ref(&self) -> JsonVariantRef<'_> {
match self {
JsonVariant::Null => JsonVariantRef::Null,
JsonVariant::Bool(x) => (*x).into(),
JsonVariant::Number(x) => match x {
JsonNumber::PosInt(i) => (*i).into(),
JsonNumber::NegInt(i) => (*i).into(),
JsonNumber::Float(f) => (f.0).into(),
},
JsonVariant::String(x) => x.as_str().into(),
JsonVariant::Array(array) => {
JsonVariantRef::Array(array.iter().map(|x| x.as_ref()).collect())
}
JsonVariant::Object(object) => JsonVariantRef::Object(
object
.iter()
.map(|(k, v)| (k.as_str(), v.as_ref()))
.collect(),
),
}
}
}
impl From<()> for JsonVariant {
fn from(_: ()) -> Self {
Self::Null
}
}
impl From<bool> for JsonVariant {
fn from(v: bool) -> Self {
Self::Bool(v)
}
}
impl<T: Into<JsonNumber>> From<T> for JsonVariant {
fn from(v: T) -> Self {
Self::Number(v.into())
}
}
impl From<&str> for JsonVariant {
fn from(v: &str) -> Self {
Self::String(v.to_string())
}
}
impl From<String> for JsonVariant {
fn from(v: String) -> Self {
Self::String(v)
}
}
impl<const N: usize, T: Into<JsonVariant>> From<[T; N]> for JsonVariant {
fn from(vs: [T; N]) -> Self {
Self::Array(vs.into_iter().map(|x| x.into()).collect())
}
}
impl<K: Into<String>, V: Into<JsonVariant>, const N: usize> From<[(K, V); N]> for JsonVariant {
fn from(vs: [(K, V); N]) -> Self {
Self::Object(vs.into_iter().map(|(k, v)| (k.into(), v.into())).collect())
}
}
impl Display for JsonVariant {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
match self {
Self::Null => write!(f, "null"),
Self::Bool(x) => write!(f, "{x}"),
Self::Number(x) => write!(f, "{x}"),
Self::String(x) => write!(f, "{x}"),
Self::Array(array) => write!(
f,
"[{}]",
array
.iter()
.map(|x| x.to_string())
.collect::<Vec<_>>()
.join(", ")
),
Self::Object(object) => {
write!(
f,
"{{ {} }}",
object
.iter()
.map(|(k, v)| format!("{k}: {v}"))
.collect::<Vec<_>>()
.join(", ")
)
}
}
}
}
/// Represents any valid JSON value.
#[derive(Debug, Eq, Serialize, Deserialize)]
pub struct JsonValue {
#[serde(skip)]
json_type: OnceLock<JsonType>,
json_variant: JsonVariant,
}
impl JsonValue {
pub fn null() -> Self {
().into()
}
pub(crate) fn new(json_variant: JsonVariant) -> Self {
Self {
json_type: OnceLock::new(),
json_variant,
}
}
pub(crate) fn data_type(&self) -> ConcreteDataType {
ConcreteDataType::Json(self.json_type().clone())
}
pub(crate) fn json_type(&self) -> &JsonType {
self.json_type.get_or_init(|| self.json_variant.json_type())
}
pub(crate) fn is_null(&self) -> bool {
matches!(self.json_variant, JsonVariant::Null)
}
pub(crate) fn as_i64(&self) -> Option<i64> {
match self.json_variant {
JsonVariant::Number(n) => n.as_i64(),
_ => None,
}
}
pub(crate) fn as_u64(&self) -> Option<u64> {
match self.json_variant {
JsonVariant::Number(n) => n.as_u64(),
_ => None,
}
}
pub(crate) fn as_f64(&self) -> Option<f64> {
match self.json_variant {
JsonVariant::Number(n) => Some(n.as_f64()),
_ => None,
}
}
pub(crate) fn as_f64_lossy(&self) -> Option<f64> {
match self.json_variant {
JsonVariant::Number(n) => Some(match n {
JsonNumber::PosInt(i) => i as f64,
JsonNumber::NegInt(i) => i as f64,
JsonNumber::Float(f) => f.0,
}),
_ => None,
}
}
pub(crate) fn as_bool(&self) -> Option<bool> {
match self.json_variant {
JsonVariant::Bool(b) => Some(b),
_ => None,
}
}
pub fn as_ref(&self) -> JsonValueRef<'_> {
JsonValueRef {
json_type: OnceLock::new(),
json_variant: self.json_variant.as_ref(),
}
}
pub fn into_variant(self) -> JsonVariant {
self.json_variant
}
pub(crate) fn into_value(self) -> Value {
fn helper(v: JsonVariant) -> Value {
match v {
JsonVariant::Null => Value::Null,
JsonVariant::Bool(x) => Value::Boolean(x),
JsonVariant::Number(x) => match x {
JsonNumber::PosInt(i) => Value::UInt64(i),
JsonNumber::NegInt(i) => Value::Int64(i),
JsonNumber::Float(f) => Value::Float64(f),
},
JsonVariant::String(x) => Value::String(x.into()),
JsonVariant::Array(array) => {
let item_type = if let Some(first) = array.first() {
first.json_type().native_type()
} else {
ConcreteDataType::null_datatype()
};
Value::List(ListValue::new(
array.into_iter().map(helper).collect(),
Arc::new(item_type),
))
}
JsonVariant::Object(object) => {
let mut fields = Vec::with_capacity(object.len());
let mut items = Vec::with_capacity(object.len());
for (k, v) in object {
fields.push(StructField::new(k, v.json_type().native_type(), true));
items.push(helper(v));
}
Value::Struct(StructValue::new(items, StructType::new(Arc::new(fields))))
}
}
}
helper(self.json_variant)
}
}
impl<T: Into<JsonVariant>> From<T> for JsonValue {
fn from(v: T) -> Self {
Self {
json_type: OnceLock::new(),
json_variant: v.into(),
}
}
}
impl From<JsonValue> for serde_json::Value {
fn from(v: JsonValue) -> Self {
fn helper(v: JsonVariant) -> serde_json::Value {
match v {
JsonVariant::Null => serde_json::Value::Null,
JsonVariant::Bool(x) => serde_json::Value::Bool(x),
JsonVariant::Number(x) => match x {
JsonNumber::PosInt(i) => serde_json::Value::Number(i.into()),
JsonNumber::NegInt(i) => serde_json::Value::Number(i.into()),
JsonNumber::Float(f) => {
if let Some(x) = Number::from_f64(f.0) {
serde_json::Value::Number(x)
} else {
serde_json::Value::String("NaN".into())
}
}
},
JsonVariant::String(x) => serde_json::Value::String(x),
JsonVariant::Array(array) => {
serde_json::Value::Array(array.into_iter().map(helper).collect())
}
JsonVariant::Object(object) => serde_json::Value::Object(
object.into_iter().map(|(k, v)| (k, helper(v))).collect(),
),
}
}
helper(v.json_variant)
}
}
impl Clone for JsonValue {
fn clone(&self) -> Self {
let Self {
json_type: _,
json_variant,
} = self;
Self {
json_type: OnceLock::new(),
json_variant: json_variant.clone(),
}
}
}
impl PartialEq<JsonValue> for JsonValue {
fn eq(&self, other: &JsonValue) -> bool {
let Self {
json_type: _,
json_variant,
} = self;
json_variant.eq(&other.json_variant)
}
}
impl Hash for JsonValue {
fn hash<H: Hasher>(&self, state: &mut H) {
let Self {
json_type: _,
json_variant,
} = self;
json_variant.hash(state);
}
}
impl Display for JsonValue {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
write!(f, "{}", self.json_variant)
}
}
/// References of variants of json.
#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
pub enum JsonVariantRef<'a> {
Null,
Bool(bool),
Number(JsonNumber),
String(&'a str),
Array(Vec<JsonVariantRef<'a>>),
Object(BTreeMap<&'a str, JsonVariantRef<'a>>),
}
impl JsonVariantRef<'_> {
fn json_type(&self) -> JsonType {
match self {
JsonVariantRef::Null => JsonType::new_native(ConcreteDataType::null_datatype()),
JsonVariantRef::Bool(_) => JsonType::new_native(ConcreteDataType::boolean_datatype()),
JsonVariantRef::Number(n) => JsonType::new_native(match n {
JsonNumber::PosInt(_) => ConcreteDataType::uint64_datatype(),
JsonNumber::NegInt(_) => ConcreteDataType::int64_datatype(),
JsonNumber::Float(_) => ConcreteDataType::float64_datatype(),
}),
JsonVariantRef::String(_) => JsonType::new_native(ConcreteDataType::string_datatype()),
JsonVariantRef::Array(array) => {
let item_type = if let Some(first) = array.first() {
first.json_type().native_type()
} else {
ConcreteDataType::null_datatype()
};
JsonType::new_native(ConcreteDataType::list_datatype(Arc::new(item_type)))
}
JsonVariantRef::Object(object) => {
let mut fields = Vec::with_capacity(object.len());
for (k, v) in object.iter() {
fields.push(StructField::new(
k.to_string(),
v.json_type().native_type(),
true,
))
}
JsonType::new_native(ConcreteDataType::struct_datatype(StructType::new(
Arc::new(fields),
)))
}
}
}
}
impl From<()> for JsonVariantRef<'_> {
fn from(_: ()) -> Self {
Self::Null
}
}
impl From<bool> for JsonVariantRef<'_> {
fn from(v: bool) -> Self {
Self::Bool(v)
}
}
impl<T: Into<JsonNumber>> From<T> for JsonVariantRef<'_> {
fn from(v: T) -> Self {
Self::Number(v.into())
}
}
impl<'a> From<&'a str> for JsonVariantRef<'a> {
fn from(v: &'a str) -> Self {
Self::String(v)
}
}
impl<'a, const N: usize, T: Into<JsonVariantRef<'a>>> From<[T; N]> for JsonVariantRef<'a> {
fn from(vs: [T; N]) -> Self {
Self::Array(vs.into_iter().map(|x| x.into()).collect())
}
}
impl<'a, V: Into<JsonVariantRef<'a>>, const N: usize> From<[(&'a str, V); N]>
for JsonVariantRef<'a>
{
fn from(vs: [(&'a str, V); N]) -> Self {
Self::Object(vs.into_iter().map(|(k, v)| (k, v.into())).collect())
}
}
impl<'a> From<Vec<JsonVariantRef<'a>>> for JsonVariantRef<'a> {
fn from(v: Vec<JsonVariantRef<'a>>) -> Self {
Self::Array(v)
}
}
impl<'a> From<BTreeMap<&'a str, JsonVariantRef<'a>>> for JsonVariantRef<'a> {
fn from(v: BTreeMap<&'a str, JsonVariantRef<'a>>) -> Self {
Self::Object(v)
}
}
impl From<JsonVariantRef<'_>> for JsonVariant {
fn from(v: JsonVariantRef) -> Self {
match v {
JsonVariantRef::Null => Self::Null,
JsonVariantRef::Bool(x) => Self::Bool(x),
JsonVariantRef::Number(x) => Self::Number(x),
JsonVariantRef::String(x) => Self::String(x.to_string()),
JsonVariantRef::Array(array) => {
Self::Array(array.into_iter().map(Into::into).collect())
}
JsonVariantRef::Object(object) => Self::Object(
object
.into_iter()
.map(|(k, v)| (k.to_string(), v.into()))
.collect(),
),
}
}
}
/// Reference to representation of any valid JSON value.
#[derive(Debug, Serialize)]
pub struct JsonValueRef<'a> {
#[serde(skip)]
json_type: OnceLock<JsonType>,
json_variant: JsonVariantRef<'a>,
}
impl<'a> JsonValueRef<'a> {
pub fn null() -> Self {
().into()
}
pub(crate) fn data_type(&self) -> ConcreteDataType {
ConcreteDataType::Json(self.json_type().clone())
}
pub(crate) fn json_type(&self) -> &JsonType {
self.json_type.get_or_init(|| self.json_variant.json_type())
}
pub fn into_variant(self) -> JsonVariantRef<'a> {
self.json_variant
}
pub(crate) fn is_null(&self) -> bool {
matches!(self.json_variant, JsonVariantRef::Null)
}
pub fn is_object(&self) -> bool {
matches!(self.json_variant, JsonVariantRef::Object(_))
}
pub(crate) fn as_f32(&self) -> Option<f32> {
match self.json_variant {
JsonVariantRef::Number(JsonNumber::Float(f)) => f.to_f32(),
_ => None,
}
}
pub(crate) fn as_f64(&self) -> Option<f64> {
match self.json_variant {
JsonVariantRef::Number(JsonNumber::Float(f)) => Some(f.0),
_ => None,
}
}
pub fn as_value_ref(&self) -> ValueRef<'_> {
fn helper<'a>(v: &'a JsonVariantRef) -> ValueRef<'a> {
match v {
JsonVariantRef::Null => ValueRef::Null,
JsonVariantRef::Bool(x) => ValueRef::Boolean(*x),
JsonVariantRef::Number(x) => match x {
JsonNumber::PosInt(i) => ValueRef::UInt64(*i),
JsonNumber::NegInt(i) => ValueRef::Int64(*i),
JsonNumber::Float(f) => ValueRef::Float64(*f),
},
JsonVariantRef::String(x) => ValueRef::String(x),
JsonVariantRef::Array(array) => {
let val = array.iter().map(helper).collect::<Vec<_>>();
let item_datatype = if let Some(first) = val.first() {
first.data_type()
} else {
ConcreteDataType::null_datatype()
};
ValueRef::List(ListValueRef::RefList {
val,
item_datatype: Arc::new(item_datatype),
})
}
JsonVariantRef::Object(object) => {
let mut fields = Vec::with_capacity(object.len());
let mut val = Vec::with_capacity(object.len());
for (k, v) in object.iter() {
let v = helper(v);
fields.push(StructField::new(k.to_string(), v.data_type(), true));
val.push(v);
}
ValueRef::Struct(StructValueRef::RefList {
val,
fields: StructType::new(Arc::new(fields)),
})
}
}
}
helper(&self.json_variant)
}
pub(crate) fn data_size(&self) -> usize {
size_of_val(self)
}
}
impl<'a, T: Into<JsonVariantRef<'a>>> From<T> for JsonValueRef<'a> {
fn from(v: T) -> Self {
Self {
json_type: OnceLock::new(),
json_variant: v.into(),
}
}
}
impl From<JsonValueRef<'_>> for JsonValue {
fn from(v: JsonValueRef<'_>) -> Self {
Self {
json_type: OnceLock::new(),
json_variant: v.json_variant.into(),
}
}
}
impl PartialEq for JsonValueRef<'_> {
fn eq(&self, other: &Self) -> bool {
let Self {
json_type: _,
json_variant,
} = self;
json_variant == &other.json_variant
}
}
impl Eq for JsonValueRef<'_> {}
impl Clone for JsonValueRef<'_> {
fn clone(&self) -> Self {
let Self {
json_type: _,
json_variant,
} = self;
Self {
json_type: OnceLock::new(),
json_variant: json_variant.clone(),
}
}
}

View File

@@ -20,7 +20,7 @@ mod decimal_type;
mod dictionary_type;
mod duration_type;
mod interval_type;
mod json_type;
pub(crate) mod json_type;
mod list_type;
mod null_type;
mod primitive_type;

View File

@@ -57,6 +57,19 @@ impl JsonType {
Self { format }
}
pub(crate) fn new_native(native: ConcreteDataType) -> Self {
Self {
format: JsonFormat::Native(Box::new(native)),
}
}
pub(crate) fn native_type(&self) -> ConcreteDataType {
match &self.format {
JsonFormat::Jsonb => ConcreteDataType::binary_datatype(),
JsonFormat::Native(x) => x.as_ref().clone(),
}
}
pub(crate) fn empty() -> Self {
Self {
format: JsonFormat::Native(Box::new(ConcreteDataType::null_datatype())),
@@ -65,26 +78,21 @@ impl JsonType {
/// Make json type a struct type, by:
/// - if the json is an object, its entries are mapped to struct fields, obviously;
/// - if not, the json is one of bool, number, string or array, make it a special field called
/// [JSON_PLAIN_FIELD_NAME] with metadata [JSON_PLAIN_FIELD_METADATA_KEY] = `"true"` in a
/// struct with only that field.
/// - if not, the json is one of bool, number, string or array, make it a special field
/// (see [plain_json_struct_type]).
pub(crate) fn as_struct_type(&self) -> StructType {
match &self.format {
JsonFormat::Jsonb => StructType::default(),
JsonFormat::Native(inner) => match inner.as_ref() {
ConcreteDataType::Struct(t) => t.clone(),
x => {
let mut field =
StructField::new(JSON_PLAIN_FIELD_NAME.to_string(), x.clone(), true);
field.insert_metadata(JSON_PLAIN_FIELD_METADATA_KEY, true);
StructType::new(Arc::new(vec![field]))
}
x => plain_json_struct_type(x.clone()),
},
}
}
/// Check if this json type is the special "plain" one.
/// See [JsonType::as_struct_type].
#[expect(unused)]
pub(crate) fn is_plain_json(&self) -> bool {
let JsonFormat::Native(box ConcreteDataType::Struct(t)) = &self.format else {
return true;
@@ -124,6 +132,14 @@ impl JsonType {
}
}
/// A special struct type for denoting "plain"(not object) json value. It has only one field, with
/// fixed name [JSON_PLAIN_FIELD_NAME] and with metadata [JSON_PLAIN_FIELD_METADATA_KEY] = `"true"`.
pub(crate) fn plain_json_struct_type(item_type: ConcreteDataType) -> StructType {
let mut field = StructField::new(JSON_PLAIN_FIELD_NAME.to_string(), item_type, true);
field.insert_metadata(JSON_PLAIN_FIELD_METADATA_KEY, true);
StructType::new(Arc::new(vec![field]))
}
fn is_mergeable(this: &ConcreteDataType, that: &ConcreteDataType) -> bool {
fn is_mergeable_struct(this: &StructType, that: &StructType) -> bool {
let this_fields = this.fields();

View File

@@ -177,7 +177,7 @@ impl DataType for StringType {
Value::Duration(v) => Some(Value::String(StringBytes::from(v.to_string()))),
Value::Decimal128(v) => Some(Value::String(StringBytes::from(v.to_string()))),
Value::Json(v) => self.try_cast(*v),
Value::Json(v) => serde_json::to_string(v.as_ref()).ok().map(|s| s.into()),
// StringBytes is only support for utf-8, Value::Binary and collections are not allowed.
Value::Binary(_) | Value::List(_) | Value::Struct(_) => None,

View File

@@ -36,6 +36,7 @@ use crate::error::{
self, ConvertArrowArrayToScalarsSnafu, ConvertScalarToArrowArraySnafu, Error,
InconsistentStructFieldsAndItemsSnafu, Result, TryFromValueSnafu,
};
use crate::json::value::{JsonValue, JsonValueRef};
use crate::prelude::*;
use crate::type_id::LogicalTypeId;
use crate::types::{IntervalType, ListType, StructType};
@@ -86,7 +87,7 @@ pub enum Value {
Struct(StructValue),
// Json Logical types:
Json(Box<Value>),
Json(Box<JsonValue>),
}
impl Display for Value {
@@ -197,7 +198,7 @@ macro_rules! define_data_type_func {
$struct::Struct(struct_value) => {
ConcreteDataType::struct_datatype(struct_value.struct_type().clone())
}
$struct::Json(v) => ConcreteDataType::json_native_datatype(v.data_type()),
$struct::Json(v) => v.data_type(),
}
}
};
@@ -220,7 +221,6 @@ impl Value {
match self {
Value::Null => Ok(None),
Value::List(v) => Ok(Some(v)),
Value::Json(inner) => inner.as_list(),
other => error::CastTypeSnafu {
msg: format!("Failed to cast {other:?} to list value"),
}
@@ -232,7 +232,6 @@ impl Value {
match self {
Value::Null => Ok(None),
Value::Struct(v) => Ok(Some(v)),
Value::Json(inner) => inner.as_struct(),
other => error::CastTypeSnafu {
msg: format!("Failed to cast {other:?} to struct value"),
}
@@ -267,7 +266,7 @@ impl Value {
Value::Duration(v) => ValueRef::Duration(*v),
Value::Decimal128(v) => ValueRef::Decimal128(*v),
Value::Struct(v) => ValueRef::Struct(StructValueRef::Ref(v)),
Value::Json(v) => ValueRef::Json(Box::new(v.as_value_ref())),
Value::Json(v) => ValueRef::Json(Box::new((**v).as_ref())),
}
}
@@ -391,7 +390,7 @@ impl Value {
/// Extract the inner JSON value from a JSON type.
pub fn into_json_inner(self) -> Option<Value> {
match self {
Value::Json(v) => Some(*v),
Value::Json(v) => Some((*v).into_value()),
_ => None,
}
}
@@ -501,7 +500,12 @@ impl Value {
let struct_type = output_type.as_struct().unwrap();
struct_value.try_to_scalar_value(struct_type)?
}
Value::Json(v) => v.try_to_scalar_value(output_type)?,
Value::Json(_) => {
return error::ToScalarValueSnafu {
reason: "unsupported for json value",
}
.fail();
}
};
Ok(scalar_value)
@@ -554,13 +558,12 @@ impl Value {
Value::IntervalDayTime(x) => Some(Value::IntervalDayTime(x.negative())),
Value::IntervalMonthDayNano(x) => Some(Value::IntervalMonthDayNano(x.negative())),
Value::Json(v) => v.try_negative().map(|neg| Value::Json(Box::new(neg))),
Value::Binary(_)
| Value::String(_)
| Value::Boolean(_)
| Value::List(_)
| Value::Struct(_) => None,
| Value::Struct(_)
| Value::Json(_) => None,
}
}
}
@@ -929,7 +932,7 @@ impl TryFrom<Value> for serde_json::Value {
.collect::<serde_json::Result<Map<String, serde_json::Value>>>()?;
serde_json::Value::Object(map)
}
Value::Json(v) => serde_json::Value::try_from(*v)?,
Value::Json(v) => (*v).into(),
};
Ok(json_value)
@@ -1263,7 +1266,7 @@ impl From<ValueRef<'_>> for Value {
ValueRef::List(v) => v.to_value(),
ValueRef::Decimal128(v) => Value::Decimal128(v),
ValueRef::Struct(v) => v.to_value(),
ValueRef::Json(v) => Value::Json(Box::new(Value::from(*v))),
ValueRef::Json(v) => Value::Json(Box::new(JsonValue::from(*v))),
}
}
}
@@ -1307,7 +1310,7 @@ pub enum ValueRef<'a> {
List(ListValueRef<'a>),
Struct(StructValueRef<'a>),
Json(Box<ValueRef<'a>>),
Json(Box<JsonValueRef<'a>>),
}
macro_rules! impl_as_for_value_ref {
@@ -1315,18 +1318,6 @@ macro_rules! impl_as_for_value_ref {
match $value {
ValueRef::Null => Ok(None),
ValueRef::$Variant(v) => Ok(Some(v.clone())),
ValueRef::Json(v) => match v.as_ref() {
ValueRef::Null => Ok(None),
ValueRef::$Variant(v) => Ok(Some(v.clone())),
other => error::CastTypeSnafu {
msg: format!(
"Failed to cast value ref {:?} to {}",
other,
stringify!($Variant)
),
}
.fail(),
},
other => error::CastTypeSnafu {
msg: format!(
"Failed to cast value ref {:?} to {}",
@@ -1402,7 +1393,7 @@ impl<'a> ValueRef<'a> {
match self {
ValueRef::Null => Ok(None),
ValueRef::Float32(f) => Ok(Some(f.0)),
ValueRef::Json(v) => v.try_into_f32(),
ValueRef::Json(v) => Ok(v.as_f32()),
other => error::CastTypeSnafu {
msg: format!("Failed to cast value ref {:?} to ValueRef::Float32", other,),
}
@@ -1414,7 +1405,7 @@ impl<'a> ValueRef<'a> {
match self {
ValueRef::Null => Ok(None),
ValueRef::Float64(f) => Ok(Some(f.0)),
ValueRef::Json(v) => v.try_into_f64(),
ValueRef::Json(v) => Ok(v.as_f64()),
other => error::CastTypeSnafu {
msg: format!("Failed to cast value ref {:?} to ValueRef::Float64", other,),
}
@@ -1746,6 +1737,7 @@ pub(crate) mod tests {
use num_traits::Float;
use super::*;
use crate::json::value::{JsonVariant, JsonVariantRef};
use crate::types::StructField;
use crate::vectors::ListVectorBuilder;
@@ -2281,19 +2273,48 @@ pub(crate) mod tests {
check_type_and_value(
&ConcreteDataType::json_native_datatype(ConcreteDataType::boolean_datatype()),
&Value::Json(Box::new(Value::Boolean(true))),
&Value::Json(Box::new(true.into())),
);
check_type_and_value(
&ConcreteDataType::json_native_datatype(build_list_type()),
&Value::Json(Box::new(Value::List(build_list_value()))),
&Value::Json(Box::new([true].into())),
);
check_type_and_value(
&ConcreteDataType::json_native_datatype(ConcreteDataType::struct_datatype(
build_struct_type(),
StructType::new(Arc::new(vec![
StructField::new(
"address".to_string(),
ConcreteDataType::string_datatype(),
true,
),
StructField::new("age".to_string(), ConcreteDataType::uint64_datatype(), true),
StructField::new(
"awards".to_string(),
ConcreteDataType::list_datatype(Arc::new(
ConcreteDataType::boolean_datatype(),
)),
true,
),
StructField::new("id".to_string(), ConcreteDataType::int64_datatype(), true),
StructField::new(
"name".to_string(),
ConcreteDataType::string_datatype(),
true,
),
])),
)),
&Value::Json(Box::new(
[
("id", JsonVariant::from(1i64)),
("name", "Alice".into()),
("age", 1u64.into()),
("address", "blah".into()),
("awards", [true, false].into()),
]
.into(),
)),
&Value::Json(Box::new(Value::Struct(build_struct_value()))),
);
}
@@ -2435,25 +2456,27 @@ pub(crate) mod tests {
// string wrapped in json
assert_eq!(
serde_json::Value::try_from(Value::Json(Box::new(Value::String("hello".into()))))
.unwrap(),
serde_json::Value::try_from(Value::Json(Box::new("hello".into()))).unwrap(),
serde_json::json!("hello")
);
// list wrapped in json
assert_eq!(
serde_json::Value::try_from(Value::Json(Box::new(Value::List(ListValue::new(
vec![Value::Int32(1), Value::Int32(2), Value::Int32(3),],
Arc::new(ConcreteDataType::int32_datatype())
)))))
.unwrap(),
serde_json::Value::try_from(Value::Json(Box::new([1i64, 2, 3,].into()))).unwrap(),
serde_json::json!([1, 2, 3])
);
// struct wrapped in json
assert_eq!(
serde_json::Value::try_from(Value::Json(Box::new(Value::Struct(struct_value))))
.unwrap(),
serde_json::Value::try_from(Value::Json(Box::new(
[
("num".to_string(), JsonVariant::from(42i64)),
("name".to_string(), "tomcat".into()),
("yes_or_no".to_string(), true.into()),
]
.into()
)))
.unwrap(),
serde_json::json!({
"num": 42,
"name": "tomcat",
@@ -2465,7 +2488,7 @@ pub(crate) mod tests {
#[test]
fn test_null_value() {
assert!(Value::Null.is_null());
assert!(Value::Json(Box::new(Value::Null)).is_null());
assert!(Value::Json(Box::new(JsonValue::null())).is_null());
assert!(!Value::Boolean(true).is_null());
assert!(Value::Null < Value::Boolean(false));
assert!(Value::Boolean(true) > Value::Null);
@@ -2544,13 +2567,6 @@ pub(crate) mod tests {
ValueRef::Struct(StructValueRef::Ref(&struct_value)),
Value::Struct(struct_value.clone()).as_value_ref()
);
assert_eq!(
ValueRef::Json(Box::new(ValueRef::Struct(StructValueRef::Ref(
&struct_value
)))),
Value::Json(Box::new(Value::Struct(struct_value.clone()))).as_value_ref()
);
}
#[test]
@@ -2675,8 +2691,18 @@ pub(crate) mod tests {
);
assert_eq!(
Value::Json(Box::new(Value::Struct(build_struct_value()))).to_string(),
"Json({ id: 1, name: tom, age: 25, address: 94038, awards: Boolean[true, false] })"
Value::Json(Box::new(
[
("id", JsonVariant::from(1i64)),
("name", "tom".into()),
("age", 25u64.into()),
("address", "94038".into()),
("awards", [true, false].into()),
]
.into()
))
.to_string(),
"Json({ address: 94038, age: 25, awards: [true, false], id: 1, name: tom })"
)
}
@@ -3167,10 +3193,17 @@ pub(crate) mod tests {
);
check_value_ref_size_eq(
&ValueRef::Json(Box::new(ValueRef::Struct(StructValueRef::Ref(
&build_struct_value(),
)))),
31,
&ValueRef::Json(Box::new(
[
("id", JsonVariantRef::from(1i64)),
("name", "tom".into()),
("age", 25u64.into()),
("address", "94038".into()),
("awards", [true, false].into()),
]
.into(),
)),
48,
);
}

View File

@@ -14,13 +14,13 @@
use std::any::Any;
use std::collections::HashMap;
use snafu::OptionExt;
use std::sync::LazyLock;
use crate::data_type::ConcreteDataType;
use crate::error::{Result, TryFromValueSnafu, UnsupportedOperationSnafu};
use crate::json::value::JsonValueRef;
use crate::prelude::{ValueRef, Vector, VectorRef};
use crate::types::JsonType;
use crate::types::{JsonType, json_type};
use crate::value::StructValueRef;
use crate::vectors::{MutableVector, StructVectorBuilder};
@@ -40,16 +40,16 @@ impl JsonStructsBuilder {
self.inner.len()
}
fn push(&mut self, value: &ValueRef) -> Result<()> {
if self.json_type.is_plain_json() {
let value = ValueRef::Struct(StructValueRef::RefList {
val: vec![value.clone()],
fields: self.json_type.as_struct_type(),
});
self.inner.try_push_value_ref(&value)
} else {
self.inner.try_push_value_ref(value)
fn push(&mut self, json: &JsonValueRef) -> Result<()> {
let mut value = json.as_value_ref();
if !json.is_object() {
let fields = json_type::plain_json_struct_type(value.data_type());
value = ValueRef::Struct(StructValueRef::RefList {
val: vec![value],
fields,
})
}
self.inner.try_push_value_ref(&value)
}
/// Try to merge (and consume the data of) other json vector builder into this one.
@@ -252,13 +252,17 @@ impl MutableVector for JsonVectorBuilder {
}
fn try_push_value_ref(&mut self, value: &ValueRef) -> Result<()> {
let data_type = value.data_type();
let json_type = data_type.as_json().with_context(|| TryFromValueSnafu {
reason: format!("expected json value, got {value:?}"),
})?;
let ValueRef::Json(value) = value else {
return TryFromValueSnafu {
reason: format!("expected json value, got {value:?}"),
}
.fail();
};
let json_type = value.json_type();
let builder = match self.builders.last_mut() {
Some(last) => {
// TODO(LFC): use "is_include" and amend json value with nulls
if &last.json_type != json_type {
self.try_create_new_builder(json_type)?
} else {
@@ -268,21 +272,16 @@ impl MutableVector for JsonVectorBuilder {
None => self.try_create_new_builder(json_type)?,
};
let ValueRef::Json(value) = value else {
// Safety: json datatype value must be the value of json.
unreachable!()
};
builder.push(value)
builder.push(value.as_ref())
}
fn push_null(&mut self) {
let null_json_value = ValueRef::Json(Box::new(ValueRef::Null));
self.try_push_value_ref(&null_json_value)
static NULL_JSON: LazyLock<ValueRef> =
LazyLock::new(|| ValueRef::Json(Box::new(JsonValueRef::null())));
self.try_push_value_ref(&NULL_JSON)
// Safety: learning from the method "try_push_value_ref", a null json value should be
// always able to push into any json vectors.
.unwrap_or_else(|e| {
panic!("failed to push null json value: {null_json_value:?}, error: {e}")
});
.unwrap_or_else(|e| panic!("failed to push null json value, error: {e}"));
}
fn extend_slice_of(&mut self, _: &dyn Vector, _: usize, _: usize) -> Result<()> {
@@ -307,12 +306,11 @@ mod tests {
let value = settings.encode(json).unwrap();
let value = value.as_value_ref();
let result = builder.try_push_value_ref(&value);
match (result, expected) {
(Ok(()), Ok(())) => (),
(Err(e), Err(expected)) => assert_eq!(e.to_string(), expected),
_ => unreachable!(),
}
let result = builder
.try_push_value_ref(&value)
.map_err(|e| e.to_string());
let expected = expected.map_err(|e| e.to_string());
assert_eq!(result, expected);
}
#[test]

View File

@@ -18,7 +18,8 @@ use api::v1::column::Values;
use api::v1::greptime_request::Request;
use api::v1::value::ValueData;
use api::v1::{
Decimal128, InsertRequests, IntervalMonthDayNano, RowInsertRequest, RowInsertRequests,
Decimal128, InsertRequests, IntervalMonthDayNano, JsonValue, RowInsertRequest,
RowInsertRequests, json_value,
};
use pipeline::ContextReq;
use snafu::ResultExt;
@@ -229,12 +230,29 @@ impl Limiter {
.unwrap_or(0)
})
.sum(),
ValueData::JsonValue(inner) => inner
.as_ref()
.value_data
.as_ref()
.map(Self::size_of_value_data)
.unwrap_or(0),
ValueData::JsonValue(v) => {
fn calc(v: &JsonValue) -> usize {
let Some(value) = v.value.as_ref() else {
return 0;
};
match value {
json_value::Value::Boolean(_) => size_of::<bool>(),
json_value::Value::Int(_) => size_of::<i64>(),
json_value::Value::Uint(_) => size_of::<u64>(),
json_value::Value::Float(_) => size_of::<f64>(),
json_value::Value::Str(s) => s.len(),
json_value::Value::Array(array) => array.items.iter().map(calc).sum(),
json_value::Value::Object(object) => object
.entries
.iter()
.flat_map(|entry| {
entry.value.as_ref().map(|v| entry.key.len() + calc(v))
})
.sum(),
}
}
calc(v)
}
}
}
}

View File

@@ -173,6 +173,9 @@ impl<'a> Collider<'a> {
for (column, mut column_values) in values {
column_values.sort_unstable();
column_values.dedup(); // Remove duplicates
// allowed because we have carefully implemented `Hash` to eliminate the mutable
#[allow(clippy::mutable_key_type)]
let mut value_map = HashMap::with_capacity(column_values.len());
let mut start_value = ZERO;
for value in column_values {