mirror of
https://github.com/GreptimeTeam/greptimedb.git
synced 2026-05-26 01:40:36 +00:00
feat: add json data type (#4619)
* feat: add json type and vector * fix: allow to create and insert json data * feat: udf to query json as string * refactor: remove JsonbValue and JsonVector * feat: show json value as strings * chore: make ci happy * test: adunit test and sqlness test * refactor: use binary as grpc value of json * fix: use non-preserve-order jsonb * test: revert changed test * refactor: change udf get_by_path to jq * chore: make ci happy * fix: distinguish binary and json in proto * chore: delete udf for future pr * refactor: remove Value(Json) * chore: follow review comments * test: some tests and checks * test: fix unit tests * chore: follow review comments * chore: corresponding changes to proto * fix: change grpc and pgsql server behavior alongside with sqlness/crud tests * chore: follow review comments * feat: udf of conversions between json and strings, used for grpc server * refactor: rename to_string to json_to_string * test: add more sqlness test for json * chore: thanks for review :) * Apply suggestions from code review --------- Co-authored-by: Weny Xu <wenymedia@gmail.com>
This commit is contained in:
@@ -25,6 +25,7 @@ common-time.workspace = true
|
||||
datafusion-common.workspace = true
|
||||
enum_dispatch = "0.3"
|
||||
greptime-proto.workspace = true
|
||||
jsonb.workspace = true
|
||||
num = "0.4"
|
||||
num-traits = "0.2"
|
||||
ordered-float = { version = "3.0", features = ["serde"] }
|
||||
|
||||
@@ -33,8 +33,8 @@ use crate::types::{
|
||||
BinaryType, BooleanType, DateTimeType, DateType, Decimal128Type, DictionaryType,
|
||||
DurationMicrosecondType, DurationMillisecondType, DurationNanosecondType, DurationSecondType,
|
||||
DurationType, Float32Type, Float64Type, Int16Type, Int32Type, Int64Type, Int8Type,
|
||||
IntervalDayTimeType, IntervalMonthDayNanoType, IntervalType, IntervalYearMonthType, ListType,
|
||||
NullType, StringType, TimeMillisecondType, TimeType, TimestampMicrosecondType,
|
||||
IntervalDayTimeType, IntervalMonthDayNanoType, IntervalType, IntervalYearMonthType, JsonType,
|
||||
ListType, NullType, StringType, TimeMillisecondType, TimeType, TimestampMicrosecondType,
|
||||
TimestampMillisecondType, TimestampNanosecondType, TimestampSecondType, TimestampType,
|
||||
UInt16Type, UInt32Type, UInt64Type, UInt8Type,
|
||||
};
|
||||
@@ -81,6 +81,9 @@ pub enum ConcreteDataType {
|
||||
// Compound types:
|
||||
List(ListType),
|
||||
Dictionary(DictionaryType),
|
||||
|
||||
// JSON type:
|
||||
Json(JsonType),
|
||||
}
|
||||
|
||||
impl fmt::Display for ConcreteDataType {
|
||||
@@ -128,6 +131,7 @@ impl fmt::Display for ConcreteDataType {
|
||||
ConcreteDataType::Decimal128(v) => write!(f, "{}", v.name()),
|
||||
ConcreteDataType::List(v) => write!(f, "{}", v.name()),
|
||||
ConcreteDataType::Dictionary(v) => write!(f, "{}", v.name()),
|
||||
ConcreteDataType::Json(v) => write!(f, "{}", v.name()),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -162,6 +166,7 @@ impl ConcreteDataType {
|
||||
| ConcreteDataType::Duration(_)
|
||||
| ConcreteDataType::Decimal128(_)
|
||||
| ConcreteDataType::Binary(_)
|
||||
| ConcreteDataType::Json(_)
|
||||
)
|
||||
}
|
||||
|
||||
@@ -216,6 +221,10 @@ impl ConcreteDataType {
|
||||
matches!(self, ConcreteDataType::Decimal128(_))
|
||||
}
|
||||
|
||||
pub fn is_json(&self) -> bool {
|
||||
matches!(self, ConcreteDataType::Json(_))
|
||||
}
|
||||
|
||||
pub fn numerics() -> Vec<ConcreteDataType> {
|
||||
vec![
|
||||
ConcreteDataType::int8_datatype(),
|
||||
@@ -404,7 +413,7 @@ macro_rules! impl_new_concrete_type_functions {
|
||||
|
||||
impl_new_concrete_type_functions!(
|
||||
Null, Boolean, UInt8, UInt16, UInt32, UInt64, Int8, Int16, Int32, Int64, Float32, Float64,
|
||||
Binary, Date, DateTime, String
|
||||
Binary, Date, DateTime, String, Json
|
||||
);
|
||||
|
||||
impl ConcreteDataType {
|
||||
|
||||
@@ -25,6 +25,7 @@ use datafusion_common::DFSchemaRef;
|
||||
use snafu::{ensure, ResultExt};
|
||||
|
||||
use crate::error::{self, DuplicateColumnSnafu, Error, ProjectArrowSchemaSnafu, Result};
|
||||
use crate::prelude::DataType;
|
||||
pub use crate::schema::column_schema::{
|
||||
ColumnSchema, FulltextAnalyzer, FulltextOptions, Metadata, COMMENT_KEY, FULLTEXT_KEY,
|
||||
TIME_INDEX_KEY,
|
||||
@@ -34,6 +35,8 @@ pub use crate::schema::raw::RawSchema;
|
||||
|
||||
/// Key used to store version number of the schema in metadata.
|
||||
pub const VERSION_KEY: &str = "greptime:version";
|
||||
/// Key used to store actual column type in field metadata.
|
||||
pub const TYPE_KEY: &str = "greptime:type";
|
||||
|
||||
/// A common schema, should be immutable.
|
||||
#[derive(Clone, PartialEq, Eq)]
|
||||
@@ -256,7 +259,13 @@ fn collect_fields(column_schemas: &[ColumnSchema]) -> Result<FieldsAndIndices> {
|
||||
if column_schema.is_time_index() && timestamp_index.is_none() {
|
||||
timestamp_index = Some(index);
|
||||
}
|
||||
let field = Field::try_from(column_schema)?;
|
||||
let mut field = Field::try_from(column_schema)?;
|
||||
|
||||
// Json column performs the same as binary column in Arrow, so we need to mark it
|
||||
if column_schema.data_type.is_json() {
|
||||
let metadata = HashMap::from([(TYPE_KEY.to_string(), column_schema.data_type.name())]);
|
||||
field = field.with_metadata(metadata);
|
||||
}
|
||||
fields.push(field);
|
||||
ensure!(
|
||||
name_to_index
|
||||
|
||||
@@ -22,6 +22,8 @@ use snafu::{ensure, ResultExt};
|
||||
use crate::data_type::{ConcreteDataType, DataType};
|
||||
use crate::error::{self, Error, Result};
|
||||
use crate::schema::constraint::ColumnDefaultConstraint;
|
||||
use crate::schema::TYPE_KEY;
|
||||
use crate::types::JSON_TYPE_NAME;
|
||||
use crate::value::Value;
|
||||
use crate::vectors::VectorRef;
|
||||
|
||||
@@ -268,7 +270,14 @@ impl TryFrom<&Field> for ColumnSchema {
|
||||
type Error = Error;
|
||||
|
||||
fn try_from(field: &Field) -> Result<ColumnSchema> {
|
||||
let data_type = ConcreteDataType::try_from(field.data_type())?;
|
||||
let mut data_type = ConcreteDataType::try_from(field.data_type())?;
|
||||
// Override the data type if it is specified in the metadata.
|
||||
if field.metadata().contains_key(TYPE_KEY) {
|
||||
data_type = match field.metadata().get(TYPE_KEY).unwrap().as_str() {
|
||||
JSON_TYPE_NAME => ConcreteDataType::json_datatype(),
|
||||
_ => data_type,
|
||||
};
|
||||
}
|
||||
let mut metadata = field.metadata().clone();
|
||||
let default_constraint = match metadata.remove(DEFAULT_CONSTRAINT_KEY) {
|
||||
Some(json) => {
|
||||
@@ -528,4 +537,32 @@ mod tests {
|
||||
assert_eq!(formatted_int8, "test_column_1 Int8 null");
|
||||
assert_eq!(formatted_int32, "test_column_2 Int32 not null");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_from_field_to_column_schema() {
|
||||
let field = Field::new("test", ArrowDataType::Int32, true);
|
||||
let column_schema = ColumnSchema::try_from(&field).unwrap();
|
||||
assert_eq!("test", column_schema.name);
|
||||
assert_eq!(ConcreteDataType::int32_datatype(), column_schema.data_type);
|
||||
assert!(column_schema.is_nullable);
|
||||
assert!(!column_schema.is_time_index);
|
||||
assert!(column_schema.default_constraint.is_none());
|
||||
assert!(column_schema.metadata.is_empty());
|
||||
|
||||
let field = Field::new("test", ArrowDataType::Binary, true);
|
||||
let field = field.with_metadata(Metadata::from([(
|
||||
TYPE_KEY.to_string(),
|
||||
ConcreteDataType::json_datatype().name(),
|
||||
)]));
|
||||
let column_schema = ColumnSchema::try_from(&field).unwrap();
|
||||
assert_eq!("test", column_schema.name);
|
||||
assert_eq!(ConcreteDataType::json_datatype(), column_schema.data_type);
|
||||
assert!(column_schema.is_nullable);
|
||||
assert!(!column_schema.is_time_index);
|
||||
assert!(column_schema.default_constraint.is_none());
|
||||
assert_eq!(
|
||||
column_schema.metadata.get(TYPE_KEY).unwrap(),
|
||||
&ConcreteDataType::json_datatype().name()
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -68,6 +68,8 @@ pub enum LogicalTypeId {
|
||||
|
||||
List,
|
||||
Dictionary,
|
||||
|
||||
Json,
|
||||
}
|
||||
|
||||
impl LogicalTypeId {
|
||||
@@ -126,6 +128,7 @@ impl LogicalTypeId {
|
||||
LogicalTypeId::DurationMicrosecond => ConcreteDataType::duration_microsecond_datatype(),
|
||||
LogicalTypeId::DurationNanosecond => ConcreteDataType::duration_nanosecond_datatype(),
|
||||
LogicalTypeId::Decimal128 => ConcreteDataType::decimal128_default_datatype(),
|
||||
LogicalTypeId::Json => ConcreteDataType::json_datatype(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -21,6 +21,7 @@ mod decimal_type;
|
||||
mod dictionary_type;
|
||||
mod duration_type;
|
||||
mod interval_type;
|
||||
mod json_type;
|
||||
mod list_type;
|
||||
mod null_type;
|
||||
mod primitive_type;
|
||||
@@ -42,6 +43,7 @@ pub use duration_type::{
|
||||
pub use interval_type::{
|
||||
IntervalDayTimeType, IntervalMonthDayNanoType, IntervalType, IntervalYearMonthType,
|
||||
};
|
||||
pub use json_type::{JsonType, JSON_TYPE_NAME};
|
||||
pub use list_type::ListType;
|
||||
pub use null_type::NullType;
|
||||
pub use primitive_type::{
|
||||
|
||||
67
src/datatypes/src/types/json_type.rs
Normal file
67
src/datatypes/src/types/json_type.rs
Normal file
@@ -0,0 +1,67 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::sync::Arc;
|
||||
|
||||
use arrow::datatypes::DataType as ArrowDataType;
|
||||
use common_base::bytes::Bytes;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::data_type::{DataType, DataTypeRef};
|
||||
use crate::scalars::ScalarVectorBuilder;
|
||||
use crate::type_id::LogicalTypeId;
|
||||
use crate::value::Value;
|
||||
use crate::vectors::{BinaryVectorBuilder, MutableVector};
|
||||
|
||||
pub const JSON_TYPE_NAME: &str = "Json";
|
||||
|
||||
/// JsonType is a data type for JSON data. It is stored as binary data of jsonb format.
|
||||
/// It utilizes current binary value and vector implementation.
|
||||
#[derive(Debug, Default, Clone, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize, Deserialize)]
|
||||
pub struct JsonType;
|
||||
|
||||
impl JsonType {
|
||||
pub fn arc() -> DataTypeRef {
|
||||
Arc::new(Self)
|
||||
}
|
||||
}
|
||||
|
||||
impl DataType for JsonType {
|
||||
fn name(&self) -> String {
|
||||
JSON_TYPE_NAME.to_string()
|
||||
}
|
||||
|
||||
fn logical_type_id(&self) -> LogicalTypeId {
|
||||
LogicalTypeId::Json
|
||||
}
|
||||
|
||||
fn default_value(&self) -> Value {
|
||||
Bytes::default().into()
|
||||
}
|
||||
|
||||
fn as_arrow_type(&self) -> ArrowDataType {
|
||||
ArrowDataType::Binary
|
||||
}
|
||||
|
||||
fn create_mutable_vector(&self, capacity: usize) -> Box<dyn MutableVector> {
|
||||
Box::new(BinaryVectorBuilder::with_capacity(capacity))
|
||||
}
|
||||
|
||||
fn try_cast(&self, from: Value) -> Option<Value> {
|
||||
match from {
|
||||
Value::Binary(v) => Some(Value::Binary(v)),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -342,7 +342,8 @@ impl Value {
|
||||
let value_type_id = self.logical_type_id();
|
||||
let output_type_id = output_type.logical_type_id();
|
||||
ensure!(
|
||||
output_type_id == value_type_id || self.is_null(),
|
||||
// Json type leverage Value(Binary) for storage.
|
||||
output_type_id == value_type_id || self.is_null() || (output_type_id == LogicalTypeId::Json && value_type_id == LogicalTypeId::Binary),
|
||||
error::ToScalarValueSnafu {
|
||||
reason: format!(
|
||||
"expect value to return output_type {output_type_id:?}, actual: {value_type_id:?}",
|
||||
@@ -484,7 +485,7 @@ pub fn to_null_scalar_value(output_type: &ConcreteDataType) -> Result<ScalarValu
|
||||
ConcreteDataType::UInt64(_) => ScalarValue::UInt64(None),
|
||||
ConcreteDataType::Float32(_) => ScalarValue::Float32(None),
|
||||
ConcreteDataType::Float64(_) => ScalarValue::Float64(None),
|
||||
ConcreteDataType::Binary(_) => ScalarValue::Binary(None),
|
||||
ConcreteDataType::Binary(_) | ConcreteDataType::Json(_) => ScalarValue::Binary(None),
|
||||
ConcreteDataType::String(_) => ScalarValue::Utf8(None),
|
||||
ConcreteDataType::Date(_) => ScalarValue::Date32(None),
|
||||
ConcreteDataType::DateTime(_) => ScalarValue::Date64(None),
|
||||
@@ -1994,6 +1995,10 @@ mod tests {
|
||||
&ConcreteDataType::duration_nanosecond_datatype(),
|
||||
&Value::Duration(Duration::new_nanosecond(1)),
|
||||
);
|
||||
check_type_and_value(
|
||||
&ConcreteDataType::decimal128_datatype(38, 10),
|
||||
&Value::Decimal128(Decimal128::new(1, 38, 10)),
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -2178,6 +2183,14 @@ mod tests {
|
||||
ValueRef::List(ListValueRef::Ref { val: &list }),
|
||||
Value::List(list.clone()).as_value_ref()
|
||||
);
|
||||
|
||||
let jsonb_value = jsonb::parse_value(r#"{"key": "value"}"#.as_bytes())
|
||||
.unwrap()
|
||||
.to_vec();
|
||||
assert_eq!(
|
||||
ValueRef::Binary(jsonb_value.clone().as_slice()),
|
||||
Value::Binary(jsonb_value.into()).as_value_ref()
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -2391,6 +2404,16 @@ mod tests {
|
||||
.try_to_scalar_value(&ConcreteDataType::binary_datatype())
|
||||
.unwrap()
|
||||
);
|
||||
|
||||
let jsonb_value = jsonb::parse_value(r#"{"key": "value"}"#.as_bytes())
|
||||
.unwrap()
|
||||
.to_vec();
|
||||
assert_eq!(
|
||||
ScalarValue::Binary(Some(jsonb_value.clone())),
|
||||
Value::Binary(jsonb_value.into())
|
||||
.try_to_scalar_value(&ConcreteDataType::json_datatype())
|
||||
.unwrap()
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -2523,6 +2546,12 @@ mod tests {
|
||||
.try_to_scalar_value(&ConcreteDataType::duration_nanosecond_datatype())
|
||||
.unwrap()
|
||||
);
|
||||
assert_eq!(
|
||||
ScalarValue::Binary(None),
|
||||
Value::Null
|
||||
.try_to_scalar_value(&ConcreteDataType::json_datatype())
|
||||
.unwrap()
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
||||
@@ -80,7 +80,7 @@ fn equal(lhs: &dyn Vector, rhs: &dyn Vector) -> bool {
|
||||
match lhs.data_type() {
|
||||
Null(_) => true,
|
||||
Boolean(_) => is_vector_eq!(BooleanVector, lhs, rhs),
|
||||
Binary(_) => is_vector_eq!(BinaryVector, lhs, rhs),
|
||||
Binary(_) | Json(_) => is_vector_eq!(BinaryVector, lhs, rhs),
|
||||
String(_) => is_vector_eq!(StringVector, lhs, rhs),
|
||||
Date(_) => is_vector_eq!(DateVector, lhs, rhs),
|
||||
DateTime(_) => is_vector_eq!(DateTimeVector, lhs, rhs),
|
||||
|
||||
Reference in New Issue
Block a user