mirror of
https://github.com/GreptimeTeam/greptimedb.git
synced 2026-01-14 09:12:57 +00:00
feat: add json data type (#4619)
* feat: add json type and vector * fix: allow to create and insert json data * feat: udf to query json as string * refactor: remove JsonbValue and JsonVector * feat: show json value as strings * chore: make ci happy * test: adunit test and sqlness test * refactor: use binary as grpc value of json * fix: use non-preserve-order jsonb * test: revert changed test * refactor: change udf get_by_path to jq * chore: make ci happy * fix: distinguish binary and json in proto * chore: delete udf for future pr * refactor: remove Value(Json) * chore: follow review comments * test: some tests and checks * test: fix unit tests * chore: follow review comments * chore: corresponding changes to proto * fix: change grpc and pgsql server behavior alongside with sqlness/crud tests * chore: follow review comments * feat: udf of conversions between json and strings, used for grpc server * refactor: rename to_string to json_to_string * test: add more sqlness test for json * chore: thanks for review :) * Apply suggestions from code review --------- Co-authored-by: Weny Xu <wenymedia@gmail.com>
This commit is contained in:
@@ -29,6 +29,7 @@ datafusion.workspace = true
|
||||
datatypes.workspace = true
|
||||
geohash = { version = "0.13", optional = true }
|
||||
h3o = { version = "0.6", optional = true }
|
||||
jsonb.workspace = true
|
||||
num = "0.4"
|
||||
num-traits = "0.2"
|
||||
once_cell.workspace = true
|
||||
|
||||
@@ -22,6 +22,7 @@ use crate::function::{AsyncFunctionRef, FunctionRef};
|
||||
use crate::scalars::aggregate::{AggregateFunctionMetaRef, AggregateFunctions};
|
||||
use crate::scalars::date::DateFunction;
|
||||
use crate::scalars::expression::ExpressionFunction;
|
||||
use crate::scalars::json::JsonFunction;
|
||||
use crate::scalars::matches::MatchesFunction;
|
||||
use crate::scalars::math::MathFunction;
|
||||
use crate::scalars::numpy::NumpyFunction;
|
||||
@@ -116,6 +117,9 @@ pub static FUNCTION_REGISTRY: Lazy<Arc<FunctionRegistry>> = Lazy::new(|| {
|
||||
SystemFunction::register(&function_registry);
|
||||
TableFunction::register(&function_registry);
|
||||
|
||||
// Json related functions
|
||||
JsonFunction::register(&function_registry);
|
||||
|
||||
// Geo functions
|
||||
#[cfg(feature = "geo")]
|
||||
crate::scalars::geo::GeoFunctions::register(&function_registry);
|
||||
|
||||
@@ -17,9 +17,11 @@ pub(crate) mod date;
|
||||
pub mod expression;
|
||||
#[cfg(feature = "geo")]
|
||||
pub mod geo;
|
||||
pub mod json;
|
||||
pub mod matches;
|
||||
pub mod math;
|
||||
pub mod numpy;
|
||||
|
||||
#[cfg(test)]
|
||||
pub(crate) mod test;
|
||||
pub(crate) mod timestamp;
|
||||
|
||||
31
src/common/function/src/scalars/json.rs
Normal file
31
src/common/function/src/scalars/json.rs
Normal file
@@ -0,0 +1,31 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::sync::Arc;
|
||||
mod json_to_string;
|
||||
mod to_json;
|
||||
|
||||
use json_to_string::JsonToStringFunction;
|
||||
use to_json::ToJsonFunction;
|
||||
|
||||
use crate::function_registry::FunctionRegistry;
|
||||
|
||||
pub(crate) struct JsonFunction;
|
||||
|
||||
impl JsonFunction {
|
||||
pub fn register(registry: &FunctionRegistry) {
|
||||
registry.register(Arc::new(JsonToStringFunction));
|
||||
registry.register(Arc::new(ToJsonFunction));
|
||||
}
|
||||
}
|
||||
174
src/common/function/src/scalars/json/json_to_string.rs
Normal file
174
src/common/function/src/scalars/json/json_to_string.rs
Normal file
@@ -0,0 +1,174 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::fmt::{self, Display};
|
||||
|
||||
use common_query::error::{InvalidFuncArgsSnafu, Result, UnsupportedInputDataTypeSnafu};
|
||||
use common_query::prelude::Signature;
|
||||
use datafusion::logical_expr::Volatility;
|
||||
use datatypes::data_type::ConcreteDataType;
|
||||
use datatypes::prelude::VectorRef;
|
||||
use datatypes::scalars::ScalarVectorBuilder;
|
||||
use datatypes::vectors::{MutableVector, StringVectorBuilder};
|
||||
use snafu::ensure;
|
||||
|
||||
use crate::function::{Function, FunctionContext};
|
||||
|
||||
/// Converts the `JSONB` into `String`. It's useful for displaying JSONB content.
|
||||
#[derive(Clone, Debug, Default)]
|
||||
pub struct JsonToStringFunction;
|
||||
|
||||
const NAME: &str = "json_to_string";
|
||||
|
||||
impl Function for JsonToStringFunction {
|
||||
fn name(&self) -> &str {
|
||||
NAME
|
||||
}
|
||||
|
||||
fn return_type(&self, _input_types: &[ConcreteDataType]) -> Result<ConcreteDataType> {
|
||||
Ok(ConcreteDataType::string_datatype())
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::exact(
|
||||
vec![ConcreteDataType::json_datatype()],
|
||||
Volatility::Immutable,
|
||||
)
|
||||
}
|
||||
|
||||
fn eval(&self, _func_ctx: FunctionContext, columns: &[VectorRef]) -> Result<VectorRef> {
|
||||
ensure!(
|
||||
columns.len() == 1,
|
||||
InvalidFuncArgsSnafu {
|
||||
err_msg: format!(
|
||||
"The length of the args is not correct, expect exactly one, have: {}",
|
||||
columns.len()
|
||||
),
|
||||
}
|
||||
);
|
||||
let jsons = &columns[0];
|
||||
|
||||
let size = jsons.len();
|
||||
let datatype = jsons.data_type();
|
||||
let mut results = StringVectorBuilder::with_capacity(size);
|
||||
|
||||
match datatype {
|
||||
// JSON data type uses binary vector
|
||||
ConcreteDataType::Binary(_) => {
|
||||
for i in 0..size {
|
||||
let json = jsons.get_ref(i);
|
||||
|
||||
let json = json.as_binary();
|
||||
let result = match json {
|
||||
Ok(Some(json)) => match jsonb::from_slice(json) {
|
||||
Ok(json) => {
|
||||
let json = json.to_string();
|
||||
Some(json)
|
||||
}
|
||||
Err(_) => {
|
||||
return InvalidFuncArgsSnafu {
|
||||
err_msg: format!("Illegal json binary: {:?}", json),
|
||||
}
|
||||
.fail()
|
||||
}
|
||||
},
|
||||
_ => None,
|
||||
};
|
||||
|
||||
results.push(result.as_deref());
|
||||
}
|
||||
}
|
||||
_ => {
|
||||
return UnsupportedInputDataTypeSnafu {
|
||||
function: NAME,
|
||||
datatypes: columns.iter().map(|c| c.data_type()).collect::<Vec<_>>(),
|
||||
}
|
||||
.fail();
|
||||
}
|
||||
}
|
||||
|
||||
Ok(results.to_vector())
|
||||
}
|
||||
}
|
||||
|
||||
impl Display for JsonToStringFunction {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
write!(f, "JSON_TO_STRING")
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::sync::Arc;
|
||||
|
||||
use common_query::prelude::TypeSignature;
|
||||
use datatypes::scalars::ScalarVector;
|
||||
use datatypes::vectors::BinaryVector;
|
||||
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_get_by_path_function() {
|
||||
let json_to_string = JsonToStringFunction;
|
||||
|
||||
assert_eq!("json_to_string", json_to_string.name());
|
||||
assert_eq!(
|
||||
ConcreteDataType::string_datatype(),
|
||||
json_to_string
|
||||
.return_type(&[ConcreteDataType::json_datatype()])
|
||||
.unwrap()
|
||||
);
|
||||
|
||||
assert!(matches!(json_to_string.signature(),
|
||||
Signature {
|
||||
type_signature: TypeSignature::Exact(valid_types),
|
||||
volatility: Volatility::Immutable
|
||||
} if valid_types == vec![ConcreteDataType::json_datatype()]
|
||||
));
|
||||
|
||||
let json_strings = [
|
||||
r#"{"a": {"b": 2}, "b": 2, "c": 3}"#,
|
||||
r#"{"a": 4, "b": {"c": 6}, "c": 6}"#,
|
||||
r#"{"a": 7, "b": 8, "c": {"a": 7}}"#,
|
||||
];
|
||||
|
||||
let jsonbs = json_strings
|
||||
.iter()
|
||||
.map(|s| {
|
||||
let value = jsonb::parse_value(s.as_bytes()).unwrap();
|
||||
value.to_vec()
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
let json_vector = BinaryVector::from_vec(jsonbs);
|
||||
let args: Vec<VectorRef> = vec![Arc::new(json_vector)];
|
||||
let vector = json_to_string
|
||||
.eval(FunctionContext::default(), &args)
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(3, vector.len());
|
||||
for (i, gt) in json_strings.iter().enumerate() {
|
||||
let result = vector.get_ref(i);
|
||||
let result = result.as_string().unwrap().unwrap();
|
||||
// remove whitespaces
|
||||
assert_eq!(gt.replace(" ", ""), result);
|
||||
}
|
||||
|
||||
let invalid_jsonb = vec![b"invalid json"];
|
||||
let invalid_json_vector = BinaryVector::from_vec(invalid_jsonb);
|
||||
let args: Vec<VectorRef> = vec![Arc::new(invalid_json_vector)];
|
||||
let vector = json_to_string.eval(FunctionContext::default(), &args);
|
||||
assert!(vector.is_err());
|
||||
}
|
||||
}
|
||||
165
src/common/function/src/scalars/json/to_json.rs
Normal file
165
src/common/function/src/scalars/json/to_json.rs
Normal file
@@ -0,0 +1,165 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::fmt::{self, Display};
|
||||
|
||||
use common_query::error::{InvalidFuncArgsSnafu, Result, UnsupportedInputDataTypeSnafu};
|
||||
use common_query::prelude::Signature;
|
||||
use datafusion::logical_expr::Volatility;
|
||||
use datatypes::data_type::ConcreteDataType;
|
||||
use datatypes::prelude::VectorRef;
|
||||
use datatypes::scalars::ScalarVectorBuilder;
|
||||
use datatypes::vectors::{BinaryVectorBuilder, MutableVector};
|
||||
use snafu::ensure;
|
||||
|
||||
use crate::function::{Function, FunctionContext};
|
||||
|
||||
/// Parses the `String` into `JSONB`.
|
||||
#[derive(Clone, Debug, Default)]
|
||||
pub struct ToJsonFunction;
|
||||
|
||||
const NAME: &str = "to_json";
|
||||
|
||||
impl Function for ToJsonFunction {
|
||||
fn name(&self) -> &str {
|
||||
NAME
|
||||
}
|
||||
|
||||
fn return_type(&self, _input_types: &[ConcreteDataType]) -> Result<ConcreteDataType> {
|
||||
Ok(ConcreteDataType::json_datatype())
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::exact(
|
||||
vec![ConcreteDataType::string_datatype()],
|
||||
Volatility::Immutable,
|
||||
)
|
||||
}
|
||||
|
||||
fn eval(&self, _func_ctx: FunctionContext, columns: &[VectorRef]) -> Result<VectorRef> {
|
||||
ensure!(
|
||||
columns.len() == 1,
|
||||
InvalidFuncArgsSnafu {
|
||||
err_msg: format!(
|
||||
"The length of the args is not correct, expect exactly one, have: {}",
|
||||
columns.len()
|
||||
),
|
||||
}
|
||||
);
|
||||
let json_strings = &columns[0];
|
||||
|
||||
let size = json_strings.len();
|
||||
let datatype = json_strings.data_type();
|
||||
let mut results = BinaryVectorBuilder::with_capacity(size);
|
||||
|
||||
match datatype {
|
||||
ConcreteDataType::String(_) => {
|
||||
for i in 0..size {
|
||||
let json_string = json_strings.get_ref(i);
|
||||
|
||||
let json_string = json_string.as_string();
|
||||
let result = match json_string {
|
||||
Ok(Some(json_string)) => match jsonb::parse_value(json_string.as_bytes()) {
|
||||
Ok(json) => Some(json.to_vec()),
|
||||
Err(_) => {
|
||||
return InvalidFuncArgsSnafu {
|
||||
err_msg: format!(
|
||||
"Cannot convert the string to json, have: {}",
|
||||
json_string
|
||||
),
|
||||
}
|
||||
.fail()
|
||||
}
|
||||
},
|
||||
_ => None,
|
||||
};
|
||||
|
||||
results.push(result.as_deref());
|
||||
}
|
||||
}
|
||||
_ => {
|
||||
return UnsupportedInputDataTypeSnafu {
|
||||
function: NAME,
|
||||
datatypes: columns.iter().map(|c| c.data_type()).collect::<Vec<_>>(),
|
||||
}
|
||||
.fail();
|
||||
}
|
||||
}
|
||||
|
||||
Ok(results.to_vector())
|
||||
}
|
||||
}
|
||||
|
||||
impl Display for ToJsonFunction {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
write!(f, "TO_JSON")
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::sync::Arc;
|
||||
|
||||
use common_query::prelude::TypeSignature;
|
||||
use datatypes::scalars::ScalarVector;
|
||||
use datatypes::vectors::StringVector;
|
||||
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_get_by_path_function() {
|
||||
let to_json = ToJsonFunction;
|
||||
|
||||
assert_eq!("to_json", to_json.name());
|
||||
assert_eq!(
|
||||
ConcreteDataType::json_datatype(),
|
||||
to_json
|
||||
.return_type(&[ConcreteDataType::json_datatype()])
|
||||
.unwrap()
|
||||
);
|
||||
|
||||
assert!(matches!(to_json.signature(),
|
||||
Signature {
|
||||
type_signature: TypeSignature::Exact(valid_types),
|
||||
volatility: Volatility::Immutable
|
||||
} if valid_types == vec![ConcreteDataType::string_datatype()]
|
||||
));
|
||||
|
||||
let json_strings = [
|
||||
r#"{"a": {"b": 2}, "b": 2, "c": 3}"#,
|
||||
r#"{"a": 4, "b": {"c": 6}, "c": 6}"#,
|
||||
r#"{"a": 7, "b": 8, "c": {"a": 7}}"#,
|
||||
];
|
||||
|
||||
let jsonbs = json_strings
|
||||
.iter()
|
||||
.map(|s| {
|
||||
let value = jsonb::parse_value(s.as_bytes()).unwrap();
|
||||
value.to_vec()
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
let json_string_vector = StringVector::from_vec(json_strings.to_vec());
|
||||
let args: Vec<VectorRef> = vec![Arc::new(json_string_vector)];
|
||||
let vector = to_json.eval(FunctionContext::default(), &args).unwrap();
|
||||
|
||||
assert_eq!(3, vector.len());
|
||||
for (i, gt) in jsonbs.iter().enumerate() {
|
||||
let result = vector.get_ref(i);
|
||||
let result = result.as_binary().unwrap().unwrap();
|
||||
// remove whitespaces
|
||||
assert_eq!(gt, result);
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user