mirror of
https://github.com/GreptimeTeam/greptimedb.git
synced 2026-01-08 22:32:55 +00:00
feat: add respective json_is UDFs for JSON type (#4726)
* feat: add respective json_is UDFs * refactor: rename to_json to parse_json * chore: happy clippy * chore: some rename * fix: small fixes
This commit is contained in:
@@ -14,12 +14,16 @@
|
||||
|
||||
use std::sync::Arc;
|
||||
mod json_get;
|
||||
mod json_is;
|
||||
mod json_to_string;
|
||||
mod to_json;
|
||||
mod parse_json;
|
||||
|
||||
use json_get::{JsonGetBool, JsonGetFloat, JsonGetInt, JsonGetString};
|
||||
use json_is::{
|
||||
JsonIsArray, JsonIsBool, JsonIsFloat, JsonIsInt, JsonIsNull, JsonIsObject, JsonIsString,
|
||||
};
|
||||
use json_to_string::JsonToStringFunction;
|
||||
use to_json::ToJsonFunction;
|
||||
use parse_json::ParseJsonFunction;
|
||||
|
||||
use crate::function_registry::FunctionRegistry;
|
||||
|
||||
@@ -28,11 +32,19 @@ pub(crate) struct JsonFunction;
|
||||
impl JsonFunction {
|
||||
pub fn register(registry: &FunctionRegistry) {
|
||||
registry.register(Arc::new(JsonToStringFunction));
|
||||
registry.register(Arc::new(ToJsonFunction));
|
||||
registry.register(Arc::new(ParseJsonFunction));
|
||||
|
||||
registry.register(Arc::new(JsonGetInt));
|
||||
registry.register(Arc::new(JsonGetFloat));
|
||||
registry.register(Arc::new(JsonGetString));
|
||||
registry.register(Arc::new(JsonGetBool));
|
||||
|
||||
registry.register(Arc::new(JsonIsNull));
|
||||
registry.register(Arc::new(JsonIsInt));
|
||||
registry.register(Arc::new(JsonIsFloat));
|
||||
registry.register(Arc::new(JsonIsString));
|
||||
registry.register(Arc::new(JsonIsBool));
|
||||
registry.register(Arc::new(JsonIsArray));
|
||||
registry.register(Arc::new(JsonIsObject));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -47,7 +47,7 @@ fn get_json_by_path(json: &[u8], path: &str) -> Option<Vec<u8>> {
|
||||
/// If the path does not exist or the value is not the type specified, return `NULL`.
|
||||
macro_rules! json_get {
|
||||
// e.g. name = JsonGetInt, type = Int64, rust_type = i64, doc = "Get the value from the JSONB by the given path and return it as an integer."
|
||||
($name: ident, $type: ident, $rust_type: ident, $doc:expr) => {
|
||||
($name:ident, $type:ident, $rust_type:ident, $doc:expr) => {
|
||||
paste::paste! {
|
||||
#[doc = $doc]
|
||||
#[derive(Clone, Debug, Default)]
|
||||
|
||||
215
src/common/function/src/scalars/json/json_is.rs
Normal file
215
src/common/function/src/scalars/json/json_is.rs
Normal file
@@ -0,0 +1,215 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::fmt::{self, Display};
|
||||
|
||||
use common_query::error::{InvalidFuncArgsSnafu, Result, UnsupportedInputDataTypeSnafu};
|
||||
use common_query::prelude::Signature;
|
||||
use datafusion::logical_expr::Volatility;
|
||||
use datatypes::data_type::ConcreteDataType;
|
||||
use datatypes::prelude::VectorRef;
|
||||
use datatypes::scalars::ScalarVectorBuilder;
|
||||
use datatypes::vectors::{BooleanVectorBuilder, MutableVector};
|
||||
use snafu::ensure;
|
||||
|
||||
use crate::function::{Function, FunctionContext};
|
||||
|
||||
/// Checks if the input is a JSON object of the given type.
|
||||
macro_rules! json_is {
|
||||
($name:ident, $json_type:ident, $doc:expr) => {
|
||||
paste::paste! {
|
||||
#[derive(Clone, Debug, Default)]
|
||||
pub struct $name;
|
||||
|
||||
impl Function for $name {
|
||||
fn name(&self) -> &str {
|
||||
stringify!([<$name:snake>])
|
||||
}
|
||||
|
||||
fn return_type(&self, _input_types: &[ConcreteDataType]) -> Result<ConcreteDataType> {
|
||||
Ok(ConcreteDataType::boolean_datatype())
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::exact(vec![ConcreteDataType::json_datatype()], Volatility::Immutable)
|
||||
}
|
||||
|
||||
fn eval(&self, _func_ctx: FunctionContext, columns: &[VectorRef]) -> Result<VectorRef> {
|
||||
ensure!(
|
||||
columns.len() == 1,
|
||||
InvalidFuncArgsSnafu {
|
||||
err_msg: format!(
|
||||
"The length of the args is not correct, expect exactly one, have: {}",
|
||||
columns.len()
|
||||
),
|
||||
}
|
||||
);
|
||||
|
||||
let jsons = &columns[0];
|
||||
let size = jsons.len();
|
||||
let datatype = jsons.data_type();
|
||||
let mut results = BooleanVectorBuilder::with_capacity(size);
|
||||
|
||||
match datatype {
|
||||
// JSON data type uses binary vector
|
||||
ConcreteDataType::Binary(_) => {
|
||||
for i in 0..size {
|
||||
let json = jsons.get_ref(i);
|
||||
let json = json.as_binary();
|
||||
let result = match json {
|
||||
Ok(Some(json)) => {
|
||||
Some(jsonb::[<is_ $json_type>](json))
|
||||
}
|
||||
_ => None,
|
||||
};
|
||||
results.push(result);
|
||||
}
|
||||
}
|
||||
_ => {
|
||||
return UnsupportedInputDataTypeSnafu {
|
||||
function: stringify!([<$name:snake>]),
|
||||
datatypes: columns.iter().map(|c| c.data_type()).collect::<Vec<_>>(),
|
||||
}
|
||||
.fail();
|
||||
}
|
||||
}
|
||||
|
||||
Ok(results.to_vector())
|
||||
}
|
||||
}
|
||||
|
||||
impl Display for $name {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
write!(f, "{}", stringify!([<$name:snake>]).to_ascii_uppercase())
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
json_is!(JsonIsNull, null, "Checks if the input JSONB is null");
|
||||
json_is!(
|
||||
JsonIsBool,
|
||||
boolean,
|
||||
"Checks if the input JSONB is a boolean type JSON value"
|
||||
);
|
||||
json_is!(
|
||||
JsonIsInt,
|
||||
i64,
|
||||
"Checks if the input JSONB is a integer type JSON value"
|
||||
);
|
||||
json_is!(
|
||||
JsonIsFloat,
|
||||
number,
|
||||
"Checks if the input JSONB is a JSON float"
|
||||
);
|
||||
json_is!(
|
||||
JsonIsString,
|
||||
string,
|
||||
"Checks if the input JSONB is a JSON string"
|
||||
);
|
||||
json_is!(
|
||||
JsonIsArray,
|
||||
array,
|
||||
"Checks if the input JSONB is a JSON array"
|
||||
);
|
||||
json_is!(
|
||||
JsonIsObject,
|
||||
object,
|
||||
"Checks if the input JSONB is a JSON object"
|
||||
);
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::sync::Arc;
|
||||
|
||||
use datatypes::scalars::ScalarVector;
|
||||
use datatypes::vectors::BinaryVector;
|
||||
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_json_is_functions() {
|
||||
let json_is_functions: [&dyn Function; 6] = [
|
||||
&JsonIsBool,
|
||||
&JsonIsInt,
|
||||
&JsonIsFloat,
|
||||
&JsonIsString,
|
||||
&JsonIsArray,
|
||||
&JsonIsObject,
|
||||
];
|
||||
let expected_names = [
|
||||
"json_is_bool",
|
||||
"json_is_int",
|
||||
"json_is_float",
|
||||
"json_is_string",
|
||||
"json_is_array",
|
||||
"json_is_object",
|
||||
];
|
||||
for (func, expected_name) in json_is_functions.iter().zip(expected_names.iter()) {
|
||||
assert_eq!(func.name(), *expected_name);
|
||||
assert_eq!(
|
||||
func.return_type(&[ConcreteDataType::json_datatype()])
|
||||
.unwrap(),
|
||||
ConcreteDataType::boolean_datatype()
|
||||
);
|
||||
assert_eq!(
|
||||
func.signature(),
|
||||
Signature::exact(
|
||||
vec![ConcreteDataType::json_datatype()],
|
||||
Volatility::Immutable
|
||||
)
|
||||
);
|
||||
}
|
||||
|
||||
let json_strings = [
|
||||
r#"true"#,
|
||||
r#"1"#,
|
||||
r#"1.0"#,
|
||||
r#""The pig fly through a castle, and has been attracted by the princess.""#,
|
||||
r#"[1, 2]"#,
|
||||
r#"{"a": 1}"#,
|
||||
];
|
||||
let expected_results = [
|
||||
[true, false, false, false, false, false],
|
||||
[false, true, false, false, false, false],
|
||||
// Integers are also floats
|
||||
[false, true, true, false, false, false],
|
||||
[false, false, false, true, false, false],
|
||||
[false, false, false, false, true, false],
|
||||
[false, false, false, false, false, true],
|
||||
];
|
||||
|
||||
let jsonbs = json_strings
|
||||
.iter()
|
||||
.map(|s| {
|
||||
let value = jsonb::parse_value(s.as_bytes()).unwrap();
|
||||
value.to_vec()
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
let json_vector = BinaryVector::from_vec(jsonbs);
|
||||
let args: Vec<VectorRef> = vec![Arc::new(json_vector)];
|
||||
|
||||
for (func, expected_result) in json_is_functions.iter().zip(expected_results.iter()) {
|
||||
let vector = func.eval(FunctionContext::default(), &args).unwrap();
|
||||
assert_eq!(vector.len(), json_strings.len());
|
||||
|
||||
for (i, expected) in expected_result.iter().enumerate() {
|
||||
let result = vector.get_ref(i);
|
||||
let result = result.as_boolean().unwrap().unwrap();
|
||||
assert_eq!(result, *expected);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -119,7 +119,7 @@ mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_get_by_path_function() {
|
||||
fn test_json_to_string_function() {
|
||||
let json_to_string = JsonToStringFunction;
|
||||
|
||||
assert_eq!("json_to_string", json_to_string.name());
|
||||
|
||||
@@ -27,11 +27,11 @@ use crate::function::{Function, FunctionContext};
|
||||
|
||||
/// Parses the `String` into `JSONB`.
|
||||
#[derive(Clone, Debug, Default)]
|
||||
pub struct ToJsonFunction;
|
||||
pub struct ParseJsonFunction;
|
||||
|
||||
const NAME: &str = "to_json";
|
||||
const NAME: &str = "parse_json";
|
||||
|
||||
impl Function for ToJsonFunction {
|
||||
impl Function for ParseJsonFunction {
|
||||
fn name(&self) -> &str {
|
||||
NAME
|
||||
}
|
||||
@@ -101,9 +101,9 @@ impl Function for ToJsonFunction {
|
||||
}
|
||||
}
|
||||
|
||||
impl Display for ToJsonFunction {
|
||||
impl Display for ParseJsonFunction {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
write!(f, "TO_JSON")
|
||||
write!(f, "PARSE_JSON")
|
||||
}
|
||||
}
|
||||
|
||||
@@ -119,17 +119,17 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn test_get_by_path_function() {
|
||||
let to_json = ToJsonFunction;
|
||||
let parse_json = ParseJsonFunction;
|
||||
|
||||
assert_eq!("to_json", to_json.name());
|
||||
assert_eq!("parse_json", parse_json.name());
|
||||
assert_eq!(
|
||||
ConcreteDataType::json_datatype(),
|
||||
to_json
|
||||
parse_json
|
||||
.return_type(&[ConcreteDataType::json_datatype()])
|
||||
.unwrap()
|
||||
);
|
||||
|
||||
assert!(matches!(to_json.signature(),
|
||||
assert!(matches!(parse_json.signature(),
|
||||
Signature {
|
||||
type_signature: TypeSignature::Exact(valid_types),
|
||||
volatility: Volatility::Immutable
|
||||
@@ -152,13 +152,12 @@ mod tests {
|
||||
|
||||
let json_string_vector = StringVector::from_vec(json_strings.to_vec());
|
||||
let args: Vec<VectorRef> = vec![Arc::new(json_string_vector)];
|
||||
let vector = to_json.eval(FunctionContext::default(), &args).unwrap();
|
||||
let vector = parse_json.eval(FunctionContext::default(), &args).unwrap();
|
||||
|
||||
assert_eq!(3, vector.len());
|
||||
for (i, gt) in jsonbs.iter().enumerate() {
|
||||
let result = vector.get_ref(i);
|
||||
let result = result.as_binary().unwrap().unwrap();
|
||||
// remove whitespaces
|
||||
assert_eq!(gt, result);
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user