feat: add respective json_is UDFs for JSON type (#4726)

* feat: add respective json_is UDFs

* refactor: rename to_json to parse_json

* chore: happy clippy

* chore: some rename

* fix: small fixes
This commit is contained in:
Yohan Wal
2024-09-18 19:07:30 +08:00
committed by GitHub
parent 50b3bb4c0d
commit f73fb82133
12 changed files with 688 additions and 144 deletions

View File

@@ -14,12 +14,16 @@
use std::sync::Arc;
mod json_get;
mod json_is;
mod json_to_string;
mod to_json;
mod parse_json;
use json_get::{JsonGetBool, JsonGetFloat, JsonGetInt, JsonGetString};
use json_is::{
JsonIsArray, JsonIsBool, JsonIsFloat, JsonIsInt, JsonIsNull, JsonIsObject, JsonIsString,
};
use json_to_string::JsonToStringFunction;
use to_json::ToJsonFunction;
use parse_json::ParseJsonFunction;
use crate::function_registry::FunctionRegistry;
@@ -28,11 +32,19 @@ pub(crate) struct JsonFunction;
impl JsonFunction {
pub fn register(registry: &FunctionRegistry) {
registry.register(Arc::new(JsonToStringFunction));
registry.register(Arc::new(ToJsonFunction));
registry.register(Arc::new(ParseJsonFunction));
registry.register(Arc::new(JsonGetInt));
registry.register(Arc::new(JsonGetFloat));
registry.register(Arc::new(JsonGetString));
registry.register(Arc::new(JsonGetBool));
registry.register(Arc::new(JsonIsNull));
registry.register(Arc::new(JsonIsInt));
registry.register(Arc::new(JsonIsFloat));
registry.register(Arc::new(JsonIsString));
registry.register(Arc::new(JsonIsBool));
registry.register(Arc::new(JsonIsArray));
registry.register(Arc::new(JsonIsObject));
}
}

View File

@@ -47,7 +47,7 @@ fn get_json_by_path(json: &[u8], path: &str) -> Option<Vec<u8>> {
/// If the path does not exist or the value is not the type specified, return `NULL`.
macro_rules! json_get {
// e.g. name = JsonGetInt, type = Int64, rust_type = i64, doc = "Get the value from the JSONB by the given path and return it as an integer."
($name: ident, $type: ident, $rust_type: ident, $doc:expr) => {
($name:ident, $type:ident, $rust_type:ident, $doc:expr) => {
paste::paste! {
#[doc = $doc]
#[derive(Clone, Debug, Default)]

View File

@@ -0,0 +1,215 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::fmt::{self, Display};
use common_query::error::{InvalidFuncArgsSnafu, Result, UnsupportedInputDataTypeSnafu};
use common_query::prelude::Signature;
use datafusion::logical_expr::Volatility;
use datatypes::data_type::ConcreteDataType;
use datatypes::prelude::VectorRef;
use datatypes::scalars::ScalarVectorBuilder;
use datatypes::vectors::{BooleanVectorBuilder, MutableVector};
use snafu::ensure;
use crate::function::{Function, FunctionContext};
/// Checks if the input is a JSON object of the given type.
macro_rules! json_is {
($name:ident, $json_type:ident, $doc:expr) => {
paste::paste! {
#[derive(Clone, Debug, Default)]
pub struct $name;
impl Function for $name {
fn name(&self) -> &str {
stringify!([<$name:snake>])
}
fn return_type(&self, _input_types: &[ConcreteDataType]) -> Result<ConcreteDataType> {
Ok(ConcreteDataType::boolean_datatype())
}
fn signature(&self) -> Signature {
Signature::exact(vec![ConcreteDataType::json_datatype()], Volatility::Immutable)
}
fn eval(&self, _func_ctx: FunctionContext, columns: &[VectorRef]) -> Result<VectorRef> {
ensure!(
columns.len() == 1,
InvalidFuncArgsSnafu {
err_msg: format!(
"The length of the args is not correct, expect exactly one, have: {}",
columns.len()
),
}
);
let jsons = &columns[0];
let size = jsons.len();
let datatype = jsons.data_type();
let mut results = BooleanVectorBuilder::with_capacity(size);
match datatype {
// JSON data type uses binary vector
ConcreteDataType::Binary(_) => {
for i in 0..size {
let json = jsons.get_ref(i);
let json = json.as_binary();
let result = match json {
Ok(Some(json)) => {
Some(jsonb::[<is_ $json_type>](json))
}
_ => None,
};
results.push(result);
}
}
_ => {
return UnsupportedInputDataTypeSnafu {
function: stringify!([<$name:snake>]),
datatypes: columns.iter().map(|c| c.data_type()).collect::<Vec<_>>(),
}
.fail();
}
}
Ok(results.to_vector())
}
}
impl Display for $name {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "{}", stringify!([<$name:snake>]).to_ascii_uppercase())
}
}
}
}
}
json_is!(JsonIsNull, null, "Checks if the input JSONB is null");
json_is!(
JsonIsBool,
boolean,
"Checks if the input JSONB is a boolean type JSON value"
);
json_is!(
JsonIsInt,
i64,
"Checks if the input JSONB is a integer type JSON value"
);
json_is!(
JsonIsFloat,
number,
"Checks if the input JSONB is a JSON float"
);
json_is!(
JsonIsString,
string,
"Checks if the input JSONB is a JSON string"
);
json_is!(
JsonIsArray,
array,
"Checks if the input JSONB is a JSON array"
);
json_is!(
JsonIsObject,
object,
"Checks if the input JSONB is a JSON object"
);
#[cfg(test)]
mod tests {
use std::sync::Arc;
use datatypes::scalars::ScalarVector;
use datatypes::vectors::BinaryVector;
use super::*;
#[test]
fn test_json_is_functions() {
let json_is_functions: [&dyn Function; 6] = [
&JsonIsBool,
&JsonIsInt,
&JsonIsFloat,
&JsonIsString,
&JsonIsArray,
&JsonIsObject,
];
let expected_names = [
"json_is_bool",
"json_is_int",
"json_is_float",
"json_is_string",
"json_is_array",
"json_is_object",
];
for (func, expected_name) in json_is_functions.iter().zip(expected_names.iter()) {
assert_eq!(func.name(), *expected_name);
assert_eq!(
func.return_type(&[ConcreteDataType::json_datatype()])
.unwrap(),
ConcreteDataType::boolean_datatype()
);
assert_eq!(
func.signature(),
Signature::exact(
vec![ConcreteDataType::json_datatype()],
Volatility::Immutable
)
);
}
let json_strings = [
r#"true"#,
r#"1"#,
r#"1.0"#,
r#""The pig fly through a castle, and has been attracted by the princess.""#,
r#"[1, 2]"#,
r#"{"a": 1}"#,
];
let expected_results = [
[true, false, false, false, false, false],
[false, true, false, false, false, false],
// Integers are also floats
[false, true, true, false, false, false],
[false, false, false, true, false, false],
[false, false, false, false, true, false],
[false, false, false, false, false, true],
];
let jsonbs = json_strings
.iter()
.map(|s| {
let value = jsonb::parse_value(s.as_bytes()).unwrap();
value.to_vec()
})
.collect::<Vec<_>>();
let json_vector = BinaryVector::from_vec(jsonbs);
let args: Vec<VectorRef> = vec![Arc::new(json_vector)];
for (func, expected_result) in json_is_functions.iter().zip(expected_results.iter()) {
let vector = func.eval(FunctionContext::default(), &args).unwrap();
assert_eq!(vector.len(), json_strings.len());
for (i, expected) in expected_result.iter().enumerate() {
let result = vector.get_ref(i);
let result = result.as_boolean().unwrap().unwrap();
assert_eq!(result, *expected);
}
}
}
}

View File

@@ -119,7 +119,7 @@ mod tests {
use super::*;
#[test]
fn test_get_by_path_function() {
fn test_json_to_string_function() {
let json_to_string = JsonToStringFunction;
assert_eq!("json_to_string", json_to_string.name());

View File

@@ -27,11 +27,11 @@ use crate::function::{Function, FunctionContext};
/// Parses the `String` into `JSONB`.
#[derive(Clone, Debug, Default)]
pub struct ToJsonFunction;
pub struct ParseJsonFunction;
const NAME: &str = "to_json";
const NAME: &str = "parse_json";
impl Function for ToJsonFunction {
impl Function for ParseJsonFunction {
fn name(&self) -> &str {
NAME
}
@@ -101,9 +101,9 @@ impl Function for ToJsonFunction {
}
}
impl Display for ToJsonFunction {
impl Display for ParseJsonFunction {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "TO_JSON")
write!(f, "PARSE_JSON")
}
}
@@ -119,17 +119,17 @@ mod tests {
#[test]
fn test_get_by_path_function() {
let to_json = ToJsonFunction;
let parse_json = ParseJsonFunction;
assert_eq!("to_json", to_json.name());
assert_eq!("parse_json", parse_json.name());
assert_eq!(
ConcreteDataType::json_datatype(),
to_json
parse_json
.return_type(&[ConcreteDataType::json_datatype()])
.unwrap()
);
assert!(matches!(to_json.signature(),
assert!(matches!(parse_json.signature(),
Signature {
type_signature: TypeSignature::Exact(valid_types),
volatility: Volatility::Immutable
@@ -152,13 +152,12 @@ mod tests {
let json_string_vector = StringVector::from_vec(json_strings.to_vec());
let args: Vec<VectorRef> = vec![Arc::new(json_string_vector)];
let vector = to_json.eval(FunctionContext::default(), &args).unwrap();
let vector = parse_json.eval(FunctionContext::default(), &args).unwrap();
assert_eq!(3, vector.len());
for (i, gt) in jsonbs.iter().enumerate() {
let result = vector.get_ref(i);
let result = result.as_binary().unwrap().unwrap();
// remove whitespaces
assert_eq!(gt, result);
}
}