Skip to main content

common_function/scalars/json/
json_get.rs

1// Copyright 2023 Greptime Team
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15use std::sync::Arc;
16
17use arrow::array::{ArrayRef, BinaryViewArray, new_null_array};
18use arrow::compute;
19use arrow::datatypes::Float64Type;
20use arrow_schema::Field;
21use datafusion_common::arrow::array::{
22    Array, AsArray, BinaryViewBuilder, BooleanBuilder, Float64Builder, Int64Builder,
23    StringViewBuilder,
24};
25use datafusion_common::arrow::datatypes::DataType;
26use datafusion_common::{DataFusionError, Result, ScalarValue, exec_datafusion_err, exec_err};
27use datafusion_expr::{ColumnarValue, ScalarFunctionArgs, Signature, Volatility};
28use datatypes::arrow_array::{int_array_value_at_index, string_array_value_at_index};
29use datatypes::vectors::json::array::JsonArray;
30use derive_more::Display;
31use serde_json::Value;
32
33use crate::function::{Function, extract_args};
34use crate::helper;
35
36fn get_json_by_path(json: &[u8], path: &str) -> Option<Vec<u8>> {
37    let json_path = jsonb::jsonpath::parse_json_path(path.as_bytes());
38    match json_path {
39        Ok(json_path) => {
40            let mut sub_jsonb = Vec::new();
41            let mut sub_offsets = Vec::new();
42            match jsonb::get_by_path(json, json_path, &mut sub_jsonb, &mut sub_offsets) {
43                Ok(_) => Some(sub_jsonb),
44                Err(_) => None,
45            }
46        }
47        _ => None,
48    }
49}
50
51enum JsonResultValue<'a> {
52    Jsonb(Vec<u8>),
53    #[expect(unused)]
54    JsonStructByColumn(&'a ArrayRef, usize),
55    JsonStructByValue(&'a Value),
56}
57
58trait JsonGetResultBuilder {
59    fn append_value(&mut self, value: JsonResultValue<'_>) -> Result<()>;
60
61    fn append_null(&mut self);
62
63    fn build(&mut self) -> ArrayRef;
64}
65
66fn result_builder(
67    len: usize,
68    with_type: Option<&DataType>,
69) -> Result<Box<dyn JsonGetResultBuilder>> {
70    let builder = if let Some(t) = with_type {
71        match t {
72            DataType::Utf8 | DataType::LargeUtf8 | DataType::Utf8View => {
73                Box::new(StringResultBuilder(StringViewBuilder::with_capacity(len)))
74                    as Box<dyn JsonGetResultBuilder>
75            }
76            DataType::Int64 => Box::new(IntResultBuilder(Int64Builder::with_capacity(len))),
77            DataType::Float64 => Box::new(FloatResultBuilder(Float64Builder::with_capacity(len))),
78            DataType::Boolean => Box::new(BoolResultBuilder(BooleanBuilder::with_capacity(len))),
79            t => {
80                return exec_err!("json_get with unknown type {t}");
81            }
82        }
83    } else {
84        Box::new(StringResultBuilder(StringViewBuilder::with_capacity(len)))
85    };
86    Ok(builder)
87}
88
89// TODO: refactor this to StringLikeArrayBuilder from Arrow 57
90struct StringResultBuilder(StringViewBuilder);
91
92impl JsonGetResultBuilder for StringResultBuilder {
93    fn append_value(&mut self, value: JsonResultValue<'_>) -> Result<()> {
94        match value {
95            JsonResultValue::Jsonb(value) => self.0.append_option(jsonb::to_str(&value).ok()),
96            JsonResultValue::JsonStructByColumn(column, i) => {
97                if let Some(v) = string_array_value_at_index(column, i) {
98                    self.0.append_value(v);
99                } else {
100                    self.0
101                        .append_value(arrow_cast::display::array_value_to_string(column, i)?);
102                }
103            }
104            JsonResultValue::JsonStructByValue(value) => {
105                if let Some(s) = value.as_str() {
106                    self.0.append_value(s)
107                } else {
108                    self.0.append_value(value.to_string())
109                }
110            }
111        }
112        Ok(())
113    }
114
115    fn append_null(&mut self) {
116        self.0.append_null();
117    }
118
119    fn build(&mut self) -> ArrayRef {
120        Arc::new(self.0.finish())
121    }
122}
123
124#[derive(Default, Display, Debug)]
125#[display("{}", Self::NAME.to_ascii_uppercase())]
126pub struct JsonGetString(JsonGetWithType);
127
128impl JsonGetString {
129    pub const NAME: &'static str = "json_get_string";
130}
131
132impl Function for JsonGetString {
133    fn name(&self) -> &str {
134        Self::NAME
135    }
136
137    fn return_type(&self, _: &[DataType]) -> Result<DataType> {
138        Ok(DataType::Utf8View)
139    }
140
141    fn signature(&self) -> &Signature {
142        &self.0.signature
143    }
144
145    fn invoke_with_args(&self, mut args: ScalarFunctionArgs) -> Result<ColumnarValue> {
146        args.args
147            .push(ColumnarValue::Scalar(ScalarValue::Utf8View(None)));
148        self.0.invoke_with_args(args)
149    }
150}
151
152struct IntResultBuilder(Int64Builder);
153
154impl JsonGetResultBuilder for IntResultBuilder {
155    fn append_value(&mut self, value: JsonResultValue<'_>) -> Result<()> {
156        match value {
157            JsonResultValue::Jsonb(value) => self.0.append_option(jsonb::to_i64(&value).ok()),
158            JsonResultValue::JsonStructByColumn(column, i) => {
159                self.0.append_option(int_array_value_at_index(column, i))
160            }
161            JsonResultValue::JsonStructByValue(value) => self.0.append_option(value.as_i64()),
162        }
163        Ok(())
164    }
165
166    fn append_null(&mut self) {
167        self.0.append_null();
168    }
169
170    fn build(&mut self) -> ArrayRef {
171        Arc::new(self.0.finish())
172    }
173}
174
175#[derive(Default, Display, Debug)]
176#[display("{}", Self::NAME.to_ascii_uppercase())]
177pub struct JsonGetInt(JsonGetWithType);
178
179impl JsonGetInt {
180    pub const NAME: &'static str = "json_get_int";
181}
182
183impl Function for JsonGetInt {
184    fn name(&self) -> &str {
185        Self::NAME
186    }
187
188    fn return_type(&self, _: &[DataType]) -> Result<DataType> {
189        Ok(DataType::Int64)
190    }
191
192    fn signature(&self) -> &Signature {
193        &self.0.signature
194    }
195
196    fn invoke_with_args(&self, mut args: ScalarFunctionArgs) -> Result<ColumnarValue> {
197        args.args
198            .push(ColumnarValue::Scalar(ScalarValue::Int64(None)));
199        self.0.invoke_with_args(args)
200    }
201}
202
203struct FloatResultBuilder(Float64Builder);
204
205impl JsonGetResultBuilder for FloatResultBuilder {
206    fn append_value(&mut self, value: JsonResultValue<'_>) -> Result<()> {
207        match value {
208            JsonResultValue::Jsonb(value) => self.0.append_option(jsonb::to_f64(&value).ok()),
209            JsonResultValue::JsonStructByColumn(column, i) => {
210                let result = if column.data_type() == &DataType::Float64 {
211                    column
212                        .as_primitive::<Float64Type>()
213                        .is_valid(i)
214                        .then(|| column.as_primitive::<Float64Type>().value(i))
215                } else {
216                    None
217                };
218                self.0.append_option(result);
219            }
220            JsonResultValue::JsonStructByValue(value) => self.0.append_option(value.as_f64()),
221        }
222        Ok(())
223    }
224
225    fn append_null(&mut self) {
226        self.0.append_null();
227    }
228
229    fn build(&mut self) -> ArrayRef {
230        Arc::new(self.0.finish())
231    }
232}
233
234#[derive(Default, Display, Debug)]
235#[display("{}", Self::NAME.to_ascii_uppercase())]
236pub struct JsonGetFloat(JsonGetWithType);
237
238impl JsonGetFloat {
239    pub const NAME: &'static str = "json_get_float";
240}
241
242impl Function for JsonGetFloat {
243    fn name(&self) -> &str {
244        Self::NAME
245    }
246
247    fn return_type(&self, _: &[DataType]) -> Result<DataType> {
248        Ok(DataType::Float64)
249    }
250
251    fn signature(&self) -> &Signature {
252        &self.0.signature
253    }
254
255    fn invoke_with_args(&self, mut args: ScalarFunctionArgs) -> Result<ColumnarValue> {
256        args.args
257            .push(ColumnarValue::Scalar(ScalarValue::Float64(None)));
258        self.0.invoke_with_args(args)
259    }
260}
261
262struct BoolResultBuilder(BooleanBuilder);
263
264impl JsonGetResultBuilder for BoolResultBuilder {
265    fn append_value(&mut self, value: JsonResultValue<'_>) -> Result<()> {
266        match value {
267            JsonResultValue::Jsonb(value) => self.0.append_option(jsonb::to_bool(&value).ok()),
268            JsonResultValue::JsonStructByColumn(column, i) => {
269                let result = if column.data_type() == &DataType::Boolean {
270                    column
271                        .as_boolean()
272                        .is_valid(i)
273                        .then(|| column.as_boolean().value(i))
274                } else {
275                    None
276                };
277                self.0.append_option(result);
278            }
279            JsonResultValue::JsonStructByValue(value) => self.0.append_option(value.as_bool()),
280        }
281        Ok(())
282    }
283
284    fn append_null(&mut self) {
285        self.0.append_null();
286    }
287
288    fn build(&mut self) -> ArrayRef {
289        Arc::new(self.0.finish())
290    }
291}
292
293#[derive(Default, Display, Debug)]
294#[display("{}", Self::NAME.to_ascii_uppercase())]
295pub struct JsonGetBool(JsonGetWithType);
296
297impl JsonGetBool {
298    pub const NAME: &'static str = "json_get_bool";
299}
300
301impl Function for JsonGetBool {
302    fn name(&self) -> &str {
303        Self::NAME
304    }
305
306    fn return_type(&self, _: &[DataType]) -> Result<DataType> {
307        Ok(DataType::Boolean)
308    }
309
310    fn signature(&self) -> &Signature {
311        &self.0.signature
312    }
313
314    fn invoke_with_args(&self, mut args: ScalarFunctionArgs) -> Result<ColumnarValue> {
315        args.args
316            .push(ColumnarValue::Scalar(ScalarValue::Boolean(None)));
317        self.0.invoke_with_args(args)
318    }
319}
320
321fn jsonb_get(
322    jsons: &BinaryViewArray,
323    path: &str,
324    builder: &mut dyn JsonGetResultBuilder,
325) -> Result<()> {
326    let size = jsons.len();
327    for i in 0..size {
328        let json = jsons.is_valid(i).then(|| jsons.value(i));
329        let result = match json {
330            Some(json) => get_json_by_path(json, path),
331            _ => None,
332        };
333        if let Some(v) = result {
334            builder.append_value(JsonResultValue::Jsonb(v))?;
335        } else {
336            builder.append_null();
337        }
338    }
339    Ok(())
340}
341
342fn json_struct_get(array: &ArrayRef, path: &str, with_type: Option<&DataType>) -> Result<ArrayRef> {
343    let path = path.trim_start_matches("$");
344
345    // Fast path: if the JSON array fields can be directly indexed into by the `path`, simply get
346    // the sub-array (`column_by_name`).
347    let mut direct = true;
348    let mut current = array;
349    for segment in path.split(".").filter(|s| !s.is_empty()) {
350        if matches!(current.data_type(), DataType::Binary) {
351            direct = false;
352            break;
353        }
354
355        let Some(json) = current.as_struct_opt() else {
356            return exec_err!("unknown JSON array datatype: {}", current.data_type());
357        };
358        let Some(sub_json) = json.column_by_name(segment) else {
359            return Ok(new_null_array(
360                with_type.unwrap_or(&DataType::Utf8View),
361                array.len(),
362            ));
363        };
364        current = sub_json;
365    }
366
367    // Build the result array with optional value mapper.
368    fn build_with<F>(
369        input: &ArrayRef,
370        with_type: Option<&DataType>,
371        value_mapper: F,
372    ) -> Result<ArrayRef>
373    where
374        for<'a> F: Fn(&'a Value) -> Option<&'a Value>,
375    {
376        let json_array = JsonArray::from(input);
377
378        let mut builder = result_builder(input.len(), with_type)?;
379        for i in 0..input.len() {
380            if input.is_null(i) {
381                builder.append_null();
382                continue;
383            }
384
385            let value = json_array
386                .try_get_value(i)
387                .map_err(|e| exec_datafusion_err!("{e}"))?;
388            let value = value_mapper(&value);
389
390            if let Some(value) = value {
391                builder.append_value(JsonResultValue::JsonStructByValue(value))?;
392            } else {
393                builder.append_null();
394            }
395        }
396        Ok(builder.build())
397    }
398
399    if direct {
400        let casted = if let Some(with_type) = with_type
401            && current.data_type() != with_type
402        {
403            match (current.data_type(), with_type) {
404                (DataType::Binary, _) => {
405                    // Fall back to the slow path if the found JSON sub-array is serialized to bytes
406                    // (because of JSON type conflicting)
407                    build_with(current, Some(with_type), |v| Some(v))?
408                }
409                (DataType::List(_) | DataType::Struct(_), with_type) if with_type.is_string() => {
410                    // Special handle for wanted array is string (Arrow cast is not working here if
411                    // the datatype is list or struct), because it could be used in displaying the
412                    // result.
413                    build_with(current, Some(with_type), |v| Some(v))?
414                }
415                (_, with_type) if with_type.is_string() => {
416                    // Same special handle for wanted array is string as above, except for simply
417                    // casting by Arrow is more desirable.
418                    arrow_cast::cast(current.as_ref(), with_type)?
419                }
420                _ => new_null_array(with_type, current.len()),
421            }
422        } else {
423            current.clone()
424        };
425        return Ok(casted);
426    }
427
428    // Slow path: reconstruct the JSON array from serialized representation of conflicting JSON
429    // values: `serde_json::Value`.
430    let mut pointer = path.replace(".", "/");
431    if !pointer.starts_with("/") {
432        pointer = format!("/{}", pointer);
433    }
434    build_with(array, with_type, |value| value.pointer(&pointer))
435}
436
437/// This function is mostly called as `json_get(value, 'attr')::type` and rewritten by
438/// `json_get_rewriter::JsonGetRewriter` to `json_get(value, 'attr', NULL::type)`. So we
439/// use the third argument's type to determine the return type.
440#[derive(Debug, Display)]
441#[display("{}", Self::NAME.to_ascii_uppercase())]
442pub struct JsonGetWithType {
443    signature: Signature,
444}
445
446impl JsonGetWithType {
447    pub const NAME: &'static str = "json_get";
448}
449
450impl Default for JsonGetWithType {
451    fn default() -> Self {
452        Self {
453            signature: Signature::variadic_any(Volatility::Immutable),
454        }
455    }
456}
457
458impl Function for JsonGetWithType {
459    fn name(&self) -> &str {
460        Self::NAME
461    }
462
463    fn return_type(&self, _input_types: &[DataType]) -> datafusion_common::Result<DataType> {
464        Err(DataFusionError::Internal(
465            "This method isn't meant to be called".to_string(),
466        ))
467    }
468
469    fn return_field_from_args(
470        &self,
471        args: datafusion_expr::ReturnFieldArgs<'_>,
472    ) -> datafusion_common::Result<Arc<Field>> {
473        match args.scalar_arguments.get(2) {
474            Some(Some(v)) => {
475                let mut data_type = v.data_type();
476                if matches!(data_type, DataType::Utf8 | DataType::LargeUtf8) {
477                    data_type = DataType::Utf8View;
478                }
479
480                Ok(Arc::new(Field::new(self.name(), data_type, true)))
481            }
482            _ => Ok(Arc::new(Field::new(self.name(), DataType::Utf8View, true))),
483        }
484    }
485
486    fn signature(&self) -> &Signature {
487        &self.signature
488    }
489
490    fn invoke_with_args(
491        &self,
492        args: ScalarFunctionArgs,
493    ) -> datafusion_common::Result<ColumnarValue> {
494        let args_len = args.args.len();
495        if args_len != 2 && args_len != 3 {
496            return exec_err!("json_get expects 2 or 3 arguments, got {args_len}");
497        }
498
499        let arg0 = args.args[0].to_array(args.number_rows)?;
500        let len = arg0.len();
501
502        let path = if let ColumnarValue::Scalar(path) = &args.args[1]
503            && let Some(Some(path)) = path.try_as_str()
504        {
505            path
506        } else {
507            return exec_err!(
508                r#"json_get expects a string literal "path" argument, got {}"#,
509                args.args[1]
510            );
511        };
512
513        let with_type = args.args.get(2).map(|x| x.data_type());
514        let result = match arg0.data_type() {
515            DataType::Binary | DataType::LargeBinary | DataType::BinaryView => {
516                let arg0 = compute::cast(&arg0, &DataType::BinaryView)?;
517                let jsons = arg0.as_binary_view();
518
519                let mut builder = result_builder(len, with_type.as_ref())?;
520                jsonb_get(jsons, path, builder.as_mut())?;
521                builder.build()
522            }
523            DataType::Struct(_) => json_struct_get(&arg0, path, with_type.as_ref())?,
524            _ => {
525                return exec_err!("JSON_GET not supported argument type {}", arg0.data_type());
526            }
527        };
528
529        Ok(ColumnarValue::Array(result))
530    }
531}
532
533/// Get the object from JSON value by path.
534#[derive(Display, Debug)]
535#[display("{}", Self::NAME.to_ascii_uppercase())]
536pub(super) struct JsonGetObject {
537    signature: Signature,
538}
539
540impl JsonGetObject {
541    const NAME: &'static str = "json_get_object";
542}
543
544impl Default for JsonGetObject {
545    fn default() -> Self {
546        Self {
547            signature: helper::one_of_sigs2(
548                vec![
549                    DataType::Binary,
550                    DataType::LargeBinary,
551                    DataType::BinaryView,
552                ],
553                vec![DataType::UInt8, DataType::LargeUtf8, DataType::Utf8View],
554            ),
555        }
556    }
557}
558
559impl Function for JsonGetObject {
560    fn name(&self) -> &str {
561        Self::NAME
562    }
563
564    fn return_type(&self, _: &[DataType]) -> datafusion_common::Result<DataType> {
565        Ok(DataType::BinaryView)
566    }
567
568    fn signature(&self) -> &Signature {
569        &self.signature
570    }
571
572    fn invoke_with_args(
573        &self,
574        args: ScalarFunctionArgs,
575    ) -> datafusion_common::Result<ColumnarValue> {
576        let [arg0, arg1] = extract_args(self.name(), &args)?;
577        let arg0 = compute::cast(&arg0, &DataType::BinaryView)?;
578        let jsons = arg0.as_binary_view();
579        let arg1 = compute::cast(&arg1, &DataType::Utf8View)?;
580        let paths = arg1.as_string_view();
581
582        let len = jsons.len();
583        let mut builder = BinaryViewBuilder::with_capacity(len);
584
585        for i in 0..len {
586            let json = jsons.is_valid(i).then(|| jsons.value(i));
587            let path = paths.is_valid(i).then(|| paths.value(i));
588            let result = if let (Some(json), Some(path)) = (json, path) {
589                let result = jsonb::jsonpath::parse_json_path(path.as_bytes()).and_then(|path| {
590                    let mut data = Vec::new();
591                    let mut offset = Vec::new();
592                    jsonb::get_by_path(json, path, &mut data, &mut offset)
593                        .map(|()| jsonb::is_object(&data).then_some(data))
594                });
595                result.map_err(|e| DataFusionError::Execution(e.to_string()))?
596            } else {
597                None
598            };
599            builder.append_option(result);
600        }
601
602        Ok(ColumnarValue::Array(Arc::new(builder.finish())))
603    }
604}
605
606#[cfg(test)]
607mod tests {
608    use std::sync::Arc;
609
610    use arrow::array::{BooleanArray, Int64Array, StructArray};
611    use arrow_schema::{Field, Fields};
612    use datafusion_common::ScalarValue;
613    use datafusion_common::arrow::array::{BinaryArray, BinaryViewArray, StringArray};
614    use datafusion_common::arrow::datatypes::{Float64Type, Int64Type};
615    use datatypes::types::parse_string_to_jsonb;
616    use serde_json::json;
617
618    use super::*;
619
620    /// Create a JSON object like this (as a one element struct array for testing):
621    ///
622    /// ```JSON
623    /// {
624    ///     "kind": "foo",
625    ///     "payload": {
626    ///         "code": 404,
627    ///         "success": false,
628    ///         "result": {
629    ///             "error": "not found",
630    ///             "time_cost": 1.234
631    ///         }
632    ///     }
633    /// }
634    /// ```
635    fn test_json_struct() -> ArrayRef {
636        let payload_fields = Fields::from(vec![
637            Field::new("code", DataType::Int64, true),
638            Field::new("success", DataType::Boolean, true),
639            Field::new("result", DataType::Binary, true),
640        ]);
641        Arc::new(StructArray::new(
642            vec![
643                Field::new("kind", DataType::Utf8, true),
644                Field::new("payload", DataType::Struct(payload_fields.clone()), true),
645            ]
646            .into(),
647            vec![
648                Arc::new(StringArray::from_iter([Some("foo")])) as ArrayRef,
649                Arc::new(StructArray::new(
650                    payload_fields,
651                    vec![
652                        Arc::new(Int64Array::from_iter([Some(404)])) as ArrayRef,
653                        Arc::new(BooleanArray::from_iter([Some(false)])),
654                        Arc::new(BinaryArray::from_iter([Some(
655                            json!({
656                                "error": "not found",
657                                "time_cost": 1.234
658                            })
659                            .to_string()
660                            .as_bytes(),
661                        )])),
662                    ],
663                    None,
664                )),
665            ],
666            None,
667        ))
668    }
669
670    #[test]
671    fn test_json_get_int() {
672        let json_get_int = JsonGetInt::default();
673
674        assert_eq!("json_get_int", json_get_int.name());
675        assert_eq!(
676            DataType::Int64,
677            json_get_int
678                .return_type(&[DataType::Binary, DataType::Utf8])
679                .unwrap()
680        );
681
682        let json_strings = [
683            r#"{"a": {"b": 2}, "b": 2, "c": 3}"#,
684            r#"{"a": 4, "b": {"c": 6}, "c": 6}"#,
685            r#"{"a": 7, "b": 8, "c": {"a": 7}}"#,
686        ];
687        let json_struct = test_json_struct();
688
689        let path_expects = vec![
690            ("$.a.b", Some(2)),
691            ("$.a", Some(4)),
692            ("$.c", None),
693            ("$.kind", None),
694            ("$.payload.code", Some(404)),
695            ("$.payload.success", None),
696            ("$.payload.result.time_cost", None),
697            ("$.payload.not-exists", None),
698            ("$.not-exists", None),
699            ("$", None),
700        ];
701
702        let mut jsons = json_strings
703            .iter()
704            .map(|s| {
705                let value = jsonb::parse_value(s.as_bytes()).unwrap();
706                Arc::new(BinaryArray::from_iter_values([value.to_vec()])) as ArrayRef
707            })
708            .collect::<Vec<_>>();
709        let json_struct_arrays =
710            std::iter::repeat_n(json_struct, path_expects.len() - jsons.len()).collect::<Vec<_>>();
711        jsons.extend(json_struct_arrays);
712
713        for i in 0..jsons.len() {
714            let json = &jsons[i];
715            let (path, expect) = path_expects[i];
716
717            let args = ScalarFunctionArgs {
718                args: vec![
719                    ColumnarValue::Array(json.clone()),
720                    ColumnarValue::Scalar(path.into()),
721                ],
722                arg_fields: vec![],
723                number_rows: 1,
724                return_field: Arc::new(Field::new("x", DataType::Int64, false)),
725                config_options: Arc::new(Default::default()),
726            };
727            let result = json_get_int
728                .invoke_with_args(args)
729                .and_then(|x| x.to_array(1))
730                .unwrap();
731
732            let result = result.as_primitive::<Int64Type>();
733            assert_eq!(1, result.len());
734            let actual = result.is_valid(0).then(|| result.value(0));
735            assert_eq!(actual, expect);
736        }
737    }
738
739    #[test]
740    fn test_json_get_float() {
741        let json_get_float = JsonGetFloat::default();
742
743        assert_eq!("json_get_float", json_get_float.name());
744        assert_eq!(
745            DataType::Float64,
746            json_get_float
747                .return_type(&[DataType::Binary, DataType::Utf8])
748                .unwrap()
749        );
750
751        let json_strings = [
752            r#"{"a": {"b": 2.1}, "b": 2.2, "c": 3.3}"#,
753            r#"{"a": 4.4, "b": {"c": 6.6}, "c": 6.6}"#,
754            r#"{"a": 7.7, "b": 8.8, "c": {"a": 7.7}}"#,
755        ];
756        let json_struct = test_json_struct();
757
758        let path_expects = vec![
759            ("$.a.b", Some(2.1)),
760            ("$.a", Some(4.4)),
761            ("$.c", None),
762            ("$.kind", None),
763            ("$.payload.code", None),
764            ("$.payload.success", None),
765            ("$.payload.result.time_cost", Some(1.234)),
766            ("$.payload.not-exists", None),
767            ("$.not-exists", None),
768            ("$", None),
769        ];
770
771        let mut jsons = json_strings
772            .iter()
773            .map(|s| {
774                let value = jsonb::parse_value(s.as_bytes()).unwrap();
775                Arc::new(BinaryArray::from_iter_values([value.to_vec()])) as ArrayRef
776            })
777            .collect::<Vec<_>>();
778        let json_struct_arrays =
779            std::iter::repeat_n(json_struct, path_expects.len() - jsons.len()).collect::<Vec<_>>();
780        jsons.extend(json_struct_arrays);
781
782        for i in 0..jsons.len() {
783            let json = &jsons[i];
784            let (path, expect) = path_expects[i];
785
786            let args = ScalarFunctionArgs {
787                args: vec![
788                    ColumnarValue::Array(json.clone()),
789                    ColumnarValue::Scalar(path.into()),
790                ],
791                arg_fields: vec![],
792                number_rows: 1,
793                return_field: Arc::new(Field::new("x", DataType::Float64, false)),
794                config_options: Arc::new(Default::default()),
795            };
796            let result = json_get_float
797                .invoke_with_args(args)
798                .and_then(|x| x.to_array(1))
799                .unwrap();
800
801            let result = result.as_primitive::<Float64Type>();
802            assert_eq!(1, result.len());
803            let actual = result.is_valid(0).then(|| result.value(0));
804            assert_eq!(actual, expect);
805        }
806    }
807
808    #[test]
809    fn test_json_get_bool() {
810        let json_get_bool = JsonGetBool::default();
811
812        assert_eq!("json_get_bool", json_get_bool.name());
813        assert_eq!(
814            DataType::Boolean,
815            json_get_bool
816                .return_type(&[DataType::Binary, DataType::Utf8])
817                .unwrap()
818        );
819
820        let json_strings = [
821            r#"{"a": {"b": true}, "b": false, "c": true}"#,
822            r#"{"a": false, "b": {"c": true}, "c": false}"#,
823            r#"{"a": true, "b": false, "c": {"a": true}}"#,
824        ];
825        let json_struct = test_json_struct();
826
827        let path_expects = vec![
828            ("$.a.b", Some(true)),
829            ("$.a", Some(false)),
830            ("$.c", None),
831            ("$.kind", None),
832            ("$.payload.code", None),
833            ("$.payload.success", Some(false)),
834            ("$.payload.result.time_cost", None),
835            ("$.payload.not-exists", None),
836            ("$.not-exists", None),
837            ("$", None),
838        ];
839
840        let mut jsons = json_strings
841            .iter()
842            .map(|s| {
843                let value = jsonb::parse_value(s.as_bytes()).unwrap();
844                Arc::new(BinaryArray::from_iter_values([value.to_vec()])) as ArrayRef
845            })
846            .collect::<Vec<_>>();
847        let json_struct_arrays =
848            std::iter::repeat_n(json_struct, path_expects.len() - jsons.len()).collect::<Vec<_>>();
849        jsons.extend(json_struct_arrays);
850
851        for i in 0..jsons.len() {
852            let json = &jsons[i];
853            let (path, expect) = path_expects[i];
854
855            let args = ScalarFunctionArgs {
856                args: vec![
857                    ColumnarValue::Array(json.clone()),
858                    ColumnarValue::Scalar(path.into()),
859                ],
860                arg_fields: vec![],
861                number_rows: 1,
862                return_field: Arc::new(Field::new("x", DataType::Boolean, false)),
863                config_options: Arc::new(Default::default()),
864            };
865            let result = json_get_bool
866                .invoke_with_args(args)
867                .and_then(|x| x.to_array(1))
868                .unwrap();
869
870            let result = result.as_boolean();
871            assert_eq!(1, result.len());
872            let actual = result.is_valid(0).then(|| result.value(0));
873            assert_eq!(actual, expect);
874        }
875    }
876
877    #[test]
878    fn test_json_get_string() {
879        let json_get_string = JsonGetString::default();
880
881        assert_eq!("json_get_string", json_get_string.name());
882        assert_eq!(
883            DataType::Utf8View,
884            json_get_string
885                .return_type(&[DataType::Binary, DataType::Utf8])
886                .unwrap()
887        );
888
889        let json_strings = [
890            r#"{"a": {"b": "a"}, "b": "b", "c": "c"}"#,
891            r#"{"a": "d", "b": {"c": "e"}, "c": "f"}"#,
892            r#"{"a": "g", "b": "h", "c": {"a": "g"}}"#,
893        ];
894        let json_struct = test_json_struct();
895
896        let paths = vec![
897            "$.a.b",
898            "$.a",
899            "",
900            "$.kind",
901            "$.payload.code",
902            "$.payload.result.time_cost",
903            "$.payload",
904            "$.payload.success",
905            "$.payload.result",
906            "$.payload.result.error",
907            "$.payload.result.not-exists",
908            "$.payload.not-exists",
909            "$.not-exists",
910            "$",
911        ];
912        let expects = [
913            Some("a"),
914            Some("d"),
915            None,
916            Some("foo"),
917            Some("404"),
918            Some("1.234"),
919            Some(
920                r#"{"code":404,"result":{"error":"not found","time_cost":1.234},"success":false}"#,
921            ),
922            Some("false"),
923            Some(r#"{"error":"not found","time_cost":1.234}"#),
924            Some("not found"),
925            None,
926            None,
927            None,
928            Some(
929                r#"{"kind":"foo","payload":{"code":404,"result":{"error":"not found","time_cost":1.234},"success":false}}"#,
930            ),
931        ];
932
933        let mut jsons = json_strings
934            .iter()
935            .map(|s| {
936                let value = jsonb::parse_value(s.as_bytes()).unwrap();
937                Arc::new(BinaryArray::from_iter_values([value.to_vec()])) as ArrayRef
938            })
939            .collect::<Vec<_>>();
940        let json_struct_arrays =
941            std::iter::repeat_n(json_struct, expects.len() - jsons.len()).collect::<Vec<_>>();
942        jsons.extend(json_struct_arrays);
943
944        for i in 0..jsons.len() {
945            let json = &jsons[i];
946            let path = paths[i];
947            let expect = expects[i];
948
949            let args = ScalarFunctionArgs {
950                args: vec![
951                    ColumnarValue::Array(json.clone()),
952                    ColumnarValue::Scalar(path.into()),
953                ],
954                arg_fields: vec![],
955                number_rows: 1,
956                return_field: Arc::new(Field::new("x", DataType::Utf8View, false)),
957                config_options: Arc::new(Default::default()),
958            };
959            let result = json_get_string
960                .invoke_with_args(args)
961                .and_then(|x| x.to_array(1))
962                .unwrap();
963
964            let result = result.as_string_view();
965            assert_eq!(1, result.len());
966            let actual = result.is_valid(0).then(|| result.value(0));
967            assert_eq!(actual, expect);
968        }
969    }
970
971    #[test]
972    fn test_json_get_object() -> Result<()> {
973        let udf = JsonGetObject::default();
974        assert_eq!("json_get_object", udf.name());
975        assert_eq!(
976            DataType::BinaryView,
977            udf.return_type(&[DataType::BinaryView, DataType::Utf8View])?
978        );
979
980        let json_value = parse_string_to_jsonb(r#"{"a": {"b": {"c": {"d": 1}}}}"#).unwrap();
981        let paths = vec!["$", "$.a", "$.a.b", "$.a.b.c", "$.a.b.c.d", "$.e", "$.a.e"];
982        let number_rows = paths.len();
983
984        let args = ScalarFunctionArgs {
985            args: vec![
986                ColumnarValue::Scalar(ScalarValue::Binary(Some(json_value))),
987                ColumnarValue::Array(Arc::new(StringArray::from_iter_values(paths))),
988            ],
989            arg_fields: vec![],
990            number_rows,
991            return_field: Arc::new(Field::new("x", DataType::Binary, false)),
992            config_options: Arc::new(Default::default()),
993        };
994        let result = udf
995            .invoke_with_args(args)
996            .and_then(|x| x.to_array(number_rows))?;
997        let result = result.as_binary_view();
998
999        let expected = &BinaryViewArray::from_iter(
1000            vec![
1001                Some(r#"{"a": {"b": {"c": {"d": 1}}}}"#),
1002                Some(r#"{"b": {"c": {"d": 1}}}"#),
1003                Some(r#"{"c": {"d": 1}}"#),
1004                Some(r#"{"d": 1}"#),
1005                None,
1006                None,
1007                None,
1008            ]
1009            .into_iter()
1010            .map(|x| x.and_then(|s| parse_string_to_jsonb(s).ok())),
1011        );
1012        assert_eq!(result, expected);
1013        Ok(())
1014    }
1015
1016    #[test]
1017    fn test_json_get_with_type() {
1018        let json_get_with_type = JsonGetWithType::default();
1019
1020        assert_eq!("json_get", json_get_with_type.name());
1021
1022        let json_strings = [
1023            r#"{"a": {"b": "a"}, "b": "b", "c": "c"}"#,
1024            r#"{"a": "d", "b": {"c": "e"}, "c": "f"}"#,
1025            r#"{"a": "g", "b": "h", "c": {"a": "g"}}"#,
1026        ];
1027        let json_struct = test_json_struct();
1028
1029        let paths = vec![
1030            "$.a.b",
1031            "$.a",
1032            "",
1033            "$.kind",
1034            "$.payload.code",
1035            "$.payload.result.time_cost",
1036            "$.payload",
1037            "$.payload.success",
1038            "$.payload.result",
1039            "$.payload.result.error",
1040            "$.payload.result.not-exists",
1041            "$.payload.not-exists",
1042            "$.not-exists",
1043            "$",
1044        ];
1045        let expects = [
1046            Some("a"),
1047            Some("d"),
1048            None,
1049            Some("foo"),
1050            Some("404"),
1051            Some("1.234"),
1052            Some(
1053                r#"{"code":404,"result":{"error":"not found","time_cost":1.234},"success":false}"#,
1054            ),
1055            Some("false"),
1056            Some(r#"{"error":"not found","time_cost":1.234}"#),
1057            Some("not found"),
1058            None,
1059            None,
1060            None,
1061            Some(
1062                r#"{"kind":"foo","payload":{"code":404,"result":{"error":"not found","time_cost":1.234},"success":false}}"#,
1063            ),
1064        ];
1065
1066        let mut jsons = json_strings
1067            .iter()
1068            .map(|s| {
1069                let value = jsonb::parse_value(s.as_bytes()).unwrap();
1070                Arc::new(BinaryArray::from_iter_values([value.to_vec()])) as ArrayRef
1071            })
1072            .collect::<Vec<_>>();
1073        let json_struct_arrays =
1074            std::iter::repeat_n(json_struct, expects.len() - jsons.len()).collect::<Vec<_>>();
1075        jsons.extend(json_struct_arrays);
1076
1077        for i in 0..jsons.len() {
1078            let json = &jsons[i];
1079            let path = paths[i];
1080            let expect = expects[i];
1081
1082            let args = ScalarFunctionArgs {
1083                args: vec![
1084                    ColumnarValue::Array(json.clone()),
1085                    ColumnarValue::Scalar(path.into()),
1086                    ColumnarValue::Scalar(ScalarValue::Utf8View(None)),
1087                ],
1088                arg_fields: vec![],
1089                number_rows: 1,
1090                return_field: Arc::new(Field::new("x", DataType::Utf8View, false)),
1091                config_options: Arc::new(Default::default()),
1092            };
1093            let result = json_get_with_type
1094                .invoke_with_args(args)
1095                .and_then(|x| x.to_array(1))
1096                .unwrap();
1097
1098            let result = result.as_string_view();
1099            assert_eq!(1, result.len());
1100            let actual = result.is_valid(0).then(|| result.value(0));
1101            assert_eq!(actual, expect);
1102        }
1103
1104        let json_strings = [
1105            r#"{"a": {"b": 2}, "b": 2, "c": 3}"#,
1106            r#"{"a": 4, "b": {"c": 6}, "c": 6}"#,
1107            r#"{"a": 7, "b": 8, "c": {"a": 7}}"#,
1108        ];
1109        let paths = ["$.a.b", "$.a", "$.c", "$.payload.code"];
1110        let expects = [Some(2), Some(4), None, Some(404)];
1111
1112        for (i, (path, expect)) in paths.iter().zip(expects.iter()).enumerate() {
1113            let json = if i < json_strings.len() {
1114                let value = jsonb::parse_value(json_strings[i].as_bytes()).unwrap();
1115                Arc::new(BinaryArray::from_iter_values([value.to_vec()])) as ArrayRef
1116            } else {
1117                test_json_struct()
1118            };
1119
1120            let args = ScalarFunctionArgs {
1121                args: vec![
1122                    ColumnarValue::Array(json),
1123                    ColumnarValue::Scalar((*path).into()),
1124                    ColumnarValue::Scalar(ScalarValue::Int64(None)),
1125                ],
1126                arg_fields: vec![],
1127                number_rows: 1,
1128                return_field: Arc::new(Field::new("x", DataType::Int64, false)),
1129                config_options: Arc::new(Default::default()),
1130            };
1131            let result = json_get_with_type
1132                .invoke_with_args(args)
1133                .and_then(|x| x.to_array(1))
1134                .unwrap();
1135
1136            let result = result.as_primitive::<Int64Type>();
1137            assert_eq!(1, result.len());
1138            let actual = result.is_valid(0).then(|| result.value(0));
1139            assert_eq!(actual, *expect);
1140        }
1141
1142        let json_strings = [
1143            r#"{"a": {"b": 2.1}, "b": 2.2, "c": 3.3}"#,
1144            r#"{"a": 4.4, "b": {"c": 6.6}, "c": 6.6}"#,
1145            r#"{"a": 7.7, "b": 8.8, "c": {"a": 7.7}}"#,
1146        ];
1147        let paths = ["$.a.b", "$.a", "$.c", "$.payload.result.time_cost"];
1148        let expects = [Some(2.1), Some(4.4), None, Some(1.234)];
1149
1150        for (i, (path, expect)) in paths.iter().zip(expects.iter()).enumerate() {
1151            let json = if i < json_strings.len() {
1152                let value = jsonb::parse_value(json_strings[i].as_bytes()).unwrap();
1153                Arc::new(BinaryArray::from_iter_values([value.to_vec()])) as ArrayRef
1154            } else {
1155                test_json_struct()
1156            };
1157
1158            let args = ScalarFunctionArgs {
1159                args: vec![
1160                    ColumnarValue::Array(json),
1161                    ColumnarValue::Scalar((*path).into()),
1162                    ColumnarValue::Scalar(ScalarValue::Float64(None)),
1163                ],
1164                arg_fields: vec![],
1165                number_rows: 1,
1166                return_field: Arc::new(Field::new("x", DataType::Float64, false)),
1167                config_options: Arc::new(Default::default()),
1168            };
1169            let result = json_get_with_type
1170                .invoke_with_args(args)
1171                .and_then(|x| x.to_array(1))
1172                .unwrap();
1173
1174            let result = result.as_primitive::<Float64Type>();
1175            assert_eq!(1, result.len());
1176            let actual = result.is_valid(0).then(|| result.value(0));
1177            assert_eq!(actual, *expect);
1178        }
1179
1180        let json_strings = [
1181            r#"{"a": {"b": true}, "b": false, "c": true}"#,
1182            r#"{"a": false, "b": {"c": true}, "c": false}"#,
1183            r#"{"a": true, "b": false, "c": {"a": true}}"#,
1184        ];
1185        let paths = ["$.a.b", "$.a", "$.c", "$.payload.success"];
1186        let expects = [Some(true), Some(false), None, Some(false)];
1187
1188        for (i, (path, expect)) in paths.iter().zip(expects.iter()).enumerate() {
1189            let json = if i < json_strings.len() {
1190                let value = jsonb::parse_value(json_strings[i].as_bytes()).unwrap();
1191                Arc::new(BinaryArray::from_iter_values([value.to_vec()])) as ArrayRef
1192            } else {
1193                test_json_struct()
1194            };
1195
1196            let args = ScalarFunctionArgs {
1197                args: vec![
1198                    ColumnarValue::Array(json),
1199                    ColumnarValue::Scalar((*path).into()),
1200                    ColumnarValue::Scalar(ScalarValue::Boolean(None)),
1201                ],
1202                arg_fields: vec![],
1203                number_rows: 1,
1204                return_field: Arc::new(Field::new("x", DataType::Boolean, false)),
1205                config_options: Arc::new(Default::default()),
1206            };
1207            let result = json_get_with_type
1208                .invoke_with_args(args)
1209                .and_then(|x| x.to_array(1))
1210                .unwrap();
1211
1212            let result = result.as_boolean();
1213            assert_eq!(1, result.len());
1214            let actual = result.is_valid(0).then(|| result.value(0));
1215            assert_eq!(actual, *expect);
1216        }
1217    }
1218}