Skip to main content

common_function/scalars/json/
json_get.rs

1// Copyright 2023 Greptime Team
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15use std::sync::Arc;
16
17use arrow::array::{ArrayRef, BinaryViewArray, new_null_array};
18use arrow::compute;
19use arrow::datatypes::Float64Type;
20use arrow_schema::Field;
21use datafusion_common::arrow::array::{
22    Array, AsArray, BinaryViewBuilder, BooleanBuilder, Float64Builder, Int64Builder,
23    StringViewBuilder,
24};
25use datafusion_common::arrow::datatypes::DataType;
26use datafusion_common::{DataFusionError, Result, ScalarValue, exec_datafusion_err, exec_err};
27use datafusion_expr::{ColumnarValue, ScalarFunctionArgs, Signature, Volatility};
28use datatypes::arrow_array::{int_array_value_at_index, string_array_value_at_index};
29use datatypes::vectors::json::array::JsonArray;
30use derive_more::Display;
31use serde_json::Value;
32
33use crate::function::{Function, extract_args};
34use crate::helper;
35
36fn get_json_by_path(json: &[u8], path: &str) -> Option<Vec<u8>> {
37    let json_path = jsonb::jsonpath::parse_json_path(path.as_bytes());
38    match json_path {
39        Ok(json_path) => {
40            let mut sub_jsonb = Vec::new();
41            let mut sub_offsets = Vec::new();
42            match jsonb::get_by_path(json, json_path, &mut sub_jsonb, &mut sub_offsets) {
43                Ok(_) => Some(sub_jsonb),
44                Err(_) => None,
45            }
46        }
47        _ => None,
48    }
49}
50
51enum JsonResultValue<'a> {
52    Jsonb(Vec<u8>),
53    #[expect(unused)]
54    JsonStructByColumn(&'a ArrayRef, usize),
55    JsonStructByValue(&'a Value),
56}
57
58trait JsonGetResultBuilder {
59    fn append_value(&mut self, value: JsonResultValue<'_>) -> Result<()>;
60
61    fn append_null(&mut self);
62
63    fn build(&mut self) -> ArrayRef;
64}
65
66fn result_builder(len: usize, with_type: &DataType) -> Result<Box<dyn JsonGetResultBuilder>> {
67    let builder = match with_type {
68        DataType::Utf8 | DataType::LargeUtf8 | DataType::Utf8View => {
69            Box::new(StringResultBuilder(StringViewBuilder::with_capacity(len)))
70                as Box<dyn JsonGetResultBuilder>
71        }
72        DataType::Int64 => Box::new(IntResultBuilder(Int64Builder::with_capacity(len))),
73        DataType::Float64 => Box::new(FloatResultBuilder(Float64Builder::with_capacity(len))),
74        DataType::Boolean => Box::new(BoolResultBuilder(BooleanBuilder::with_capacity(len))),
75        t => {
76            return exec_err!("json_get with unknown type {t}");
77        }
78    };
79    Ok(builder)
80}
81
82// TODO: refactor this to StringLikeArrayBuilder from Arrow 57
83struct StringResultBuilder(StringViewBuilder);
84
85impl JsonGetResultBuilder for StringResultBuilder {
86    fn append_value(&mut self, value: JsonResultValue<'_>) -> Result<()> {
87        match value {
88            JsonResultValue::Jsonb(value) => self.0.append_option(jsonb::to_str(&value).ok()),
89            JsonResultValue::JsonStructByColumn(column, i) => {
90                if let Some(v) = string_array_value_at_index(column, i) {
91                    self.0.append_value(v);
92                } else {
93                    self.0
94                        .append_value(arrow_cast::display::array_value_to_string(column, i)?);
95                }
96            }
97            JsonResultValue::JsonStructByValue(value) => {
98                if let Some(s) = value.as_str() {
99                    self.0.append_value(s)
100                } else {
101                    self.0.append_value(value.to_string())
102                }
103            }
104        }
105        Ok(())
106    }
107
108    fn append_null(&mut self) {
109        self.0.append_null();
110    }
111
112    fn build(&mut self) -> ArrayRef {
113        Arc::new(self.0.finish())
114    }
115}
116
117#[derive(Default, Display, Debug)]
118#[display("{}", Self::NAME.to_ascii_uppercase())]
119pub struct JsonGetString(JsonGetWithType);
120
121impl JsonGetString {
122    pub const NAME: &'static str = "json_get_string";
123}
124
125impl Function for JsonGetString {
126    fn name(&self) -> &str {
127        Self::NAME
128    }
129
130    fn return_type(&self, _: &[DataType]) -> Result<DataType> {
131        Ok(DataType::Utf8View)
132    }
133
134    fn signature(&self) -> &Signature {
135        &self.0.signature
136    }
137
138    fn invoke_with_args(&self, mut args: ScalarFunctionArgs) -> Result<ColumnarValue> {
139        args.args
140            .push(ColumnarValue::Scalar(ScalarValue::Utf8View(None)));
141        self.0.invoke_with_args(args)
142    }
143}
144
145struct IntResultBuilder(Int64Builder);
146
147impl JsonGetResultBuilder for IntResultBuilder {
148    fn append_value(&mut self, value: JsonResultValue<'_>) -> Result<()> {
149        match value {
150            JsonResultValue::Jsonb(value) => self.0.append_option(jsonb::to_i64(&value).ok()),
151            JsonResultValue::JsonStructByColumn(column, i) => {
152                self.0.append_option(int_array_value_at_index(column, i))
153            }
154            JsonResultValue::JsonStructByValue(value) => self.0.append_option(value.as_i64()),
155        }
156        Ok(())
157    }
158
159    fn append_null(&mut self) {
160        self.0.append_null();
161    }
162
163    fn build(&mut self) -> ArrayRef {
164        Arc::new(self.0.finish())
165    }
166}
167
168#[derive(Default, Display, Debug)]
169#[display("{}", Self::NAME.to_ascii_uppercase())]
170pub struct JsonGetInt(JsonGetWithType);
171
172impl JsonGetInt {
173    pub const NAME: &'static str = "json_get_int";
174}
175
176impl Function for JsonGetInt {
177    fn name(&self) -> &str {
178        Self::NAME
179    }
180
181    fn return_type(&self, _: &[DataType]) -> Result<DataType> {
182        Ok(DataType::Int64)
183    }
184
185    fn signature(&self) -> &Signature {
186        &self.0.signature
187    }
188
189    fn invoke_with_args(&self, mut args: ScalarFunctionArgs) -> Result<ColumnarValue> {
190        args.args
191            .push(ColumnarValue::Scalar(ScalarValue::Int64(None)));
192        self.0.invoke_with_args(args)
193    }
194}
195
196struct FloatResultBuilder(Float64Builder);
197
198impl JsonGetResultBuilder for FloatResultBuilder {
199    fn append_value(&mut self, value: JsonResultValue<'_>) -> Result<()> {
200        match value {
201            JsonResultValue::Jsonb(value) => self.0.append_option(jsonb::to_f64(&value).ok()),
202            JsonResultValue::JsonStructByColumn(column, i) => {
203                let result = if column.data_type() == &DataType::Float64 {
204                    column
205                        .as_primitive::<Float64Type>()
206                        .is_valid(i)
207                        .then(|| column.as_primitive::<Float64Type>().value(i))
208                } else {
209                    None
210                };
211                self.0.append_option(result);
212            }
213            JsonResultValue::JsonStructByValue(value) => self.0.append_option(value.as_f64()),
214        }
215        Ok(())
216    }
217
218    fn append_null(&mut self) {
219        self.0.append_null();
220    }
221
222    fn build(&mut self) -> ArrayRef {
223        Arc::new(self.0.finish())
224    }
225}
226
227#[derive(Default, Display, Debug)]
228#[display("{}", Self::NAME.to_ascii_uppercase())]
229pub struct JsonGetFloat(JsonGetWithType);
230
231impl JsonGetFloat {
232    pub const NAME: &'static str = "json_get_float";
233}
234
235impl Function for JsonGetFloat {
236    fn name(&self) -> &str {
237        Self::NAME
238    }
239
240    fn return_type(&self, _: &[DataType]) -> Result<DataType> {
241        Ok(DataType::Float64)
242    }
243
244    fn signature(&self) -> &Signature {
245        &self.0.signature
246    }
247
248    fn invoke_with_args(&self, mut args: ScalarFunctionArgs) -> Result<ColumnarValue> {
249        args.args
250            .push(ColumnarValue::Scalar(ScalarValue::Float64(None)));
251        self.0.invoke_with_args(args)
252    }
253}
254
255struct BoolResultBuilder(BooleanBuilder);
256
257impl JsonGetResultBuilder for BoolResultBuilder {
258    fn append_value(&mut self, value: JsonResultValue<'_>) -> Result<()> {
259        match value {
260            JsonResultValue::Jsonb(value) => self.0.append_option(jsonb::to_bool(&value).ok()),
261            JsonResultValue::JsonStructByColumn(column, i) => {
262                let result = if column.data_type() == &DataType::Boolean {
263                    column
264                        .as_boolean()
265                        .is_valid(i)
266                        .then(|| column.as_boolean().value(i))
267                } else {
268                    None
269                };
270                self.0.append_option(result);
271            }
272            JsonResultValue::JsonStructByValue(value) => self.0.append_option(value.as_bool()),
273        }
274        Ok(())
275    }
276
277    fn append_null(&mut self) {
278        self.0.append_null();
279    }
280
281    fn build(&mut self) -> ArrayRef {
282        Arc::new(self.0.finish())
283    }
284}
285
286#[derive(Default, Display, Debug)]
287#[display("{}", Self::NAME.to_ascii_uppercase())]
288pub struct JsonGetBool(JsonGetWithType);
289
290impl JsonGetBool {
291    pub const NAME: &'static str = "json_get_bool";
292}
293
294impl Function for JsonGetBool {
295    fn name(&self) -> &str {
296        Self::NAME
297    }
298
299    fn return_type(&self, _: &[DataType]) -> Result<DataType> {
300        Ok(DataType::Boolean)
301    }
302
303    fn signature(&self) -> &Signature {
304        &self.0.signature
305    }
306
307    fn invoke_with_args(&self, mut args: ScalarFunctionArgs) -> Result<ColumnarValue> {
308        args.args
309            .push(ColumnarValue::Scalar(ScalarValue::Boolean(None)));
310        self.0.invoke_with_args(args)
311    }
312}
313
314fn jsonb_get(
315    jsons: &BinaryViewArray,
316    path: &str,
317    builder: &mut dyn JsonGetResultBuilder,
318) -> Result<()> {
319    let size = jsons.len();
320    for i in 0..size {
321        let json = jsons.is_valid(i).then(|| jsons.value(i));
322        let result = match json {
323            Some(json) => get_json_by_path(json, path),
324            _ => None,
325        };
326        if let Some(v) = result {
327            builder.append_value(JsonResultValue::Jsonb(v))?;
328        } else {
329            builder.append_null();
330        }
331    }
332    Ok(())
333}
334
335fn json_struct_get(array: &ArrayRef, path: &str, with_type: &DataType) -> Result<ArrayRef> {
336    let path = path.trim_start_matches("$");
337
338    // Fast path: if the JSON array fields can be directly indexed into by the `path`, simply get
339    // the sub-array (`column_by_name`).
340    let mut direct = true;
341    let mut current = array;
342    for segment in path.split(".").filter(|s| !s.is_empty()) {
343        if matches!(current.data_type(), DataType::Binary) {
344            direct = false;
345            break;
346        }
347
348        let Some(json) = current.as_struct_opt() else {
349            return exec_err!("unknown JSON array datatype: {}", current.data_type());
350        };
351        let Some(sub_json) = json.column_by_name(segment) else {
352            return Ok(new_null_array(with_type, array.len()));
353        };
354        current = sub_json;
355    }
356
357    // Build the result array with optional value mapper.
358    fn build_with<F>(input: &ArrayRef, with_type: &DataType, value_mapper: F) -> Result<ArrayRef>
359    where
360        for<'a> F: Fn(&'a Value) -> Option<&'a Value>,
361    {
362        let json_array = JsonArray::from(input);
363
364        let mut builder = result_builder(input.len(), with_type)?;
365        for i in 0..input.len() {
366            if input.is_null(i) {
367                builder.append_null();
368                continue;
369            }
370
371            let value = json_array
372                .try_get_value(i)
373                .map_err(|e| exec_datafusion_err!("{e}"))?;
374            let value = value_mapper(&value);
375
376            if let Some(value) = value {
377                builder.append_value(JsonResultValue::JsonStructByValue(value))?;
378            } else {
379                builder.append_null();
380            }
381        }
382        Ok(builder.build())
383    }
384
385    if direct {
386        let casted = if current.data_type() != with_type {
387            match (current.data_type(), with_type) {
388                (DataType::Binary, _) => {
389                    // Fall back to the slow path if the found JSON sub-array is serialized to bytes
390                    // (because of JSON type conflicting)
391                    build_with(current, with_type, |v| Some(v))?
392                }
393                (DataType::List(_) | DataType::Struct(_), with_type) if with_type.is_string() => {
394                    // Special handle for wanted array is string (Arrow cast is not working here if
395                    // the datatype is list or struct), because it could be used in displaying the
396                    // result.
397                    build_with(current, with_type, |v| Some(v))?
398                }
399                (_, with_type) if with_type.is_string() => {
400                    // Same special handle for wanted array is string as above, except for simply
401                    // casting by Arrow is more desirable.
402                    arrow_cast::cast(current.as_ref(), with_type)?
403                }
404                _ => new_null_array(with_type, current.len()),
405            }
406        } else {
407            current.clone()
408        };
409        return Ok(casted);
410    }
411
412    // Slow path: reconstruct the JSON array from serialized representation of conflicting JSON
413    // values: `serde_json::Value`.
414    let mut pointer = path.replace(".", "/");
415    if !pointer.starts_with("/") {
416        pointer = format!("/{}", pointer);
417    }
418    build_with(array, with_type, |value| value.pointer(&pointer))
419}
420
421/// This function is mostly called as `json_get(value, 'attr')::type` and rewritten by
422/// `json_get_rewriter::JsonGetRewriter` to `json_get(value, 'attr', NULL::type)`. So we
423/// use the third argument's type to determine the return type.
424#[derive(Debug, Display)]
425#[display("{}", Self::NAME.to_ascii_uppercase())]
426pub struct JsonGetWithType {
427    signature: Signature,
428}
429
430impl JsonGetWithType {
431    pub const NAME: &'static str = "json_get";
432}
433
434impl Default for JsonGetWithType {
435    fn default() -> Self {
436        Self {
437            signature: Signature::variadic_any(Volatility::Immutable),
438        }
439    }
440}
441
442impl Function for JsonGetWithType {
443    fn name(&self) -> &str {
444        Self::NAME
445    }
446
447    fn return_type(&self, _input_types: &[DataType]) -> datafusion_common::Result<DataType> {
448        Err(DataFusionError::Internal(
449            "This method isn't meant to be called".to_string(),
450        ))
451    }
452
453    fn return_field_from_args(
454        &self,
455        args: datafusion_expr::ReturnFieldArgs<'_>,
456    ) -> datafusion_common::Result<Arc<Field>> {
457        match args.scalar_arguments.get(2) {
458            Some(Some(v)) => {
459                let mut data_type = v.data_type();
460                if matches!(data_type, DataType::Utf8 | DataType::LargeUtf8) {
461                    data_type = DataType::Utf8View;
462                }
463
464                Ok(Arc::new(Field::new(self.name(), data_type, true)))
465            }
466            _ => Ok(Arc::new(Field::new(self.name(), DataType::Utf8View, true))),
467        }
468    }
469
470    fn signature(&self) -> &Signature {
471        &self.signature
472    }
473
474    fn invoke_with_args(
475        &self,
476        args: ScalarFunctionArgs,
477    ) -> datafusion_common::Result<ColumnarValue> {
478        let args_len = args.args.len();
479        if args_len != 2 && args_len != 3 {
480            return exec_err!("json_get expects 2 or 3 arguments, got {args_len}");
481        }
482
483        let arg0 = args.args[0].to_array(args.number_rows)?;
484        let len = arg0.len();
485
486        let path = if let ColumnarValue::Scalar(path) = &args.args[1]
487            && let Some(Some(path)) = path.try_as_str()
488        {
489            path
490        } else {
491            return exec_err!(
492                r#"json_get expects a string literal "path" argument, got {}"#,
493                args.args[1]
494            );
495        };
496
497        let with_type = args
498            .args
499            .get(2)
500            .map(|x| x.data_type())
501            .unwrap_or(DataType::Utf8View);
502
503        let result = match arg0.data_type() {
504            DataType::Binary | DataType::LargeBinary | DataType::BinaryView => {
505                let arg0 = compute::cast(&arg0, &DataType::BinaryView)?;
506                let jsons = arg0.as_binary_view();
507
508                let mut builder = result_builder(len, &with_type)?;
509                jsonb_get(jsons, path, builder.as_mut())?;
510                builder.build()
511            }
512            DataType::Struct(_) => json_struct_get(&arg0, path, &with_type)?,
513            _ => {
514                return exec_err!("JSON_GET not supported argument type {}", arg0.data_type());
515            }
516        };
517
518        Ok(ColumnarValue::Array(result))
519    }
520}
521
522/// Get the object from JSON value by path.
523#[derive(Display, Debug)]
524#[display("{}", Self::NAME.to_ascii_uppercase())]
525pub(super) struct JsonGetObject {
526    signature: Signature,
527}
528
529impl JsonGetObject {
530    const NAME: &'static str = "json_get_object";
531}
532
533impl Default for JsonGetObject {
534    fn default() -> Self {
535        Self {
536            signature: helper::one_of_sigs2(
537                vec![
538                    DataType::Binary,
539                    DataType::LargeBinary,
540                    DataType::BinaryView,
541                ],
542                vec![DataType::UInt8, DataType::LargeUtf8, DataType::Utf8View],
543            ),
544        }
545    }
546}
547
548impl Function for JsonGetObject {
549    fn name(&self) -> &str {
550        Self::NAME
551    }
552
553    fn return_type(&self, _: &[DataType]) -> datafusion_common::Result<DataType> {
554        Ok(DataType::BinaryView)
555    }
556
557    fn signature(&self) -> &Signature {
558        &self.signature
559    }
560
561    fn invoke_with_args(
562        &self,
563        args: ScalarFunctionArgs,
564    ) -> datafusion_common::Result<ColumnarValue> {
565        let [arg0, arg1] = extract_args(self.name(), &args)?;
566        let arg0 = compute::cast(&arg0, &DataType::BinaryView)?;
567        let jsons = arg0.as_binary_view();
568        let arg1 = compute::cast(&arg1, &DataType::Utf8View)?;
569        let paths = arg1.as_string_view();
570
571        let len = jsons.len();
572        let mut builder = BinaryViewBuilder::with_capacity(len);
573
574        for i in 0..len {
575            let json = jsons.is_valid(i).then(|| jsons.value(i));
576            let path = paths.is_valid(i).then(|| paths.value(i));
577            let result = if let (Some(json), Some(path)) = (json, path) {
578                let result = jsonb::jsonpath::parse_json_path(path.as_bytes()).and_then(|path| {
579                    let mut data = Vec::new();
580                    let mut offset = Vec::new();
581                    jsonb::get_by_path(json, path, &mut data, &mut offset)
582                        .map(|()| jsonb::is_object(&data).then_some(data))
583                });
584                result.map_err(|e| DataFusionError::Execution(e.to_string()))?
585            } else {
586                None
587            };
588            builder.append_option(result);
589        }
590
591        Ok(ColumnarValue::Array(Arc::new(builder.finish())))
592    }
593}
594
595#[cfg(test)]
596mod tests {
597    use std::sync::Arc;
598
599    use arrow::array::{BooleanArray, Int64Array, StructArray};
600    use arrow_schema::{Field, Fields};
601    use datafusion_common::ScalarValue;
602    use datafusion_common::arrow::array::{BinaryArray, BinaryViewArray, StringArray};
603    use datafusion_common::arrow::datatypes::{Float64Type, Int64Type};
604    use datatypes::types::parse_string_to_jsonb;
605    use serde_json::json;
606
607    use super::*;
608
609    /// Create a JSON object like this (as a one element struct array for testing):
610    ///
611    /// ```JSON
612    /// {
613    ///     "kind": "foo",
614    ///     "payload": {
615    ///         "code": 404,
616    ///         "success": false,
617    ///         "result": {
618    ///             "error": "not found",
619    ///             "time_cost": 1.234
620    ///         }
621    ///     }
622    /// }
623    /// ```
624    fn test_json_struct() -> ArrayRef {
625        let payload_fields = Fields::from(vec![
626            Field::new("code", DataType::Int64, true),
627            Field::new("success", DataType::Boolean, true),
628            Field::new("result", DataType::Binary, true),
629        ]);
630        Arc::new(StructArray::new(
631            vec![
632                Field::new("kind", DataType::Utf8, true),
633                Field::new("payload", DataType::Struct(payload_fields.clone()), true),
634            ]
635            .into(),
636            vec![
637                Arc::new(StringArray::from_iter([Some("foo")])) as ArrayRef,
638                Arc::new(StructArray::new(
639                    payload_fields,
640                    vec![
641                        Arc::new(Int64Array::from_iter([Some(404)])) as ArrayRef,
642                        Arc::new(BooleanArray::from_iter([Some(false)])),
643                        Arc::new(BinaryArray::from_iter([Some(
644                            json!({
645                                "error": "not found",
646                                "time_cost": 1.234
647                            })
648                            .to_string()
649                            .as_bytes(),
650                        )])),
651                    ],
652                    None,
653                )),
654            ],
655            None,
656        ))
657    }
658
659    #[test]
660    fn test_json_get_int() {
661        let json_get_int = JsonGetInt::default();
662
663        assert_eq!("json_get_int", json_get_int.name());
664        assert_eq!(
665            DataType::Int64,
666            json_get_int
667                .return_type(&[DataType::Binary, DataType::Utf8])
668                .unwrap()
669        );
670
671        let json_strings = [
672            r#"{"a": {"b": 2}, "b": 2, "c": 3}"#,
673            r#"{"a": 4, "b": {"c": 6}, "c": 6}"#,
674            r#"{"a": 7, "b": 8, "c": {"a": 7}}"#,
675        ];
676        let json_struct = test_json_struct();
677
678        let path_expects = vec![
679            ("$.a.b", Some(2)),
680            ("$.a", Some(4)),
681            ("$.c", None),
682            ("$.kind", None),
683            ("$.payload.code", Some(404)),
684            ("$.payload.success", None),
685            ("$.payload.result.time_cost", None),
686            ("$.payload.not-exists", None),
687            ("$.not-exists", None),
688            ("$", None),
689        ];
690
691        let mut jsons = json_strings
692            .iter()
693            .map(|s| {
694                let value = jsonb::parse_value(s.as_bytes()).unwrap();
695                Arc::new(BinaryArray::from_iter_values([value.to_vec()])) as ArrayRef
696            })
697            .collect::<Vec<_>>();
698        let json_struct_arrays =
699            std::iter::repeat_n(json_struct, path_expects.len() - jsons.len()).collect::<Vec<_>>();
700        jsons.extend(json_struct_arrays);
701
702        for i in 0..jsons.len() {
703            let json = &jsons[i];
704            let (path, expect) = path_expects[i];
705
706            let args = ScalarFunctionArgs {
707                args: vec![
708                    ColumnarValue::Array(json.clone()),
709                    ColumnarValue::Scalar(path.into()),
710                ],
711                arg_fields: vec![],
712                number_rows: 1,
713                return_field: Arc::new(Field::new("x", DataType::Int64, false)),
714                config_options: Arc::new(Default::default()),
715            };
716            let result = json_get_int
717                .invoke_with_args(args)
718                .and_then(|x| x.to_array(1))
719                .unwrap();
720
721            let result = result.as_primitive::<Int64Type>();
722            assert_eq!(1, result.len());
723            let actual = result.is_valid(0).then(|| result.value(0));
724            assert_eq!(actual, expect);
725        }
726    }
727
728    #[test]
729    fn test_json_get_float() {
730        let json_get_float = JsonGetFloat::default();
731
732        assert_eq!("json_get_float", json_get_float.name());
733        assert_eq!(
734            DataType::Float64,
735            json_get_float
736                .return_type(&[DataType::Binary, DataType::Utf8])
737                .unwrap()
738        );
739
740        let json_strings = [
741            r#"{"a": {"b": 2.1}, "b": 2.2, "c": 3.3}"#,
742            r#"{"a": 4.4, "b": {"c": 6.6}, "c": 6.6}"#,
743            r#"{"a": 7.7, "b": 8.8, "c": {"a": 7.7}}"#,
744        ];
745        let json_struct = test_json_struct();
746
747        let path_expects = vec![
748            ("$.a.b", Some(2.1)),
749            ("$.a", Some(4.4)),
750            ("$.c", None),
751            ("$.kind", None),
752            ("$.payload.code", None),
753            ("$.payload.success", None),
754            ("$.payload.result.time_cost", Some(1.234)),
755            ("$.payload.not-exists", None),
756            ("$.not-exists", None),
757            ("$", None),
758        ];
759
760        let mut jsons = json_strings
761            .iter()
762            .map(|s| {
763                let value = jsonb::parse_value(s.as_bytes()).unwrap();
764                Arc::new(BinaryArray::from_iter_values([value.to_vec()])) as ArrayRef
765            })
766            .collect::<Vec<_>>();
767        let json_struct_arrays =
768            std::iter::repeat_n(json_struct, path_expects.len() - jsons.len()).collect::<Vec<_>>();
769        jsons.extend(json_struct_arrays);
770
771        for i in 0..jsons.len() {
772            let json = &jsons[i];
773            let (path, expect) = path_expects[i];
774
775            let args = ScalarFunctionArgs {
776                args: vec![
777                    ColumnarValue::Array(json.clone()),
778                    ColumnarValue::Scalar(path.into()),
779                ],
780                arg_fields: vec![],
781                number_rows: 1,
782                return_field: Arc::new(Field::new("x", DataType::Float64, false)),
783                config_options: Arc::new(Default::default()),
784            };
785            let result = json_get_float
786                .invoke_with_args(args)
787                .and_then(|x| x.to_array(1))
788                .unwrap();
789
790            let result = result.as_primitive::<Float64Type>();
791            assert_eq!(1, result.len());
792            let actual = result.is_valid(0).then(|| result.value(0));
793            assert_eq!(actual, expect);
794        }
795    }
796
797    #[test]
798    fn test_json_get_bool() {
799        let json_get_bool = JsonGetBool::default();
800
801        assert_eq!("json_get_bool", json_get_bool.name());
802        assert_eq!(
803            DataType::Boolean,
804            json_get_bool
805                .return_type(&[DataType::Binary, DataType::Utf8])
806                .unwrap()
807        );
808
809        let json_strings = [
810            r#"{"a": {"b": true}, "b": false, "c": true}"#,
811            r#"{"a": false, "b": {"c": true}, "c": false}"#,
812            r#"{"a": true, "b": false, "c": {"a": true}}"#,
813        ];
814        let json_struct = test_json_struct();
815
816        let path_expects = vec![
817            ("$.a.b", Some(true)),
818            ("$.a", Some(false)),
819            ("$.c", None),
820            ("$.kind", None),
821            ("$.payload.code", None),
822            ("$.payload.success", Some(false)),
823            ("$.payload.result.time_cost", None),
824            ("$.payload.not-exists", None),
825            ("$.not-exists", None),
826            ("$", None),
827        ];
828
829        let mut jsons = json_strings
830            .iter()
831            .map(|s| {
832                let value = jsonb::parse_value(s.as_bytes()).unwrap();
833                Arc::new(BinaryArray::from_iter_values([value.to_vec()])) as ArrayRef
834            })
835            .collect::<Vec<_>>();
836        let json_struct_arrays =
837            std::iter::repeat_n(json_struct, path_expects.len() - jsons.len()).collect::<Vec<_>>();
838        jsons.extend(json_struct_arrays);
839
840        for i in 0..jsons.len() {
841            let json = &jsons[i];
842            let (path, expect) = path_expects[i];
843
844            let args = ScalarFunctionArgs {
845                args: vec![
846                    ColumnarValue::Array(json.clone()),
847                    ColumnarValue::Scalar(path.into()),
848                ],
849                arg_fields: vec![],
850                number_rows: 1,
851                return_field: Arc::new(Field::new("x", DataType::Boolean, false)),
852                config_options: Arc::new(Default::default()),
853            };
854            let result = json_get_bool
855                .invoke_with_args(args)
856                .and_then(|x| x.to_array(1))
857                .unwrap();
858
859            let result = result.as_boolean();
860            assert_eq!(1, result.len());
861            let actual = result.is_valid(0).then(|| result.value(0));
862            assert_eq!(actual, expect);
863        }
864    }
865
866    #[test]
867    fn test_json_get_string() {
868        let json_get_string = JsonGetString::default();
869
870        assert_eq!("json_get_string", json_get_string.name());
871        assert_eq!(
872            DataType::Utf8View,
873            json_get_string
874                .return_type(&[DataType::Binary, DataType::Utf8])
875                .unwrap()
876        );
877
878        let json_strings = [
879            r#"{"a": {"b": "a"}, "b": "b", "c": "c"}"#,
880            r#"{"a": "d", "b": {"c": "e"}, "c": "f"}"#,
881            r#"{"a": "g", "b": "h", "c": {"a": "g"}}"#,
882        ];
883        let json_struct = test_json_struct();
884
885        let paths = vec![
886            "$.a.b",
887            "$.a",
888            "",
889            "$.kind",
890            "$.payload.code",
891            "$.payload.result.time_cost",
892            "$.payload",
893            "$.payload.success",
894            "$.payload.result",
895            "$.payload.result.error",
896            "$.payload.result.not-exists",
897            "$.payload.not-exists",
898            "$.not-exists",
899            "$",
900        ];
901        let expects = [
902            Some("a"),
903            Some("d"),
904            None,
905            Some("foo"),
906            Some("404"),
907            Some("1.234"),
908            Some(
909                r#"{"code":404,"result":{"error":"not found","time_cost":1.234},"success":false}"#,
910            ),
911            Some("false"),
912            Some(r#"{"error":"not found","time_cost":1.234}"#),
913            Some("not found"),
914            None,
915            None,
916            None,
917            Some(
918                r#"{"kind":"foo","payload":{"code":404,"result":{"error":"not found","time_cost":1.234},"success":false}}"#,
919            ),
920        ];
921
922        let mut jsons = json_strings
923            .iter()
924            .map(|s| {
925                let value = jsonb::parse_value(s.as_bytes()).unwrap();
926                Arc::new(BinaryArray::from_iter_values([value.to_vec()])) as ArrayRef
927            })
928            .collect::<Vec<_>>();
929        let json_struct_arrays =
930            std::iter::repeat_n(json_struct, expects.len() - jsons.len()).collect::<Vec<_>>();
931        jsons.extend(json_struct_arrays);
932
933        for i in 0..jsons.len() {
934            let json = &jsons[i];
935            let path = paths[i];
936            let expect = expects[i];
937
938            let args = ScalarFunctionArgs {
939                args: vec![
940                    ColumnarValue::Array(json.clone()),
941                    ColumnarValue::Scalar(path.into()),
942                ],
943                arg_fields: vec![],
944                number_rows: 1,
945                return_field: Arc::new(Field::new("x", DataType::Utf8View, false)),
946                config_options: Arc::new(Default::default()),
947            };
948            let result = json_get_string
949                .invoke_with_args(args)
950                .and_then(|x| x.to_array(1))
951                .unwrap();
952
953            let result = result.as_string_view();
954            assert_eq!(1, result.len());
955            let actual = result.is_valid(0).then(|| result.value(0));
956            assert_eq!(actual, expect);
957        }
958    }
959
960    #[test]
961    fn test_json_get_object() -> Result<()> {
962        let udf = JsonGetObject::default();
963        assert_eq!("json_get_object", udf.name());
964        assert_eq!(
965            DataType::BinaryView,
966            udf.return_type(&[DataType::BinaryView, DataType::Utf8View])?
967        );
968
969        let json_value = parse_string_to_jsonb(r#"{"a": {"b": {"c": {"d": 1}}}}"#).unwrap();
970        let paths = vec!["$", "$.a", "$.a.b", "$.a.b.c", "$.a.b.c.d", "$.e", "$.a.e"];
971        let number_rows = paths.len();
972
973        let args = ScalarFunctionArgs {
974            args: vec![
975                ColumnarValue::Scalar(ScalarValue::Binary(Some(json_value))),
976                ColumnarValue::Array(Arc::new(StringArray::from_iter_values(paths))),
977            ],
978            arg_fields: vec![],
979            number_rows,
980            return_field: Arc::new(Field::new("x", DataType::Binary, false)),
981            config_options: Arc::new(Default::default()),
982        };
983        let result = udf
984            .invoke_with_args(args)
985            .and_then(|x| x.to_array(number_rows))?;
986        let result = result.as_binary_view();
987
988        let expected = &BinaryViewArray::from_iter(
989            vec![
990                Some(r#"{"a": {"b": {"c": {"d": 1}}}}"#),
991                Some(r#"{"b": {"c": {"d": 1}}}"#),
992                Some(r#"{"c": {"d": 1}}"#),
993                Some(r#"{"d": 1}"#),
994                None,
995                None,
996                None,
997            ]
998            .into_iter()
999            .map(|x| x.and_then(|s| parse_string_to_jsonb(s).ok())),
1000        );
1001        assert_eq!(result, expected);
1002        Ok(())
1003    }
1004
1005    #[test]
1006    fn test_json_get_with_type() {
1007        let json_get_with_type = JsonGetWithType::default();
1008
1009        assert_eq!("json_get", json_get_with_type.name());
1010
1011        let json_strings = [
1012            r#"{"a": {"b": "a"}, "b": "b", "c": "c"}"#,
1013            r#"{"a": "d", "b": {"c": "e"}, "c": "f"}"#,
1014            r#"{"a": "g", "b": "h", "c": {"a": "g"}}"#,
1015        ];
1016        let json_struct = test_json_struct();
1017
1018        let paths = vec![
1019            "$.a.b",
1020            "$.a",
1021            "",
1022            "$.kind",
1023            "$.payload.code",
1024            "$.payload.result.time_cost",
1025            "$.payload",
1026            "$.payload.success",
1027            "$.payload.result",
1028            "$.payload.result.error",
1029            "$.payload.result.not-exists",
1030            "$.payload.not-exists",
1031            "$.not-exists",
1032            "$",
1033        ];
1034        let expects = [
1035            Some("a"),
1036            Some("d"),
1037            None,
1038            Some("foo"),
1039            Some("404"),
1040            Some("1.234"),
1041            Some(
1042                r#"{"code":404,"result":{"error":"not found","time_cost":1.234},"success":false}"#,
1043            ),
1044            Some("false"),
1045            Some(r#"{"error":"not found","time_cost":1.234}"#),
1046            Some("not found"),
1047            None,
1048            None,
1049            None,
1050            Some(
1051                r#"{"kind":"foo","payload":{"code":404,"result":{"error":"not found","time_cost":1.234},"success":false}}"#,
1052            ),
1053        ];
1054
1055        let mut jsons = json_strings
1056            .iter()
1057            .map(|s| {
1058                let value = jsonb::parse_value(s.as_bytes()).unwrap();
1059                Arc::new(BinaryArray::from_iter_values([value.to_vec()])) as ArrayRef
1060            })
1061            .collect::<Vec<_>>();
1062        let json_struct_arrays =
1063            std::iter::repeat_n(json_struct, expects.len() - jsons.len()).collect::<Vec<_>>();
1064        jsons.extend(json_struct_arrays);
1065
1066        for i in 0..jsons.len() {
1067            let json = &jsons[i];
1068            let path = paths[i];
1069            let expect = expects[i];
1070
1071            let args = ScalarFunctionArgs {
1072                args: vec![
1073                    ColumnarValue::Array(json.clone()),
1074                    ColumnarValue::Scalar(path.into()),
1075                    ColumnarValue::Scalar(ScalarValue::Utf8View(None)),
1076                ],
1077                arg_fields: vec![],
1078                number_rows: 1,
1079                return_field: Arc::new(Field::new("x", DataType::Utf8View, false)),
1080                config_options: Arc::new(Default::default()),
1081            };
1082            let result = json_get_with_type
1083                .invoke_with_args(args)
1084                .and_then(|x| x.to_array(1))
1085                .unwrap();
1086
1087            let result = result.as_string_view();
1088            assert_eq!(1, result.len());
1089            let actual = result.is_valid(0).then(|| result.value(0));
1090            assert_eq!(actual, expect);
1091        }
1092
1093        let json_strings = [
1094            r#"{"a": {"b": 2}, "b": 2, "c": 3}"#,
1095            r#"{"a": 4, "b": {"c": 6}, "c": 6}"#,
1096            r#"{"a": 7, "b": 8, "c": {"a": 7}}"#,
1097        ];
1098        let paths = ["$.a.b", "$.a", "$.c", "$.payload.code"];
1099        let expects = [Some(2), Some(4), None, Some(404)];
1100
1101        for (i, (path, expect)) in paths.iter().zip(expects.iter()).enumerate() {
1102            let json = if i < json_strings.len() {
1103                let value = jsonb::parse_value(json_strings[i].as_bytes()).unwrap();
1104                Arc::new(BinaryArray::from_iter_values([value.to_vec()])) as ArrayRef
1105            } else {
1106                test_json_struct()
1107            };
1108
1109            let args = ScalarFunctionArgs {
1110                args: vec![
1111                    ColumnarValue::Array(json),
1112                    ColumnarValue::Scalar((*path).into()),
1113                    ColumnarValue::Scalar(ScalarValue::Int64(None)),
1114                ],
1115                arg_fields: vec![],
1116                number_rows: 1,
1117                return_field: Arc::new(Field::new("x", DataType::Int64, false)),
1118                config_options: Arc::new(Default::default()),
1119            };
1120            let result = json_get_with_type
1121                .invoke_with_args(args)
1122                .and_then(|x| x.to_array(1))
1123                .unwrap();
1124
1125            let result = result.as_primitive::<Int64Type>();
1126            assert_eq!(1, result.len());
1127            let actual = result.is_valid(0).then(|| result.value(0));
1128            assert_eq!(actual, *expect);
1129        }
1130
1131        let json_strings = [
1132            r#"{"a": {"b": 2.1}, "b": 2.2, "c": 3.3}"#,
1133            r#"{"a": 4.4, "b": {"c": 6.6}, "c": 6.6}"#,
1134            r#"{"a": 7.7, "b": 8.8, "c": {"a": 7.7}}"#,
1135        ];
1136        let paths = ["$.a.b", "$.a", "$.c", "$.payload.result.time_cost"];
1137        let expects = [Some(2.1), Some(4.4), None, Some(1.234)];
1138
1139        for (i, (path, expect)) in paths.iter().zip(expects.iter()).enumerate() {
1140            let json = if i < json_strings.len() {
1141                let value = jsonb::parse_value(json_strings[i].as_bytes()).unwrap();
1142                Arc::new(BinaryArray::from_iter_values([value.to_vec()])) as ArrayRef
1143            } else {
1144                test_json_struct()
1145            };
1146
1147            let args = ScalarFunctionArgs {
1148                args: vec![
1149                    ColumnarValue::Array(json),
1150                    ColumnarValue::Scalar((*path).into()),
1151                    ColumnarValue::Scalar(ScalarValue::Float64(None)),
1152                ],
1153                arg_fields: vec![],
1154                number_rows: 1,
1155                return_field: Arc::new(Field::new("x", DataType::Float64, false)),
1156                config_options: Arc::new(Default::default()),
1157            };
1158            let result = json_get_with_type
1159                .invoke_with_args(args)
1160                .and_then(|x| x.to_array(1))
1161                .unwrap();
1162
1163            let result = result.as_primitive::<Float64Type>();
1164            assert_eq!(1, result.len());
1165            let actual = result.is_valid(0).then(|| result.value(0));
1166            assert_eq!(actual, *expect);
1167        }
1168
1169        let json_strings = [
1170            r#"{"a": {"b": true}, "b": false, "c": true}"#,
1171            r#"{"a": false, "b": {"c": true}, "c": false}"#,
1172            r#"{"a": true, "b": false, "c": {"a": true}}"#,
1173        ];
1174        let paths = ["$.a.b", "$.a", "$.c", "$.payload.success"];
1175        let expects = [Some(true), Some(false), None, Some(false)];
1176
1177        for (i, (path, expect)) in paths.iter().zip(expects.iter()).enumerate() {
1178            let json = if i < json_strings.len() {
1179                let value = jsonb::parse_value(json_strings[i].as_bytes()).unwrap();
1180                Arc::new(BinaryArray::from_iter_values([value.to_vec()])) as ArrayRef
1181            } else {
1182                test_json_struct()
1183            };
1184
1185            let args = ScalarFunctionArgs {
1186                args: vec![
1187                    ColumnarValue::Array(json),
1188                    ColumnarValue::Scalar((*path).into()),
1189                    ColumnarValue::Scalar(ScalarValue::Boolean(None)),
1190                ],
1191                arg_fields: vec![],
1192                number_rows: 1,
1193                return_field: Arc::new(Field::new("x", DataType::Boolean, false)),
1194                config_options: Arc::new(Default::default()),
1195            };
1196            let result = json_get_with_type
1197                .invoke_with_args(args)
1198                .and_then(|x| x.to_array(1))
1199                .unwrap();
1200
1201            let result = result.as_boolean();
1202            assert_eq!(1, result.len());
1203            let actual = result.is_valid(0).then(|| result.value(0));
1204            assert_eq!(actual, *expect);
1205        }
1206    }
1207}