1use std::sync::Arc;
16
17use arrow::array::{ArrayRef, BinaryViewArray, new_null_array};
18use arrow::compute;
19use arrow::datatypes::Float64Type;
20use arrow_schema::Field;
21use datafusion_common::arrow::array::{
22 Array, AsArray, BinaryViewBuilder, BooleanBuilder, Float64Builder, Int64Builder,
23 StringViewBuilder,
24};
25use datafusion_common::arrow::datatypes::DataType;
26use datafusion_common::{DataFusionError, Result, ScalarValue, exec_datafusion_err, exec_err};
27use datafusion_expr::{ColumnarValue, ScalarFunctionArgs, Signature, Volatility};
28use datatypes::arrow_array::{int_array_value_at_index, string_array_value_at_index};
29use datatypes::vectors::json::array::JsonArray;
30use derive_more::Display;
31use serde_json::Value;
32
33use crate::function::{Function, extract_args};
34use crate::helper;
35
36fn get_json_by_path(json: &[u8], path: &str) -> Option<Vec<u8>> {
37 let json_path = jsonb::jsonpath::parse_json_path(path.as_bytes());
38 match json_path {
39 Ok(json_path) => {
40 let mut sub_jsonb = Vec::new();
41 let mut sub_offsets = Vec::new();
42 match jsonb::get_by_path(json, json_path, &mut sub_jsonb, &mut sub_offsets) {
43 Ok(_) => Some(sub_jsonb),
44 Err(_) => None,
45 }
46 }
47 _ => None,
48 }
49}
50
51enum JsonResultValue<'a> {
52 Jsonb(Vec<u8>),
53 #[expect(unused)]
54 JsonStructByColumn(&'a ArrayRef, usize),
55 JsonStructByValue(&'a Value),
56}
57
58trait JsonGetResultBuilder {
59 fn append_value(&mut self, value: JsonResultValue<'_>) -> Result<()>;
60
61 fn append_null(&mut self);
62
63 fn build(&mut self) -> ArrayRef;
64}
65
66fn result_builder(len: usize, with_type: &DataType) -> Result<Box<dyn JsonGetResultBuilder>> {
67 let builder = match with_type {
68 DataType::Utf8 | DataType::LargeUtf8 | DataType::Utf8View => {
69 Box::new(StringResultBuilder(StringViewBuilder::with_capacity(len)))
70 as Box<dyn JsonGetResultBuilder>
71 }
72 DataType::Int64 => Box::new(IntResultBuilder(Int64Builder::with_capacity(len))),
73 DataType::Float64 => Box::new(FloatResultBuilder(Float64Builder::with_capacity(len))),
74 DataType::Boolean => Box::new(BoolResultBuilder(BooleanBuilder::with_capacity(len))),
75 t => {
76 return exec_err!("json_get with unknown type {t}");
77 }
78 };
79 Ok(builder)
80}
81
82struct StringResultBuilder(StringViewBuilder);
84
85impl JsonGetResultBuilder for StringResultBuilder {
86 fn append_value(&mut self, value: JsonResultValue<'_>) -> Result<()> {
87 match value {
88 JsonResultValue::Jsonb(value) => self.0.append_option(jsonb::to_str(&value).ok()),
89 JsonResultValue::JsonStructByColumn(column, i) => {
90 if let Some(v) = string_array_value_at_index(column, i) {
91 self.0.append_value(v);
92 } else {
93 self.0
94 .append_value(arrow_cast::display::array_value_to_string(column, i)?);
95 }
96 }
97 JsonResultValue::JsonStructByValue(value) => {
98 if let Some(s) = value.as_str() {
99 self.0.append_value(s)
100 } else {
101 self.0.append_value(value.to_string())
102 }
103 }
104 }
105 Ok(())
106 }
107
108 fn append_null(&mut self) {
109 self.0.append_null();
110 }
111
112 fn build(&mut self) -> ArrayRef {
113 Arc::new(self.0.finish())
114 }
115}
116
117#[derive(Default, Display, Debug)]
118#[display("{}", Self::NAME.to_ascii_uppercase())]
119pub struct JsonGetString(JsonGetWithType);
120
121impl JsonGetString {
122 pub const NAME: &'static str = "json_get_string";
123}
124
125impl Function for JsonGetString {
126 fn name(&self) -> &str {
127 Self::NAME
128 }
129
130 fn return_type(&self, _: &[DataType]) -> Result<DataType> {
131 Ok(DataType::Utf8View)
132 }
133
134 fn signature(&self) -> &Signature {
135 &self.0.signature
136 }
137
138 fn invoke_with_args(&self, mut args: ScalarFunctionArgs) -> Result<ColumnarValue> {
139 args.args
140 .push(ColumnarValue::Scalar(ScalarValue::Utf8View(None)));
141 self.0.invoke_with_args(args)
142 }
143}
144
145struct IntResultBuilder(Int64Builder);
146
147impl JsonGetResultBuilder for IntResultBuilder {
148 fn append_value(&mut self, value: JsonResultValue<'_>) -> Result<()> {
149 match value {
150 JsonResultValue::Jsonb(value) => self.0.append_option(jsonb::to_i64(&value).ok()),
151 JsonResultValue::JsonStructByColumn(column, i) => {
152 self.0.append_option(int_array_value_at_index(column, i))
153 }
154 JsonResultValue::JsonStructByValue(value) => self.0.append_option(value.as_i64()),
155 }
156 Ok(())
157 }
158
159 fn append_null(&mut self) {
160 self.0.append_null();
161 }
162
163 fn build(&mut self) -> ArrayRef {
164 Arc::new(self.0.finish())
165 }
166}
167
168#[derive(Default, Display, Debug)]
169#[display("{}", Self::NAME.to_ascii_uppercase())]
170pub struct JsonGetInt(JsonGetWithType);
171
172impl JsonGetInt {
173 pub const NAME: &'static str = "json_get_int";
174}
175
176impl Function for JsonGetInt {
177 fn name(&self) -> &str {
178 Self::NAME
179 }
180
181 fn return_type(&self, _: &[DataType]) -> Result<DataType> {
182 Ok(DataType::Int64)
183 }
184
185 fn signature(&self) -> &Signature {
186 &self.0.signature
187 }
188
189 fn invoke_with_args(&self, mut args: ScalarFunctionArgs) -> Result<ColumnarValue> {
190 args.args
191 .push(ColumnarValue::Scalar(ScalarValue::Int64(None)));
192 self.0.invoke_with_args(args)
193 }
194}
195
196struct FloatResultBuilder(Float64Builder);
197
198impl JsonGetResultBuilder for FloatResultBuilder {
199 fn append_value(&mut self, value: JsonResultValue<'_>) -> Result<()> {
200 match value {
201 JsonResultValue::Jsonb(value) => self.0.append_option(jsonb::to_f64(&value).ok()),
202 JsonResultValue::JsonStructByColumn(column, i) => {
203 let result = if column.data_type() == &DataType::Float64 {
204 column
205 .as_primitive::<Float64Type>()
206 .is_valid(i)
207 .then(|| column.as_primitive::<Float64Type>().value(i))
208 } else {
209 None
210 };
211 self.0.append_option(result);
212 }
213 JsonResultValue::JsonStructByValue(value) => self.0.append_option(value.as_f64()),
214 }
215 Ok(())
216 }
217
218 fn append_null(&mut self) {
219 self.0.append_null();
220 }
221
222 fn build(&mut self) -> ArrayRef {
223 Arc::new(self.0.finish())
224 }
225}
226
227#[derive(Default, Display, Debug)]
228#[display("{}", Self::NAME.to_ascii_uppercase())]
229pub struct JsonGetFloat(JsonGetWithType);
230
231impl JsonGetFloat {
232 pub const NAME: &'static str = "json_get_float";
233}
234
235impl Function for JsonGetFloat {
236 fn name(&self) -> &str {
237 Self::NAME
238 }
239
240 fn return_type(&self, _: &[DataType]) -> Result<DataType> {
241 Ok(DataType::Float64)
242 }
243
244 fn signature(&self) -> &Signature {
245 &self.0.signature
246 }
247
248 fn invoke_with_args(&self, mut args: ScalarFunctionArgs) -> Result<ColumnarValue> {
249 args.args
250 .push(ColumnarValue::Scalar(ScalarValue::Float64(None)));
251 self.0.invoke_with_args(args)
252 }
253}
254
255struct BoolResultBuilder(BooleanBuilder);
256
257impl JsonGetResultBuilder for BoolResultBuilder {
258 fn append_value(&mut self, value: JsonResultValue<'_>) -> Result<()> {
259 match value {
260 JsonResultValue::Jsonb(value) => self.0.append_option(jsonb::to_bool(&value).ok()),
261 JsonResultValue::JsonStructByColumn(column, i) => {
262 let result = if column.data_type() == &DataType::Boolean {
263 column
264 .as_boolean()
265 .is_valid(i)
266 .then(|| column.as_boolean().value(i))
267 } else {
268 None
269 };
270 self.0.append_option(result);
271 }
272 JsonResultValue::JsonStructByValue(value) => self.0.append_option(value.as_bool()),
273 }
274 Ok(())
275 }
276
277 fn append_null(&mut self) {
278 self.0.append_null();
279 }
280
281 fn build(&mut self) -> ArrayRef {
282 Arc::new(self.0.finish())
283 }
284}
285
286#[derive(Default, Display, Debug)]
287#[display("{}", Self::NAME.to_ascii_uppercase())]
288pub struct JsonGetBool(JsonGetWithType);
289
290impl JsonGetBool {
291 pub const NAME: &'static str = "json_get_bool";
292}
293
294impl Function for JsonGetBool {
295 fn name(&self) -> &str {
296 Self::NAME
297 }
298
299 fn return_type(&self, _: &[DataType]) -> Result<DataType> {
300 Ok(DataType::Boolean)
301 }
302
303 fn signature(&self) -> &Signature {
304 &self.0.signature
305 }
306
307 fn invoke_with_args(&self, mut args: ScalarFunctionArgs) -> Result<ColumnarValue> {
308 args.args
309 .push(ColumnarValue::Scalar(ScalarValue::Boolean(None)));
310 self.0.invoke_with_args(args)
311 }
312}
313
314fn jsonb_get(
315 jsons: &BinaryViewArray,
316 path: &str,
317 builder: &mut dyn JsonGetResultBuilder,
318) -> Result<()> {
319 let size = jsons.len();
320 for i in 0..size {
321 let json = jsons.is_valid(i).then(|| jsons.value(i));
322 let result = match json {
323 Some(json) => get_json_by_path(json, path),
324 _ => None,
325 };
326 if let Some(v) = result {
327 builder.append_value(JsonResultValue::Jsonb(v))?;
328 } else {
329 builder.append_null();
330 }
331 }
332 Ok(())
333}
334
335fn json_struct_get(array: &ArrayRef, path: &str, with_type: &DataType) -> Result<ArrayRef> {
336 let path = path.trim_start_matches("$");
337
338 let mut direct = true;
341 let mut current = array;
342 for segment in path.split(".").filter(|s| !s.is_empty()) {
343 if matches!(current.data_type(), DataType::Binary) {
344 direct = false;
345 break;
346 }
347
348 let Some(json) = current.as_struct_opt() else {
349 return exec_err!("unknown JSON array datatype: {}", current.data_type());
350 };
351 let Some(sub_json) = json.column_by_name(segment) else {
352 return Ok(new_null_array(with_type, array.len()));
353 };
354 current = sub_json;
355 }
356
357 fn build_with<F>(input: &ArrayRef, with_type: &DataType, value_mapper: F) -> Result<ArrayRef>
359 where
360 for<'a> F: Fn(&'a Value) -> Option<&'a Value>,
361 {
362 let json_array = JsonArray::from(input);
363
364 let mut builder = result_builder(input.len(), with_type)?;
365 for i in 0..input.len() {
366 if input.is_null(i) {
367 builder.append_null();
368 continue;
369 }
370
371 let value = json_array
372 .try_get_value(i)
373 .map_err(|e| exec_datafusion_err!("{e}"))?;
374 let value = value_mapper(&value);
375
376 if let Some(value) = value {
377 builder.append_value(JsonResultValue::JsonStructByValue(value))?;
378 } else {
379 builder.append_null();
380 }
381 }
382 Ok(builder.build())
383 }
384
385 if direct {
386 let casted = if current.data_type() != with_type {
387 match (current.data_type(), with_type) {
388 (DataType::Binary, _) => {
389 build_with(current, with_type, |v| Some(v))?
392 }
393 (DataType::List(_) | DataType::Struct(_), with_type) if with_type.is_string() => {
394 build_with(current, with_type, |v| Some(v))?
398 }
399 (_, with_type) if with_type.is_string() => {
400 arrow_cast::cast(current.as_ref(), with_type)?
403 }
404 _ => new_null_array(with_type, current.len()),
405 }
406 } else {
407 current.clone()
408 };
409 return Ok(casted);
410 }
411
412 let mut pointer = path.replace(".", "/");
415 if !pointer.starts_with("/") {
416 pointer = format!("/{}", pointer);
417 }
418 build_with(array, with_type, |value| value.pointer(&pointer))
419}
420
421#[derive(Debug, Display)]
425#[display("{}", Self::NAME.to_ascii_uppercase())]
426pub struct JsonGetWithType {
427 signature: Signature,
428}
429
430impl JsonGetWithType {
431 pub const NAME: &'static str = "json_get";
432}
433
434impl Default for JsonGetWithType {
435 fn default() -> Self {
436 Self {
437 signature: Signature::variadic_any(Volatility::Immutable),
438 }
439 }
440}
441
442impl Function for JsonGetWithType {
443 fn name(&self) -> &str {
444 Self::NAME
445 }
446
447 fn return_type(&self, _input_types: &[DataType]) -> datafusion_common::Result<DataType> {
448 Err(DataFusionError::Internal(
449 "This method isn't meant to be called".to_string(),
450 ))
451 }
452
453 fn return_field_from_args(
454 &self,
455 args: datafusion_expr::ReturnFieldArgs<'_>,
456 ) -> datafusion_common::Result<Arc<Field>> {
457 match args.scalar_arguments.get(2) {
458 Some(Some(v)) => {
459 let mut data_type = v.data_type();
460 if matches!(data_type, DataType::Utf8 | DataType::LargeUtf8) {
461 data_type = DataType::Utf8View;
462 }
463
464 Ok(Arc::new(Field::new(self.name(), data_type, true)))
465 }
466 _ => Ok(Arc::new(Field::new(self.name(), DataType::Utf8View, true))),
467 }
468 }
469
470 fn signature(&self) -> &Signature {
471 &self.signature
472 }
473
474 fn invoke_with_args(
475 &self,
476 args: ScalarFunctionArgs,
477 ) -> datafusion_common::Result<ColumnarValue> {
478 let args_len = args.args.len();
479 if args_len != 2 && args_len != 3 {
480 return exec_err!("json_get expects 2 or 3 arguments, got {args_len}");
481 }
482
483 let arg0 = args.args[0].to_array(args.number_rows)?;
484 let len = arg0.len();
485
486 let path = if let ColumnarValue::Scalar(path) = &args.args[1]
487 && let Some(Some(path)) = path.try_as_str()
488 {
489 path
490 } else {
491 return exec_err!(
492 r#"json_get expects a string literal "path" argument, got {}"#,
493 args.args[1]
494 );
495 };
496
497 let with_type = args
498 .args
499 .get(2)
500 .map(|x| x.data_type())
501 .unwrap_or(DataType::Utf8View);
502
503 let result = match arg0.data_type() {
504 DataType::Binary | DataType::LargeBinary | DataType::BinaryView => {
505 let arg0 = compute::cast(&arg0, &DataType::BinaryView)?;
506 let jsons = arg0.as_binary_view();
507
508 let mut builder = result_builder(len, &with_type)?;
509 jsonb_get(jsons, path, builder.as_mut())?;
510 builder.build()
511 }
512 DataType::Struct(_) => json_struct_get(&arg0, path, &with_type)?,
513 _ => {
514 return exec_err!("JSON_GET not supported argument type {}", arg0.data_type());
515 }
516 };
517
518 Ok(ColumnarValue::Array(result))
519 }
520}
521
522#[derive(Display, Debug)]
524#[display("{}", Self::NAME.to_ascii_uppercase())]
525pub(super) struct JsonGetObject {
526 signature: Signature,
527}
528
529impl JsonGetObject {
530 const NAME: &'static str = "json_get_object";
531}
532
533impl Default for JsonGetObject {
534 fn default() -> Self {
535 Self {
536 signature: helper::one_of_sigs2(
537 vec![
538 DataType::Binary,
539 DataType::LargeBinary,
540 DataType::BinaryView,
541 ],
542 vec![DataType::UInt8, DataType::LargeUtf8, DataType::Utf8View],
543 ),
544 }
545 }
546}
547
548impl Function for JsonGetObject {
549 fn name(&self) -> &str {
550 Self::NAME
551 }
552
553 fn return_type(&self, _: &[DataType]) -> datafusion_common::Result<DataType> {
554 Ok(DataType::BinaryView)
555 }
556
557 fn signature(&self) -> &Signature {
558 &self.signature
559 }
560
561 fn invoke_with_args(
562 &self,
563 args: ScalarFunctionArgs,
564 ) -> datafusion_common::Result<ColumnarValue> {
565 let [arg0, arg1] = extract_args(self.name(), &args)?;
566 let arg0 = compute::cast(&arg0, &DataType::BinaryView)?;
567 let jsons = arg0.as_binary_view();
568 let arg1 = compute::cast(&arg1, &DataType::Utf8View)?;
569 let paths = arg1.as_string_view();
570
571 let len = jsons.len();
572 let mut builder = BinaryViewBuilder::with_capacity(len);
573
574 for i in 0..len {
575 let json = jsons.is_valid(i).then(|| jsons.value(i));
576 let path = paths.is_valid(i).then(|| paths.value(i));
577 let result = if let (Some(json), Some(path)) = (json, path) {
578 let result = jsonb::jsonpath::parse_json_path(path.as_bytes()).and_then(|path| {
579 let mut data = Vec::new();
580 let mut offset = Vec::new();
581 jsonb::get_by_path(json, path, &mut data, &mut offset)
582 .map(|()| jsonb::is_object(&data).then_some(data))
583 });
584 result.map_err(|e| DataFusionError::Execution(e.to_string()))?
585 } else {
586 None
587 };
588 builder.append_option(result);
589 }
590
591 Ok(ColumnarValue::Array(Arc::new(builder.finish())))
592 }
593}
594
595#[cfg(test)]
596mod tests {
597 use std::sync::Arc;
598
599 use arrow::array::{BooleanArray, Int64Array, StructArray};
600 use arrow_schema::{Field, Fields};
601 use datafusion_common::ScalarValue;
602 use datafusion_common::arrow::array::{BinaryArray, BinaryViewArray, StringArray};
603 use datafusion_common::arrow::datatypes::{Float64Type, Int64Type};
604 use datatypes::types::parse_string_to_jsonb;
605 use serde_json::json;
606
607 use super::*;
608
609 fn test_json_struct() -> ArrayRef {
625 let payload_fields = Fields::from(vec![
626 Field::new("code", DataType::Int64, true),
627 Field::new("success", DataType::Boolean, true),
628 Field::new("result", DataType::Binary, true),
629 ]);
630 Arc::new(StructArray::new(
631 vec![
632 Field::new("kind", DataType::Utf8, true),
633 Field::new("payload", DataType::Struct(payload_fields.clone()), true),
634 ]
635 .into(),
636 vec![
637 Arc::new(StringArray::from_iter([Some("foo")])) as ArrayRef,
638 Arc::new(StructArray::new(
639 payload_fields,
640 vec![
641 Arc::new(Int64Array::from_iter([Some(404)])) as ArrayRef,
642 Arc::new(BooleanArray::from_iter([Some(false)])),
643 Arc::new(BinaryArray::from_iter([Some(
644 json!({
645 "error": "not found",
646 "time_cost": 1.234
647 })
648 .to_string()
649 .as_bytes(),
650 )])),
651 ],
652 None,
653 )),
654 ],
655 None,
656 ))
657 }
658
659 #[test]
660 fn test_json_get_int() {
661 let json_get_int = JsonGetInt::default();
662
663 assert_eq!("json_get_int", json_get_int.name());
664 assert_eq!(
665 DataType::Int64,
666 json_get_int
667 .return_type(&[DataType::Binary, DataType::Utf8])
668 .unwrap()
669 );
670
671 let json_strings = [
672 r#"{"a": {"b": 2}, "b": 2, "c": 3}"#,
673 r#"{"a": 4, "b": {"c": 6}, "c": 6}"#,
674 r#"{"a": 7, "b": 8, "c": {"a": 7}}"#,
675 ];
676 let json_struct = test_json_struct();
677
678 let path_expects = vec![
679 ("$.a.b", Some(2)),
680 ("$.a", Some(4)),
681 ("$.c", None),
682 ("$.kind", None),
683 ("$.payload.code", Some(404)),
684 ("$.payload.success", None),
685 ("$.payload.result.time_cost", None),
686 ("$.payload.not-exists", None),
687 ("$.not-exists", None),
688 ("$", None),
689 ];
690
691 let mut jsons = json_strings
692 .iter()
693 .map(|s| {
694 let value = jsonb::parse_value(s.as_bytes()).unwrap();
695 Arc::new(BinaryArray::from_iter_values([value.to_vec()])) as ArrayRef
696 })
697 .collect::<Vec<_>>();
698 let json_struct_arrays =
699 std::iter::repeat_n(json_struct, path_expects.len() - jsons.len()).collect::<Vec<_>>();
700 jsons.extend(json_struct_arrays);
701
702 for i in 0..jsons.len() {
703 let json = &jsons[i];
704 let (path, expect) = path_expects[i];
705
706 let args = ScalarFunctionArgs {
707 args: vec![
708 ColumnarValue::Array(json.clone()),
709 ColumnarValue::Scalar(path.into()),
710 ],
711 arg_fields: vec![],
712 number_rows: 1,
713 return_field: Arc::new(Field::new("x", DataType::Int64, false)),
714 config_options: Arc::new(Default::default()),
715 };
716 let result = json_get_int
717 .invoke_with_args(args)
718 .and_then(|x| x.to_array(1))
719 .unwrap();
720
721 let result = result.as_primitive::<Int64Type>();
722 assert_eq!(1, result.len());
723 let actual = result.is_valid(0).then(|| result.value(0));
724 assert_eq!(actual, expect);
725 }
726 }
727
728 #[test]
729 fn test_json_get_float() {
730 let json_get_float = JsonGetFloat::default();
731
732 assert_eq!("json_get_float", json_get_float.name());
733 assert_eq!(
734 DataType::Float64,
735 json_get_float
736 .return_type(&[DataType::Binary, DataType::Utf8])
737 .unwrap()
738 );
739
740 let json_strings = [
741 r#"{"a": {"b": 2.1}, "b": 2.2, "c": 3.3}"#,
742 r#"{"a": 4.4, "b": {"c": 6.6}, "c": 6.6}"#,
743 r#"{"a": 7.7, "b": 8.8, "c": {"a": 7.7}}"#,
744 ];
745 let json_struct = test_json_struct();
746
747 let path_expects = vec![
748 ("$.a.b", Some(2.1)),
749 ("$.a", Some(4.4)),
750 ("$.c", None),
751 ("$.kind", None),
752 ("$.payload.code", None),
753 ("$.payload.success", None),
754 ("$.payload.result.time_cost", Some(1.234)),
755 ("$.payload.not-exists", None),
756 ("$.not-exists", None),
757 ("$", None),
758 ];
759
760 let mut jsons = json_strings
761 .iter()
762 .map(|s| {
763 let value = jsonb::parse_value(s.as_bytes()).unwrap();
764 Arc::new(BinaryArray::from_iter_values([value.to_vec()])) as ArrayRef
765 })
766 .collect::<Vec<_>>();
767 let json_struct_arrays =
768 std::iter::repeat_n(json_struct, path_expects.len() - jsons.len()).collect::<Vec<_>>();
769 jsons.extend(json_struct_arrays);
770
771 for i in 0..jsons.len() {
772 let json = &jsons[i];
773 let (path, expect) = path_expects[i];
774
775 let args = ScalarFunctionArgs {
776 args: vec![
777 ColumnarValue::Array(json.clone()),
778 ColumnarValue::Scalar(path.into()),
779 ],
780 arg_fields: vec![],
781 number_rows: 1,
782 return_field: Arc::new(Field::new("x", DataType::Float64, false)),
783 config_options: Arc::new(Default::default()),
784 };
785 let result = json_get_float
786 .invoke_with_args(args)
787 .and_then(|x| x.to_array(1))
788 .unwrap();
789
790 let result = result.as_primitive::<Float64Type>();
791 assert_eq!(1, result.len());
792 let actual = result.is_valid(0).then(|| result.value(0));
793 assert_eq!(actual, expect);
794 }
795 }
796
797 #[test]
798 fn test_json_get_bool() {
799 let json_get_bool = JsonGetBool::default();
800
801 assert_eq!("json_get_bool", json_get_bool.name());
802 assert_eq!(
803 DataType::Boolean,
804 json_get_bool
805 .return_type(&[DataType::Binary, DataType::Utf8])
806 .unwrap()
807 );
808
809 let json_strings = [
810 r#"{"a": {"b": true}, "b": false, "c": true}"#,
811 r#"{"a": false, "b": {"c": true}, "c": false}"#,
812 r#"{"a": true, "b": false, "c": {"a": true}}"#,
813 ];
814 let json_struct = test_json_struct();
815
816 let path_expects = vec![
817 ("$.a.b", Some(true)),
818 ("$.a", Some(false)),
819 ("$.c", None),
820 ("$.kind", None),
821 ("$.payload.code", None),
822 ("$.payload.success", Some(false)),
823 ("$.payload.result.time_cost", None),
824 ("$.payload.not-exists", None),
825 ("$.not-exists", None),
826 ("$", None),
827 ];
828
829 let mut jsons = json_strings
830 .iter()
831 .map(|s| {
832 let value = jsonb::parse_value(s.as_bytes()).unwrap();
833 Arc::new(BinaryArray::from_iter_values([value.to_vec()])) as ArrayRef
834 })
835 .collect::<Vec<_>>();
836 let json_struct_arrays =
837 std::iter::repeat_n(json_struct, path_expects.len() - jsons.len()).collect::<Vec<_>>();
838 jsons.extend(json_struct_arrays);
839
840 for i in 0..jsons.len() {
841 let json = &jsons[i];
842 let (path, expect) = path_expects[i];
843
844 let args = ScalarFunctionArgs {
845 args: vec![
846 ColumnarValue::Array(json.clone()),
847 ColumnarValue::Scalar(path.into()),
848 ],
849 arg_fields: vec![],
850 number_rows: 1,
851 return_field: Arc::new(Field::new("x", DataType::Boolean, false)),
852 config_options: Arc::new(Default::default()),
853 };
854 let result = json_get_bool
855 .invoke_with_args(args)
856 .and_then(|x| x.to_array(1))
857 .unwrap();
858
859 let result = result.as_boolean();
860 assert_eq!(1, result.len());
861 let actual = result.is_valid(0).then(|| result.value(0));
862 assert_eq!(actual, expect);
863 }
864 }
865
866 #[test]
867 fn test_json_get_string() {
868 let json_get_string = JsonGetString::default();
869
870 assert_eq!("json_get_string", json_get_string.name());
871 assert_eq!(
872 DataType::Utf8View,
873 json_get_string
874 .return_type(&[DataType::Binary, DataType::Utf8])
875 .unwrap()
876 );
877
878 let json_strings = [
879 r#"{"a": {"b": "a"}, "b": "b", "c": "c"}"#,
880 r#"{"a": "d", "b": {"c": "e"}, "c": "f"}"#,
881 r#"{"a": "g", "b": "h", "c": {"a": "g"}}"#,
882 ];
883 let json_struct = test_json_struct();
884
885 let paths = vec![
886 "$.a.b",
887 "$.a",
888 "",
889 "$.kind",
890 "$.payload.code",
891 "$.payload.result.time_cost",
892 "$.payload",
893 "$.payload.success",
894 "$.payload.result",
895 "$.payload.result.error",
896 "$.payload.result.not-exists",
897 "$.payload.not-exists",
898 "$.not-exists",
899 "$",
900 ];
901 let expects = [
902 Some("a"),
903 Some("d"),
904 None,
905 Some("foo"),
906 Some("404"),
907 Some("1.234"),
908 Some(
909 r#"{"code":404,"result":{"error":"not found","time_cost":1.234},"success":false}"#,
910 ),
911 Some("false"),
912 Some(r#"{"error":"not found","time_cost":1.234}"#),
913 Some("not found"),
914 None,
915 None,
916 None,
917 Some(
918 r#"{"kind":"foo","payload":{"code":404,"result":{"error":"not found","time_cost":1.234},"success":false}}"#,
919 ),
920 ];
921
922 let mut jsons = json_strings
923 .iter()
924 .map(|s| {
925 let value = jsonb::parse_value(s.as_bytes()).unwrap();
926 Arc::new(BinaryArray::from_iter_values([value.to_vec()])) as ArrayRef
927 })
928 .collect::<Vec<_>>();
929 let json_struct_arrays =
930 std::iter::repeat_n(json_struct, expects.len() - jsons.len()).collect::<Vec<_>>();
931 jsons.extend(json_struct_arrays);
932
933 for i in 0..jsons.len() {
934 let json = &jsons[i];
935 let path = paths[i];
936 let expect = expects[i];
937
938 let args = ScalarFunctionArgs {
939 args: vec![
940 ColumnarValue::Array(json.clone()),
941 ColumnarValue::Scalar(path.into()),
942 ],
943 arg_fields: vec![],
944 number_rows: 1,
945 return_field: Arc::new(Field::new("x", DataType::Utf8View, false)),
946 config_options: Arc::new(Default::default()),
947 };
948 let result = json_get_string
949 .invoke_with_args(args)
950 .and_then(|x| x.to_array(1))
951 .unwrap();
952
953 let result = result.as_string_view();
954 assert_eq!(1, result.len());
955 let actual = result.is_valid(0).then(|| result.value(0));
956 assert_eq!(actual, expect);
957 }
958 }
959
960 #[test]
961 fn test_json_get_object() -> Result<()> {
962 let udf = JsonGetObject::default();
963 assert_eq!("json_get_object", udf.name());
964 assert_eq!(
965 DataType::BinaryView,
966 udf.return_type(&[DataType::BinaryView, DataType::Utf8View])?
967 );
968
969 let json_value = parse_string_to_jsonb(r#"{"a": {"b": {"c": {"d": 1}}}}"#).unwrap();
970 let paths = vec!["$", "$.a", "$.a.b", "$.a.b.c", "$.a.b.c.d", "$.e", "$.a.e"];
971 let number_rows = paths.len();
972
973 let args = ScalarFunctionArgs {
974 args: vec![
975 ColumnarValue::Scalar(ScalarValue::Binary(Some(json_value))),
976 ColumnarValue::Array(Arc::new(StringArray::from_iter_values(paths))),
977 ],
978 arg_fields: vec![],
979 number_rows,
980 return_field: Arc::new(Field::new("x", DataType::Binary, false)),
981 config_options: Arc::new(Default::default()),
982 };
983 let result = udf
984 .invoke_with_args(args)
985 .and_then(|x| x.to_array(number_rows))?;
986 let result = result.as_binary_view();
987
988 let expected = &BinaryViewArray::from_iter(
989 vec![
990 Some(r#"{"a": {"b": {"c": {"d": 1}}}}"#),
991 Some(r#"{"b": {"c": {"d": 1}}}"#),
992 Some(r#"{"c": {"d": 1}}"#),
993 Some(r#"{"d": 1}"#),
994 None,
995 None,
996 None,
997 ]
998 .into_iter()
999 .map(|x| x.and_then(|s| parse_string_to_jsonb(s).ok())),
1000 );
1001 assert_eq!(result, expected);
1002 Ok(())
1003 }
1004
1005 #[test]
1006 fn test_json_get_with_type() {
1007 let json_get_with_type = JsonGetWithType::default();
1008
1009 assert_eq!("json_get", json_get_with_type.name());
1010
1011 let json_strings = [
1012 r#"{"a": {"b": "a"}, "b": "b", "c": "c"}"#,
1013 r#"{"a": "d", "b": {"c": "e"}, "c": "f"}"#,
1014 r#"{"a": "g", "b": "h", "c": {"a": "g"}}"#,
1015 ];
1016 let json_struct = test_json_struct();
1017
1018 let paths = vec![
1019 "$.a.b",
1020 "$.a",
1021 "",
1022 "$.kind",
1023 "$.payload.code",
1024 "$.payload.result.time_cost",
1025 "$.payload",
1026 "$.payload.success",
1027 "$.payload.result",
1028 "$.payload.result.error",
1029 "$.payload.result.not-exists",
1030 "$.payload.not-exists",
1031 "$.not-exists",
1032 "$",
1033 ];
1034 let expects = [
1035 Some("a"),
1036 Some("d"),
1037 None,
1038 Some("foo"),
1039 Some("404"),
1040 Some("1.234"),
1041 Some(
1042 r#"{"code":404,"result":{"error":"not found","time_cost":1.234},"success":false}"#,
1043 ),
1044 Some("false"),
1045 Some(r#"{"error":"not found","time_cost":1.234}"#),
1046 Some("not found"),
1047 None,
1048 None,
1049 None,
1050 Some(
1051 r#"{"kind":"foo","payload":{"code":404,"result":{"error":"not found","time_cost":1.234},"success":false}}"#,
1052 ),
1053 ];
1054
1055 let mut jsons = json_strings
1056 .iter()
1057 .map(|s| {
1058 let value = jsonb::parse_value(s.as_bytes()).unwrap();
1059 Arc::new(BinaryArray::from_iter_values([value.to_vec()])) as ArrayRef
1060 })
1061 .collect::<Vec<_>>();
1062 let json_struct_arrays =
1063 std::iter::repeat_n(json_struct, expects.len() - jsons.len()).collect::<Vec<_>>();
1064 jsons.extend(json_struct_arrays);
1065
1066 for i in 0..jsons.len() {
1067 let json = &jsons[i];
1068 let path = paths[i];
1069 let expect = expects[i];
1070
1071 let args = ScalarFunctionArgs {
1072 args: vec![
1073 ColumnarValue::Array(json.clone()),
1074 ColumnarValue::Scalar(path.into()),
1075 ColumnarValue::Scalar(ScalarValue::Utf8View(None)),
1076 ],
1077 arg_fields: vec![],
1078 number_rows: 1,
1079 return_field: Arc::new(Field::new("x", DataType::Utf8View, false)),
1080 config_options: Arc::new(Default::default()),
1081 };
1082 let result = json_get_with_type
1083 .invoke_with_args(args)
1084 .and_then(|x| x.to_array(1))
1085 .unwrap();
1086
1087 let result = result.as_string_view();
1088 assert_eq!(1, result.len());
1089 let actual = result.is_valid(0).then(|| result.value(0));
1090 assert_eq!(actual, expect);
1091 }
1092
1093 let json_strings = [
1094 r#"{"a": {"b": 2}, "b": 2, "c": 3}"#,
1095 r#"{"a": 4, "b": {"c": 6}, "c": 6}"#,
1096 r#"{"a": 7, "b": 8, "c": {"a": 7}}"#,
1097 ];
1098 let paths = ["$.a.b", "$.a", "$.c", "$.payload.code"];
1099 let expects = [Some(2), Some(4), None, Some(404)];
1100
1101 for (i, (path, expect)) in paths.iter().zip(expects.iter()).enumerate() {
1102 let json = if i < json_strings.len() {
1103 let value = jsonb::parse_value(json_strings[i].as_bytes()).unwrap();
1104 Arc::new(BinaryArray::from_iter_values([value.to_vec()])) as ArrayRef
1105 } else {
1106 test_json_struct()
1107 };
1108
1109 let args = ScalarFunctionArgs {
1110 args: vec![
1111 ColumnarValue::Array(json),
1112 ColumnarValue::Scalar((*path).into()),
1113 ColumnarValue::Scalar(ScalarValue::Int64(None)),
1114 ],
1115 arg_fields: vec![],
1116 number_rows: 1,
1117 return_field: Arc::new(Field::new("x", DataType::Int64, false)),
1118 config_options: Arc::new(Default::default()),
1119 };
1120 let result = json_get_with_type
1121 .invoke_with_args(args)
1122 .and_then(|x| x.to_array(1))
1123 .unwrap();
1124
1125 let result = result.as_primitive::<Int64Type>();
1126 assert_eq!(1, result.len());
1127 let actual = result.is_valid(0).then(|| result.value(0));
1128 assert_eq!(actual, *expect);
1129 }
1130
1131 let json_strings = [
1132 r#"{"a": {"b": 2.1}, "b": 2.2, "c": 3.3}"#,
1133 r#"{"a": 4.4, "b": {"c": 6.6}, "c": 6.6}"#,
1134 r#"{"a": 7.7, "b": 8.8, "c": {"a": 7.7}}"#,
1135 ];
1136 let paths = ["$.a.b", "$.a", "$.c", "$.payload.result.time_cost"];
1137 let expects = [Some(2.1), Some(4.4), None, Some(1.234)];
1138
1139 for (i, (path, expect)) in paths.iter().zip(expects.iter()).enumerate() {
1140 let json = if i < json_strings.len() {
1141 let value = jsonb::parse_value(json_strings[i].as_bytes()).unwrap();
1142 Arc::new(BinaryArray::from_iter_values([value.to_vec()])) as ArrayRef
1143 } else {
1144 test_json_struct()
1145 };
1146
1147 let args = ScalarFunctionArgs {
1148 args: vec![
1149 ColumnarValue::Array(json),
1150 ColumnarValue::Scalar((*path).into()),
1151 ColumnarValue::Scalar(ScalarValue::Float64(None)),
1152 ],
1153 arg_fields: vec![],
1154 number_rows: 1,
1155 return_field: Arc::new(Field::new("x", DataType::Float64, false)),
1156 config_options: Arc::new(Default::default()),
1157 };
1158 let result = json_get_with_type
1159 .invoke_with_args(args)
1160 .and_then(|x| x.to_array(1))
1161 .unwrap();
1162
1163 let result = result.as_primitive::<Float64Type>();
1164 assert_eq!(1, result.len());
1165 let actual = result.is_valid(0).then(|| result.value(0));
1166 assert_eq!(actual, *expect);
1167 }
1168
1169 let json_strings = [
1170 r#"{"a": {"b": true}, "b": false, "c": true}"#,
1171 r#"{"a": false, "b": {"c": true}, "c": false}"#,
1172 r#"{"a": true, "b": false, "c": {"a": true}}"#,
1173 ];
1174 let paths = ["$.a.b", "$.a", "$.c", "$.payload.success"];
1175 let expects = [Some(true), Some(false), None, Some(false)];
1176
1177 for (i, (path, expect)) in paths.iter().zip(expects.iter()).enumerate() {
1178 let json = if i < json_strings.len() {
1179 let value = jsonb::parse_value(json_strings[i].as_bytes()).unwrap();
1180 Arc::new(BinaryArray::from_iter_values([value.to_vec()])) as ArrayRef
1181 } else {
1182 test_json_struct()
1183 };
1184
1185 let args = ScalarFunctionArgs {
1186 args: vec![
1187 ColumnarValue::Array(json),
1188 ColumnarValue::Scalar((*path).into()),
1189 ColumnarValue::Scalar(ScalarValue::Boolean(None)),
1190 ],
1191 arg_fields: vec![],
1192 number_rows: 1,
1193 return_field: Arc::new(Field::new("x", DataType::Boolean, false)),
1194 config_options: Arc::new(Default::default()),
1195 };
1196 let result = json_get_with_type
1197 .invoke_with_args(args)
1198 .and_then(|x| x.to_array(1))
1199 .unwrap();
1200
1201 let result = result.as_boolean();
1202 assert_eq!(1, result.len());
1203 let actual = result.is_valid(0).then(|| result.value(0));
1204 assert_eq!(actual, *expect);
1205 }
1206 }
1207}