1use std::sync::Arc;
16
17use arrow::array::{ArrayRef, BinaryViewArray, new_null_array};
18use arrow::compute;
19use arrow::datatypes::Float64Type;
20use arrow_schema::Field;
21use datafusion_common::arrow::array::{
22 Array, AsArray, BinaryViewBuilder, BooleanBuilder, Float64Builder, Int64Builder,
23 StringViewBuilder,
24};
25use datafusion_common::arrow::datatypes::DataType;
26use datafusion_common::{DataFusionError, Result, ScalarValue, exec_datafusion_err, exec_err};
27use datafusion_expr::{ColumnarValue, ScalarFunctionArgs, Signature, Volatility};
28use datatypes::arrow_array::{int_array_value_at_index, string_array_value_at_index};
29use datatypes::vectors::json::array::JsonArray;
30use derive_more::Display;
31use serde_json::Value;
32
33use crate::function::{Function, extract_args};
34use crate::helper;
35
36fn get_json_by_path(json: &[u8], path: &str) -> Option<Vec<u8>> {
37 let json_path = jsonb::jsonpath::parse_json_path(path.as_bytes());
38 match json_path {
39 Ok(json_path) => {
40 let mut sub_jsonb = Vec::new();
41 let mut sub_offsets = Vec::new();
42 match jsonb::get_by_path(json, json_path, &mut sub_jsonb, &mut sub_offsets) {
43 Ok(_) => Some(sub_jsonb),
44 Err(_) => None,
45 }
46 }
47 _ => None,
48 }
49}
50
51enum JsonResultValue<'a> {
52 Jsonb(Vec<u8>),
53 #[expect(unused)]
54 JsonStructByColumn(&'a ArrayRef, usize),
55 JsonStructByValue(&'a Value),
56}
57
58trait JsonGetResultBuilder {
59 fn append_value(&mut self, value: JsonResultValue<'_>) -> Result<()>;
60
61 fn append_null(&mut self);
62
63 fn build(&mut self) -> ArrayRef;
64}
65
66fn result_builder(
67 len: usize,
68 with_type: Option<&DataType>,
69) -> Result<Box<dyn JsonGetResultBuilder>> {
70 let builder = if let Some(t) = with_type {
71 match t {
72 DataType::Utf8 | DataType::LargeUtf8 | DataType::Utf8View => {
73 Box::new(StringResultBuilder(StringViewBuilder::with_capacity(len)))
74 as Box<dyn JsonGetResultBuilder>
75 }
76 DataType::Int64 => Box::new(IntResultBuilder(Int64Builder::with_capacity(len))),
77 DataType::Float64 => Box::new(FloatResultBuilder(Float64Builder::with_capacity(len))),
78 DataType::Boolean => Box::new(BoolResultBuilder(BooleanBuilder::with_capacity(len))),
79 t => {
80 return exec_err!("json_get with unknown type {t}");
81 }
82 }
83 } else {
84 Box::new(StringResultBuilder(StringViewBuilder::with_capacity(len)))
85 };
86 Ok(builder)
87}
88
89struct StringResultBuilder(StringViewBuilder);
91
92impl JsonGetResultBuilder for StringResultBuilder {
93 fn append_value(&mut self, value: JsonResultValue<'_>) -> Result<()> {
94 match value {
95 JsonResultValue::Jsonb(value) => self.0.append_option(jsonb::to_str(&value).ok()),
96 JsonResultValue::JsonStructByColumn(column, i) => {
97 if let Some(v) = string_array_value_at_index(column, i) {
98 self.0.append_value(v);
99 } else {
100 self.0
101 .append_value(arrow_cast::display::array_value_to_string(column, i)?);
102 }
103 }
104 JsonResultValue::JsonStructByValue(value) => {
105 if let Some(s) = value.as_str() {
106 self.0.append_value(s)
107 } else {
108 self.0.append_value(value.to_string())
109 }
110 }
111 }
112 Ok(())
113 }
114
115 fn append_null(&mut self) {
116 self.0.append_null();
117 }
118
119 fn build(&mut self) -> ArrayRef {
120 Arc::new(self.0.finish())
121 }
122}
123
124#[derive(Default, Display, Debug)]
125#[display("{}", Self::NAME.to_ascii_uppercase())]
126pub struct JsonGetString(JsonGetWithType);
127
128impl JsonGetString {
129 pub const NAME: &'static str = "json_get_string";
130}
131
132impl Function for JsonGetString {
133 fn name(&self) -> &str {
134 Self::NAME
135 }
136
137 fn return_type(&self, _: &[DataType]) -> Result<DataType> {
138 Ok(DataType::Utf8View)
139 }
140
141 fn signature(&self) -> &Signature {
142 &self.0.signature
143 }
144
145 fn invoke_with_args(&self, mut args: ScalarFunctionArgs) -> Result<ColumnarValue> {
146 args.args
147 .push(ColumnarValue::Scalar(ScalarValue::Utf8View(None)));
148 self.0.invoke_with_args(args)
149 }
150}
151
152struct IntResultBuilder(Int64Builder);
153
154impl JsonGetResultBuilder for IntResultBuilder {
155 fn append_value(&mut self, value: JsonResultValue<'_>) -> Result<()> {
156 match value {
157 JsonResultValue::Jsonb(value) => self.0.append_option(jsonb::to_i64(&value).ok()),
158 JsonResultValue::JsonStructByColumn(column, i) => {
159 self.0.append_option(int_array_value_at_index(column, i))
160 }
161 JsonResultValue::JsonStructByValue(value) => self.0.append_option(value.as_i64()),
162 }
163 Ok(())
164 }
165
166 fn append_null(&mut self) {
167 self.0.append_null();
168 }
169
170 fn build(&mut self) -> ArrayRef {
171 Arc::new(self.0.finish())
172 }
173}
174
175#[derive(Default, Display, Debug)]
176#[display("{}", Self::NAME.to_ascii_uppercase())]
177pub struct JsonGetInt(JsonGetWithType);
178
179impl JsonGetInt {
180 pub const NAME: &'static str = "json_get_int";
181}
182
183impl Function for JsonGetInt {
184 fn name(&self) -> &str {
185 Self::NAME
186 }
187
188 fn return_type(&self, _: &[DataType]) -> Result<DataType> {
189 Ok(DataType::Int64)
190 }
191
192 fn signature(&self) -> &Signature {
193 &self.0.signature
194 }
195
196 fn invoke_with_args(&self, mut args: ScalarFunctionArgs) -> Result<ColumnarValue> {
197 args.args
198 .push(ColumnarValue::Scalar(ScalarValue::Int64(None)));
199 self.0.invoke_with_args(args)
200 }
201}
202
203struct FloatResultBuilder(Float64Builder);
204
205impl JsonGetResultBuilder for FloatResultBuilder {
206 fn append_value(&mut self, value: JsonResultValue<'_>) -> Result<()> {
207 match value {
208 JsonResultValue::Jsonb(value) => self.0.append_option(jsonb::to_f64(&value).ok()),
209 JsonResultValue::JsonStructByColumn(column, i) => {
210 let result = if column.data_type() == &DataType::Float64 {
211 column
212 .as_primitive::<Float64Type>()
213 .is_valid(i)
214 .then(|| column.as_primitive::<Float64Type>().value(i))
215 } else {
216 None
217 };
218 self.0.append_option(result);
219 }
220 JsonResultValue::JsonStructByValue(value) => self.0.append_option(value.as_f64()),
221 }
222 Ok(())
223 }
224
225 fn append_null(&mut self) {
226 self.0.append_null();
227 }
228
229 fn build(&mut self) -> ArrayRef {
230 Arc::new(self.0.finish())
231 }
232}
233
234#[derive(Default, Display, Debug)]
235#[display("{}", Self::NAME.to_ascii_uppercase())]
236pub struct JsonGetFloat(JsonGetWithType);
237
238impl JsonGetFloat {
239 pub const NAME: &'static str = "json_get_float";
240}
241
242impl Function for JsonGetFloat {
243 fn name(&self) -> &str {
244 Self::NAME
245 }
246
247 fn return_type(&self, _: &[DataType]) -> Result<DataType> {
248 Ok(DataType::Float64)
249 }
250
251 fn signature(&self) -> &Signature {
252 &self.0.signature
253 }
254
255 fn invoke_with_args(&self, mut args: ScalarFunctionArgs) -> Result<ColumnarValue> {
256 args.args
257 .push(ColumnarValue::Scalar(ScalarValue::Float64(None)));
258 self.0.invoke_with_args(args)
259 }
260}
261
262struct BoolResultBuilder(BooleanBuilder);
263
264impl JsonGetResultBuilder for BoolResultBuilder {
265 fn append_value(&mut self, value: JsonResultValue<'_>) -> Result<()> {
266 match value {
267 JsonResultValue::Jsonb(value) => self.0.append_option(jsonb::to_bool(&value).ok()),
268 JsonResultValue::JsonStructByColumn(column, i) => {
269 let result = if column.data_type() == &DataType::Boolean {
270 column
271 .as_boolean()
272 .is_valid(i)
273 .then(|| column.as_boolean().value(i))
274 } else {
275 None
276 };
277 self.0.append_option(result);
278 }
279 JsonResultValue::JsonStructByValue(value) => self.0.append_option(value.as_bool()),
280 }
281 Ok(())
282 }
283
284 fn append_null(&mut self) {
285 self.0.append_null();
286 }
287
288 fn build(&mut self) -> ArrayRef {
289 Arc::new(self.0.finish())
290 }
291}
292
293#[derive(Default, Display, Debug)]
294#[display("{}", Self::NAME.to_ascii_uppercase())]
295pub struct JsonGetBool(JsonGetWithType);
296
297impl JsonGetBool {
298 pub const NAME: &'static str = "json_get_bool";
299}
300
301impl Function for JsonGetBool {
302 fn name(&self) -> &str {
303 Self::NAME
304 }
305
306 fn return_type(&self, _: &[DataType]) -> Result<DataType> {
307 Ok(DataType::Boolean)
308 }
309
310 fn signature(&self) -> &Signature {
311 &self.0.signature
312 }
313
314 fn invoke_with_args(&self, mut args: ScalarFunctionArgs) -> Result<ColumnarValue> {
315 args.args
316 .push(ColumnarValue::Scalar(ScalarValue::Boolean(None)));
317 self.0.invoke_with_args(args)
318 }
319}
320
321fn jsonb_get(
322 jsons: &BinaryViewArray,
323 path: &str,
324 builder: &mut dyn JsonGetResultBuilder,
325) -> Result<()> {
326 let size = jsons.len();
327 for i in 0..size {
328 let json = jsons.is_valid(i).then(|| jsons.value(i));
329 let result = match json {
330 Some(json) => get_json_by_path(json, path),
331 _ => None,
332 };
333 if let Some(v) = result {
334 builder.append_value(JsonResultValue::Jsonb(v))?;
335 } else {
336 builder.append_null();
337 }
338 }
339 Ok(())
340}
341
342fn json_struct_get(array: &ArrayRef, path: &str, with_type: Option<&DataType>) -> Result<ArrayRef> {
343 let path = path.trim_start_matches("$");
344
345 let mut direct = true;
348 let mut current = array;
349 for segment in path.split(".").filter(|s| !s.is_empty()) {
350 if matches!(current.data_type(), DataType::Binary) {
351 direct = false;
352 break;
353 }
354
355 let Some(json) = current.as_struct_opt() else {
356 return exec_err!("unknown JSON array datatype: {}", current.data_type());
357 };
358 let Some(sub_json) = json.column_by_name(segment) else {
359 return Ok(new_null_array(
360 with_type.unwrap_or(&DataType::Utf8View),
361 array.len(),
362 ));
363 };
364 current = sub_json;
365 }
366
367 fn build_with<F>(
369 input: &ArrayRef,
370 with_type: Option<&DataType>,
371 value_mapper: F,
372 ) -> Result<ArrayRef>
373 where
374 for<'a> F: Fn(&'a Value) -> Option<&'a Value>,
375 {
376 let json_array = JsonArray::from(input);
377
378 let mut builder = result_builder(input.len(), with_type)?;
379 for i in 0..input.len() {
380 if input.is_null(i) {
381 builder.append_null();
382 continue;
383 }
384
385 let value = json_array
386 .try_get_value(i)
387 .map_err(|e| exec_datafusion_err!("{e}"))?;
388 let value = value_mapper(&value);
389
390 if let Some(value) = value {
391 builder.append_value(JsonResultValue::JsonStructByValue(value))?;
392 } else {
393 builder.append_null();
394 }
395 }
396 Ok(builder.build())
397 }
398
399 if direct {
400 let casted = if let Some(with_type) = with_type
401 && current.data_type() != with_type
402 {
403 match (current.data_type(), with_type) {
404 (DataType::Binary, _) => {
405 build_with(current, Some(with_type), |v| Some(v))?
408 }
409 (DataType::List(_) | DataType::Struct(_), with_type) if with_type.is_string() => {
410 build_with(current, Some(with_type), |v| Some(v))?
414 }
415 (_, with_type) if with_type.is_string() => {
416 arrow_cast::cast(current.as_ref(), with_type)?
419 }
420 _ => new_null_array(with_type, current.len()),
421 }
422 } else {
423 current.clone()
424 };
425 return Ok(casted);
426 }
427
428 let mut pointer = path.replace(".", "/");
431 if !pointer.starts_with("/") {
432 pointer = format!("/{}", pointer);
433 }
434 build_with(array, with_type, |value| value.pointer(&pointer))
435}
436
437#[derive(Debug, Display)]
441#[display("{}", Self::NAME.to_ascii_uppercase())]
442pub struct JsonGetWithType {
443 signature: Signature,
444}
445
446impl JsonGetWithType {
447 pub const NAME: &'static str = "json_get";
448}
449
450impl Default for JsonGetWithType {
451 fn default() -> Self {
452 Self {
453 signature: Signature::variadic_any(Volatility::Immutable),
454 }
455 }
456}
457
458impl Function for JsonGetWithType {
459 fn name(&self) -> &str {
460 Self::NAME
461 }
462
463 fn return_type(&self, _input_types: &[DataType]) -> datafusion_common::Result<DataType> {
464 Err(DataFusionError::Internal(
465 "This method isn't meant to be called".to_string(),
466 ))
467 }
468
469 fn return_field_from_args(
470 &self,
471 args: datafusion_expr::ReturnFieldArgs<'_>,
472 ) -> datafusion_common::Result<Arc<Field>> {
473 match args.scalar_arguments.get(2) {
474 Some(Some(v)) => {
475 let mut data_type = v.data_type();
476 if matches!(data_type, DataType::Utf8 | DataType::LargeUtf8) {
477 data_type = DataType::Utf8View;
478 }
479
480 Ok(Arc::new(Field::new(self.name(), data_type, true)))
481 }
482 _ => Ok(Arc::new(Field::new(self.name(), DataType::Utf8View, true))),
483 }
484 }
485
486 fn signature(&self) -> &Signature {
487 &self.signature
488 }
489
490 fn invoke_with_args(
491 &self,
492 args: ScalarFunctionArgs,
493 ) -> datafusion_common::Result<ColumnarValue> {
494 let args_len = args.args.len();
495 if args_len != 2 && args_len != 3 {
496 return exec_err!("json_get expects 2 or 3 arguments, got {args_len}");
497 }
498
499 let arg0 = args.args[0].to_array(args.number_rows)?;
500 let len = arg0.len();
501
502 let path = if let ColumnarValue::Scalar(path) = &args.args[1]
503 && let Some(Some(path)) = path.try_as_str()
504 {
505 path
506 } else {
507 return exec_err!(
508 r#"json_get expects a string literal "path" argument, got {}"#,
509 args.args[1]
510 );
511 };
512
513 let with_type = args.args.get(2).map(|x| x.data_type());
514 let result = match arg0.data_type() {
515 DataType::Binary | DataType::LargeBinary | DataType::BinaryView => {
516 let arg0 = compute::cast(&arg0, &DataType::BinaryView)?;
517 let jsons = arg0.as_binary_view();
518
519 let mut builder = result_builder(len, with_type.as_ref())?;
520 jsonb_get(jsons, path, builder.as_mut())?;
521 builder.build()
522 }
523 DataType::Struct(_) => json_struct_get(&arg0, path, with_type.as_ref())?,
524 _ => {
525 return exec_err!("JSON_GET not supported argument type {}", arg0.data_type());
526 }
527 };
528
529 Ok(ColumnarValue::Array(result))
530 }
531}
532
533#[derive(Display, Debug)]
535#[display("{}", Self::NAME.to_ascii_uppercase())]
536pub(super) struct JsonGetObject {
537 signature: Signature,
538}
539
540impl JsonGetObject {
541 const NAME: &'static str = "json_get_object";
542}
543
544impl Default for JsonGetObject {
545 fn default() -> Self {
546 Self {
547 signature: helper::one_of_sigs2(
548 vec![
549 DataType::Binary,
550 DataType::LargeBinary,
551 DataType::BinaryView,
552 ],
553 vec![DataType::UInt8, DataType::LargeUtf8, DataType::Utf8View],
554 ),
555 }
556 }
557}
558
559impl Function for JsonGetObject {
560 fn name(&self) -> &str {
561 Self::NAME
562 }
563
564 fn return_type(&self, _: &[DataType]) -> datafusion_common::Result<DataType> {
565 Ok(DataType::BinaryView)
566 }
567
568 fn signature(&self) -> &Signature {
569 &self.signature
570 }
571
572 fn invoke_with_args(
573 &self,
574 args: ScalarFunctionArgs,
575 ) -> datafusion_common::Result<ColumnarValue> {
576 let [arg0, arg1] = extract_args(self.name(), &args)?;
577 let arg0 = compute::cast(&arg0, &DataType::BinaryView)?;
578 let jsons = arg0.as_binary_view();
579 let arg1 = compute::cast(&arg1, &DataType::Utf8View)?;
580 let paths = arg1.as_string_view();
581
582 let len = jsons.len();
583 let mut builder = BinaryViewBuilder::with_capacity(len);
584
585 for i in 0..len {
586 let json = jsons.is_valid(i).then(|| jsons.value(i));
587 let path = paths.is_valid(i).then(|| paths.value(i));
588 let result = if let (Some(json), Some(path)) = (json, path) {
589 let result = jsonb::jsonpath::parse_json_path(path.as_bytes()).and_then(|path| {
590 let mut data = Vec::new();
591 let mut offset = Vec::new();
592 jsonb::get_by_path(json, path, &mut data, &mut offset)
593 .map(|()| jsonb::is_object(&data).then_some(data))
594 });
595 result.map_err(|e| DataFusionError::Execution(e.to_string()))?
596 } else {
597 None
598 };
599 builder.append_option(result);
600 }
601
602 Ok(ColumnarValue::Array(Arc::new(builder.finish())))
603 }
604}
605
606#[cfg(test)]
607mod tests {
608 use std::sync::Arc;
609
610 use arrow::array::{BooleanArray, Int64Array, StructArray};
611 use arrow_schema::{Field, Fields};
612 use datafusion_common::ScalarValue;
613 use datafusion_common::arrow::array::{BinaryArray, BinaryViewArray, StringArray};
614 use datafusion_common::arrow::datatypes::{Float64Type, Int64Type};
615 use datatypes::types::parse_string_to_jsonb;
616 use serde_json::json;
617
618 use super::*;
619
620 fn test_json_struct() -> ArrayRef {
636 let payload_fields = Fields::from(vec![
637 Field::new("code", DataType::Int64, true),
638 Field::new("success", DataType::Boolean, true),
639 Field::new("result", DataType::Binary, true),
640 ]);
641 Arc::new(StructArray::new(
642 vec![
643 Field::new("kind", DataType::Utf8, true),
644 Field::new("payload", DataType::Struct(payload_fields.clone()), true),
645 ]
646 .into(),
647 vec![
648 Arc::new(StringArray::from_iter([Some("foo")])) as ArrayRef,
649 Arc::new(StructArray::new(
650 payload_fields,
651 vec![
652 Arc::new(Int64Array::from_iter([Some(404)])) as ArrayRef,
653 Arc::new(BooleanArray::from_iter([Some(false)])),
654 Arc::new(BinaryArray::from_iter([Some(
655 json!({
656 "error": "not found",
657 "time_cost": 1.234
658 })
659 .to_string()
660 .as_bytes(),
661 )])),
662 ],
663 None,
664 )),
665 ],
666 None,
667 ))
668 }
669
670 #[test]
671 fn test_json_get_int() {
672 let json_get_int = JsonGetInt::default();
673
674 assert_eq!("json_get_int", json_get_int.name());
675 assert_eq!(
676 DataType::Int64,
677 json_get_int
678 .return_type(&[DataType::Binary, DataType::Utf8])
679 .unwrap()
680 );
681
682 let json_strings = [
683 r#"{"a": {"b": 2}, "b": 2, "c": 3}"#,
684 r#"{"a": 4, "b": {"c": 6}, "c": 6}"#,
685 r#"{"a": 7, "b": 8, "c": {"a": 7}}"#,
686 ];
687 let json_struct = test_json_struct();
688
689 let path_expects = vec![
690 ("$.a.b", Some(2)),
691 ("$.a", Some(4)),
692 ("$.c", None),
693 ("$.kind", None),
694 ("$.payload.code", Some(404)),
695 ("$.payload.success", None),
696 ("$.payload.result.time_cost", None),
697 ("$.payload.not-exists", None),
698 ("$.not-exists", None),
699 ("$", None),
700 ];
701
702 let mut jsons = json_strings
703 .iter()
704 .map(|s| {
705 let value = jsonb::parse_value(s.as_bytes()).unwrap();
706 Arc::new(BinaryArray::from_iter_values([value.to_vec()])) as ArrayRef
707 })
708 .collect::<Vec<_>>();
709 let json_struct_arrays =
710 std::iter::repeat_n(json_struct, path_expects.len() - jsons.len()).collect::<Vec<_>>();
711 jsons.extend(json_struct_arrays);
712
713 for i in 0..jsons.len() {
714 let json = &jsons[i];
715 let (path, expect) = path_expects[i];
716
717 let args = ScalarFunctionArgs {
718 args: vec![
719 ColumnarValue::Array(json.clone()),
720 ColumnarValue::Scalar(path.into()),
721 ],
722 arg_fields: vec![],
723 number_rows: 1,
724 return_field: Arc::new(Field::new("x", DataType::Int64, false)),
725 config_options: Arc::new(Default::default()),
726 };
727 let result = json_get_int
728 .invoke_with_args(args)
729 .and_then(|x| x.to_array(1))
730 .unwrap();
731
732 let result = result.as_primitive::<Int64Type>();
733 assert_eq!(1, result.len());
734 let actual = result.is_valid(0).then(|| result.value(0));
735 assert_eq!(actual, expect);
736 }
737 }
738
739 #[test]
740 fn test_json_get_float() {
741 let json_get_float = JsonGetFloat::default();
742
743 assert_eq!("json_get_float", json_get_float.name());
744 assert_eq!(
745 DataType::Float64,
746 json_get_float
747 .return_type(&[DataType::Binary, DataType::Utf8])
748 .unwrap()
749 );
750
751 let json_strings = [
752 r#"{"a": {"b": 2.1}, "b": 2.2, "c": 3.3}"#,
753 r#"{"a": 4.4, "b": {"c": 6.6}, "c": 6.6}"#,
754 r#"{"a": 7.7, "b": 8.8, "c": {"a": 7.7}}"#,
755 ];
756 let json_struct = test_json_struct();
757
758 let path_expects = vec![
759 ("$.a.b", Some(2.1)),
760 ("$.a", Some(4.4)),
761 ("$.c", None),
762 ("$.kind", None),
763 ("$.payload.code", None),
764 ("$.payload.success", None),
765 ("$.payload.result.time_cost", Some(1.234)),
766 ("$.payload.not-exists", None),
767 ("$.not-exists", None),
768 ("$", None),
769 ];
770
771 let mut jsons = json_strings
772 .iter()
773 .map(|s| {
774 let value = jsonb::parse_value(s.as_bytes()).unwrap();
775 Arc::new(BinaryArray::from_iter_values([value.to_vec()])) as ArrayRef
776 })
777 .collect::<Vec<_>>();
778 let json_struct_arrays =
779 std::iter::repeat_n(json_struct, path_expects.len() - jsons.len()).collect::<Vec<_>>();
780 jsons.extend(json_struct_arrays);
781
782 for i in 0..jsons.len() {
783 let json = &jsons[i];
784 let (path, expect) = path_expects[i];
785
786 let args = ScalarFunctionArgs {
787 args: vec![
788 ColumnarValue::Array(json.clone()),
789 ColumnarValue::Scalar(path.into()),
790 ],
791 arg_fields: vec![],
792 number_rows: 1,
793 return_field: Arc::new(Field::new("x", DataType::Float64, false)),
794 config_options: Arc::new(Default::default()),
795 };
796 let result = json_get_float
797 .invoke_with_args(args)
798 .and_then(|x| x.to_array(1))
799 .unwrap();
800
801 let result = result.as_primitive::<Float64Type>();
802 assert_eq!(1, result.len());
803 let actual = result.is_valid(0).then(|| result.value(0));
804 assert_eq!(actual, expect);
805 }
806 }
807
808 #[test]
809 fn test_json_get_bool() {
810 let json_get_bool = JsonGetBool::default();
811
812 assert_eq!("json_get_bool", json_get_bool.name());
813 assert_eq!(
814 DataType::Boolean,
815 json_get_bool
816 .return_type(&[DataType::Binary, DataType::Utf8])
817 .unwrap()
818 );
819
820 let json_strings = [
821 r#"{"a": {"b": true}, "b": false, "c": true}"#,
822 r#"{"a": false, "b": {"c": true}, "c": false}"#,
823 r#"{"a": true, "b": false, "c": {"a": true}}"#,
824 ];
825 let json_struct = test_json_struct();
826
827 let path_expects = vec![
828 ("$.a.b", Some(true)),
829 ("$.a", Some(false)),
830 ("$.c", None),
831 ("$.kind", None),
832 ("$.payload.code", None),
833 ("$.payload.success", Some(false)),
834 ("$.payload.result.time_cost", None),
835 ("$.payload.not-exists", None),
836 ("$.not-exists", None),
837 ("$", None),
838 ];
839
840 let mut jsons = json_strings
841 .iter()
842 .map(|s| {
843 let value = jsonb::parse_value(s.as_bytes()).unwrap();
844 Arc::new(BinaryArray::from_iter_values([value.to_vec()])) as ArrayRef
845 })
846 .collect::<Vec<_>>();
847 let json_struct_arrays =
848 std::iter::repeat_n(json_struct, path_expects.len() - jsons.len()).collect::<Vec<_>>();
849 jsons.extend(json_struct_arrays);
850
851 for i in 0..jsons.len() {
852 let json = &jsons[i];
853 let (path, expect) = path_expects[i];
854
855 let args = ScalarFunctionArgs {
856 args: vec![
857 ColumnarValue::Array(json.clone()),
858 ColumnarValue::Scalar(path.into()),
859 ],
860 arg_fields: vec![],
861 number_rows: 1,
862 return_field: Arc::new(Field::new("x", DataType::Boolean, false)),
863 config_options: Arc::new(Default::default()),
864 };
865 let result = json_get_bool
866 .invoke_with_args(args)
867 .and_then(|x| x.to_array(1))
868 .unwrap();
869
870 let result = result.as_boolean();
871 assert_eq!(1, result.len());
872 let actual = result.is_valid(0).then(|| result.value(0));
873 assert_eq!(actual, expect);
874 }
875 }
876
877 #[test]
878 fn test_json_get_string() {
879 let json_get_string = JsonGetString::default();
880
881 assert_eq!("json_get_string", json_get_string.name());
882 assert_eq!(
883 DataType::Utf8View,
884 json_get_string
885 .return_type(&[DataType::Binary, DataType::Utf8])
886 .unwrap()
887 );
888
889 let json_strings = [
890 r#"{"a": {"b": "a"}, "b": "b", "c": "c"}"#,
891 r#"{"a": "d", "b": {"c": "e"}, "c": "f"}"#,
892 r#"{"a": "g", "b": "h", "c": {"a": "g"}}"#,
893 ];
894 let json_struct = test_json_struct();
895
896 let paths = vec![
897 "$.a.b",
898 "$.a",
899 "",
900 "$.kind",
901 "$.payload.code",
902 "$.payload.result.time_cost",
903 "$.payload",
904 "$.payload.success",
905 "$.payload.result",
906 "$.payload.result.error",
907 "$.payload.result.not-exists",
908 "$.payload.not-exists",
909 "$.not-exists",
910 "$",
911 ];
912 let expects = [
913 Some("a"),
914 Some("d"),
915 None,
916 Some("foo"),
917 Some("404"),
918 Some("1.234"),
919 Some(
920 r#"{"code":404,"result":{"error":"not found","time_cost":1.234},"success":false}"#,
921 ),
922 Some("false"),
923 Some(r#"{"error":"not found","time_cost":1.234}"#),
924 Some("not found"),
925 None,
926 None,
927 None,
928 Some(
929 r#"{"kind":"foo","payload":{"code":404,"result":{"error":"not found","time_cost":1.234},"success":false}}"#,
930 ),
931 ];
932
933 let mut jsons = json_strings
934 .iter()
935 .map(|s| {
936 let value = jsonb::parse_value(s.as_bytes()).unwrap();
937 Arc::new(BinaryArray::from_iter_values([value.to_vec()])) as ArrayRef
938 })
939 .collect::<Vec<_>>();
940 let json_struct_arrays =
941 std::iter::repeat_n(json_struct, expects.len() - jsons.len()).collect::<Vec<_>>();
942 jsons.extend(json_struct_arrays);
943
944 for i in 0..jsons.len() {
945 let json = &jsons[i];
946 let path = paths[i];
947 let expect = expects[i];
948
949 let args = ScalarFunctionArgs {
950 args: vec![
951 ColumnarValue::Array(json.clone()),
952 ColumnarValue::Scalar(path.into()),
953 ],
954 arg_fields: vec![],
955 number_rows: 1,
956 return_field: Arc::new(Field::new("x", DataType::Utf8View, false)),
957 config_options: Arc::new(Default::default()),
958 };
959 let result = json_get_string
960 .invoke_with_args(args)
961 .and_then(|x| x.to_array(1))
962 .unwrap();
963
964 let result = result.as_string_view();
965 assert_eq!(1, result.len());
966 let actual = result.is_valid(0).then(|| result.value(0));
967 assert_eq!(actual, expect);
968 }
969 }
970
971 #[test]
972 fn test_json_get_object() -> Result<()> {
973 let udf = JsonGetObject::default();
974 assert_eq!("json_get_object", udf.name());
975 assert_eq!(
976 DataType::BinaryView,
977 udf.return_type(&[DataType::BinaryView, DataType::Utf8View])?
978 );
979
980 let json_value = parse_string_to_jsonb(r#"{"a": {"b": {"c": {"d": 1}}}}"#).unwrap();
981 let paths = vec!["$", "$.a", "$.a.b", "$.a.b.c", "$.a.b.c.d", "$.e", "$.a.e"];
982 let number_rows = paths.len();
983
984 let args = ScalarFunctionArgs {
985 args: vec![
986 ColumnarValue::Scalar(ScalarValue::Binary(Some(json_value))),
987 ColumnarValue::Array(Arc::new(StringArray::from_iter_values(paths))),
988 ],
989 arg_fields: vec![],
990 number_rows,
991 return_field: Arc::new(Field::new("x", DataType::Binary, false)),
992 config_options: Arc::new(Default::default()),
993 };
994 let result = udf
995 .invoke_with_args(args)
996 .and_then(|x| x.to_array(number_rows))?;
997 let result = result.as_binary_view();
998
999 let expected = &BinaryViewArray::from_iter(
1000 vec![
1001 Some(r#"{"a": {"b": {"c": {"d": 1}}}}"#),
1002 Some(r#"{"b": {"c": {"d": 1}}}"#),
1003 Some(r#"{"c": {"d": 1}}"#),
1004 Some(r#"{"d": 1}"#),
1005 None,
1006 None,
1007 None,
1008 ]
1009 .into_iter()
1010 .map(|x| x.and_then(|s| parse_string_to_jsonb(s).ok())),
1011 );
1012 assert_eq!(result, expected);
1013 Ok(())
1014 }
1015
1016 #[test]
1017 fn test_json_get_with_type() {
1018 let json_get_with_type = JsonGetWithType::default();
1019
1020 assert_eq!("json_get", json_get_with_type.name());
1021
1022 let json_strings = [
1023 r#"{"a": {"b": "a"}, "b": "b", "c": "c"}"#,
1024 r#"{"a": "d", "b": {"c": "e"}, "c": "f"}"#,
1025 r#"{"a": "g", "b": "h", "c": {"a": "g"}}"#,
1026 ];
1027 let json_struct = test_json_struct();
1028
1029 let paths = vec![
1030 "$.a.b",
1031 "$.a",
1032 "",
1033 "$.kind",
1034 "$.payload.code",
1035 "$.payload.result.time_cost",
1036 "$.payload",
1037 "$.payload.success",
1038 "$.payload.result",
1039 "$.payload.result.error",
1040 "$.payload.result.not-exists",
1041 "$.payload.not-exists",
1042 "$.not-exists",
1043 "$",
1044 ];
1045 let expects = [
1046 Some("a"),
1047 Some("d"),
1048 None,
1049 Some("foo"),
1050 Some("404"),
1051 Some("1.234"),
1052 Some(
1053 r#"{"code":404,"result":{"error":"not found","time_cost":1.234},"success":false}"#,
1054 ),
1055 Some("false"),
1056 Some(r#"{"error":"not found","time_cost":1.234}"#),
1057 Some("not found"),
1058 None,
1059 None,
1060 None,
1061 Some(
1062 r#"{"kind":"foo","payload":{"code":404,"result":{"error":"not found","time_cost":1.234},"success":false}}"#,
1063 ),
1064 ];
1065
1066 let mut jsons = json_strings
1067 .iter()
1068 .map(|s| {
1069 let value = jsonb::parse_value(s.as_bytes()).unwrap();
1070 Arc::new(BinaryArray::from_iter_values([value.to_vec()])) as ArrayRef
1071 })
1072 .collect::<Vec<_>>();
1073 let json_struct_arrays =
1074 std::iter::repeat_n(json_struct, expects.len() - jsons.len()).collect::<Vec<_>>();
1075 jsons.extend(json_struct_arrays);
1076
1077 for i in 0..jsons.len() {
1078 let json = &jsons[i];
1079 let path = paths[i];
1080 let expect = expects[i];
1081
1082 let args = ScalarFunctionArgs {
1083 args: vec![
1084 ColumnarValue::Array(json.clone()),
1085 ColumnarValue::Scalar(path.into()),
1086 ColumnarValue::Scalar(ScalarValue::Utf8View(None)),
1087 ],
1088 arg_fields: vec![],
1089 number_rows: 1,
1090 return_field: Arc::new(Field::new("x", DataType::Utf8View, false)),
1091 config_options: Arc::new(Default::default()),
1092 };
1093 let result = json_get_with_type
1094 .invoke_with_args(args)
1095 .and_then(|x| x.to_array(1))
1096 .unwrap();
1097
1098 let result = result.as_string_view();
1099 assert_eq!(1, result.len());
1100 let actual = result.is_valid(0).then(|| result.value(0));
1101 assert_eq!(actual, expect);
1102 }
1103
1104 let json_strings = [
1105 r#"{"a": {"b": 2}, "b": 2, "c": 3}"#,
1106 r#"{"a": 4, "b": {"c": 6}, "c": 6}"#,
1107 r#"{"a": 7, "b": 8, "c": {"a": 7}}"#,
1108 ];
1109 let paths = ["$.a.b", "$.a", "$.c", "$.payload.code"];
1110 let expects = [Some(2), Some(4), None, Some(404)];
1111
1112 for (i, (path, expect)) in paths.iter().zip(expects.iter()).enumerate() {
1113 let json = if i < json_strings.len() {
1114 let value = jsonb::parse_value(json_strings[i].as_bytes()).unwrap();
1115 Arc::new(BinaryArray::from_iter_values([value.to_vec()])) as ArrayRef
1116 } else {
1117 test_json_struct()
1118 };
1119
1120 let args = ScalarFunctionArgs {
1121 args: vec![
1122 ColumnarValue::Array(json),
1123 ColumnarValue::Scalar((*path).into()),
1124 ColumnarValue::Scalar(ScalarValue::Int64(None)),
1125 ],
1126 arg_fields: vec![],
1127 number_rows: 1,
1128 return_field: Arc::new(Field::new("x", DataType::Int64, false)),
1129 config_options: Arc::new(Default::default()),
1130 };
1131 let result = json_get_with_type
1132 .invoke_with_args(args)
1133 .and_then(|x| x.to_array(1))
1134 .unwrap();
1135
1136 let result = result.as_primitive::<Int64Type>();
1137 assert_eq!(1, result.len());
1138 let actual = result.is_valid(0).then(|| result.value(0));
1139 assert_eq!(actual, *expect);
1140 }
1141
1142 let json_strings = [
1143 r#"{"a": {"b": 2.1}, "b": 2.2, "c": 3.3}"#,
1144 r#"{"a": 4.4, "b": {"c": 6.6}, "c": 6.6}"#,
1145 r#"{"a": 7.7, "b": 8.8, "c": {"a": 7.7}}"#,
1146 ];
1147 let paths = ["$.a.b", "$.a", "$.c", "$.payload.result.time_cost"];
1148 let expects = [Some(2.1), Some(4.4), None, Some(1.234)];
1149
1150 for (i, (path, expect)) in paths.iter().zip(expects.iter()).enumerate() {
1151 let json = if i < json_strings.len() {
1152 let value = jsonb::parse_value(json_strings[i].as_bytes()).unwrap();
1153 Arc::new(BinaryArray::from_iter_values([value.to_vec()])) as ArrayRef
1154 } else {
1155 test_json_struct()
1156 };
1157
1158 let args = ScalarFunctionArgs {
1159 args: vec![
1160 ColumnarValue::Array(json),
1161 ColumnarValue::Scalar((*path).into()),
1162 ColumnarValue::Scalar(ScalarValue::Float64(None)),
1163 ],
1164 arg_fields: vec![],
1165 number_rows: 1,
1166 return_field: Arc::new(Field::new("x", DataType::Float64, false)),
1167 config_options: Arc::new(Default::default()),
1168 };
1169 let result = json_get_with_type
1170 .invoke_with_args(args)
1171 .and_then(|x| x.to_array(1))
1172 .unwrap();
1173
1174 let result = result.as_primitive::<Float64Type>();
1175 assert_eq!(1, result.len());
1176 let actual = result.is_valid(0).then(|| result.value(0));
1177 assert_eq!(actual, *expect);
1178 }
1179
1180 let json_strings = [
1181 r#"{"a": {"b": true}, "b": false, "c": true}"#,
1182 r#"{"a": false, "b": {"c": true}, "c": false}"#,
1183 r#"{"a": true, "b": false, "c": {"a": true}}"#,
1184 ];
1185 let paths = ["$.a.b", "$.a", "$.c", "$.payload.success"];
1186 let expects = [Some(true), Some(false), None, Some(false)];
1187
1188 for (i, (path, expect)) in paths.iter().zip(expects.iter()).enumerate() {
1189 let json = if i < json_strings.len() {
1190 let value = jsonb::parse_value(json_strings[i].as_bytes()).unwrap();
1191 Arc::new(BinaryArray::from_iter_values([value.to_vec()])) as ArrayRef
1192 } else {
1193 test_json_struct()
1194 };
1195
1196 let args = ScalarFunctionArgs {
1197 args: vec![
1198 ColumnarValue::Array(json),
1199 ColumnarValue::Scalar((*path).into()),
1200 ColumnarValue::Scalar(ScalarValue::Boolean(None)),
1201 ],
1202 arg_fields: vec![],
1203 number_rows: 1,
1204 return_field: Arc::new(Field::new("x", DataType::Boolean, false)),
1205 config_options: Arc::new(Default::default()),
1206 };
1207 let result = json_get_with_type
1208 .invoke_with_args(args)
1209 .and_then(|x| x.to_array(1))
1210 .unwrap();
1211
1212 let result = result.as_boolean();
1213 assert_eq!(1, result.len());
1214 let actual = result.is_valid(0).then(|| result.value(0));
1215 assert_eq!(actual, *expect);
1216 }
1217 }
1218}