mirror of
https://github.com/GreptimeTeam/greptimedb.git
synced 2025-12-25 23:49:58 +00:00
refactor: rewrite some UDFs to DataFusion style (final part) (#7023)
Signed-off-by: luofucong <luofc@foxmail.com>
This commit is contained in:
@@ -96,6 +96,41 @@ pub fn get_string_from_params<'a>(
|
|||||||
Ok(s)
|
Ok(s)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
macro_rules! with_match_timestamp_types {
|
||||||
|
($data_type:expr, | $_t:tt $T:ident | $body:tt) => {{
|
||||||
|
macro_rules! __with_ty__ {
|
||||||
|
( $_t $T:ident ) => {
|
||||||
|
$body
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
use datafusion_common::DataFusionError;
|
||||||
|
use datafusion_common::arrow::datatypes::{
|
||||||
|
TimeUnit, TimestampMicrosecondType, TimestampMillisecondType, TimestampNanosecondType,
|
||||||
|
TimestampSecondType,
|
||||||
|
};
|
||||||
|
|
||||||
|
match $data_type {
|
||||||
|
DataType::Timestamp(TimeUnit::Second, _) => Ok(__with_ty__! { TimestampSecondType }),
|
||||||
|
DataType::Timestamp(TimeUnit::Millisecond, _) => {
|
||||||
|
Ok(__with_ty__! { TimestampMillisecondType })
|
||||||
|
}
|
||||||
|
DataType::Timestamp(TimeUnit::Microsecond, _) => {
|
||||||
|
Ok(__with_ty__! { TimestampMicrosecondType })
|
||||||
|
}
|
||||||
|
DataType::Timestamp(TimeUnit::Nanosecond, _) => {
|
||||||
|
Ok(__with_ty__! { TimestampNanosecondType })
|
||||||
|
}
|
||||||
|
_ => Err(DataFusionError::Execution(format!(
|
||||||
|
"not expected data type: '{}'",
|
||||||
|
$data_type
|
||||||
|
))),
|
||||||
|
}
|
||||||
|
}};
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) use with_match_timestamp_types;
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use super::*;
|
use super::*;
|
||||||
|
|||||||
@@ -13,17 +13,20 @@
|
|||||||
// limitations under the License.
|
// limitations under the License.
|
||||||
|
|
||||||
use std::fmt;
|
use std::fmt;
|
||||||
|
use std::sync::Arc;
|
||||||
|
|
||||||
use common_error::ext::BoxedError;
|
use common_error::ext::BoxedError;
|
||||||
use common_query::error::{self, InvalidFuncArgsSnafu, Result, UnsupportedInputDataTypeSnafu};
|
use common_query::error::{self, Result};
|
||||||
use datafusion_expr::Signature;
|
use common_time::{Date, Timestamp};
|
||||||
use datatypes::arrow::datatypes::{DataType, TimeUnit};
|
use datafusion_common::DataFusionError;
|
||||||
use datatypes::prelude::{ConcreteDataType, MutableVector, ScalarVectorBuilder};
|
use datafusion_common::arrow::array::{Array, AsArray, StringViewBuilder};
|
||||||
use datatypes::vectors::{StringVectorBuilder, VectorRef};
|
use datafusion_common::arrow::datatypes::{ArrowTimestampType, DataType, Date32Type, TimeUnit};
|
||||||
use snafu::{ResultExt, ensure};
|
use datafusion_expr::{ColumnarValue, ScalarFunctionArgs, Signature};
|
||||||
|
use snafu::ResultExt;
|
||||||
|
|
||||||
use crate::function::{Function, FunctionContext};
|
use crate::function::{Function, extract_args, find_function_context};
|
||||||
use crate::helper;
|
use crate::helper;
|
||||||
|
use crate::helper::with_match_timestamp_types;
|
||||||
|
|
||||||
/// A function that formats timestamp/date/datetime into string by the format
|
/// A function that formats timestamp/date/datetime into string by the format
|
||||||
#[derive(Clone, Debug, Default)]
|
#[derive(Clone, Debug, Default)]
|
||||||
@@ -37,7 +40,7 @@ impl Function for DateFormatFunction {
|
|||||||
}
|
}
|
||||||
|
|
||||||
fn return_type(&self, _: &[DataType]) -> Result<DataType> {
|
fn return_type(&self, _: &[DataType]) -> Result<DataType> {
|
||||||
Ok(DataType::Utf8)
|
Ok(DataType::Utf8View)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn signature(&self) -> Signature {
|
fn signature(&self) -> Signature {
|
||||||
@@ -53,68 +56,65 @@ impl Function for DateFormatFunction {
|
|||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn eval(&self, func_ctx: &FunctionContext, columns: &[VectorRef]) -> Result<VectorRef> {
|
fn invoke_with_args(
|
||||||
ensure!(
|
&self,
|
||||||
columns.len() == 2,
|
args: ScalarFunctionArgs,
|
||||||
InvalidFuncArgsSnafu {
|
) -> datafusion_common::Result<ColumnarValue> {
|
||||||
err_msg: format!(
|
let ctx = find_function_context(&args)?;
|
||||||
"The length of the args is not correct, expect 2, have: {}",
|
let timezone = &ctx.query_ctx.timezone();
|
||||||
columns.len()
|
|
||||||
),
|
|
||||||
}
|
|
||||||
);
|
|
||||||
|
|
||||||
let left = &columns[0];
|
let [left, arg1] = extract_args(self.name(), &args)?;
|
||||||
let formats = &columns[1];
|
let formats = arg1.as_string::<i32>();
|
||||||
|
|
||||||
let size = left.len();
|
let size = left.len();
|
||||||
let left_datatype = columns[0].data_type();
|
let left_datatype = left.data_type();
|
||||||
let mut results = StringVectorBuilder::with_capacity(size);
|
let mut builder = StringViewBuilder::with_capacity(size);
|
||||||
|
|
||||||
match left_datatype {
|
match left_datatype {
|
||||||
ConcreteDataType::Timestamp(_) => {
|
DataType::Timestamp(_, _) => {
|
||||||
for i in 0..size {
|
with_match_timestamp_types!(left_datatype, |$S| {
|
||||||
let ts = left.get(i).as_timestamp();
|
let array = left.as_primitive::<$S>();
|
||||||
let format = formats.get(i).as_string();
|
for (date, format) in array.iter().zip(formats.iter()) {
|
||||||
|
let result = match (date, format) {
|
||||||
let result = match (ts, format) {
|
(Some(date), Some(format)) => {
|
||||||
(Some(ts), Some(fmt)) => Some(
|
let ts = Timestamp::new(date, $S::UNIT.into());
|
||||||
ts.as_formatted_string(&fmt, Some(&func_ctx.query_ctx.timezone()))
|
let x = ts.as_formatted_string(&format, Some(timezone))
|
||||||
.map_err(BoxedError::new)
|
.map_err(|e| DataFusionError::Execution(format!(
|
||||||
.context(error::ExecuteSnafu)?,
|
"cannot format {ts:?} as '{format}': {e}"
|
||||||
),
|
)))?;
|
||||||
_ => None,
|
Some(x)
|
||||||
};
|
}
|
||||||
|
_ => None
|
||||||
results.push(result.as_deref());
|
};
|
||||||
}
|
builder.append_option(result.as_deref());
|
||||||
|
}
|
||||||
|
})?;
|
||||||
}
|
}
|
||||||
ConcreteDataType::Date(_) => {
|
DataType::Date32 => {
|
||||||
|
let left = left.as_primitive::<Date32Type>();
|
||||||
for i in 0..size {
|
for i in 0..size {
|
||||||
let date = left.get(i).as_date();
|
let date = left.is_valid(i).then(|| Date::from(left.value(i)));
|
||||||
let format = formats.get(i).as_string();
|
let format = formats.is_valid(i).then(|| formats.value(i));
|
||||||
|
|
||||||
let result = match (date, format) {
|
let result = match (date, format) {
|
||||||
(Some(date), Some(fmt)) => date
|
(Some(date), Some(fmt)) => date
|
||||||
.as_formatted_string(&fmt, Some(&func_ctx.query_ctx.timezone()))
|
.as_formatted_string(fmt, Some(timezone))
|
||||||
.map_err(BoxedError::new)
|
.map_err(BoxedError::new)
|
||||||
.context(error::ExecuteSnafu)?,
|
.context(error::ExecuteSnafu)?,
|
||||||
_ => None,
|
_ => None,
|
||||||
};
|
};
|
||||||
|
|
||||||
results.push(result.as_deref());
|
builder.append_option(result.as_deref());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
_ => {
|
x => {
|
||||||
return UnsupportedInputDataTypeSnafu {
|
return Err(DataFusionError::Execution(format!(
|
||||||
function: NAME,
|
"unsupported input data type {x}"
|
||||||
datatypes: columns.iter().map(|c| c.data_type()).collect::<Vec<_>>(),
|
)));
|
||||||
}
|
|
||||||
.fail();
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(results.to_vector())
|
Ok(ColumnarValue::Array(Arc::new(builder.finish())))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -128,28 +128,32 @@ impl fmt::Display for DateFormatFunction {
|
|||||||
mod tests {
|
mod tests {
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
|
|
||||||
|
use arrow_schema::Field;
|
||||||
|
use datafusion_common::arrow::array::{Date32Array, StringArray, TimestampSecondArray};
|
||||||
|
use datafusion_common::config::ConfigOptions;
|
||||||
use datafusion_expr::{TypeSignature, Volatility};
|
use datafusion_expr::{TypeSignature, Volatility};
|
||||||
use datatypes::prelude::ScalarVector;
|
|
||||||
use datatypes::value::Value;
|
|
||||||
use datatypes::vectors::{DateVector, StringVector, TimestampSecondVector};
|
|
||||||
|
|
||||||
use super::{DateFormatFunction, *};
|
use super::{DateFormatFunction, *};
|
||||||
|
use crate::function::FunctionContext;
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_date_format_misc() {
|
fn test_date_format_misc() {
|
||||||
let f = DateFormatFunction;
|
let f = DateFormatFunction;
|
||||||
assert_eq!("date_format", f.name());
|
assert_eq!("date_format", f.name());
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
DataType::Utf8,
|
DataType::Utf8View,
|
||||||
f.return_type(&[DataType::Timestamp(TimeUnit::Microsecond, None)])
|
f.return_type(&[DataType::Timestamp(TimeUnit::Microsecond, None)])
|
||||||
.unwrap()
|
.unwrap()
|
||||||
);
|
);
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
DataType::Utf8,
|
DataType::Utf8View,
|
||||||
f.return_type(&[DataType::Timestamp(TimeUnit::Second, None)])
|
f.return_type(&[DataType::Timestamp(TimeUnit::Second, None)])
|
||||||
.unwrap()
|
.unwrap()
|
||||||
);
|
);
|
||||||
assert_eq!(DataType::Utf8, f.return_type(&[DataType::Date32]).unwrap());
|
assert_eq!(
|
||||||
|
DataType::Utf8View,
|
||||||
|
f.return_type(&[DataType::Date32]).unwrap()
|
||||||
|
);
|
||||||
assert!(matches!(f.signature(),
|
assert!(matches!(f.signature(),
|
||||||
Signature {
|
Signature {
|
||||||
type_signature: TypeSignature::OneOf(sigs),
|
type_signature: TypeSignature::OneOf(sigs),
|
||||||
@@ -175,26 +179,29 @@ mod tests {
|
|||||||
None,
|
None,
|
||||||
];
|
];
|
||||||
|
|
||||||
let time_vector = TimestampSecondVector::from(times.clone());
|
let mut config_options = ConfigOptions::default();
|
||||||
let interval_vector = StringVector::from_vec(formats);
|
config_options.extensions.insert(FunctionContext::default());
|
||||||
let args: Vec<VectorRef> = vec![Arc::new(time_vector), Arc::new(interval_vector)];
|
let config_options = Arc::new(config_options);
|
||||||
let vector = f.eval(&FunctionContext::default(), &args).unwrap();
|
|
||||||
|
let args = ScalarFunctionArgs {
|
||||||
|
args: vec![
|
||||||
|
ColumnarValue::Array(Arc::new(TimestampSecondArray::from(times))),
|
||||||
|
ColumnarValue::Array(Arc::new(StringArray::from_iter_values(formats))),
|
||||||
|
],
|
||||||
|
arg_fields: vec![],
|
||||||
|
number_rows: 4,
|
||||||
|
return_field: Arc::new(Field::new("x", DataType::Utf8View, false)),
|
||||||
|
config_options,
|
||||||
|
};
|
||||||
|
let result = f
|
||||||
|
.invoke_with_args(args)
|
||||||
|
.and_then(|x| x.to_array(4))
|
||||||
|
.unwrap();
|
||||||
|
let vector = result.as_string_view();
|
||||||
|
|
||||||
assert_eq!(4, vector.len());
|
assert_eq!(4, vector.len());
|
||||||
for (i, _t) in times.iter().enumerate() {
|
for (actual, expect) in vector.iter().zip(results) {
|
||||||
let v = vector.get(i);
|
assert_eq!(actual, expect);
|
||||||
let result = results.get(i).unwrap();
|
|
||||||
|
|
||||||
if result.is_none() {
|
|
||||||
assert_eq!(Value::Null, v);
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
match v {
|
|
||||||
Value::String(s) => {
|
|
||||||
assert_eq!(s.as_utf8(), result.unwrap());
|
|
||||||
}
|
|
||||||
_ => unreachable!(),
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -216,26 +223,29 @@ mod tests {
|
|||||||
None,
|
None,
|
||||||
];
|
];
|
||||||
|
|
||||||
let date_vector = DateVector::from(dates.clone());
|
let mut config_options = ConfigOptions::default();
|
||||||
let interval_vector = StringVector::from_vec(formats);
|
config_options.extensions.insert(FunctionContext::default());
|
||||||
let args: Vec<VectorRef> = vec![Arc::new(date_vector), Arc::new(interval_vector)];
|
let config_options = Arc::new(config_options);
|
||||||
let vector = f.eval(&FunctionContext::default(), &args).unwrap();
|
|
||||||
|
let args = ScalarFunctionArgs {
|
||||||
|
args: vec![
|
||||||
|
ColumnarValue::Array(Arc::new(Date32Array::from(dates))),
|
||||||
|
ColumnarValue::Array(Arc::new(StringArray::from_iter_values(formats))),
|
||||||
|
],
|
||||||
|
arg_fields: vec![],
|
||||||
|
number_rows: 4,
|
||||||
|
return_field: Arc::new(Field::new("x", DataType::Utf8View, false)),
|
||||||
|
config_options,
|
||||||
|
};
|
||||||
|
let result = f
|
||||||
|
.invoke_with_args(args)
|
||||||
|
.and_then(|x| x.to_array(4))
|
||||||
|
.unwrap();
|
||||||
|
let vector = result.as_string_view();
|
||||||
|
|
||||||
assert_eq!(4, vector.len());
|
assert_eq!(4, vector.len());
|
||||||
for (i, _t) in dates.iter().enumerate() {
|
for (actual, expect) in vector.iter().zip(results) {
|
||||||
let v = vector.get(i);
|
assert_eq!(actual, expect);
|
||||||
let result = results.get(i).unwrap();
|
|
||||||
|
|
||||||
if result.is_none() {
|
|
||||||
assert_eq!(Value::Null, v);
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
match v {
|
|
||||||
Value::String(s) => {
|
|
||||||
assert_eq!(s.as_utf8(), result.unwrap());
|
|
||||||
}
|
|
||||||
_ => unreachable!(),
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -13,20 +13,18 @@
|
|||||||
// limitations under the License.
|
// limitations under the License.
|
||||||
|
|
||||||
use std::fmt::{self, Display};
|
use std::fmt::{self, Display};
|
||||||
|
use std::sync::Arc;
|
||||||
|
|
||||||
use common_query::error::{InvalidFuncArgsSnafu, Result, UnsupportedInputDataTypeSnafu};
|
use arrow::compute;
|
||||||
use datafusion_expr::{Signature, Volatility};
|
use common_query::error::Result;
|
||||||
use datatypes::arrow::datatypes::DataType;
|
use datafusion_common::arrow::array::{
|
||||||
use datatypes::data_type::ConcreteDataType;
|
Array, AsArray, BooleanBuilder, Float64Builder, Int64Builder, StringViewBuilder,
|
||||||
use datatypes::prelude::VectorRef;
|
|
||||||
use datatypes::scalars::ScalarVectorBuilder;
|
|
||||||
use datatypes::vectors::{
|
|
||||||
BooleanVectorBuilder, Float64VectorBuilder, Int64VectorBuilder, MutableVector,
|
|
||||||
StringVectorBuilder,
|
|
||||||
};
|
};
|
||||||
use snafu::ensure;
|
use datafusion_common::arrow::datatypes::DataType;
|
||||||
|
use datafusion_expr::{ColumnarValue, ScalarFunctionArgs, Signature};
|
||||||
|
|
||||||
use crate::function::{Function, FunctionContext};
|
use crate::function::{Function, extract_args};
|
||||||
|
use crate::helper;
|
||||||
|
|
||||||
fn get_json_by_path(json: &[u8], path: &str) -> Option<Vec<u8>> {
|
fn get_json_by_path(json: &[u8], path: &str) -> Option<Vec<u8>> {
|
||||||
let json_path = jsonb::jsonpath::parse_json_path(path.as_bytes());
|
let json_path = jsonb::jsonpath::parse_json_path(path.as_bytes());
|
||||||
@@ -64,59 +62,40 @@ macro_rules! json_get {
|
|||||||
|
|
||||||
fn signature(&self) -> Signature {
|
fn signature(&self) -> Signature {
|
||||||
// TODO(LFC): Use a more clear type here instead of "Binary" for Json input, once we have a "Json" type.
|
// TODO(LFC): Use a more clear type here instead of "Binary" for Json input, once we have a "Json" type.
|
||||||
Signature::exact(
|
helper::one_of_sigs2(
|
||||||
vec![DataType::Binary, DataType::Utf8],
|
vec![DataType::Binary, DataType::BinaryView],
|
||||||
Volatility::Immutable,
|
vec![DataType::Utf8, DataType::Utf8View],
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn eval(&self, _func_ctx: &FunctionContext, columns: &[VectorRef]) -> Result<VectorRef> {
|
fn invoke_with_args(
|
||||||
ensure!(
|
&self,
|
||||||
columns.len() == 2,
|
args: ScalarFunctionArgs,
|
||||||
InvalidFuncArgsSnafu {
|
) -> datafusion_common::Result<ColumnarValue> {
|
||||||
err_msg: format!(
|
let [arg0, arg1] = extract_args(self.name(), &args)?;
|
||||||
"The length of the args is not correct, expect exactly two, have: {}",
|
let arg0 = compute::cast(&arg0, &DataType::BinaryView)?;
|
||||||
columns.len()
|
let jsons = arg0.as_binary_view();
|
||||||
),
|
let arg1 = compute::cast(&arg1, &DataType::Utf8View)?;
|
||||||
}
|
let paths = arg1.as_string_view();
|
||||||
);
|
|
||||||
let jsons = &columns[0];
|
|
||||||
let paths = &columns[1];
|
|
||||||
|
|
||||||
let size = jsons.len();
|
let size = jsons.len();
|
||||||
let datatype = jsons.data_type();
|
let mut builder = [<$type Builder>]::with_capacity(size);
|
||||||
let mut results = [<$type VectorBuilder>]::with_capacity(size);
|
|
||||||
|
|
||||||
match datatype {
|
for i in 0..size {
|
||||||
// JSON data type uses binary vector
|
let json = jsons.is_valid(i).then(|| jsons.value(i));
|
||||||
ConcreteDataType::Binary(_) => {
|
let path = paths.is_valid(i).then(|| paths.value(i));
|
||||||
for i in 0..size {
|
let result = match (json, path) {
|
||||||
let json = jsons.get_ref(i);
|
(Some(json), Some(path)) => {
|
||||||
let path = paths.get_ref(i);
|
get_json_by_path(json, path)
|
||||||
|
.and_then(|json| { jsonb::[<to_ $rust_type>](&json).ok() })
|
||||||
let json = json.as_binary();
|
|
||||||
let path = path.as_string();
|
|
||||||
let result = match (json, path) {
|
|
||||||
(Ok(Some(json)), Ok(Some(path))) => {
|
|
||||||
get_json_by_path(json, path)
|
|
||||||
.and_then(|json| { jsonb::[<to_ $rust_type>](&json).ok() })
|
|
||||||
}
|
|
||||||
_ => None,
|
|
||||||
};
|
|
||||||
|
|
||||||
results.push(result);
|
|
||||||
}
|
}
|
||||||
}
|
_ => None,
|
||||||
_ => {
|
};
|
||||||
return UnsupportedInputDataTypeSnafu {
|
|
||||||
function: stringify!([<$name:snake>]),
|
builder.append_option(result);
|
||||||
datatypes: columns.iter().map(|c| c.data_type()).collect::<Vec<_>>(),
|
|
||||||
}
|
|
||||||
.fail();
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(results.to_vector())
|
Ok(ColumnarValue::Array(Arc::new(builder.finish())))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -160,63 +139,43 @@ impl Function for JsonGetString {
|
|||||||
}
|
}
|
||||||
|
|
||||||
fn return_type(&self, _: &[DataType]) -> Result<DataType> {
|
fn return_type(&self, _: &[DataType]) -> Result<DataType> {
|
||||||
Ok(DataType::Utf8)
|
Ok(DataType::Utf8View)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn signature(&self) -> Signature {
|
fn signature(&self) -> Signature {
|
||||||
// TODO(LFC): Use a more clear type here instead of "Binary" for Json input, once we have a "Json" type.
|
// TODO(LFC): Use a more clear type here instead of "Binary" for Json input, once we have a "Json" type.
|
||||||
Signature::exact(
|
helper::one_of_sigs2(
|
||||||
vec![DataType::Binary, DataType::Utf8],
|
vec![DataType::Binary, DataType::BinaryView],
|
||||||
Volatility::Immutable,
|
vec![DataType::Utf8, DataType::Utf8View],
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn eval(&self, _func_ctx: &FunctionContext, columns: &[VectorRef]) -> Result<VectorRef> {
|
fn invoke_with_args(
|
||||||
ensure!(
|
&self,
|
||||||
columns.len() == 2,
|
args: ScalarFunctionArgs,
|
||||||
InvalidFuncArgsSnafu {
|
) -> datafusion_common::Result<ColumnarValue> {
|
||||||
err_msg: format!(
|
let [arg0, arg1] = extract_args(self.name(), &args)?;
|
||||||
"The length of the args is not correct, expect exactly two, have: {}",
|
let arg0 = compute::cast(&arg0, &DataType::BinaryView)?;
|
||||||
columns.len()
|
let jsons = arg0.as_binary_view();
|
||||||
),
|
let arg1 = compute::cast(&arg1, &DataType::Utf8View)?;
|
||||||
}
|
let paths = arg1.as_string_view();
|
||||||
);
|
|
||||||
let jsons = &columns[0];
|
|
||||||
let paths = &columns[1];
|
|
||||||
|
|
||||||
let size = jsons.len();
|
let size = jsons.len();
|
||||||
let datatype = jsons.data_type();
|
let mut builder = StringViewBuilder::with_capacity(size);
|
||||||
let mut results = StringVectorBuilder::with_capacity(size);
|
|
||||||
|
|
||||||
match datatype {
|
for i in 0..size {
|
||||||
// JSON data type uses binary vector
|
let json = jsons.is_valid(i).then(|| jsons.value(i));
|
||||||
ConcreteDataType::Binary(_) => {
|
let path = paths.is_valid(i).then(|| paths.value(i));
|
||||||
for i in 0..size {
|
let result = match (json, path) {
|
||||||
let json = jsons.get_ref(i);
|
(Some(json), Some(path)) => {
|
||||||
let path = paths.get_ref(i);
|
get_json_by_path(json, path).and_then(|json| jsonb::to_str(&json).ok())
|
||||||
|
|
||||||
let json = json.as_binary();
|
|
||||||
let path = path.as_string();
|
|
||||||
let result = match (json, path) {
|
|
||||||
(Ok(Some(json)), Ok(Some(path))) => {
|
|
||||||
get_json_by_path(json, path).and_then(|json| jsonb::to_str(&json).ok())
|
|
||||||
}
|
|
||||||
_ => None,
|
|
||||||
};
|
|
||||||
|
|
||||||
results.push(result.as_deref());
|
|
||||||
}
|
}
|
||||||
}
|
_ => None,
|
||||||
_ => {
|
};
|
||||||
return UnsupportedInputDataTypeSnafu {
|
builder.append_option(result);
|
||||||
function: "json_get_string",
|
|
||||||
datatypes: columns.iter().map(|c| c.data_type()).collect::<Vec<_>>(),
|
|
||||||
}
|
|
||||||
.fail();
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(results.to_vector())
|
Ok(ColumnarValue::Array(Arc::new(builder.finish())))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -230,9 +189,9 @@ impl Display for JsonGetString {
|
|||||||
mod tests {
|
mod tests {
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
|
|
||||||
use datafusion_expr::TypeSignature;
|
use arrow_schema::Field;
|
||||||
use datatypes::scalars::ScalarVector;
|
use datafusion_common::arrow::array::{BinaryArray, StringArray};
|
||||||
use datatypes::vectors::{BinaryVector, StringVector};
|
use datafusion_common::arrow::datatypes::{Float64Type, Int64Type};
|
||||||
|
|
||||||
use super::*;
|
use super::*;
|
||||||
|
|
||||||
@@ -248,13 +207,6 @@ mod tests {
|
|||||||
.unwrap()
|
.unwrap()
|
||||||
);
|
);
|
||||||
|
|
||||||
assert!(matches!(json_get_int.signature(),
|
|
||||||
Signature {
|
|
||||||
type_signature: TypeSignature::Exact(valid_types),
|
|
||||||
volatility: Volatility::Immutable
|
|
||||||
} if valid_types == vec![DataType::Binary, DataType::Utf8]
|
|
||||||
));
|
|
||||||
|
|
||||||
let json_strings = [
|
let json_strings = [
|
||||||
r#"{"a": {"b": 2}, "b": 2, "c": 3}"#,
|
r#"{"a": {"b": 2}, "b": 2, "c": 3}"#,
|
||||||
r#"{"a": 4, "b": {"c": 6}, "c": 6}"#,
|
r#"{"a": 4, "b": {"c": 6}, "c": 6}"#,
|
||||||
@@ -271,17 +223,25 @@ mod tests {
|
|||||||
})
|
})
|
||||||
.collect::<Vec<_>>();
|
.collect::<Vec<_>>();
|
||||||
|
|
||||||
let json_vector = BinaryVector::from_vec(jsonbs);
|
let args = ScalarFunctionArgs {
|
||||||
let path_vector = StringVector::from_vec(paths);
|
args: vec![
|
||||||
let args: Vec<VectorRef> = vec![Arc::new(json_vector), Arc::new(path_vector)];
|
ColumnarValue::Array(Arc::new(BinaryArray::from_iter_values(jsonbs))),
|
||||||
let vector = json_get_int
|
ColumnarValue::Array(Arc::new(StringArray::from_iter_values(paths))),
|
||||||
.eval(&FunctionContext::default(), &args)
|
],
|
||||||
|
arg_fields: vec![],
|
||||||
|
number_rows: 3,
|
||||||
|
return_field: Arc::new(Field::new("x", DataType::Int64, false)),
|
||||||
|
config_options: Arc::new(Default::default()),
|
||||||
|
};
|
||||||
|
let result = json_get_int
|
||||||
|
.invoke_with_args(args)
|
||||||
|
.and_then(|x| x.to_array(3))
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
let vector = result.as_primitive::<Int64Type>();
|
||||||
|
|
||||||
assert_eq!(3, vector.len());
|
assert_eq!(3, vector.len());
|
||||||
for (i, gt) in results.iter().enumerate() {
|
for (i, gt) in results.iter().enumerate() {
|
||||||
let result = vector.get_ref(i);
|
let result = vector.is_valid(i).then(|| vector.value(i));
|
||||||
let result = result.as_i64().unwrap();
|
|
||||||
assert_eq!(*gt, result);
|
assert_eq!(*gt, result);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -298,13 +258,6 @@ mod tests {
|
|||||||
.unwrap()
|
.unwrap()
|
||||||
);
|
);
|
||||||
|
|
||||||
assert!(matches!(json_get_float.signature(),
|
|
||||||
Signature {
|
|
||||||
type_signature: TypeSignature::Exact(valid_types),
|
|
||||||
volatility: Volatility::Immutable
|
|
||||||
} if valid_types == vec![DataType::Binary, DataType::Utf8]
|
|
||||||
));
|
|
||||||
|
|
||||||
let json_strings = [
|
let json_strings = [
|
||||||
r#"{"a": {"b": 2.1}, "b": 2.2, "c": 3.3}"#,
|
r#"{"a": {"b": 2.1}, "b": 2.2, "c": 3.3}"#,
|
||||||
r#"{"a": 4.4, "b": {"c": 6.6}, "c": 6.6}"#,
|
r#"{"a": 4.4, "b": {"c": 6.6}, "c": 6.6}"#,
|
||||||
@@ -321,17 +274,25 @@ mod tests {
|
|||||||
})
|
})
|
||||||
.collect::<Vec<_>>();
|
.collect::<Vec<_>>();
|
||||||
|
|
||||||
let json_vector = BinaryVector::from_vec(jsonbs);
|
let args = ScalarFunctionArgs {
|
||||||
let path_vector = StringVector::from_vec(paths);
|
args: vec![
|
||||||
let args: Vec<VectorRef> = vec![Arc::new(json_vector), Arc::new(path_vector)];
|
ColumnarValue::Array(Arc::new(BinaryArray::from_iter_values(jsonbs))),
|
||||||
let vector = json_get_float
|
ColumnarValue::Array(Arc::new(StringArray::from_iter_values(paths))),
|
||||||
.eval(&FunctionContext::default(), &args)
|
],
|
||||||
|
arg_fields: vec![],
|
||||||
|
number_rows: 3,
|
||||||
|
return_field: Arc::new(Field::new("x", DataType::Float64, false)),
|
||||||
|
config_options: Arc::new(Default::default()),
|
||||||
|
};
|
||||||
|
let result = json_get_float
|
||||||
|
.invoke_with_args(args)
|
||||||
|
.and_then(|x| x.to_array(3))
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
let vector = result.as_primitive::<Float64Type>();
|
||||||
|
|
||||||
assert_eq!(3, vector.len());
|
assert_eq!(3, vector.len());
|
||||||
for (i, gt) in results.iter().enumerate() {
|
for (i, gt) in results.iter().enumerate() {
|
||||||
let result = vector.get_ref(i);
|
let result = vector.is_valid(i).then(|| vector.value(i));
|
||||||
let result = result.as_f64().unwrap();
|
|
||||||
assert_eq!(*gt, result);
|
assert_eq!(*gt, result);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -348,13 +309,6 @@ mod tests {
|
|||||||
.unwrap()
|
.unwrap()
|
||||||
);
|
);
|
||||||
|
|
||||||
assert!(matches!(json_get_bool.signature(),
|
|
||||||
Signature {
|
|
||||||
type_signature: TypeSignature::Exact(valid_types),
|
|
||||||
volatility: Volatility::Immutable
|
|
||||||
} if valid_types == vec![DataType::Binary, DataType::Utf8]
|
|
||||||
));
|
|
||||||
|
|
||||||
let json_strings = [
|
let json_strings = [
|
||||||
r#"{"a": {"b": true}, "b": false, "c": true}"#,
|
r#"{"a": {"b": true}, "b": false, "c": true}"#,
|
||||||
r#"{"a": false, "b": {"c": true}, "c": false}"#,
|
r#"{"a": false, "b": {"c": true}, "c": false}"#,
|
||||||
@@ -371,17 +325,25 @@ mod tests {
|
|||||||
})
|
})
|
||||||
.collect::<Vec<_>>();
|
.collect::<Vec<_>>();
|
||||||
|
|
||||||
let json_vector = BinaryVector::from_vec(jsonbs);
|
let args = ScalarFunctionArgs {
|
||||||
let path_vector = StringVector::from_vec(paths);
|
args: vec![
|
||||||
let args: Vec<VectorRef> = vec![Arc::new(json_vector), Arc::new(path_vector)];
|
ColumnarValue::Array(Arc::new(BinaryArray::from_iter_values(jsonbs))),
|
||||||
let vector = json_get_bool
|
ColumnarValue::Array(Arc::new(StringArray::from_iter_values(paths))),
|
||||||
.eval(&FunctionContext::default(), &args)
|
],
|
||||||
|
arg_fields: vec![],
|
||||||
|
number_rows: 3,
|
||||||
|
return_field: Arc::new(Field::new("x", DataType::Boolean, false)),
|
||||||
|
config_options: Arc::new(Default::default()),
|
||||||
|
};
|
||||||
|
let result = json_get_bool
|
||||||
|
.invoke_with_args(args)
|
||||||
|
.and_then(|x| x.to_array(3))
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
let vector = result.as_boolean();
|
||||||
|
|
||||||
assert_eq!(3, vector.len());
|
assert_eq!(3, vector.len());
|
||||||
for (i, gt) in results.iter().enumerate() {
|
for (i, gt) in results.iter().enumerate() {
|
||||||
let result = vector.get_ref(i);
|
let result = vector.is_valid(i).then(|| vector.value(i));
|
||||||
let result = result.as_boolean().unwrap();
|
|
||||||
assert_eq!(*gt, result);
|
assert_eq!(*gt, result);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -392,19 +354,12 @@ mod tests {
|
|||||||
|
|
||||||
assert_eq!("json_get_string", json_get_string.name());
|
assert_eq!("json_get_string", json_get_string.name());
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
DataType::Utf8,
|
DataType::Utf8View,
|
||||||
json_get_string
|
json_get_string
|
||||||
.return_type(&[DataType::Binary, DataType::Utf8])
|
.return_type(&[DataType::Binary, DataType::Utf8])
|
||||||
.unwrap()
|
.unwrap()
|
||||||
);
|
);
|
||||||
|
|
||||||
assert!(matches!(json_get_string.signature(),
|
|
||||||
Signature {
|
|
||||||
type_signature: TypeSignature::Exact(valid_types),
|
|
||||||
volatility: Volatility::Immutable
|
|
||||||
} if valid_types == vec![DataType::Binary, DataType::Utf8]
|
|
||||||
));
|
|
||||||
|
|
||||||
let json_strings = [
|
let json_strings = [
|
||||||
r#"{"a": {"b": "a"}, "b": "b", "c": "c"}"#,
|
r#"{"a": {"b": "a"}, "b": "b", "c": "c"}"#,
|
||||||
r#"{"a": "d", "b": {"c": "e"}, "c": "f"}"#,
|
r#"{"a": "d", "b": {"c": "e"}, "c": "f"}"#,
|
||||||
@@ -421,17 +376,25 @@ mod tests {
|
|||||||
})
|
})
|
||||||
.collect::<Vec<_>>();
|
.collect::<Vec<_>>();
|
||||||
|
|
||||||
let json_vector = BinaryVector::from_vec(jsonbs);
|
let args = ScalarFunctionArgs {
|
||||||
let path_vector = StringVector::from_vec(paths);
|
args: vec![
|
||||||
let args: Vec<VectorRef> = vec![Arc::new(json_vector), Arc::new(path_vector)];
|
ColumnarValue::Array(Arc::new(BinaryArray::from_iter_values(jsonbs))),
|
||||||
let vector = json_get_string
|
ColumnarValue::Array(Arc::new(StringArray::from_iter_values(paths))),
|
||||||
.eval(&FunctionContext::default(), &args)
|
],
|
||||||
|
arg_fields: vec![],
|
||||||
|
number_rows: 3,
|
||||||
|
return_field: Arc::new(Field::new("x", DataType::Utf8View, false)),
|
||||||
|
config_options: Arc::new(Default::default()),
|
||||||
|
};
|
||||||
|
let result = json_get_string
|
||||||
|
.invoke_with_args(args)
|
||||||
|
.and_then(|x| x.to_array(3))
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
let vector = result.as_string_view();
|
||||||
|
|
||||||
assert_eq!(3, vector.len());
|
assert_eq!(3, vector.len());
|
||||||
for (i, gt) in results.iter().enumerate() {
|
for (i, gt) in results.iter().enumerate() {
|
||||||
let result = vector.get_ref(i);
|
let result = vector.is_valid(i).then(|| vector.value(i));
|
||||||
let result = result.as_string().unwrap();
|
|
||||||
assert_eq!(*gt, result);
|
assert_eq!(*gt, result);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -13,17 +13,15 @@
|
|||||||
// limitations under the License.
|
// limitations under the License.
|
||||||
|
|
||||||
use std::fmt::{self, Display};
|
use std::fmt::{self, Display};
|
||||||
|
use std::sync::Arc;
|
||||||
|
|
||||||
use common_query::error::{InvalidFuncArgsSnafu, Result, UnsupportedInputDataTypeSnafu};
|
use common_query::error::Result;
|
||||||
use datafusion_expr::{Signature, Volatility};
|
use datafusion_common::arrow::array::{Array, AsArray, BooleanBuilder};
|
||||||
use datatypes::arrow::datatypes::DataType;
|
use datafusion_common::arrow::compute;
|
||||||
use datatypes::data_type::ConcreteDataType;
|
use datafusion_common::arrow::datatypes::DataType;
|
||||||
use datatypes::prelude::VectorRef;
|
use datafusion_expr::{ColumnarValue, ScalarFunctionArgs, Signature, Volatility};
|
||||||
use datatypes::scalars::ScalarVectorBuilder;
|
|
||||||
use datatypes::vectors::{BooleanVectorBuilder, MutableVector};
|
|
||||||
use snafu::ensure;
|
|
||||||
|
|
||||||
use crate::function::{Function, FunctionContext};
|
use crate::function::{Function, extract_args};
|
||||||
|
|
||||||
/// Checks if the input is a JSON object of the given type.
|
/// Checks if the input is a JSON object of the given type.
|
||||||
macro_rules! json_is {
|
macro_rules! json_is {
|
||||||
@@ -43,50 +41,36 @@ macro_rules! json_is {
|
|||||||
|
|
||||||
fn signature(&self) -> Signature {
|
fn signature(&self) -> Signature {
|
||||||
// TODO(LFC): Use a more clear type here instead of "Binary" for Json input, once we have a "Json" type.
|
// TODO(LFC): Use a more clear type here instead of "Binary" for Json input, once we have a "Json" type.
|
||||||
Signature::exact(vec![DataType::Binary], Volatility::Immutable)
|
Signature::uniform(
|
||||||
|
1,
|
||||||
|
vec![DataType::Binary, DataType::BinaryView],
|
||||||
|
Volatility::Immutable,
|
||||||
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn eval(&self, _func_ctx: &FunctionContext, columns: &[VectorRef]) -> Result<VectorRef> {
|
fn invoke_with_args(
|
||||||
ensure!(
|
&self,
|
||||||
columns.len() == 1,
|
args: ScalarFunctionArgs,
|
||||||
InvalidFuncArgsSnafu {
|
) -> datafusion_common::Result<ColumnarValue> {
|
||||||
err_msg: format!(
|
let [arg0] = extract_args(self.name(), &args)?;
|
||||||
"The length of the args is not correct, expect exactly one, have: {}",
|
|
||||||
columns.len()
|
|
||||||
),
|
|
||||||
}
|
|
||||||
);
|
|
||||||
|
|
||||||
let jsons = &columns[0];
|
let arg0 = compute::cast(&arg0, &DataType::BinaryView)?;
|
||||||
|
let jsons = arg0.as_binary_view();
|
||||||
let size = jsons.len();
|
let size = jsons.len();
|
||||||
let datatype = jsons.data_type();
|
let mut builder = BooleanBuilder::with_capacity(size);
|
||||||
let mut results = BooleanVectorBuilder::with_capacity(size);
|
|
||||||
|
|
||||||
match datatype {
|
for i in 0..size {
|
||||||
// JSON data type uses binary vector
|
let json = jsons.is_valid(i).then(|| jsons.value(i));
|
||||||
ConcreteDataType::Binary(_) => {
|
let result = match json {
|
||||||
for i in 0..size {
|
Some(json) => {
|
||||||
let json = jsons.get_ref(i);
|
Some(jsonb::[<is_ $json_type>](json))
|
||||||
let json = json.as_binary();
|
|
||||||
let result = match json {
|
|
||||||
Ok(Some(json)) => {
|
|
||||||
Some(jsonb::[<is_ $json_type>](json))
|
|
||||||
}
|
|
||||||
_ => None,
|
|
||||||
};
|
|
||||||
results.push(result);
|
|
||||||
}
|
}
|
||||||
}
|
_ => None,
|
||||||
_ => {
|
};
|
||||||
return UnsupportedInputDataTypeSnafu {
|
builder.append_option(result);
|
||||||
function: stringify!([<$name:snake>]),
|
|
||||||
datatypes: columns.iter().map(|c| c.data_type()).collect::<Vec<_>>(),
|
|
||||||
}
|
|
||||||
.fail();
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(results.to_vector())
|
Ok(ColumnarValue::Array(Arc::new(builder.finish())))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -96,7 +80,7 @@ macro_rules! json_is {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
json_is!(JsonIsNull, null, "Checks if the input JSONB is null");
|
json_is!(JsonIsNull, null, "Checks if the input JSONB is null");
|
||||||
@@ -135,8 +119,8 @@ json_is!(
|
|||||||
mod tests {
|
mod tests {
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
|
|
||||||
use datatypes::scalars::ScalarVector;
|
use arrow_schema::Field;
|
||||||
use datatypes::vectors::BinaryVector;
|
use datafusion_common::arrow::array::{AsArray, BinaryArray};
|
||||||
|
|
||||||
use super::*;
|
use super::*;
|
||||||
|
|
||||||
@@ -166,7 +150,11 @@ mod tests {
|
|||||||
);
|
);
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
func.signature(),
|
func.signature(),
|
||||||
Signature::exact(vec![DataType::Binary], Volatility::Immutable)
|
Signature::uniform(
|
||||||
|
1,
|
||||||
|
vec![DataType::Binary, DataType::BinaryView],
|
||||||
|
Volatility::Immutable
|
||||||
|
)
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -195,16 +183,26 @@ mod tests {
|
|||||||
value.to_vec()
|
value.to_vec()
|
||||||
})
|
})
|
||||||
.collect::<Vec<_>>();
|
.collect::<Vec<_>>();
|
||||||
let json_vector = BinaryVector::from_vec(jsonbs);
|
let args = ScalarFunctionArgs {
|
||||||
let args: Vec<VectorRef> = vec![Arc::new(json_vector)];
|
args: vec![ColumnarValue::Array(Arc::new(
|
||||||
|
BinaryArray::from_iter_values(jsonbs),
|
||||||
|
))],
|
||||||
|
arg_fields: vec![],
|
||||||
|
number_rows: 6,
|
||||||
|
return_field: Arc::new(Field::new("", DataType::Boolean, false)),
|
||||||
|
config_options: Arc::new(Default::default()),
|
||||||
|
};
|
||||||
|
|
||||||
for (func, expected_result) in json_is_functions.iter().zip(expected_results.iter()) {
|
for (func, expected_result) in json_is_functions.iter().zip(expected_results.iter()) {
|
||||||
let vector = func.eval(&FunctionContext::default(), &args).unwrap();
|
let result = func
|
||||||
|
.invoke_with_args(args.clone())
|
||||||
|
.and_then(|x| x.to_array(6))
|
||||||
|
.unwrap();
|
||||||
|
let vector = result.as_boolean();
|
||||||
assert_eq!(vector.len(), json_strings.len());
|
assert_eq!(vector.len(), json_strings.len());
|
||||||
|
|
||||||
for (i, expected) in expected_result.iter().enumerate() {
|
for (i, expected) in expected_result.iter().enumerate() {
|
||||||
let result = vector.get_ref(i);
|
let result = vector.value(i);
|
||||||
let result = result.as_boolean().unwrap().unwrap();
|
|
||||||
assert_eq!(result, *expected);
|
assert_eq!(result, *expected);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -13,17 +13,17 @@
|
|||||||
// limitations under the License.
|
// limitations under the License.
|
||||||
|
|
||||||
use std::fmt::{self, Display};
|
use std::fmt::{self, Display};
|
||||||
|
use std::sync::Arc;
|
||||||
|
|
||||||
use common_query::error::{InvalidFuncArgsSnafu, Result, UnsupportedInputDataTypeSnafu};
|
use arrow::compute;
|
||||||
use datafusion_expr::{Signature, TypeSignature, Volatility};
|
use common_query::error::Result;
|
||||||
use datatypes::arrow::datatypes::DataType;
|
use datafusion_common::DataFusionError;
|
||||||
use datatypes::data_type::ConcreteDataType;
|
use datafusion_common::arrow::array::{Array, AsArray, BooleanBuilder};
|
||||||
use datatypes::prelude::VectorRef;
|
use datafusion_common::arrow::datatypes::DataType;
|
||||||
use datatypes::scalars::ScalarVectorBuilder;
|
use datafusion_expr::{ColumnarValue, ScalarFunctionArgs, Signature};
|
||||||
use datatypes::vectors::{BooleanVectorBuilder, MutableVector};
|
|
||||||
use snafu::ensure;
|
|
||||||
|
|
||||||
use crate::function::{Function, FunctionContext};
|
use crate::function::{Function, extract_args};
|
||||||
|
use crate::helper;
|
||||||
|
|
||||||
/// Check if the given JSON data contains the given JSON path.
|
/// Check if the given JSON data contains the given JSON path.
|
||||||
#[derive(Clone, Debug, Default)]
|
#[derive(Clone, Debug, Default)]
|
||||||
@@ -42,48 +42,41 @@ impl Function for JsonPathExistsFunction {
|
|||||||
|
|
||||||
fn signature(&self) -> Signature {
|
fn signature(&self) -> Signature {
|
||||||
// TODO(LFC): Use a more clear type here instead of "Binary" for Json input, once we have a "Json" type.
|
// TODO(LFC): Use a more clear type here instead of "Binary" for Json input, once we have a "Json" type.
|
||||||
Signature::one_of(
|
helper::one_of_sigs2(
|
||||||
vec![
|
vec![DataType::Binary, DataType::BinaryView, DataType::Null],
|
||||||
TypeSignature::Exact(vec![DataType::Binary, DataType::Utf8]),
|
vec![DataType::Utf8, DataType::Utf8View, DataType::Null],
|
||||||
TypeSignature::Exact(vec![DataType::Null, DataType::Utf8]),
|
|
||||||
TypeSignature::Exact(vec![DataType::Binary, DataType::Null]),
|
|
||||||
TypeSignature::Exact(vec![DataType::Null, DataType::Null]),
|
|
||||||
],
|
|
||||||
Volatility::Immutable,
|
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn eval(&self, _func_ctx: &FunctionContext, columns: &[VectorRef]) -> Result<VectorRef> {
|
fn invoke_with_args(
|
||||||
ensure!(
|
&self,
|
||||||
columns.len() == 2,
|
args: ScalarFunctionArgs,
|
||||||
InvalidFuncArgsSnafu {
|
) -> datafusion_common::Result<ColumnarValue> {
|
||||||
err_msg: format!(
|
let [jsons, paths] = extract_args(self.name(), &args)?;
|
||||||
"The length of the args is not correct, expect exactly two, have: {}",
|
|
||||||
columns.len()
|
|
||||||
),
|
|
||||||
}
|
|
||||||
);
|
|
||||||
let jsons = &columns[0];
|
|
||||||
let paths = &columns[1];
|
|
||||||
|
|
||||||
let size = jsons.len();
|
let size = jsons.len();
|
||||||
let mut results = BooleanVectorBuilder::with_capacity(size);
|
let mut builder = BooleanBuilder::with_capacity(size);
|
||||||
|
|
||||||
match (jsons.data_type(), paths.data_type()) {
|
match (jsons.data_type(), paths.data_type()) {
|
||||||
(ConcreteDataType::Binary(_), ConcreteDataType::String(_)) => {
|
(DataType::Null, _) | (_, DataType::Null) => builder.append_nulls(size),
|
||||||
|
_ => {
|
||||||
|
let jsons = compute::cast(&jsons, &DataType::BinaryView)?;
|
||||||
|
let jsons = jsons.as_binary_view();
|
||||||
|
let paths = compute::cast(&paths, &DataType::Utf8View)?;
|
||||||
|
let paths = paths.as_string_view();
|
||||||
for i in 0..size {
|
for i in 0..size {
|
||||||
let result = match (jsons.get_ref(i).as_binary(), paths.get_ref(i).as_string())
|
let json = jsons.is_valid(i).then(|| jsons.value(i));
|
||||||
{
|
let path = paths.is_valid(i).then(|| paths.value(i));
|
||||||
(Ok(Some(json)), Ok(Some(path))) => {
|
let result = match (json, path) {
|
||||||
|
(Some(json), Some(path)) => {
|
||||||
// Get `JsonPath`.
|
// Get `JsonPath`.
|
||||||
let json_path = match jsonb::jsonpath::parse_json_path(path.as_bytes())
|
let json_path = match jsonb::jsonpath::parse_json_path(path.as_bytes())
|
||||||
{
|
{
|
||||||
Ok(json_path) => json_path,
|
Ok(json_path) => json_path,
|
||||||
Err(_) => {
|
Err(e) => {
|
||||||
return InvalidFuncArgsSnafu {
|
return Err(DataFusionError::Execution(format!(
|
||||||
err_msg: format!("Illegal json path: {:?}", path),
|
"invalid json path '{path}': {e}"
|
||||||
}
|
)));
|
||||||
.fail();
|
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
jsonb::path_exists(json, json_path).ok()
|
jsonb::path_exists(json, json_path).ok()
|
||||||
@@ -91,25 +84,12 @@ impl Function for JsonPathExistsFunction {
|
|||||||
_ => None,
|
_ => None,
|
||||||
};
|
};
|
||||||
|
|
||||||
results.push(result);
|
builder.append_option(result);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Any null args existence causes the result to be NULL.
|
|
||||||
(ConcreteDataType::Null(_), ConcreteDataType::String(_)) => results.push_nulls(size),
|
|
||||||
(ConcreteDataType::Binary(_), ConcreteDataType::Null(_)) => results.push_nulls(size),
|
|
||||||
(ConcreteDataType::Null(_), ConcreteDataType::Null(_)) => results.push_nulls(size),
|
|
||||||
|
|
||||||
_ => {
|
|
||||||
return UnsupportedInputDataTypeSnafu {
|
|
||||||
function: NAME,
|
|
||||||
datatypes: columns.iter().map(|c| c.data_type()).collect::<Vec<_>>(),
|
|
||||||
}
|
|
||||||
.fail();
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(results.to_vector())
|
Ok(ColumnarValue::Array(Arc::new(builder.finish())))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -123,8 +103,8 @@ impl Display for JsonPathExistsFunction {
|
|||||||
mod tests {
|
mod tests {
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
|
|
||||||
use datatypes::prelude::ScalarVector;
|
use arrow_schema::Field;
|
||||||
use datatypes::vectors::{BinaryVector, NullVector, StringVector};
|
use datafusion_common::arrow::array::{BinaryArray, NullArray, StringArray};
|
||||||
|
|
||||||
use super::*;
|
use super::*;
|
||||||
|
|
||||||
@@ -138,31 +118,6 @@ mod tests {
|
|||||||
json_path_exists.return_type(&[DataType::Binary]).unwrap()
|
json_path_exists.return_type(&[DataType::Binary]).unwrap()
|
||||||
);
|
);
|
||||||
|
|
||||||
assert!(matches!(json_path_exists.signature(),
|
|
||||||
Signature {
|
|
||||||
type_signature: TypeSignature::OneOf(valid_types),
|
|
||||||
volatility: Volatility::Immutable
|
|
||||||
} if valid_types ==
|
|
||||||
vec![
|
|
||||||
TypeSignature::Exact(vec![
|
|
||||||
DataType::Binary,
|
|
||||||
DataType::Utf8,
|
|
||||||
]),
|
|
||||||
TypeSignature::Exact(vec![
|
|
||||||
DataType::Null,
|
|
||||||
DataType::Utf8,
|
|
||||||
]),
|
|
||||||
TypeSignature::Exact(vec![
|
|
||||||
DataType::Binary,
|
|
||||||
DataType::Null,
|
|
||||||
]),
|
|
||||||
TypeSignature::Exact(vec![
|
|
||||||
DataType::Null,
|
|
||||||
DataType::Null,
|
|
||||||
]),
|
|
||||||
],
|
|
||||||
));
|
|
||||||
|
|
||||||
let json_strings = [
|
let json_strings = [
|
||||||
r#"{"a": {"b": 2}, "b": 2, "c": 3}"#,
|
r#"{"a": {"b": 2}, "b": 2, "c": 3}"#,
|
||||||
r#"{"a": 4, "b": {"c": 6}, "c": 6}"#,
|
r#"{"a": 4, "b": {"c": 6}, "c": 6}"#,
|
||||||
@@ -186,51 +141,83 @@ mod tests {
|
|||||||
})
|
})
|
||||||
.collect::<Vec<_>>();
|
.collect::<Vec<_>>();
|
||||||
|
|
||||||
let json_vector = BinaryVector::from_vec(jsonbs);
|
let args = ScalarFunctionArgs {
|
||||||
let path_vector = StringVector::from_vec(paths);
|
args: vec![
|
||||||
let args: Vec<VectorRef> = vec![Arc::new(json_vector), Arc::new(path_vector)];
|
ColumnarValue::Array(Arc::new(BinaryArray::from_iter_values(jsonbs))),
|
||||||
let vector = json_path_exists
|
ColumnarValue::Array(Arc::new(StringArray::from_iter_values(paths))),
|
||||||
.eval(&FunctionContext::default(), &args)
|
],
|
||||||
|
arg_fields: vec![],
|
||||||
|
number_rows: 8,
|
||||||
|
return_field: Arc::new(Field::new("x", DataType::Boolean, false)),
|
||||||
|
config_options: Arc::new(Default::default()),
|
||||||
|
};
|
||||||
|
let result = json_path_exists
|
||||||
|
.invoke_with_args(args)
|
||||||
|
.and_then(|x| x.to_array(8))
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
let vector = result.as_boolean();
|
||||||
|
|
||||||
// Test for non-nulls.
|
// Test for non-nulls.
|
||||||
assert_eq!(8, vector.len());
|
assert_eq!(8, vector.len());
|
||||||
for (i, real) in expected.iter().enumerate() {
|
for (i, real) in expected.iter().enumerate() {
|
||||||
let result = vector.get_ref(i);
|
let val = vector.value(i);
|
||||||
assert!(!result.is_null());
|
|
||||||
let val = result.as_boolean().unwrap().unwrap();
|
|
||||||
assert_eq!(val, *real);
|
assert_eq!(val, *real);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Test for path error.
|
// Test for path error.
|
||||||
let json_bytes = jsonb::parse_value("{}".as_bytes()).unwrap().to_vec();
|
let json_bytes = jsonb::parse_value("{}".as_bytes()).unwrap().to_vec();
|
||||||
let json = BinaryVector::from_vec(vec![json_bytes]);
|
let illegal_path = "$..a";
|
||||||
let illegal_path = StringVector::from_vec(vec!["$..a"]);
|
|
||||||
|
|
||||||
let args: Vec<VectorRef> = vec![Arc::new(json), Arc::new(illegal_path)];
|
let args = ScalarFunctionArgs {
|
||||||
let err = json_path_exists.eval(&FunctionContext::default(), &args);
|
args: vec![
|
||||||
|
ColumnarValue::Array(Arc::new(BinaryArray::from_iter_values(vec![json_bytes]))),
|
||||||
|
ColumnarValue::Array(Arc::new(StringArray::from_iter_values(vec![illegal_path]))),
|
||||||
|
],
|
||||||
|
arg_fields: vec![],
|
||||||
|
number_rows: 1,
|
||||||
|
return_field: Arc::new(Field::new("x", DataType::Boolean, false)),
|
||||||
|
config_options: Arc::new(Default::default()),
|
||||||
|
};
|
||||||
|
let err = json_path_exists.invoke_with_args(args);
|
||||||
assert!(err.is_err());
|
assert!(err.is_err());
|
||||||
|
|
||||||
// Test for nulls.
|
// Test for nulls.
|
||||||
let json_bytes = jsonb::parse_value("{}".as_bytes()).unwrap().to_vec();
|
let json_bytes = jsonb::parse_value("{}".as_bytes()).unwrap().to_vec();
|
||||||
let json = BinaryVector::from_vec(vec![json_bytes]);
|
let json = Arc::new(BinaryArray::from_iter_values(vec![json_bytes]));
|
||||||
let null_json = NullVector::new(1);
|
let null_json = Arc::new(NullArray::new(1));
|
||||||
|
|
||||||
let path = StringVector::from_vec(vec!["$.a"]);
|
let path = Arc::new(StringArray::from_iter_values(vec!["$.a"]));
|
||||||
let null_path = NullVector::new(1);
|
let null_path = Arc::new(NullArray::new(1));
|
||||||
|
|
||||||
let args: Vec<VectorRef> = vec![Arc::new(null_json), Arc::new(path)];
|
let args = ScalarFunctionArgs {
|
||||||
let result1 = json_path_exists
|
args: vec![ColumnarValue::Array(null_json), ColumnarValue::Array(path)],
|
||||||
.eval(&FunctionContext::default(), &args)
|
arg_fields: vec![],
|
||||||
|
number_rows: 1,
|
||||||
|
return_field: Arc::new(Field::new("x", DataType::Boolean, false)),
|
||||||
|
config_options: Arc::new(Default::default()),
|
||||||
|
};
|
||||||
|
let result = json_path_exists
|
||||||
|
.invoke_with_args(args)
|
||||||
|
.and_then(|x| x.to_array(1))
|
||||||
.unwrap();
|
.unwrap();
|
||||||
let args: Vec<VectorRef> = vec![Arc::new(json), Arc::new(null_path)];
|
let result1 = result.as_boolean();
|
||||||
let result2 = json_path_exists
|
|
||||||
.eval(&FunctionContext::default(), &args)
|
let args = ScalarFunctionArgs {
|
||||||
|
args: vec![ColumnarValue::Array(json), ColumnarValue::Array(null_path)],
|
||||||
|
arg_fields: vec![],
|
||||||
|
number_rows: 1,
|
||||||
|
return_field: Arc::new(Field::new("x", DataType::Boolean, false)),
|
||||||
|
config_options: Arc::new(Default::default()),
|
||||||
|
};
|
||||||
|
let result = json_path_exists
|
||||||
|
.invoke_with_args(args)
|
||||||
|
.and_then(|x| x.to_array(1))
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
let result2 = result.as_boolean();
|
||||||
|
|
||||||
assert_eq!(result1.len(), 1);
|
assert_eq!(result1.len(), 1);
|
||||||
assert!(result1.get_ref(0).is_null());
|
assert!(result1.is_null(0));
|
||||||
assert_eq!(result2.len(), 1);
|
assert_eq!(result2.len(), 1);
|
||||||
assert!(result2.get_ref(0).is_null());
|
assert!(result2.is_null(0));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -13,17 +13,16 @@
|
|||||||
// limitations under the License.
|
// limitations under the License.
|
||||||
|
|
||||||
use std::fmt::{self, Display};
|
use std::fmt::{self, Display};
|
||||||
|
use std::sync::Arc;
|
||||||
|
|
||||||
use common_query::error::{InvalidFuncArgsSnafu, Result, UnsupportedInputDataTypeSnafu};
|
use arrow::compute;
|
||||||
use datafusion_expr::{Signature, Volatility};
|
use common_query::error::Result;
|
||||||
use datatypes::arrow::datatypes::DataType;
|
use datafusion_common::arrow::array::{Array, AsArray, BooleanBuilder};
|
||||||
use datatypes::data_type::ConcreteDataType;
|
use datafusion_common::arrow::datatypes::DataType;
|
||||||
use datatypes::prelude::VectorRef;
|
use datafusion_expr::{ColumnarValue, ScalarFunctionArgs, Signature};
|
||||||
use datatypes::scalars::ScalarVectorBuilder;
|
|
||||||
use datatypes::vectors::{BooleanVectorBuilder, MutableVector};
|
|
||||||
use snafu::ensure;
|
|
||||||
|
|
||||||
use crate::function::{Function, FunctionContext};
|
use crate::function::{Function, extract_args};
|
||||||
|
use crate::helper;
|
||||||
|
|
||||||
/// Check if the given JSON data match the given JSON path's predicate.
|
/// Check if the given JSON data match the given JSON path's predicate.
|
||||||
#[derive(Clone, Debug, Default)]
|
#[derive(Clone, Debug, Default)]
|
||||||
@@ -42,66 +41,47 @@ impl Function for JsonPathMatchFunction {
|
|||||||
|
|
||||||
fn signature(&self) -> Signature {
|
fn signature(&self) -> Signature {
|
||||||
// TODO(LFC): Use a more clear type here instead of "Binary" for Json input, once we have a "Json" type.
|
// TODO(LFC): Use a more clear type here instead of "Binary" for Json input, once we have a "Json" type.
|
||||||
Signature::exact(
|
helper::one_of_sigs2(
|
||||||
vec![DataType::Binary, DataType::Utf8],
|
vec![DataType::Binary, DataType::BinaryView],
|
||||||
Volatility::Immutable,
|
vec![DataType::Utf8, DataType::Utf8View],
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn eval(&self, _func_ctx: &FunctionContext, columns: &[VectorRef]) -> Result<VectorRef> {
|
fn invoke_with_args(
|
||||||
ensure!(
|
&self,
|
||||||
columns.len() == 2,
|
args: ScalarFunctionArgs,
|
||||||
InvalidFuncArgsSnafu {
|
) -> datafusion_common::Result<ColumnarValue> {
|
||||||
err_msg: format!(
|
let [arg0, arg1] = extract_args(self.name(), &args)?;
|
||||||
"The length of the args is not correct, expect exactly two, have: {}",
|
let arg0 = compute::cast(&arg0, &DataType::BinaryView)?;
|
||||||
columns.len()
|
let jsons = arg0.as_binary_view();
|
||||||
),
|
let arg1 = compute::cast(&arg1, &DataType::Utf8View)?;
|
||||||
}
|
let paths = arg1.as_string_view();
|
||||||
);
|
|
||||||
let jsons = &columns[0];
|
|
||||||
let paths = &columns[1];
|
|
||||||
|
|
||||||
let size = jsons.len();
|
let size = jsons.len();
|
||||||
let mut results = BooleanVectorBuilder::with_capacity(size);
|
let mut builder = BooleanBuilder::with_capacity(size);
|
||||||
|
|
||||||
for i in 0..size {
|
for i in 0..size {
|
||||||
let json = jsons.get_ref(i);
|
let json = jsons.is_valid(i).then(|| jsons.value(i));
|
||||||
let path = paths.get_ref(i);
|
let path = paths.is_valid(i).then(|| paths.value(i));
|
||||||
|
|
||||||
match json.data_type() {
|
let result = match (json, path) {
|
||||||
// JSON data type uses binary vector
|
(Some(json), Some(path)) => {
|
||||||
ConcreteDataType::Binary(_) => {
|
if !jsonb::is_null(json) {
|
||||||
let json = json.as_binary();
|
let json_path = jsonb::jsonpath::parse_json_path(path.as_bytes());
|
||||||
let path = path.as_string();
|
match json_path {
|
||||||
let result = match (json, path) {
|
Ok(json_path) => jsonb::path_match(json, json_path).ok(),
|
||||||
(Ok(Some(json)), Ok(Some(path))) => {
|
Err(_) => None,
|
||||||
if !jsonb::is_null(json) {
|
|
||||||
let json_path = jsonb::jsonpath::parse_json_path(path.as_bytes());
|
|
||||||
match json_path {
|
|
||||||
Ok(json_path) => jsonb::path_match(json, json_path).ok(),
|
|
||||||
Err(_) => None,
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
None
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
_ => None,
|
} else {
|
||||||
};
|
None
|
||||||
|
|
||||||
results.push(result);
|
|
||||||
}
|
|
||||||
|
|
||||||
_ => {
|
|
||||||
return UnsupportedInputDataTypeSnafu {
|
|
||||||
function: NAME,
|
|
||||||
datatypes: columns.iter().map(|c| c.data_type()).collect::<Vec<_>>(),
|
|
||||||
}
|
}
|
||||||
.fail();
|
|
||||||
}
|
}
|
||||||
}
|
_ => None,
|
||||||
|
};
|
||||||
|
builder.append_option(result);
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(results.to_vector())
|
Ok(ColumnarValue::Array(Arc::new(builder.finish())))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -115,8 +95,8 @@ impl Display for JsonPathMatchFunction {
|
|||||||
mod tests {
|
mod tests {
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
|
|
||||||
use datafusion_expr::TypeSignature;
|
use arrow_schema::Field;
|
||||||
use datatypes::vectors::{BinaryVector, StringVector};
|
use datafusion_common::arrow::array::{BinaryArray, StringArray};
|
||||||
|
|
||||||
use super::*;
|
use super::*;
|
||||||
|
|
||||||
@@ -130,13 +110,6 @@ mod tests {
|
|||||||
json_path_match.return_type(&[DataType::Binary]).unwrap()
|
json_path_match.return_type(&[DataType::Binary]).unwrap()
|
||||||
);
|
);
|
||||||
|
|
||||||
assert!(matches!(json_path_match.signature(),
|
|
||||||
Signature {
|
|
||||||
type_signature: TypeSignature::Exact(valid_types),
|
|
||||||
volatility: Volatility::Immutable
|
|
||||||
} if valid_types == vec![DataType::Binary, DataType::Utf8],
|
|
||||||
));
|
|
||||||
|
|
||||||
let json_strings = [
|
let json_strings = [
|
||||||
Some(r#"{"a": {"b": 2}, "b": 2, "c": 3}"#.to_string()),
|
Some(r#"{"a": {"b": 2}, "b": 2, "c": 3}"#.to_string()),
|
||||||
Some(r#"{"a": 1, "b": [1,2,3]}"#.to_string()),
|
Some(r#"{"a": 1, "b": [1,2,3]}"#.to_string()),
|
||||||
@@ -172,27 +145,25 @@ mod tests {
|
|||||||
.map(|s| s.map(|json| jsonb::parse_value(json.as_bytes()).unwrap().to_vec()))
|
.map(|s| s.map(|json| jsonb::parse_value(json.as_bytes()).unwrap().to_vec()))
|
||||||
.collect::<Vec<_>>();
|
.collect::<Vec<_>>();
|
||||||
|
|
||||||
let json_vector = BinaryVector::from(jsonbs);
|
let args = ScalarFunctionArgs {
|
||||||
let path_vector = StringVector::from(paths);
|
args: vec![
|
||||||
let args: Vec<VectorRef> = vec![Arc::new(json_vector), Arc::new(path_vector)];
|
ColumnarValue::Array(Arc::new(BinaryArray::from_iter(jsonbs))),
|
||||||
let vector = json_path_match
|
ColumnarValue::Array(Arc::new(StringArray::from_iter(paths))),
|
||||||
.eval(&FunctionContext::default(), &args)
|
],
|
||||||
|
arg_fields: vec![],
|
||||||
|
number_rows: 7,
|
||||||
|
return_field: Arc::new(Field::new("x", DataType::Boolean, false)),
|
||||||
|
config_options: Arc::new(Default::default()),
|
||||||
|
};
|
||||||
|
let result = json_path_match
|
||||||
|
.invoke_with_args(args)
|
||||||
|
.and_then(|x| x.to_array(7))
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
let vector = result.as_boolean();
|
||||||
|
|
||||||
assert_eq!(7, vector.len());
|
assert_eq!(7, vector.len());
|
||||||
for (i, expected) in results.iter().enumerate() {
|
for (actual, expected) in vector.iter().zip(results) {
|
||||||
let result = vector.get_ref(i);
|
assert_eq!(actual, expected);
|
||||||
|
|
||||||
match expected {
|
|
||||||
Some(expected_value) => {
|
|
||||||
assert!(!result.is_null());
|
|
||||||
let result_value = result.as_boolean().unwrap().unwrap();
|
|
||||||
assert_eq!(*expected_value, result_value);
|
|
||||||
}
|
|
||||||
None => {
|
|
||||||
assert!(result.is_null());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -13,17 +13,15 @@
|
|||||||
// limitations under the License.
|
// limitations under the License.
|
||||||
|
|
||||||
use std::fmt::{self, Display};
|
use std::fmt::{self, Display};
|
||||||
|
use std::sync::Arc;
|
||||||
|
|
||||||
use common_query::error::{InvalidFuncArgsSnafu, Result, UnsupportedInputDataTypeSnafu};
|
use common_query::error::Result;
|
||||||
use datafusion_expr::{Signature, Volatility};
|
use datafusion_common::DataFusionError;
|
||||||
use datatypes::arrow::datatypes::DataType;
|
use datafusion_common::arrow::array::{Array, AsArray, StringViewBuilder};
|
||||||
use datatypes::data_type::ConcreteDataType;
|
use datafusion_common::arrow::datatypes::DataType;
|
||||||
use datatypes::prelude::VectorRef;
|
use datafusion_expr::{ColumnarValue, ScalarFunctionArgs, Signature, Volatility};
|
||||||
use datatypes::scalars::ScalarVectorBuilder;
|
|
||||||
use datatypes::vectors::{MutableVector, StringVectorBuilder};
|
|
||||||
use snafu::ensure;
|
|
||||||
|
|
||||||
use crate::function::{Function, FunctionContext};
|
use crate::function::{Function, extract_args};
|
||||||
|
|
||||||
/// Converts the `JSONB` into `String`. It's useful for displaying JSONB content.
|
/// Converts the `JSONB` into `String`. It's useful for displaying JSONB content.
|
||||||
#[derive(Clone, Debug, Default)]
|
#[derive(Clone, Debug, Default)]
|
||||||
@@ -37,7 +35,7 @@ impl Function for JsonToStringFunction {
|
|||||||
}
|
}
|
||||||
|
|
||||||
fn return_type(&self, _: &[DataType]) -> Result<DataType> {
|
fn return_type(&self, _: &[DataType]) -> Result<DataType> {
|
||||||
Ok(DataType::Utf8)
|
Ok(DataType::Utf8View)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn signature(&self) -> Signature {
|
fn signature(&self) -> Signature {
|
||||||
@@ -45,58 +43,27 @@ impl Function for JsonToStringFunction {
|
|||||||
Signature::exact(vec![DataType::Binary], Volatility::Immutable)
|
Signature::exact(vec![DataType::Binary], Volatility::Immutable)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn eval(&self, _func_ctx: &FunctionContext, columns: &[VectorRef]) -> Result<VectorRef> {
|
fn invoke_with_args(
|
||||||
ensure!(
|
&self,
|
||||||
columns.len() == 1,
|
args: ScalarFunctionArgs,
|
||||||
InvalidFuncArgsSnafu {
|
) -> datafusion_common::Result<ColumnarValue> {
|
||||||
err_msg: format!(
|
let [arg0] = extract_args(self.name(), &args)?;
|
||||||
"The length of the args is not correct, expect exactly one, have: {}",
|
let jsons = arg0.as_binary::<i32>();
|
||||||
columns.len()
|
|
||||||
),
|
|
||||||
}
|
|
||||||
);
|
|
||||||
let jsons = &columns[0];
|
|
||||||
|
|
||||||
let size = jsons.len();
|
let size = jsons.len();
|
||||||
let datatype = jsons.data_type();
|
let mut builder = StringViewBuilder::with_capacity(size);
|
||||||
let mut results = StringVectorBuilder::with_capacity(size);
|
|
||||||
|
|
||||||
match datatype {
|
for i in 0..size {
|
||||||
// JSON data type uses binary vector
|
let json = jsons.is_valid(i).then(|| jsons.value(i));
|
||||||
ConcreteDataType::Binary(_) => {
|
let result = json
|
||||||
for i in 0..size {
|
.map(|json| jsonb::from_slice(json).map(|x| x.to_string()))
|
||||||
let json = jsons.get_ref(i);
|
.transpose()
|
||||||
|
.map_err(|e| DataFusionError::Execution(format!("invalid json binary: {e}")))?;
|
||||||
|
|
||||||
let json = json.as_binary();
|
builder.append_option(result.as_deref());
|
||||||
let result = match json {
|
|
||||||
Ok(Some(json)) => match jsonb::from_slice(json) {
|
|
||||||
Ok(json) => {
|
|
||||||
let json = json.to_string();
|
|
||||||
Some(json)
|
|
||||||
}
|
|
||||||
Err(_) => {
|
|
||||||
return InvalidFuncArgsSnafu {
|
|
||||||
err_msg: format!("Illegal json binary: {:?}", json),
|
|
||||||
}
|
|
||||||
.fail();
|
|
||||||
}
|
|
||||||
},
|
|
||||||
_ => None,
|
|
||||||
};
|
|
||||||
|
|
||||||
results.push(result.as_deref());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
_ => {
|
|
||||||
return UnsupportedInputDataTypeSnafu {
|
|
||||||
function: NAME,
|
|
||||||
datatypes: columns.iter().map(|c| c.data_type()).collect::<Vec<_>>(),
|
|
||||||
}
|
|
||||||
.fail();
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(results.to_vector())
|
Ok(ColumnarValue::Array(Arc::new(builder.finish())))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -110,9 +77,9 @@ impl Display for JsonToStringFunction {
|
|||||||
mod tests {
|
mod tests {
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
|
|
||||||
|
use arrow_schema::Field;
|
||||||
|
use datafusion_common::arrow::array::BinaryArray;
|
||||||
use datafusion_expr::TypeSignature;
|
use datafusion_expr::TypeSignature;
|
||||||
use datatypes::scalars::ScalarVector;
|
|
||||||
use datatypes::vectors::BinaryVector;
|
|
||||||
|
|
||||||
use super::*;
|
use super::*;
|
||||||
|
|
||||||
@@ -122,7 +89,7 @@ mod tests {
|
|||||||
|
|
||||||
assert_eq!("json_to_string", json_to_string.name());
|
assert_eq!("json_to_string", json_to_string.name());
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
DataType::Utf8,
|
DataType::Utf8View,
|
||||||
json_to_string.return_type(&[DataType::Binary]).unwrap()
|
json_to_string.return_type(&[DataType::Binary]).unwrap()
|
||||||
);
|
);
|
||||||
|
|
||||||
@@ -147,24 +114,39 @@ mod tests {
|
|||||||
})
|
})
|
||||||
.collect::<Vec<_>>();
|
.collect::<Vec<_>>();
|
||||||
|
|
||||||
let json_vector = BinaryVector::from_vec(jsonbs);
|
let args = ScalarFunctionArgs {
|
||||||
let args: Vec<VectorRef> = vec![Arc::new(json_vector)];
|
args: vec![ColumnarValue::Array(Arc::new(
|
||||||
let vector = json_to_string
|
BinaryArray::from_iter_values(jsonbs),
|
||||||
.eval(&FunctionContext::default(), &args)
|
))],
|
||||||
|
arg_fields: vec![],
|
||||||
|
number_rows: 3,
|
||||||
|
return_field: Arc::new(Field::new("x", DataType::Utf8View, false)),
|
||||||
|
config_options: Arc::new(Default::default()),
|
||||||
|
};
|
||||||
|
let result = json_to_string
|
||||||
|
.invoke_with_args(args)
|
||||||
|
.and_then(|x| x.to_array(1))
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
let vector = result.as_string_view();
|
||||||
|
|
||||||
assert_eq!(3, vector.len());
|
assert_eq!(3, vector.len());
|
||||||
for (i, gt) in json_strings.iter().enumerate() {
|
for (i, gt) in json_strings.iter().enumerate() {
|
||||||
let result = vector.get_ref(i);
|
let result = vector.value(i);
|
||||||
let result = result.as_string().unwrap().unwrap();
|
|
||||||
// remove whitespaces
|
// remove whitespaces
|
||||||
assert_eq!(gt.replace(" ", ""), result);
|
assert_eq!(gt.replace(" ", ""), result);
|
||||||
}
|
}
|
||||||
|
|
||||||
let invalid_jsonb = vec![b"invalid json"];
|
let invalid_jsonb = vec![b"invalid json"];
|
||||||
let invalid_json_vector = BinaryVector::from_vec(invalid_jsonb);
|
let args = ScalarFunctionArgs {
|
||||||
let args: Vec<VectorRef> = vec![Arc::new(invalid_json_vector)];
|
args: vec![ColumnarValue::Array(Arc::new(
|
||||||
let vector = json_to_string.eval(&FunctionContext::default(), &args);
|
BinaryArray::from_iter_values(invalid_jsonb),
|
||||||
|
))],
|
||||||
|
arg_fields: vec![],
|
||||||
|
number_rows: 1,
|
||||||
|
return_field: Arc::new(Field::new("x", DataType::Utf8View, false)),
|
||||||
|
config_options: Arc::new(Default::default()),
|
||||||
|
};
|
||||||
|
let vector = json_to_string.invoke_with_args(args);
|
||||||
assert!(vector.is_err());
|
assert!(vector.is_err());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -13,17 +13,16 @@
|
|||||||
// limitations under the License.
|
// limitations under the License.
|
||||||
|
|
||||||
use std::fmt::{self, Display};
|
use std::fmt::{self, Display};
|
||||||
|
use std::sync::Arc;
|
||||||
|
|
||||||
use common_query::error::{InvalidFuncArgsSnafu, Result, UnsupportedInputDataTypeSnafu};
|
use common_query::error::Result;
|
||||||
use datafusion::arrow::datatypes::DataType;
|
use datafusion_common::DataFusionError;
|
||||||
use datafusion_expr::{Signature, Volatility};
|
use datafusion_common::arrow::array::{Array, AsArray, BinaryViewBuilder};
|
||||||
use datatypes::data_type::ConcreteDataType;
|
use datafusion_common::arrow::compute;
|
||||||
use datatypes::prelude::VectorRef;
|
use datafusion_common::arrow::datatypes::DataType;
|
||||||
use datatypes::scalars::ScalarVectorBuilder;
|
use datafusion_expr::{ColumnarValue, ScalarFunctionArgs, Signature, Volatility};
|
||||||
use datatypes::vectors::{BinaryVectorBuilder, MutableVector};
|
|
||||||
use snafu::ensure;
|
|
||||||
|
|
||||||
use crate::function::{Function, FunctionContext};
|
use crate::function::{Function, extract_args};
|
||||||
|
|
||||||
/// Parses the `String` into `JSONB`.
|
/// Parses the `String` into `JSONB`.
|
||||||
#[derive(Clone, Debug, Default)]
|
#[derive(Clone, Debug, Default)]
|
||||||
@@ -37,64 +36,37 @@ impl Function for ParseJsonFunction {
|
|||||||
}
|
}
|
||||||
|
|
||||||
fn return_type(&self, _: &[DataType]) -> Result<DataType> {
|
fn return_type(&self, _: &[DataType]) -> Result<DataType> {
|
||||||
Ok(DataType::Binary)
|
Ok(DataType::BinaryView)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn signature(&self) -> Signature {
|
fn signature(&self) -> Signature {
|
||||||
Signature::string(1, Volatility::Immutable)
|
Signature::string(1, Volatility::Immutable)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn eval(&self, _func_ctx: &FunctionContext, columns: &[VectorRef]) -> Result<VectorRef> {
|
fn invoke_with_args(
|
||||||
ensure!(
|
&self,
|
||||||
columns.len() == 1,
|
args: ScalarFunctionArgs,
|
||||||
InvalidFuncArgsSnafu {
|
) -> datafusion_common::Result<ColumnarValue> {
|
||||||
err_msg: format!(
|
let [arg0] = extract_args(self.name(), &args)?;
|
||||||
"The length of the args is not correct, expect exactly one, have: {}",
|
let arg0 = compute::cast(&arg0, &DataType::Utf8View)?;
|
||||||
columns.len()
|
let json_strings = arg0.as_string_view();
|
||||||
),
|
|
||||||
}
|
|
||||||
);
|
|
||||||
let json_strings = &columns[0];
|
|
||||||
|
|
||||||
let size = json_strings.len();
|
let size = json_strings.len();
|
||||||
let datatype = json_strings.data_type();
|
let mut builder = BinaryViewBuilder::with_capacity(size);
|
||||||
let mut results = BinaryVectorBuilder::with_capacity(size);
|
|
||||||
|
|
||||||
match datatype {
|
for i in 0..size {
|
||||||
ConcreteDataType::String(_) => {
|
let s = json_strings.is_valid(i).then(|| json_strings.value(i));
|
||||||
for i in 0..size {
|
let result = s
|
||||||
let json_string = json_strings.get_ref(i);
|
.map(|s| {
|
||||||
|
jsonb::parse_value(s.as_bytes())
|
||||||
let json_string = json_string.as_string();
|
.map(|x| x.to_vec())
|
||||||
let result = match json_string {
|
.map_err(|e| DataFusionError::Execution(format!("cannot parse '{s}': {e}")))
|
||||||
Ok(Some(json_string)) => match jsonb::parse_value(json_string.as_bytes()) {
|
})
|
||||||
Ok(json) => Some(json.to_vec()),
|
.transpose()?;
|
||||||
Err(_) => {
|
builder.append_option(result.as_deref());
|
||||||
return InvalidFuncArgsSnafu {
|
|
||||||
err_msg: format!(
|
|
||||||
"Cannot convert the string to json, have: {}",
|
|
||||||
json_string
|
|
||||||
),
|
|
||||||
}
|
|
||||||
.fail();
|
|
||||||
}
|
|
||||||
},
|
|
||||||
_ => None,
|
|
||||||
};
|
|
||||||
|
|
||||||
results.push(result.as_deref());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
_ => {
|
|
||||||
return UnsupportedInputDataTypeSnafu {
|
|
||||||
function: NAME,
|
|
||||||
datatypes: columns.iter().map(|c| c.data_type()).collect::<Vec<_>>(),
|
|
||||||
}
|
|
||||||
.fail();
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(results.to_vector())
|
Ok(ColumnarValue::Array(Arc::new(builder.finish())))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -108,8 +80,8 @@ impl Display for ParseJsonFunction {
|
|||||||
mod tests {
|
mod tests {
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
|
|
||||||
use datatypes::scalars::ScalarVector;
|
use arrow_schema::Field;
|
||||||
use datatypes::vectors::StringVector;
|
use datafusion_common::arrow::array::StringViewArray;
|
||||||
|
|
||||||
use super::*;
|
use super::*;
|
||||||
|
|
||||||
@@ -119,7 +91,7 @@ mod tests {
|
|||||||
|
|
||||||
assert_eq!("parse_json", parse_json.name());
|
assert_eq!("parse_json", parse_json.name());
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
DataType::Binary,
|
DataType::BinaryView,
|
||||||
parse_json.return_type(&[DataType::Binary]).unwrap()
|
parse_json.return_type(&[DataType::Binary]).unwrap()
|
||||||
);
|
);
|
||||||
|
|
||||||
@@ -137,14 +109,24 @@ mod tests {
|
|||||||
})
|
})
|
||||||
.collect::<Vec<_>>();
|
.collect::<Vec<_>>();
|
||||||
|
|
||||||
let json_string_vector = StringVector::from_vec(json_strings.to_vec());
|
let args = ScalarFunctionArgs {
|
||||||
let args: Vec<VectorRef> = vec![Arc::new(json_string_vector)];
|
args: vec![ColumnarValue::Array(Arc::new(
|
||||||
let vector = parse_json.eval(&FunctionContext::default(), &args).unwrap();
|
StringViewArray::from_iter_values(json_strings),
|
||||||
|
))],
|
||||||
|
arg_fields: vec![],
|
||||||
|
number_rows: 3,
|
||||||
|
return_field: Arc::new(Field::new("x", DataType::BinaryView, false)),
|
||||||
|
config_options: Arc::new(Default::default()),
|
||||||
|
};
|
||||||
|
let result = parse_json
|
||||||
|
.invoke_with_args(args)
|
||||||
|
.and_then(|x| x.to_array(3))
|
||||||
|
.unwrap();
|
||||||
|
let vector = result.as_binary_view();
|
||||||
|
|
||||||
assert_eq!(3, vector.len());
|
assert_eq!(3, vector.len());
|
||||||
for (i, gt) in jsonbs.iter().enumerate() {
|
for (i, gt) in jsonbs.iter().enumerate() {
|
||||||
let result = vector.get_ref(i);
|
let result = vector.value(i);
|
||||||
let result = result.as_binary().unwrap().unwrap();
|
|
||||||
assert_eq!(gt, result);
|
assert_eq!(gt, result);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -13,18 +13,17 @@
|
|||||||
// limitations under the License.
|
// limitations under the License.
|
||||||
|
|
||||||
use std::fmt;
|
use std::fmt;
|
||||||
use std::iter::repeat_n;
|
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
|
|
||||||
use common_query::error::{InvalidFuncArgsSnafu, Result};
|
use common_query::error::Result;
|
||||||
use datafusion::arrow::datatypes::DataType;
|
use datafusion_common::arrow::array::{Array, AsArray, BooleanArray, BooleanBuilder};
|
||||||
use datafusion_expr::{Signature, Volatility};
|
use datafusion_common::arrow::compute;
|
||||||
use datatypes::scalars::ScalarVectorBuilder;
|
use datafusion_common::arrow::datatypes::DataType;
|
||||||
use datatypes::vectors::{BooleanVector, BooleanVectorBuilder, MutableVector, VectorRef};
|
use datafusion_common::{DataFusionError, ScalarValue};
|
||||||
|
use datafusion_expr::{ColumnarValue, ScalarFunctionArgs, Signature, Volatility};
|
||||||
use memchr::memmem;
|
use memchr::memmem;
|
||||||
use snafu::ensure;
|
|
||||||
|
|
||||||
use crate::function::{Function, FunctionContext};
|
use crate::function::Function;
|
||||||
use crate::function_registry::FunctionRegistry;
|
use crate::function_registry::FunctionRegistry;
|
||||||
|
|
||||||
/// Exact term/phrase matching function for text columns.
|
/// Exact term/phrase matching function for text columns.
|
||||||
@@ -100,64 +99,94 @@ impl Function for MatchesTermFunction {
|
|||||||
Signature::string(2, Volatility::Immutable)
|
Signature::string(2, Volatility::Immutable)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn eval(&self, _func_ctx: &FunctionContext, columns: &[VectorRef]) -> Result<VectorRef> {
|
fn invoke_with_args(
|
||||||
ensure!(
|
&self,
|
||||||
columns.len() == 2,
|
args: ScalarFunctionArgs,
|
||||||
InvalidFuncArgsSnafu {
|
) -> datafusion_common::Result<ColumnarValue> {
|
||||||
err_msg: format!(
|
let [arg0, arg1] = datafusion_common::utils::take_function_args(self.name(), &args.args)?;
|
||||||
"The length of the args is not correct, expect exactly 2, have: {}",
|
|
||||||
columns.len()
|
|
||||||
),
|
|
||||||
}
|
|
||||||
);
|
|
||||||
|
|
||||||
let text_column = &columns[0];
|
fn as_str(v: &ScalarValue) -> Option<&str> {
|
||||||
if text_column.is_empty() {
|
match v {
|
||||||
return Ok(Arc::new(BooleanVector::from(Vec::<bool>::with_capacity(0))));
|
ScalarValue::Utf8View(Some(x))
|
||||||
|
| ScalarValue::Utf8(Some(x))
|
||||||
|
| ScalarValue::LargeUtf8(Some(x)) => Some(x.as_str()),
|
||||||
|
_ => None,
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
let term_column = &columns[1];
|
if let (ColumnarValue::Scalar(text), ColumnarValue::Scalar(term)) = (arg0, arg1) {
|
||||||
let compiled_finder = if term_column.is_const() {
|
let text = as_str(text);
|
||||||
let term = term_column.get_ref(0).as_string().unwrap();
|
let term = as_str(term);
|
||||||
match term {
|
let result = match (text, term) {
|
||||||
None => {
|
(Some(text), Some(term)) => Some(MatchesTermFinder::new(term).find(text)),
|
||||||
return Ok(Arc::new(BooleanVector::from_iter(repeat_n(
|
_ => None,
|
||||||
None,
|
};
|
||||||
text_column.len(),
|
return Ok(ColumnarValue::Scalar(ScalarValue::Boolean(result)));
|
||||||
))));
|
|
||||||
}
|
|
||||||
Some(term) => Some(MatchesTermFinder::new(term)),
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
None
|
|
||||||
};
|
};
|
||||||
|
|
||||||
let len = text_column.len();
|
let v = match (arg0, arg1) {
|
||||||
let mut result = BooleanVectorBuilder::with_capacity(len);
|
(ColumnarValue::Scalar(_), ColumnarValue::Scalar(_)) => {
|
||||||
for i in 0..len {
|
// Unreachable because we have checked this case above and returned if matched.
|
||||||
let text = text_column.get_ref(i).as_string().unwrap();
|
unreachable!()
|
||||||
let Some(text) = text else {
|
}
|
||||||
result.push_null();
|
(ColumnarValue::Scalar(text), ColumnarValue::Array(terms)) => {
|
||||||
continue;
|
let text = as_str(text);
|
||||||
};
|
if let Some(text) = text {
|
||||||
|
let terms = compute::cast(terms, &DataType::Utf8View)?;
|
||||||
|
let terms = terms.as_string_view();
|
||||||
|
|
||||||
let contains = match &compiled_finder {
|
let mut builder = BooleanBuilder::with_capacity(terms.len());
|
||||||
Some(finder) => finder.find(text),
|
terms.iter().for_each(|term| {
|
||||||
None => {
|
builder.append_option(term.map(|x| MatchesTermFinder::new(x).find(text)))
|
||||||
let term = match term_column.get_ref(i).as_string().unwrap() {
|
});
|
||||||
None => {
|
ColumnarValue::Array(Arc::new(builder.finish()))
|
||||||
result.push_null();
|
} else {
|
||||||
continue;
|
ColumnarValue::Array(Arc::new(BooleanArray::new_null(terms.len())))
|
||||||
}
|
|
||||||
Some(term) => term,
|
|
||||||
};
|
|
||||||
MatchesTermFinder::new(term).find(text)
|
|
||||||
}
|
}
|
||||||
};
|
}
|
||||||
result.push(Some(contains));
|
(ColumnarValue::Array(texts), ColumnarValue::Scalar(term)) => {
|
||||||
}
|
let term = as_str(term);
|
||||||
|
if let Some(term) = term {
|
||||||
|
let finder = MatchesTermFinder::new(term);
|
||||||
|
|
||||||
Ok(result.to_vector())
|
let texts = compute::cast(texts, &DataType::Utf8View)?;
|
||||||
|
let texts = texts.as_string_view();
|
||||||
|
|
||||||
|
let mut builder = BooleanBuilder::with_capacity(texts.len());
|
||||||
|
texts
|
||||||
|
.iter()
|
||||||
|
.for_each(|text| builder.append_option(text.map(|x| finder.find(x))));
|
||||||
|
ColumnarValue::Array(Arc::new(builder.finish()))
|
||||||
|
} else {
|
||||||
|
ColumnarValue::Array(Arc::new(BooleanArray::new_null(texts.len())))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
(ColumnarValue::Array(texts), ColumnarValue::Array(terms)) => {
|
||||||
|
let terms = compute::cast(terms, &DataType::Utf8View)?;
|
||||||
|
let terms = terms.as_string_view();
|
||||||
|
let texts = compute::cast(texts, &DataType::Utf8View)?;
|
||||||
|
let texts = texts.as_string_view();
|
||||||
|
|
||||||
|
let len = texts.len();
|
||||||
|
if terms.len() != len {
|
||||||
|
return Err(DataFusionError::Internal(format!(
|
||||||
|
"input arrays have different lengths: {len}, {}",
|
||||||
|
terms.len()
|
||||||
|
)));
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut builder = BooleanBuilder::with_capacity(len);
|
||||||
|
for (text, term) in texts.iter().zip(terms.iter()) {
|
||||||
|
let result = match (text, term) {
|
||||||
|
(Some(text), Some(term)) => Some(MatchesTermFinder::new(term).find(text)),
|
||||||
|
_ => None,
|
||||||
|
};
|
||||||
|
builder.append_option(result);
|
||||||
|
}
|
||||||
|
ColumnarValue::Array(Arc::new(builder.finish()))
|
||||||
|
}
|
||||||
|
};
|
||||||
|
Ok(v)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -16,14 +16,13 @@ use std::fmt;
|
|||||||
|
|
||||||
use common_query::error::{self, Result};
|
use common_query::error::{self, Result};
|
||||||
use datafusion::arrow::compute::kernels::numeric;
|
use datafusion::arrow::compute::kernels::numeric;
|
||||||
|
use datafusion_common::arrow::compute::kernels::cast;
|
||||||
|
use datafusion_common::arrow::datatypes::DataType;
|
||||||
use datafusion_expr::type_coercion::aggregates::NUMERICS;
|
use datafusion_expr::type_coercion::aggregates::NUMERICS;
|
||||||
use datafusion_expr::{Signature, Volatility};
|
use datafusion_expr::{ColumnarValue, ScalarFunctionArgs, Signature, Volatility};
|
||||||
use datatypes::arrow::compute::kernels::cast;
|
|
||||||
use datatypes::arrow::datatypes::DataType;
|
|
||||||
use datatypes::vectors::{Helper, VectorRef};
|
|
||||||
use snafu::ResultExt;
|
use snafu::ResultExt;
|
||||||
|
|
||||||
use crate::function::{Function, FunctionContext};
|
use crate::function::{Function, extract_args};
|
||||||
|
|
||||||
/// generates rates from a sequence of adjacent data points.
|
/// generates rates from a sequence of adjacent data points.
|
||||||
#[derive(Clone, Debug, Default)]
|
#[derive(Clone, Debug, Default)]
|
||||||
@@ -48,12 +47,14 @@ impl Function for RateFunction {
|
|||||||
Signature::uniform(2, NUMERICS.to_vec(), Volatility::Immutable)
|
Signature::uniform(2, NUMERICS.to_vec(), Volatility::Immutable)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn eval(&self, _func_ctx: &FunctionContext, columns: &[VectorRef]) -> Result<VectorRef> {
|
fn invoke_with_args(
|
||||||
let val = &columns[0].to_arrow_array();
|
&self,
|
||||||
|
args: ScalarFunctionArgs,
|
||||||
|
) -> datafusion_common::Result<ColumnarValue> {
|
||||||
|
let [val, ts] = extract_args(self.name(), &args)?;
|
||||||
let val_0 = val.slice(0, val.len() - 1);
|
let val_0 = val.slice(0, val.len() - 1);
|
||||||
let val_1 = val.slice(1, val.len() - 1);
|
let val_1 = val.slice(1, val.len() - 1);
|
||||||
let dv = numeric::sub(&val_1, &val_0).context(error::ArrowComputeSnafu)?;
|
let dv = numeric::sub(&val_1, &val_0).context(error::ArrowComputeSnafu)?;
|
||||||
let ts = &columns[1].to_arrow_array();
|
|
||||||
let ts_0 = ts.slice(0, ts.len() - 1);
|
let ts_0 = ts.slice(0, ts.len() - 1);
|
||||||
let ts_1 = ts.slice(1, ts.len() - 1);
|
let ts_1 = ts.slice(1, ts.len() - 1);
|
||||||
let dt = numeric::sub(&ts_1, &ts_0).context(error::ArrowComputeSnafu)?;
|
let dt = numeric::sub(&ts_1, &ts_0).context(error::ArrowComputeSnafu)?;
|
||||||
@@ -65,9 +66,8 @@ impl Function for RateFunction {
|
|||||||
typ: DataType::Float64,
|
typ: DataType::Float64,
|
||||||
})?;
|
})?;
|
||||||
let rate = numeric::div(&dv, &dt).context(error::ArrowComputeSnafu)?;
|
let rate = numeric::div(&dv, &dt).context(error::ArrowComputeSnafu)?;
|
||||||
let v = Helper::try_into_vector(&rate).context(error::FromArrowArraySnafu)?;
|
|
||||||
|
|
||||||
Ok(v)
|
Ok(ColumnarValue::Array(rate))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -75,8 +75,10 @@ impl Function for RateFunction {
|
|||||||
mod tests {
|
mod tests {
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
|
|
||||||
|
use arrow_schema::Field;
|
||||||
|
use datafusion_common::arrow::array::{AsArray, Float32Array, Float64Array, Int64Array};
|
||||||
|
use datafusion_common::arrow::datatypes::Float64Type;
|
||||||
use datafusion_expr::TypeSignature;
|
use datafusion_expr::TypeSignature;
|
||||||
use datatypes::vectors::{Float32Vector, Float64Vector, Int64Vector};
|
|
||||||
|
|
||||||
use super::*;
|
use super::*;
|
||||||
#[test]
|
#[test]
|
||||||
@@ -93,12 +95,22 @@ mod tests {
|
|||||||
let values = vec![1.0, 3.0, 6.0];
|
let values = vec![1.0, 3.0, 6.0];
|
||||||
let ts = vec![0, 1, 2];
|
let ts = vec![0, 1, 2];
|
||||||
|
|
||||||
let args: Vec<VectorRef> = vec![
|
let args = ScalarFunctionArgs {
|
||||||
Arc::new(Float32Vector::from_vec(values)),
|
args: vec![
|
||||||
Arc::new(Int64Vector::from_vec(ts)),
|
ColumnarValue::Array(Arc::new(Float32Array::from(values))),
|
||||||
];
|
ColumnarValue::Array(Arc::new(Int64Array::from(ts))),
|
||||||
let vector = rate.eval(&FunctionContext::default(), &args).unwrap();
|
],
|
||||||
let expect: VectorRef = Arc::new(Float64Vector::from_vec(vec![2.0, 3.0]));
|
arg_fields: vec![],
|
||||||
|
number_rows: 3,
|
||||||
|
return_field: Arc::new(Field::new("x", DataType::Float64, false)),
|
||||||
|
config_options: Arc::new(Default::default()),
|
||||||
|
};
|
||||||
|
let result = rate
|
||||||
|
.invoke_with_args(args)
|
||||||
|
.and_then(|x| x.to_array(2))
|
||||||
|
.unwrap();
|
||||||
|
let vector = result.as_primitive::<Float64Type>();
|
||||||
|
let expect = &Float64Array::from(vec![2.0, 3.0]);
|
||||||
assert_eq!(expect, vector);
|
assert_eq!(expect, vector);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -15,15 +15,18 @@
|
|||||||
use std::fmt;
|
use std::fmt;
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
|
|
||||||
use common_query::error::{InvalidFuncArgsSnafu, Result, UnsupportedInputDataTypeSnafu};
|
use common_query::error::Result;
|
||||||
use common_time::{Date, Timestamp};
|
use common_time::{Date, Timestamp};
|
||||||
use datafusion_expr::{Signature, Volatility};
|
use datafusion_common::DataFusionError;
|
||||||
use datatypes::arrow::datatypes::{DataType, TimeUnit};
|
use datafusion_common::arrow::array::{
|
||||||
use datatypes::prelude::ConcreteDataType;
|
Array, ArrayRef, AsArray, Date32Array, Int64Array, Int64Builder,
|
||||||
use datatypes::vectors::{Int64Vector, VectorRef};
|
};
|
||||||
use snafu::ensure;
|
use datafusion_common::arrow::compute;
|
||||||
|
use datafusion_common::arrow::datatypes::{ArrowTimestampType, DataType, Date32Type, TimeUnit};
|
||||||
|
use datafusion_expr::{ColumnarValue, ScalarFunctionArgs, Signature, Volatility};
|
||||||
|
|
||||||
use crate::function::{Function, FunctionContext};
|
use crate::function::{Function, FunctionContext, extract_args, find_function_context};
|
||||||
|
use crate::helper::with_match_timestamp_types;
|
||||||
|
|
||||||
/// A function to convert the column into the unix timestamp in seconds.
|
/// A function to convert the column into the unix timestamp in seconds.
|
||||||
#[derive(Clone, Debug, Default)]
|
#[derive(Clone, Debug, Default)]
|
||||||
@@ -44,15 +47,23 @@ fn convert_to_seconds(arg: &str, func_ctx: &FunctionContext) -> Option<i64> {
|
|||||||
None
|
None
|
||||||
}
|
}
|
||||||
|
|
||||||
fn convert_timestamps_to_seconds(vector: &VectorRef) -> Vec<Option<i64>> {
|
fn convert_timestamps_to_seconds(array: &ArrayRef) -> datafusion_common::Result<Vec<Option<i64>>> {
|
||||||
(0..vector.len())
|
with_match_timestamp_types!(array.data_type(), |$S| {
|
||||||
.map(|i| vector.get(i).as_timestamp().map(|ts| ts.split().0))
|
let array = array.as_primitive::<$S>();
|
||||||
.collect::<Vec<Option<i64>>>()
|
array
|
||||||
|
.iter()
|
||||||
|
.map(|x| x.map(|i| Timestamp::new(i, $S::UNIT.into()).split().0))
|
||||||
|
.collect::<Vec<_>>()
|
||||||
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
fn convert_dates_to_seconds(vector: &VectorRef) -> Vec<Option<i64>> {
|
fn convert_dates_to_seconds(vector: &Date32Array) -> Vec<Option<i64>> {
|
||||||
(0..vector.len())
|
(0..vector.len())
|
||||||
.map(|i| vector.get(i).as_date().map(|dt| dt.to_secs()))
|
.map(|i| {
|
||||||
|
vector
|
||||||
|
.is_valid(i)
|
||||||
|
.then(|| Date::from(vector.value(i)).to_secs())
|
||||||
|
})
|
||||||
.collect::<Vec<Option<i64>>>()
|
.collect::<Vec<Option<i64>>>()
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -82,43 +93,43 @@ impl Function for ToUnixtimeFunction {
|
|||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn eval(&self, ctx: &FunctionContext, columns: &[VectorRef]) -> Result<VectorRef> {
|
fn invoke_with_args(
|
||||||
ensure!(
|
&self,
|
||||||
columns.len() == 1,
|
args: ScalarFunctionArgs,
|
||||||
InvalidFuncArgsSnafu {
|
) -> datafusion_common::Result<ColumnarValue> {
|
||||||
err_msg: format!(
|
let ctx = find_function_context(&args)?;
|
||||||
"The length of the args is not correct, expect exactly one, have: {}",
|
let [arg0] = extract_args(self.name(), &args)?;
|
||||||
columns.len()
|
let result: ArrayRef = match arg0.data_type() {
|
||||||
),
|
DataType::Utf8 => {
|
||||||
|
let arg0 = arg0.as_string::<i32>();
|
||||||
|
let mut builder = Int64Builder::with_capacity(arg0.len());
|
||||||
|
for i in 0..arg0.len() {
|
||||||
|
builder.append_option(
|
||||||
|
arg0.is_valid(i)
|
||||||
|
.then(|| convert_to_seconds(arg0.value(i), ctx))
|
||||||
|
.flatten(),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
Arc::new(builder.finish())
|
||||||
}
|
}
|
||||||
);
|
DataType::Int64 | DataType::Int32 => compute::cast(&arg0, &DataType::Int64)?,
|
||||||
|
DataType::Date32 => {
|
||||||
let vector = &columns[0];
|
let vector = arg0.as_primitive::<Date32Type>();
|
||||||
|
|
||||||
match columns[0].data_type() {
|
|
||||||
ConcreteDataType::String(_) => Ok(Arc::new(Int64Vector::from(
|
|
||||||
(0..vector.len())
|
|
||||||
.map(|i| convert_to_seconds(&vector.get(i).to_string(), ctx))
|
|
||||||
.collect::<Vec<_>>(),
|
|
||||||
))),
|
|
||||||
ConcreteDataType::Int64(_) | ConcreteDataType::Int32(_) => {
|
|
||||||
// Safety: cast always successfully at here
|
|
||||||
Ok(vector.cast(&ConcreteDataType::int64_datatype()).unwrap())
|
|
||||||
}
|
|
||||||
ConcreteDataType::Date(_) => {
|
|
||||||
let seconds = convert_dates_to_seconds(vector);
|
let seconds = convert_dates_to_seconds(vector);
|
||||||
Ok(Arc::new(Int64Vector::from(seconds)))
|
Arc::new(Int64Array::from(seconds))
|
||||||
}
|
}
|
||||||
ConcreteDataType::Timestamp(_) => {
|
DataType::Timestamp(_, _) => {
|
||||||
let seconds = convert_timestamps_to_seconds(vector);
|
let seconds = convert_timestamps_to_seconds(&arg0)?;
|
||||||
Ok(Arc::new(Int64Vector::from(seconds)))
|
Arc::new(Int64Array::from(seconds))
|
||||||
}
|
}
|
||||||
_ => UnsupportedInputDataTypeSnafu {
|
x => {
|
||||||
function: NAME,
|
return Err(DataFusionError::Execution(format!(
|
||||||
datatypes: columns.iter().map(|c| c.data_type()).collect::<Vec<_>>(),
|
"{}: unsupported input data type {x}",
|
||||||
|
self.name()
|
||||||
|
)));
|
||||||
}
|
}
|
||||||
.fail(),
|
};
|
||||||
}
|
Ok(ColumnarValue::Array(result))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -130,14 +141,41 @@ impl fmt::Display for ToUnixtimeFunction {
|
|||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use datafusion_expr::TypeSignature;
|
use arrow_schema::Field;
|
||||||
use datatypes::value::Value;
|
use datafusion_common::arrow::array::{
|
||||||
use datatypes::vectors::{
|
StringArray, TimestampMillisecondArray, TimestampSecondArray,
|
||||||
DateVector, StringVector, TimestampMillisecondVector, TimestampSecondVector,
|
|
||||||
};
|
};
|
||||||
|
use datafusion_common::arrow::datatypes::Int64Type;
|
||||||
|
use datafusion_common::config::ConfigOptions;
|
||||||
|
use datafusion_expr::TypeSignature;
|
||||||
|
|
||||||
use super::{ToUnixtimeFunction, *};
|
use super::{ToUnixtimeFunction, *};
|
||||||
|
|
||||||
|
fn test_invoke(arg0: ArrayRef, expects: &[Option<i64>]) {
|
||||||
|
let mut config_options = ConfigOptions::default();
|
||||||
|
config_options.extensions.insert(FunctionContext::default());
|
||||||
|
let config_options = Arc::new(config_options);
|
||||||
|
|
||||||
|
let number_rows = arg0.len();
|
||||||
|
let args = ScalarFunctionArgs {
|
||||||
|
args: vec![ColumnarValue::Array(arg0)],
|
||||||
|
arg_fields: vec![],
|
||||||
|
number_rows,
|
||||||
|
return_field: Arc::new(Field::new("", DataType::Int64, true)),
|
||||||
|
config_options,
|
||||||
|
};
|
||||||
|
let result = ToUnixtimeFunction
|
||||||
|
.invoke_with_args(args)
|
||||||
|
.and_then(|x| x.to_array(number_rows))
|
||||||
|
.unwrap();
|
||||||
|
let result = result.as_primitive::<Int64Type>();
|
||||||
|
|
||||||
|
assert_eq!(result.len(), expects.len());
|
||||||
|
for (actual, expect) in result.iter().zip(expects) {
|
||||||
|
assert_eq!(&actual, expect);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_string_to_unixtime() {
|
fn test_string_to_unixtime() {
|
||||||
let f = ToUnixtimeFunction;
|
let f = ToUnixtimeFunction;
|
||||||
@@ -167,115 +205,36 @@ mod tests {
|
|||||||
Some("invalid_time_stamp"),
|
Some("invalid_time_stamp"),
|
||||||
];
|
];
|
||||||
let results = [Some(1677652502), None, Some(1656633600), None];
|
let results = [Some(1677652502), None, Some(1656633600), None];
|
||||||
let args: Vec<VectorRef> = vec![Arc::new(StringVector::from(times.clone()))];
|
let arg0 = Arc::new(StringArray::from(times));
|
||||||
let vector = f.eval(&FunctionContext::default(), &args).unwrap();
|
test_invoke(arg0, &results);
|
||||||
assert_eq!(4, vector.len());
|
|
||||||
for (i, _t) in times.iter().enumerate() {
|
|
||||||
let v = vector.get(i);
|
|
||||||
if i == 1 || i == 3 {
|
|
||||||
assert_eq!(Value::Null, v);
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
match v {
|
|
||||||
Value::Int64(ts) => {
|
|
||||||
assert_eq!(ts, (*results.get(i).unwrap()).unwrap());
|
|
||||||
}
|
|
||||||
_ => unreachable!(),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_int_to_unixtime() {
|
fn test_int_to_unixtime() {
|
||||||
let f = ToUnixtimeFunction;
|
|
||||||
|
|
||||||
let times = vec![Some(3_i64), None, Some(5_i64), None];
|
let times = vec![Some(3_i64), None, Some(5_i64), None];
|
||||||
let results = [Some(3), None, Some(5), None];
|
let results = [Some(3), None, Some(5), None];
|
||||||
let args: Vec<VectorRef> = vec![Arc::new(Int64Vector::from(times.clone()))];
|
let arg0 = Arc::new(Int64Array::from(times));
|
||||||
let vector = f.eval(&FunctionContext::default(), &args).unwrap();
|
test_invoke(arg0, &results);
|
||||||
assert_eq!(4, vector.len());
|
|
||||||
for (i, _t) in times.iter().enumerate() {
|
|
||||||
let v = vector.get(i);
|
|
||||||
if i == 1 || i == 3 {
|
|
||||||
assert_eq!(Value::Null, v);
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
match v {
|
|
||||||
Value::Int64(ts) => {
|
|
||||||
assert_eq!(ts, (*results.get(i).unwrap()).unwrap());
|
|
||||||
}
|
|
||||||
_ => unreachable!(),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_date_to_unixtime() {
|
fn test_date_to_unixtime() {
|
||||||
let f = ToUnixtimeFunction;
|
|
||||||
|
|
||||||
let times = vec![Some(123), None, Some(42), None];
|
let times = vec![Some(123), None, Some(42), None];
|
||||||
let results = [Some(10627200), None, Some(3628800), None];
|
let results = [Some(10627200), None, Some(3628800), None];
|
||||||
let date_vector = DateVector::from(times.clone());
|
let arg0 = Arc::new(Date32Array::from(times));
|
||||||
let args: Vec<VectorRef> = vec![Arc::new(date_vector)];
|
test_invoke(arg0, &results);
|
||||||
let vector = f.eval(&FunctionContext::default(), &args).unwrap();
|
|
||||||
assert_eq!(4, vector.len());
|
|
||||||
for (i, _t) in times.iter().enumerate() {
|
|
||||||
let v = vector.get(i);
|
|
||||||
if i == 1 || i == 3 {
|
|
||||||
assert_eq!(Value::Null, v);
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
match v {
|
|
||||||
Value::Int64(ts) => {
|
|
||||||
assert_eq!(ts, (*results.get(i).unwrap()).unwrap());
|
|
||||||
}
|
|
||||||
_ => unreachable!(),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_timestamp_to_unixtime() {
|
fn test_timestamp_to_unixtime() {
|
||||||
let f = ToUnixtimeFunction;
|
|
||||||
|
|
||||||
let times = vec![Some(123), None, Some(42), None];
|
let times = vec![Some(123), None, Some(42), None];
|
||||||
let results = [Some(123), None, Some(42), None];
|
let results = [Some(123), None, Some(42), None];
|
||||||
let ts_vector = TimestampSecondVector::from(times.clone());
|
let arg0 = Arc::new(TimestampSecondArray::from(times));
|
||||||
let args: Vec<VectorRef> = vec![Arc::new(ts_vector)];
|
test_invoke(arg0, &results);
|
||||||
let vector = f.eval(&FunctionContext::default(), &args).unwrap();
|
|
||||||
assert_eq!(4, vector.len());
|
|
||||||
for (i, _t) in times.iter().enumerate() {
|
|
||||||
let v = vector.get(i);
|
|
||||||
if i == 1 || i == 3 {
|
|
||||||
assert_eq!(Value::Null, v);
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
match v {
|
|
||||||
Value::Int64(ts) => {
|
|
||||||
assert_eq!(ts, (*results.get(i).unwrap()).unwrap());
|
|
||||||
}
|
|
||||||
_ => unreachable!(),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
let times = vec![Some(123000), None, Some(42000), None];
|
let times = vec![Some(123000), None, Some(42000), None];
|
||||||
let results = [Some(123), None, Some(42), None];
|
let results = [Some(123), None, Some(42), None];
|
||||||
let ts_vector = TimestampMillisecondVector::from(times.clone());
|
let arg0 = Arc::new(TimestampMillisecondArray::from(times));
|
||||||
let args: Vec<VectorRef> = vec![Arc::new(ts_vector)];
|
test_invoke(arg0, &results);
|
||||||
let vector = f.eval(&FunctionContext::default(), &args).unwrap();
|
|
||||||
assert_eq!(4, vector.len());
|
|
||||||
for (i, _t) in times.iter().enumerate() {
|
|
||||||
let v = vector.get(i);
|
|
||||||
if i == 1 || i == 3 {
|
|
||||||
assert_eq!(Value::Null, v);
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
match v {
|
|
||||||
Value::Int64(ts) => {
|
|
||||||
assert_eq!(ts, (*results.get(i).unwrap()).unwrap());
|
|
||||||
}
|
|
||||||
_ => unreachable!(),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -13,16 +13,17 @@
|
|||||||
// limitations under the License.
|
// limitations under the License.
|
||||||
|
|
||||||
use std::fmt::Display;
|
use std::fmt::Display;
|
||||||
|
use std::sync::Arc;
|
||||||
|
|
||||||
use common_query::error::{InvalidFuncArgsSnafu, InvalidVectorStringSnafu, Result};
|
use common_query::error::{InvalidVectorStringSnafu, Result};
|
||||||
use datafusion::arrow::datatypes::DataType;
|
use datafusion_common::arrow::array::{Array, AsArray, BinaryViewBuilder};
|
||||||
use datafusion_expr::{Signature, Volatility};
|
use datafusion_common::arrow::compute;
|
||||||
use datatypes::scalars::ScalarVectorBuilder;
|
use datafusion_common::arrow::datatypes::DataType;
|
||||||
|
use datafusion_expr::{ColumnarValue, ScalarFunctionArgs, Signature, Volatility};
|
||||||
use datatypes::types::parse_string_to_vector_type_value;
|
use datatypes::types::parse_string_to_vector_type_value;
|
||||||
use datatypes::vectors::{BinaryVectorBuilder, MutableVector, VectorRef};
|
use snafu::ResultExt;
|
||||||
use snafu::{ResultExt, ensure};
|
|
||||||
|
|
||||||
use crate::function::{Function, FunctionContext};
|
use crate::function::{Function, extract_args};
|
||||||
|
|
||||||
const NAME: &str = "parse_vec";
|
const NAME: &str = "parse_vec";
|
||||||
|
|
||||||
@@ -35,40 +36,36 @@ impl Function for ParseVectorFunction {
|
|||||||
}
|
}
|
||||||
|
|
||||||
fn return_type(&self, _: &[DataType]) -> Result<DataType> {
|
fn return_type(&self, _: &[DataType]) -> Result<DataType> {
|
||||||
Ok(DataType::Binary)
|
Ok(DataType::BinaryView)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn signature(&self) -> Signature {
|
fn signature(&self) -> Signature {
|
||||||
Signature::string(1, Volatility::Immutable)
|
Signature::string(1, Volatility::Immutable)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn eval(&self, _func_ctx: &FunctionContext, columns: &[VectorRef]) -> Result<VectorRef> {
|
fn invoke_with_args(
|
||||||
ensure!(
|
&self,
|
||||||
columns.len() == 1,
|
args: ScalarFunctionArgs,
|
||||||
InvalidFuncArgsSnafu {
|
) -> datafusion_common::Result<ColumnarValue> {
|
||||||
err_msg: format!(
|
let [arg0] = extract_args(self.name(), &args)?;
|
||||||
"The length of the args is not correct, expect exactly one, have: {}",
|
let arg0 = compute::cast(&arg0, &DataType::Utf8View)?;
|
||||||
columns.len()
|
let column = arg0.as_string_view();
|
||||||
),
|
|
||||||
}
|
|
||||||
);
|
|
||||||
|
|
||||||
let column = &columns[0];
|
|
||||||
let size = column.len();
|
let size = column.len();
|
||||||
|
|
||||||
let mut result = BinaryVectorBuilder::with_capacity(size);
|
let mut builder = BinaryViewBuilder::with_capacity(size);
|
||||||
for i in 0..size {
|
for i in 0..size {
|
||||||
let value = column.get(i).as_string();
|
let value = column.is_valid(i).then(|| column.value(i));
|
||||||
if let Some(value) = value {
|
if let Some(value) = value {
|
||||||
let res = parse_string_to_vector_type_value(&value, None)
|
let result = parse_string_to_vector_type_value(value, None)
|
||||||
.context(InvalidVectorStringSnafu { vec_str: &value })?;
|
.context(InvalidVectorStringSnafu { vec_str: value })?;
|
||||||
result.push(Some(&res));
|
builder.append_value(result);
|
||||||
} else {
|
} else {
|
||||||
result.push_null();
|
builder.append_null();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(result.to_vector())
|
Ok(ColumnarValue::Array(Arc::new(builder.finish())))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -82,9 +79,9 @@ impl Display for ParseVectorFunction {
|
|||||||
mod tests {
|
mod tests {
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
|
|
||||||
|
use arrow_schema::Field;
|
||||||
use common_base::bytes::Bytes;
|
use common_base::bytes::Bytes;
|
||||||
use datatypes::value::Value;
|
use datafusion_common::arrow::array::StringViewArray;
|
||||||
use datatypes::vectors::StringVector;
|
|
||||||
|
|
||||||
use super::*;
|
use super::*;
|
||||||
|
|
||||||
@@ -92,66 +89,84 @@ mod tests {
|
|||||||
fn test_parse_vector() {
|
fn test_parse_vector() {
|
||||||
let func = ParseVectorFunction;
|
let func = ParseVectorFunction;
|
||||||
|
|
||||||
let input = Arc::new(StringVector::from(vec![
|
let arg0 = Arc::new(StringViewArray::from_iter([
|
||||||
Some("[1.0,2.0,3.0]".to_string()),
|
Some("[1.0,2.0,3.0]".to_string()),
|
||||||
Some("[4.0,5.0,6.0]".to_string()),
|
Some("[4.0,5.0,6.0]".to_string()),
|
||||||
None,
|
None,
|
||||||
]));
|
]));
|
||||||
|
let args = ScalarFunctionArgs {
|
||||||
|
args: vec![ColumnarValue::Array(arg0)],
|
||||||
|
arg_fields: vec![],
|
||||||
|
number_rows: 3,
|
||||||
|
return_field: Arc::new(Field::new("", DataType::BinaryView, false)),
|
||||||
|
config_options: Arc::new(Default::default()),
|
||||||
|
};
|
||||||
|
|
||||||
let result = func.eval(&FunctionContext::default(), &[input]).unwrap();
|
let result = func
|
||||||
|
.invoke_with_args(args)
|
||||||
|
.and_then(|x| x.to_array(3))
|
||||||
|
.unwrap();
|
||||||
|
let result = result.as_binary_view();
|
||||||
|
|
||||||
let result = result.as_ref();
|
|
||||||
assert_eq!(result.len(), 3);
|
assert_eq!(result.len(), 3);
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
result.get(0),
|
result.value(0),
|
||||||
Value::Binary(Bytes::from(
|
&Bytes::from(
|
||||||
[1.0f32, 2.0, 3.0]
|
[1.0f32, 2.0, 3.0]
|
||||||
.iter()
|
.iter()
|
||||||
.flat_map(|e| e.to_le_bytes())
|
.flat_map(|e| e.to_le_bytes())
|
||||||
.collect::<Vec<u8>>()
|
.collect::<Vec<u8>>()
|
||||||
))
|
)
|
||||||
);
|
);
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
result.get(1),
|
result.value(1),
|
||||||
Value::Binary(Bytes::from(
|
&Bytes::from(
|
||||||
[4.0f32, 5.0, 6.0]
|
[4.0f32, 5.0, 6.0]
|
||||||
.iter()
|
.iter()
|
||||||
.flat_map(|e| e.to_le_bytes())
|
.flat_map(|e| e.to_le_bytes())
|
||||||
.collect::<Vec<u8>>()
|
.collect::<Vec<u8>>()
|
||||||
))
|
)
|
||||||
);
|
);
|
||||||
assert!(result.get(2).is_null());
|
assert!(result.is_null(2));
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_parse_vector_error() {
|
fn test_parse_vector_error() {
|
||||||
let func = ParseVectorFunction;
|
let func = ParseVectorFunction;
|
||||||
|
|
||||||
let input = Arc::new(StringVector::from(vec![
|
let inputs = [
|
||||||
Some("[1.0,2.0,3.0]".to_string()),
|
StringViewArray::from_iter([
|
||||||
Some("[4.0,5.0,6.0]".to_string()),
|
Some("[1.0,2.0,3.0]".to_string()),
|
||||||
Some("[7.0,8.0,9.0".to_string()),
|
Some("[4.0,5.0,6.0]".to_string()),
|
||||||
]));
|
Some("[7.0,8.0,9.0".to_string()),
|
||||||
|
]),
|
||||||
|
StringViewArray::from_iter([
|
||||||
|
Some("[1.0,2.0,3.0]".to_string()),
|
||||||
|
Some("[4.0,5.0,6.0]".to_string()),
|
||||||
|
Some("7.0,8.0,9.0]".to_string()),
|
||||||
|
]),
|
||||||
|
StringViewArray::from_iter([
|
||||||
|
Some("[1.0,2.0,3.0]".to_string()),
|
||||||
|
Some("[4.0,5.0,6.0]".to_string()),
|
||||||
|
Some("[7.0,hello,9.0]".to_string()),
|
||||||
|
]),
|
||||||
|
];
|
||||||
|
let expected = [
|
||||||
|
"External error: Invalid vector string: [7.0,8.0,9.0",
|
||||||
|
"External error: Invalid vector string: 7.0,8.0,9.0]",
|
||||||
|
"External error: Invalid vector string: [7.0,hello,9.0]",
|
||||||
|
];
|
||||||
|
|
||||||
let result = func.eval(&FunctionContext::default(), &[input]);
|
for (input, expected) in inputs.into_iter().zip(expected.into_iter()) {
|
||||||
assert!(result.is_err());
|
let args = ScalarFunctionArgs {
|
||||||
|
args: vec![ColumnarValue::Array(Arc::new(input))],
|
||||||
let input = Arc::new(StringVector::from(vec![
|
arg_fields: vec![],
|
||||||
Some("[1.0,2.0,3.0]".to_string()),
|
number_rows: 3,
|
||||||
Some("[4.0,5.0,6.0]".to_string()),
|
return_field: Arc::new(Field::new("", DataType::BinaryView, false)),
|
||||||
Some("7.0,8.0,9.0]".to_string()),
|
config_options: Arc::new(Default::default()),
|
||||||
]));
|
};
|
||||||
|
let result = func.invoke_with_args(args);
|
||||||
let result = func.eval(&FunctionContext::default(), &[input]);
|
assert_eq!(result.unwrap_err().to_string(), expected);
|
||||||
assert!(result.is_err());
|
}
|
||||||
|
|
||||||
let input = Arc::new(StringVector::from(vec![
|
|
||||||
Some("[1.0,2.0,3.0]".to_string()),
|
|
||||||
Some("[4.0,5.0,6.0]".to_string()),
|
|
||||||
Some("[7.0,hello,9.0]".to_string()),
|
|
||||||
]));
|
|
||||||
|
|
||||||
let result = func.eval(&FunctionContext::default(), &[input]);
|
|
||||||
assert!(result.is_err());
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -13,18 +13,18 @@
|
|||||||
// limitations under the License.
|
// limitations under the License.
|
||||||
|
|
||||||
use std::fmt::Display;
|
use std::fmt::Display;
|
||||||
|
use std::sync::Arc;
|
||||||
|
|
||||||
use common_query::error::{InvalidFuncArgsSnafu, Result};
|
use common_query::error::Result;
|
||||||
use datafusion::arrow::datatypes::DataType;
|
use datafusion_common::DataFusionError;
|
||||||
|
use datafusion_common::arrow::array::{Array, AsArray, StringViewBuilder};
|
||||||
|
use datafusion_common::arrow::compute;
|
||||||
|
use datafusion_common::arrow::datatypes::DataType;
|
||||||
use datafusion_expr::type_coercion::aggregates::BINARYS;
|
use datafusion_expr::type_coercion::aggregates::BINARYS;
|
||||||
use datafusion_expr::{Signature, TypeSignature, Volatility};
|
use datafusion_expr::{ColumnarValue, ScalarFunctionArgs, Signature, TypeSignature, Volatility};
|
||||||
use datatypes::scalars::ScalarVectorBuilder;
|
|
||||||
use datatypes::types::vector_type_value_to_string;
|
use datatypes::types::vector_type_value_to_string;
|
||||||
use datatypes::value::Value;
|
|
||||||
use datatypes::vectors::{MutableVector, StringVectorBuilder, VectorRef};
|
|
||||||
use snafu::ensure;
|
|
||||||
|
|
||||||
use crate::function::{Function, FunctionContext};
|
use crate::function::{Function, extract_args};
|
||||||
|
|
||||||
const NAME: &str = "vec_to_string";
|
const NAME: &str = "vec_to_string";
|
||||||
|
|
||||||
@@ -37,7 +37,7 @@ impl Function for VectorToStringFunction {
|
|||||||
}
|
}
|
||||||
|
|
||||||
fn return_type(&self, _: &[DataType]) -> Result<DataType> {
|
fn return_type(&self, _: &[DataType]) -> Result<DataType> {
|
||||||
Ok(DataType::Utf8)
|
Ok(DataType::Utf8View)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn signature(&self) -> Signature {
|
fn signature(&self) -> Signature {
|
||||||
@@ -50,51 +50,40 @@ impl Function for VectorToStringFunction {
|
|||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn eval(&self, _func_ctx: &FunctionContext, columns: &[VectorRef]) -> Result<VectorRef> {
|
fn invoke_with_args(
|
||||||
ensure!(
|
&self,
|
||||||
columns.len() == 1,
|
args: ScalarFunctionArgs,
|
||||||
InvalidFuncArgsSnafu {
|
) -> datafusion_common::Result<ColumnarValue> {
|
||||||
err_msg: format!(
|
let [arg0] = extract_args(self.name(), &args)?;
|
||||||
"The length of the args is not correct, expect exactly one, have: {}",
|
let arg0 = compute::cast(&arg0, &DataType::BinaryView)?;
|
||||||
columns.len()
|
let column = arg0.as_binary_view();
|
||||||
),
|
|
||||||
}
|
|
||||||
);
|
|
||||||
|
|
||||||
let column = &columns[0];
|
|
||||||
let size = column.len();
|
let size = column.len();
|
||||||
|
|
||||||
let mut result = StringVectorBuilder::with_capacity(size);
|
let mut builder = StringViewBuilder::with_capacity(size);
|
||||||
for i in 0..size {
|
for i in 0..size {
|
||||||
let value = column.get(i);
|
let value = column.is_valid(i).then(|| column.value(i));
|
||||||
match value {
|
match value {
|
||||||
Value::Binary(bytes) => {
|
Some(bytes) => {
|
||||||
let len = bytes.len();
|
let len = bytes.len();
|
||||||
if len % std::mem::size_of::<f32>() != 0 {
|
if len % std::mem::size_of::<f32>() != 0 {
|
||||||
return InvalidFuncArgsSnafu {
|
return Err(DataFusionError::Execution(format!(
|
||||||
err_msg: format!("Invalid binary length of vector: {}", len),
|
"Invalid binary length of vector: {len}"
|
||||||
}
|
)));
|
||||||
.fail();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
let dim = len / std::mem::size_of::<f32>();
|
let dim = len / std::mem::size_of::<f32>();
|
||||||
// Safety: `dim` is calculated from the length of `bytes` and is guaranteed to be valid
|
// Safety: `dim` is calculated from the length of `bytes` and is guaranteed to be valid
|
||||||
let res = vector_type_value_to_string(&bytes, dim as _).unwrap();
|
let result = vector_type_value_to_string(bytes, dim as _).unwrap();
|
||||||
result.push(Some(&res));
|
builder.append_value(result);
|
||||||
}
|
}
|
||||||
Value::Null => {
|
None => {
|
||||||
result.push_null();
|
builder.append_null();
|
||||||
}
|
|
||||||
_ => {
|
|
||||||
return InvalidFuncArgsSnafu {
|
|
||||||
err_msg: format!("Invalid value type: {:?}", value.data_type()),
|
|
||||||
}
|
|
||||||
.fail();
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(result.to_vector())
|
Ok(ColumnarValue::Array(Arc::new(builder.finish())))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -106,8 +95,8 @@ impl Display for VectorToStringFunction {
|
|||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use datatypes::value::Value;
|
use arrow_schema::Field;
|
||||||
use datatypes::vectors::BinaryVectorBuilder;
|
use datafusion_common::arrow::array::BinaryViewBuilder;
|
||||||
|
|
||||||
use super::*;
|
use super::*;
|
||||||
|
|
||||||
@@ -115,29 +104,39 @@ mod tests {
|
|||||||
fn test_vector_to_string() {
|
fn test_vector_to_string() {
|
||||||
let func = VectorToStringFunction;
|
let func = VectorToStringFunction;
|
||||||
|
|
||||||
let mut builder = BinaryVectorBuilder::with_capacity(3);
|
let mut builder = BinaryViewBuilder::with_capacity(3);
|
||||||
builder.push(Some(
|
builder.append_option(Some(
|
||||||
[1.0f32, 2.0, 3.0]
|
[1.0f32, 2.0, 3.0]
|
||||||
.iter()
|
.iter()
|
||||||
.flat_map(|e| e.to_le_bytes())
|
.flat_map(|e| e.to_le_bytes())
|
||||||
.collect::<Vec<_>>()
|
.collect::<Vec<_>>()
|
||||||
.as_slice(),
|
.as_slice(),
|
||||||
));
|
));
|
||||||
builder.push(Some(
|
builder.append_option(Some(
|
||||||
[4.0f32, 5.0, 6.0]
|
[4.0f32, 5.0, 6.0]
|
||||||
.iter()
|
.iter()
|
||||||
.flat_map(|e| e.to_le_bytes())
|
.flat_map(|e| e.to_le_bytes())
|
||||||
.collect::<Vec<_>>()
|
.collect::<Vec<_>>()
|
||||||
.as_slice(),
|
.as_slice(),
|
||||||
));
|
));
|
||||||
builder.push_null();
|
builder.append_null();
|
||||||
let vector = builder.to_vector();
|
let args = ScalarFunctionArgs {
|
||||||
|
args: vec![ColumnarValue::Array(Arc::new(builder.finish()))],
|
||||||
|
arg_fields: vec![],
|
||||||
|
number_rows: 3,
|
||||||
|
return_field: Arc::new(Field::new("", DataType::Utf8View, false)),
|
||||||
|
config_options: Arc::new(Default::default()),
|
||||||
|
};
|
||||||
|
|
||||||
let result = func.eval(&FunctionContext::default(), &[vector]).unwrap();
|
let result = func
|
||||||
|
.invoke_with_args(args)
|
||||||
|
.and_then(|x| x.to_array(3))
|
||||||
|
.unwrap();
|
||||||
|
let result = result.as_string_view();
|
||||||
|
|
||||||
assert_eq!(result.len(), 3);
|
assert_eq!(result.len(), 3);
|
||||||
assert_eq!(result.get(0), Value::String("[1,2,3]".to_string().into()));
|
assert_eq!(result.value(0), "[1,2,3]");
|
||||||
assert_eq!(result.get(1), Value::String("[4,5,6]".to_string().into()));
|
assert_eq!(result.value(1), "[4,5,6]");
|
||||||
assert_eq!(result.get(2), Value::Null);
|
assert!(result.is_null(2));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -46,8 +46,18 @@ macro_rules! define_distance_function {
|
|||||||
|
|
||||||
fn signature(&self) -> Signature {
|
fn signature(&self) -> Signature {
|
||||||
helper::one_of_sigs2(
|
helper::one_of_sigs2(
|
||||||
vec![DataType::Utf8, DataType::Binary],
|
vec![
|
||||||
vec![DataType::Utf8, DataType::Binary],
|
DataType::Utf8,
|
||||||
|
DataType::Utf8View,
|
||||||
|
DataType::Binary,
|
||||||
|
DataType::BinaryView,
|
||||||
|
],
|
||||||
|
vec![
|
||||||
|
DataType::Utf8,
|
||||||
|
DataType::Utf8View,
|
||||||
|
DataType::Binary,
|
||||||
|
DataType::BinaryView,
|
||||||
|
],
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -57,6 +57,7 @@ impl Function for ElemProductFunction {
|
|||||||
vec![
|
vec![
|
||||||
TypeSignature::Uniform(1, STRINGS.to_vec()),
|
TypeSignature::Uniform(1, STRINGS.to_vec()),
|
||||||
TypeSignature::Uniform(1, BINARYS.to_vec()),
|
TypeSignature::Uniform(1, BINARYS.to_vec()),
|
||||||
|
TypeSignature::Uniform(1, vec![DataType::BinaryView]),
|
||||||
],
|
],
|
||||||
Volatility::Immutable,
|
Volatility::Immutable,
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -44,6 +44,7 @@ impl Function for ElemSumFunction {
|
|||||||
vec![
|
vec![
|
||||||
TypeSignature::Uniform(1, STRINGS.to_vec()),
|
TypeSignature::Uniform(1, STRINGS.to_vec()),
|
||||||
TypeSignature::Uniform(1, BINARYS.to_vec()),
|
TypeSignature::Uniform(1, BINARYS.to_vec()),
|
||||||
|
TypeSignature::Uniform(1, vec![DataType::BinaryView]),
|
||||||
],
|
],
|
||||||
Volatility::Immutable,
|
Volatility::Immutable,
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -20,7 +20,9 @@ use datafusion_common::ScalarValue;
|
|||||||
/// Convert a string or binary literal to a vector literal.
|
/// Convert a string or binary literal to a vector literal.
|
||||||
pub fn as_veclit(arg: &ScalarValue) -> Result<Option<Cow<'_, [f32]>>> {
|
pub fn as_veclit(arg: &ScalarValue) -> Result<Option<Cow<'_, [f32]>>> {
|
||||||
match arg {
|
match arg {
|
||||||
ScalarValue::Binary(b) => b.as_ref().map(|x| binlit_as_veclit(x)).transpose(),
|
ScalarValue::Binary(b) | ScalarValue::BinaryView(b) => {
|
||||||
|
b.as_ref().map(|x| binlit_as_veclit(x)).transpose()
|
||||||
|
}
|
||||||
ScalarValue::Utf8(s) | ScalarValue::Utf8View(s) => s
|
ScalarValue::Utf8(s) | ScalarValue::Utf8View(s) => s
|
||||||
.as_ref()
|
.as_ref()
|
||||||
.map(|x| parse_veclit_from_strlit(x).map(Cow::Owned))
|
.map(|x| parse_veclit_from_strlit(x).map(Cow::Owned))
|
||||||
|
|||||||
@@ -65,7 +65,7 @@ impl Function for ScalarAddFunction {
|
|||||||
fn signature(&self) -> Signature {
|
fn signature(&self) -> Signature {
|
||||||
helper::one_of_sigs2(
|
helper::one_of_sigs2(
|
||||||
vec![DataType::Float64],
|
vec![DataType::Float64],
|
||||||
vec![DataType::Utf8, DataType::Binary],
|
vec![DataType::Utf8, DataType::Binary, DataType::BinaryView],
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -65,7 +65,12 @@ impl Function for ScalarMulFunction {
|
|||||||
fn signature(&self) -> Signature {
|
fn signature(&self) -> Signature {
|
||||||
helper::one_of_sigs2(
|
helper::one_of_sigs2(
|
||||||
vec![DataType::Float64],
|
vec![DataType::Float64],
|
||||||
vec![DataType::Utf8, DataType::Binary],
|
vec![
|
||||||
|
DataType::Utf8,
|
||||||
|
DataType::Utf8View,
|
||||||
|
DataType::Binary,
|
||||||
|
DataType::BinaryView,
|
||||||
|
],
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -56,8 +56,8 @@ impl Function for VectorAddFunction {
|
|||||||
|
|
||||||
fn signature(&self) -> Signature {
|
fn signature(&self) -> Signature {
|
||||||
helper::one_of_sigs2(
|
helper::one_of_sigs2(
|
||||||
vec![DataType::Utf8, DataType::Binary],
|
vec![DataType::Utf8, DataType::Binary, DataType::BinaryView],
|
||||||
vec![DataType::Utf8, DataType::Binary],
|
vec![DataType::Utf8, DataType::Binary, DataType::BinaryView],
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -57,8 +57,8 @@ impl Function for VectorDivFunction {
|
|||||||
|
|
||||||
fn signature(&self) -> Signature {
|
fn signature(&self) -> Signature {
|
||||||
helper::one_of_sigs2(
|
helper::one_of_sigs2(
|
||||||
vec![DataType::Utf8, DataType::Binary],
|
vec![DataType::Utf8, DataType::Binary, DataType::BinaryView],
|
||||||
vec![DataType::Utf8, DataType::Binary],
|
vec![DataType::Utf8, DataType::Binary, DataType::BinaryView],
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -57,8 +57,8 @@ impl Function for VectorMulFunction {
|
|||||||
|
|
||||||
fn signature(&self) -> Signature {
|
fn signature(&self) -> Signature {
|
||||||
helper::one_of_sigs2(
|
helper::one_of_sigs2(
|
||||||
vec![DataType::Utf8, DataType::Binary],
|
vec![DataType::Utf8, DataType::Binary, DataType::BinaryView],
|
||||||
vec![DataType::Utf8, DataType::Binary],
|
vec![DataType::Utf8, DataType::Binary, DataType::BinaryView],
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -60,6 +60,7 @@ impl Function for VectorNormFunction {
|
|||||||
vec![
|
vec![
|
||||||
TypeSignature::Uniform(1, STRINGS.to_vec()),
|
TypeSignature::Uniform(1, STRINGS.to_vec()),
|
||||||
TypeSignature::Uniform(1, BINARYS.to_vec()),
|
TypeSignature::Uniform(1, BINARYS.to_vec()),
|
||||||
|
TypeSignature::Uniform(1, vec![DataType::BinaryView]),
|
||||||
],
|
],
|
||||||
Volatility::Immutable,
|
Volatility::Immutable,
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -56,8 +56,8 @@ impl Function for VectorSubFunction {
|
|||||||
|
|
||||||
fn signature(&self) -> Signature {
|
fn signature(&self) -> Signature {
|
||||||
helper::one_of_sigs2(
|
helper::one_of_sigs2(
|
||||||
vec![DataType::Utf8, DataType::Binary],
|
vec![DataType::Utf8, DataType::Binary, DataType::BinaryView],
|
||||||
vec![DataType::Utf8, DataType::Binary],
|
vec![DataType::Utf8, DataType::Binary, DataType::BinaryView],
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -13,14 +13,13 @@
|
|||||||
// limitations under the License.
|
// limitations under the License.
|
||||||
|
|
||||||
use std::fmt;
|
use std::fmt;
|
||||||
use std::sync::Arc;
|
|
||||||
|
|
||||||
use common_query::error::Result;
|
use common_query::error::Result;
|
||||||
use datafusion::arrow::datatypes::DataType;
|
use datafusion::arrow::datatypes::DataType;
|
||||||
use datafusion_expr::{Signature, Volatility};
|
use datafusion_common::ScalarValue;
|
||||||
use datatypes::vectors::{StringVector, VectorRef};
|
use datafusion_expr::{ColumnarValue, ScalarFunctionArgs, Signature, Volatility};
|
||||||
|
|
||||||
use crate::function::{Function, FunctionContext};
|
use crate::function::Function;
|
||||||
|
|
||||||
#[derive(Clone, Debug, Default)]
|
#[derive(Clone, Debug, Default)]
|
||||||
pub(crate) struct PGVersionFunction;
|
pub(crate) struct PGVersionFunction;
|
||||||
@@ -37,18 +36,17 @@ impl Function for PGVersionFunction {
|
|||||||
}
|
}
|
||||||
|
|
||||||
fn return_type(&self, _: &[DataType]) -> Result<DataType> {
|
fn return_type(&self, _: &[DataType]) -> Result<DataType> {
|
||||||
Ok(DataType::Utf8)
|
Ok(DataType::Utf8View)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn signature(&self) -> Signature {
|
fn signature(&self) -> Signature {
|
||||||
Signature::exact(vec![], Volatility::Immutable)
|
Signature::exact(vec![], Volatility::Immutable)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn eval(&self, _func_ctx: &FunctionContext, _columns: &[VectorRef]) -> Result<VectorRef> {
|
fn invoke_with_args(&self, _: ScalarFunctionArgs) -> datafusion_common::Result<ColumnarValue> {
|
||||||
let result = StringVector::from(vec![format!(
|
Ok(ColumnarValue::Scalar(ScalarValue::Utf8View(Some(format!(
|
||||||
"PostgreSQL 16.3 GreptimeDB {}",
|
"PostgreSQL 16.3 GreptimeDB {}",
|
||||||
common_version::version()
|
common_version::version()
|
||||||
)]);
|
)))))
|
||||||
Ok(Arc::new(result))
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -44,13 +44,6 @@ pub enum Error {
|
|||||||
source: DataTypeError,
|
source: DataTypeError,
|
||||||
},
|
},
|
||||||
|
|
||||||
#[snafu(display("Failed to cast arrow array into vector"))]
|
|
||||||
FromArrowArray {
|
|
||||||
#[snafu(implicit)]
|
|
||||||
location: Location,
|
|
||||||
source: DataTypeError,
|
|
||||||
},
|
|
||||||
|
|
||||||
#[snafu(display("Failed to cast arrow array into vector: {:?}", data_type))]
|
#[snafu(display("Failed to cast arrow array into vector: {:?}", data_type))]
|
||||||
IntoVector {
|
IntoVector {
|
||||||
#[snafu(implicit)]
|
#[snafu(implicit)]
|
||||||
@@ -225,7 +218,6 @@ impl ErrorExt for Error {
|
|||||||
Error::InvalidInputType { source, .. }
|
Error::InvalidInputType { source, .. }
|
||||||
| Error::IntoVector { source, .. }
|
| Error::IntoVector { source, .. }
|
||||||
| Error::FromScalarValue { source, .. }
|
| Error::FromScalarValue { source, .. }
|
||||||
| Error::FromArrowArray { source, .. }
|
|
||||||
| Error::InvalidVectorString { source, .. } => source.status_code(),
|
| Error::InvalidVectorString { source, .. } => source.status_code(),
|
||||||
|
|
||||||
Error::MissingTableMutationHandler { .. }
|
Error::MissingTableMutationHandler { .. }
|
||||||
|
|||||||
@@ -579,6 +579,12 @@ impl QueryEngine for DatafusionQueryEngine {
|
|||||||
state: self.engine_state().function_state(),
|
state: self.engine_state().function_state(),
|
||||||
});
|
});
|
||||||
|
|
||||||
|
let config_options = state.config_options().clone();
|
||||||
|
let _ = state
|
||||||
|
.execution_props_mut()
|
||||||
|
.config_options
|
||||||
|
.insert(config_options);
|
||||||
|
|
||||||
QueryEngineContext::new(state, query_ctx)
|
QueryEngineContext::new(state, query_ctx)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -119,11 +119,11 @@ Affected Rows: 25
|
|||||||
|
|
||||||
INSERT INTO jsons VALUES(parse_json('{"a":1, "b":2, "c":3'), 4);
|
INSERT INTO jsons VALUES(parse_json('{"a":1, "b":2, "c":3'), 4);
|
||||||
|
|
||||||
Error: 3001(EngineExecuteQuery), Invalid function args: Cannot convert the string to json, have: {"a":1, "b":2, "c":3
|
Error: 3001(EngineExecuteQuery), Execution error: cannot parse '{"a":1, "b":2, "c":3': EOF while parsing a value, pos 20
|
||||||
|
|
||||||
INSERT INTO jsons VALUES(parse_json('Morning my friends, have a nice day :)'), 5);
|
INSERT INTO jsons VALUES(parse_json('Morning my friends, have a nice day :)'), 5);
|
||||||
|
|
||||||
Error: 3001(EngineExecuteQuery), Invalid function args: Cannot convert the string to json, have: Morning my friends, have a nice day :)
|
Error: 3001(EngineExecuteQuery), Execution error: cannot parse 'Morning my friends, have a nice day :)': expected value, pos 1
|
||||||
|
|
||||||
SELECT json_to_string(j), t FROM jsons;
|
SELECT json_to_string(j), t FROM jsons;
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user