refactor: make Function trait a simple shim of DataFusion UDF (#7036)

Signed-off-by: luofucong <luofc@foxmail.com>
This commit is contained in:
LFC
2025-09-29 17:07:39 +08:00
committed by GitHub
parent aa05b3b993
commit 5b13fba65b
69 changed files with 1818 additions and 1205 deletions

View File

@@ -925,7 +925,7 @@ async fn test_udaf_correct_eval_result() {
let percent = ScalarValue::Float64(Some(0.5)).to_array().unwrap();
let percent = ColumnarValue::Array(percent);
let state = ColumnarValue::Array(arr);
let udd_calc = UddSketchCalcFunction;
let udd_calc = UddSketchCalcFunction::default();
let res = udd_calc
.invoke_with_args(ScalarFunctionArgs {
args: vec![percent, state],
@@ -965,7 +965,7 @@ async fn test_udaf_correct_eval_result() {
expected_fn: Some(|arr| {
let number_rows = arr.len();
let state = ColumnarValue::Array(arr);
let hll_calc = HllCalcFunction;
let hll_calc = HllCalcFunction::default();
let res = hll_calc
.invoke_with_args(ScalarFunctionArgs {
args: vec![state],

View File

@@ -17,18 +17,13 @@ use std::fmt;
use std::fmt::{Debug, Formatter};
use std::sync::Arc;
use common_error::ext::{BoxedError, PlainError};
use common_error::status_code::StatusCode;
use common_query::error::{ExecuteSnafu, Result};
use datafusion::arrow::datatypes::DataType;
use datafusion::logical_expr::ColumnarValue;
use datafusion_common::DataFusionError;
use datafusion_common::arrow::array::ArrayRef;
use datafusion_common::config::{ConfigEntry, ConfigExtension, ExtensionOptions};
use datafusion_expr::{ScalarFunctionArgs, Signature};
use datatypes::vectors::VectorRef;
use session::context::{QueryContextBuilder, QueryContextRef};
use snafu::ResultExt;
use crate::state::FunctionState;
@@ -108,31 +103,15 @@ pub trait Function: fmt::Display + Sync + Send {
fn name(&self) -> &str;
/// The returned data type of function execution.
fn return_type(&self, input_types: &[DataType]) -> Result<DataType>;
fn return_type(&self, input_types: &[DataType]) -> datafusion_common::Result<DataType>;
/// The signature of function.
fn signature(&self) -> Signature;
fn signature(&self) -> &Signature;
fn invoke_with_args(
&self,
args: ScalarFunctionArgs,
) -> datafusion_common::Result<ColumnarValue> {
// TODO(LFC): Remove default implementation once all UDFs have implemented this function.
let _ = args;
Err(datafusion_common::DataFusionError::NotImplemented(
"invoke_with_args".to_string(),
))
}
/// Evaluate the function, e.g. run/execute the function.
/// TODO(LFC): Remove `eval` when all UDFs are rewritten to `invoke_with_args`
fn eval(&self, _: &FunctionContext, _: &[VectorRef]) -> Result<VectorRef> {
Err(BoxedError::new(PlainError::new(
"unsupported".to_string(),
StatusCode::Unsupported,
)))
.context(ExecuteSnafu)
}
) -> datafusion_common::Result<ColumnarValue>;
fn aliases(&self) -> &[String] {
&[]

View File

@@ -52,9 +52,7 @@ impl From<ScalarUDF> for ScalarFunctionFactory {
impl From<FunctionRef> for ScalarFunctionFactory {
fn from(func: FunctionRef) -> Self {
let name = func.name().to_string();
let func = Arc::new(move |ctx: FunctionContext| {
create_udf(func.clone(), ctx.query_ctx, ctx.state)
});
let func = Arc::new(move |_| create_udf(func.clone()));
Self {
name,
factory: func,

View File

@@ -190,7 +190,7 @@ mod tests {
assert!(registry.get_function("test_and").is_none());
assert!(registry.scalar_functions().is_empty());
registry.register_scalar(TestAndFunction);
registry.register_scalar(TestAndFunction::default());
let _ = registry.get_function("test_and").unwrap();
assert_eq!(1, registry.scalar_functions().len());
}

View File

@@ -26,8 +26,8 @@ pub(crate) struct DateFunction;
impl DateFunction {
pub fn register(registry: &FunctionRegistry) {
registry.register_scalar(DateAddFunction);
registry.register_scalar(DateSubFunction);
registry.register_scalar(DateFormatFunction);
registry.register_scalar(DateAddFunction::default());
registry.register_scalar(DateSubFunction::default());
registry.register_scalar(DateFormatFunction::default());
}
}

View File

@@ -14,7 +14,7 @@
use std::fmt;
use common_query::error::{ArrowComputeSnafu, Result};
use common_query::error::ArrowComputeSnafu;
use datafusion::logical_expr::ColumnarValue;
use datafusion_expr::{ScalarFunctionArgs, Signature};
use datatypes::arrow::compute::kernels::numeric;
@@ -27,8 +27,31 @@ use crate::helper;
/// A function adds an interval value to Timestamp, Date, and return the result.
/// The implementation of datetime type is based on Date64 which is incorrect so this function
/// doesn't support the datetime type.
#[derive(Clone, Debug, Default)]
pub struct DateAddFunction;
#[derive(Clone, Debug)]
pub(crate) struct DateAddFunction {
signature: Signature,
}
impl Default for DateAddFunction {
fn default() -> Self {
Self {
signature: helper::one_of_sigs2(
vec![
DataType::Date32,
DataType::Timestamp(TimeUnit::Second, None),
DataType::Timestamp(TimeUnit::Millisecond, None),
DataType::Timestamp(TimeUnit::Microsecond, None),
DataType::Timestamp(TimeUnit::Nanosecond, None),
],
vec![
DataType::Interval(IntervalUnit::MonthDayNano),
DataType::Interval(IntervalUnit::YearMonth),
DataType::Interval(IntervalUnit::DayTime),
],
),
}
}
}
const NAME: &str = "date_add";
@@ -37,25 +60,12 @@ impl Function for DateAddFunction {
NAME
}
fn return_type(&self, input_types: &[DataType]) -> Result<DataType> {
fn return_type(&self, input_types: &[DataType]) -> datafusion_common::Result<DataType> {
Ok(input_types[0].clone())
}
fn signature(&self) -> Signature {
helper::one_of_sigs2(
vec![
DataType::Date32,
DataType::Timestamp(TimeUnit::Second, None),
DataType::Timestamp(TimeUnit::Millisecond, None),
DataType::Timestamp(TimeUnit::Microsecond, None),
DataType::Timestamp(TimeUnit::Nanosecond, None),
],
vec![
DataType::Interval(IntervalUnit::MonthDayNano),
DataType::Interval(IntervalUnit::YearMonth),
DataType::Interval(IntervalUnit::DayTime),
],
)
fn signature(&self) -> &Signature {
&self.signature
}
fn invoke_with_args(
@@ -92,7 +102,7 @@ mod tests {
#[test]
fn test_date_add_misc() {
let f = DateAddFunction;
let f = DateAddFunction::default();
assert_eq!("date_add", f.name());
assert_eq!(
DataType::Timestamp(TimeUnit::Microsecond, None),
@@ -121,7 +131,7 @@ mod tests {
#[test]
fn test_timestamp_date_add() {
let f = DateAddFunction;
let f = DateAddFunction::default();
let times = vec![Some(123), None, Some(42), None];
// Intervals in milliseconds
@@ -170,7 +180,7 @@ mod tests {
#[test]
fn test_date_date_add() {
let f = DateAddFunction;
let f = DateAddFunction::default();
let dates = vec![Some(123), None, Some(42), None];
// Intervals in months

View File

@@ -16,7 +16,7 @@ use std::fmt;
use std::sync::Arc;
use common_error::ext::BoxedError;
use common_query::error::{self, Result};
use common_query::error;
use common_time::{Date, Timestamp};
use datafusion_common::DataFusionError;
use datafusion_common::arrow::array::{Array, AsArray, StringViewBuilder};
@@ -29,31 +29,39 @@ use crate::helper;
use crate::helper::with_match_timestamp_types;
/// A function that formats timestamp/date/datetime into string by the format
#[derive(Clone, Debug, Default)]
pub struct DateFormatFunction;
#[derive(Clone, Debug)]
pub(crate) struct DateFormatFunction {
signature: Signature,
}
const NAME: &str = "date_format";
impl Default for DateFormatFunction {
fn default() -> Self {
Self {
signature: helper::one_of_sigs2(
vec![
DataType::Date32,
DataType::Timestamp(TimeUnit::Second, None),
DataType::Timestamp(TimeUnit::Millisecond, None),
DataType::Timestamp(TimeUnit::Microsecond, None),
DataType::Timestamp(TimeUnit::Nanosecond, None),
],
vec![DataType::Utf8],
),
}
}
}
impl Function for DateFormatFunction {
fn name(&self) -> &str {
NAME
"date_format"
}
fn return_type(&self, _: &[DataType]) -> Result<DataType> {
fn return_type(&self, _: &[DataType]) -> datafusion_common::Result<DataType> {
Ok(DataType::Utf8View)
}
fn signature(&self) -> Signature {
helper::one_of_sigs2(
vec![
DataType::Date32,
DataType::Timestamp(TimeUnit::Second, None),
DataType::Timestamp(TimeUnit::Millisecond, None),
DataType::Timestamp(TimeUnit::Microsecond, None),
DataType::Timestamp(TimeUnit::Nanosecond, None),
],
vec![DataType::Utf8],
)
fn signature(&self) -> &Signature {
&self.signature
}
fn invoke_with_args(
@@ -138,7 +146,7 @@ mod tests {
#[test]
fn test_date_format_misc() {
let f = DateFormatFunction;
let f = DateFormatFunction::default();
assert_eq!("date_format", f.name());
assert_eq!(
DataType::Utf8View,
@@ -163,7 +171,7 @@ mod tests {
#[test]
fn test_timestamp_date_format() {
let f = DateFormatFunction;
let f = DateFormatFunction::default();
let times = vec![Some(123), None, Some(42), None];
let formats = vec![
@@ -207,7 +215,7 @@ mod tests {
#[test]
fn test_date_date_format() {
let f = DateFormatFunction;
let f = DateFormatFunction::default();
let dates = vec![Some(123), None, Some(42), None];
let formats = vec![

View File

@@ -14,7 +14,7 @@
use std::fmt;
use common_query::error::{ArrowComputeSnafu, Result};
use common_query::error::ArrowComputeSnafu;
use datafusion::logical_expr::ColumnarValue;
use datafusion_expr::{ScalarFunctionArgs, Signature};
use datatypes::arrow::compute::kernels::numeric;
@@ -27,35 +27,43 @@ use crate::helper;
/// A function subtracts an interval value to Timestamp, Date, and return the result.
/// The implementation of datetime type is based on Date64 which is incorrect so this function
/// doesn't support the datetime type.
#[derive(Clone, Debug, Default)]
pub struct DateSubFunction;
#[derive(Clone, Debug)]
pub(crate) struct DateSubFunction {
signature: Signature,
}
const NAME: &str = "date_sub";
impl Default for DateSubFunction {
fn default() -> Self {
Self {
signature: helper::one_of_sigs2(
vec![
DataType::Date32,
DataType::Timestamp(TimeUnit::Second, None),
DataType::Timestamp(TimeUnit::Millisecond, None),
DataType::Timestamp(TimeUnit::Microsecond, None),
DataType::Timestamp(TimeUnit::Nanosecond, None),
],
vec![
DataType::Interval(IntervalUnit::MonthDayNano),
DataType::Interval(IntervalUnit::YearMonth),
DataType::Interval(IntervalUnit::DayTime),
],
),
}
}
}
impl Function for DateSubFunction {
fn name(&self) -> &str {
NAME
"date_sub"
}
fn return_type(&self, input_types: &[DataType]) -> Result<DataType> {
fn return_type(&self, input_types: &[DataType]) -> datafusion_common::Result<DataType> {
Ok(input_types[0].clone())
}
fn signature(&self) -> Signature {
helper::one_of_sigs2(
vec![
DataType::Date32,
DataType::Timestamp(TimeUnit::Second, None),
DataType::Timestamp(TimeUnit::Millisecond, None),
DataType::Timestamp(TimeUnit::Microsecond, None),
DataType::Timestamp(TimeUnit::Nanosecond, None),
],
vec![
DataType::Interval(IntervalUnit::MonthDayNano),
DataType::Interval(IntervalUnit::YearMonth),
DataType::Interval(IntervalUnit::DayTime),
],
)
fn signature(&self) -> &Signature {
&self.signature
}
fn invoke_with_args(
@@ -92,7 +100,7 @@ mod tests {
#[test]
fn test_date_sub_misc() {
let f = DateSubFunction;
let f = DateSubFunction::default();
assert_eq!("date_sub", f.name());
assert_eq!(
DataType::Timestamp(TimeUnit::Microsecond, None),
@@ -121,7 +129,7 @@ mod tests {
#[test]
fn test_timestamp_date_sub() {
let f = DateSubFunction;
let f = DateSubFunction::default();
let times = vec![Some(123), None, Some(42), None];
// Intervals in milliseconds
@@ -170,7 +178,7 @@ mod tests {
#[test]
fn test_date_date_sub() {
let f = DateSubFunction;
let f = DateSubFunction::default();
let days_per_month = 30;
let dates = vec![

View File

@@ -28,6 +28,6 @@ pub(crate) struct ExpressionFunction;
impl ExpressionFunction {
pub fn register(registry: &FunctionRegistry) {
registry.register_scalar(IsNullFunction);
registry.register_scalar(IsNullFunction::default());
}
}

View File

@@ -16,7 +16,6 @@ use std::fmt;
use std::fmt::Display;
use std::sync::Arc;
use common_query::error::Result;
use datafusion::arrow::compute::is_null;
use datafusion::arrow::datatypes::DataType;
use datafusion_expr::{ColumnarValue, ScalarFunctionArgs, Signature, Volatility};
@@ -26,8 +25,18 @@ use crate::function::{Function, extract_args};
const NAME: &str = "isnull";
/// The function to check whether an expression is NULL
#[derive(Clone, Debug, Default)]
pub struct IsNullFunction;
#[derive(Clone, Debug)]
pub(crate) struct IsNullFunction {
signature: Signature,
}
impl Default for IsNullFunction {
fn default() -> Self {
Self {
signature: Signature::any(1, Volatility::Immutable),
}
}
}
impl Display for IsNullFunction {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
@@ -40,12 +49,12 @@ impl Function for IsNullFunction {
NAME
}
fn return_type(&self, _: &[DataType]) -> Result<DataType> {
fn return_type(&self, _: &[DataType]) -> datafusion_common::Result<DataType> {
Ok(DataType::Boolean)
}
fn signature(&self) -> Signature {
Signature::any(1, Volatility::Immutable)
fn signature(&self) -> &Signature {
&self.signature
}
fn invoke_with_args(
@@ -65,21 +74,13 @@ mod tests {
use arrow_schema::Field;
use datafusion_common::arrow::array::{AsArray, BooleanArray, Float32Array};
use datafusion_expr::TypeSignature;
use super::*;
#[test]
fn test_is_null_function() {
let is_null = IsNullFunction;
let is_null = IsNullFunction::default();
assert_eq!("isnull", is_null.name());
assert_eq!(DataType::Boolean, is_null.return_type(&[]).unwrap());
assert_eq!(
is_null.signature(),
Signature {
type_signature: TypeSignature::Any(1),
volatility: Volatility::Immutable
}
);
let values = vec![None, Some(3.0), None];
let result = is_null

View File

@@ -27,57 +27,57 @@ pub(crate) struct GeoFunctions;
impl GeoFunctions {
pub fn register(registry: &FunctionRegistry) {
// geohash
registry.register_scalar(geohash::GeohashFunction);
registry.register_scalar(geohash::GeohashNeighboursFunction);
registry.register_scalar(geohash::GeohashFunction::default());
registry.register_scalar(geohash::GeohashNeighboursFunction::default());
// h3 index
registry.register_scalar(h3::H3LatLngToCell);
registry.register_scalar(h3::H3LatLngToCellString);
registry.register_scalar(h3::H3LatLngToCell::default());
registry.register_scalar(h3::H3LatLngToCellString::default());
// h3 index inspection
registry.register_scalar(h3::H3CellBase);
registry.register_scalar(h3::H3CellIsPentagon);
registry.register_scalar(h3::H3StringToCell);
registry.register_scalar(h3::H3CellToString);
registry.register_scalar(h3::H3CellCenterLatLng);
registry.register_scalar(h3::H3CellResolution);
registry.register_scalar(h3::H3CellBase::default());
registry.register_scalar(h3::H3CellIsPentagon::default());
registry.register_scalar(h3::H3StringToCell::default());
registry.register_scalar(h3::H3CellToString::default());
registry.register_scalar(h3::H3CellCenterLatLng::default());
registry.register_scalar(h3::H3CellResolution::default());
// h3 hierarchical grid
registry.register_scalar(h3::H3CellCenterChild);
registry.register_scalar(h3::H3CellParent);
registry.register_scalar(h3::H3CellToChildren);
registry.register_scalar(h3::H3CellToChildrenSize);
registry.register_scalar(h3::H3CellToChildPos);
registry.register_scalar(h3::H3ChildPosToCell);
registry.register_scalar(h3::H3CellContains);
registry.register_scalar(h3::H3CellCenterChild::default());
registry.register_scalar(h3::H3CellParent::default());
registry.register_scalar(h3::H3CellToChildren::default());
registry.register_scalar(h3::H3CellToChildrenSize::default());
registry.register_scalar(h3::H3CellToChildPos::default());
registry.register_scalar(h3::H3ChildPosToCell::default());
registry.register_scalar(h3::H3CellContains::default());
// h3 grid traversal
registry.register_scalar(h3::H3GridDisk);
registry.register_scalar(h3::H3GridDiskDistances);
registry.register_scalar(h3::H3GridDistance);
registry.register_scalar(h3::H3GridPathCells);
registry.register_scalar(h3::H3GridDisk::default());
registry.register_scalar(h3::H3GridDiskDistances::default());
registry.register_scalar(h3::H3GridDistance::default());
registry.register_scalar(h3::H3GridPathCells::default());
// h3 measurement
registry.register_scalar(h3::H3CellDistanceSphereKm);
registry.register_scalar(h3::H3CellDistanceEuclideanDegree);
registry.register_scalar(h3::H3CellDistanceSphereKm::default());
registry.register_scalar(h3::H3CellDistanceEuclideanDegree::default());
// s2
registry.register_scalar(s2::S2LatLngToCell);
registry.register_scalar(s2::S2CellLevel);
registry.register_scalar(s2::S2CellToToken);
registry.register_scalar(s2::S2CellParent);
registry.register_scalar(s2::S2LatLngToCell::default());
registry.register_scalar(s2::S2CellLevel::default());
registry.register_scalar(s2::S2CellToToken::default());
registry.register_scalar(s2::S2CellParent::default());
// spatial data type
registry.register_scalar(wkt::LatLngToPointWkt);
registry.register_scalar(wkt::LatLngToPointWkt::default());
// spatial relation
registry.register_scalar(relation::STContains);
registry.register_scalar(relation::STWithin);
registry.register_scalar(relation::STIntersects);
registry.register_scalar(relation::STContains::default());
registry.register_scalar(relation::STWithin::default());
registry.register_scalar(relation::STIntersects::default());
// spatial measure
registry.register_scalar(measure::STDistance);
registry.register_scalar(measure::STDistanceSphere);
registry.register_scalar(measure::STArea);
registry.register_scalar(measure::STDistance::default());
registry.register_scalar(measure::STDistanceSphere::default());
registry.register_scalar(measure::STArea::default());
}
}

View File

@@ -17,7 +17,7 @@ use std::sync::Arc;
use common_error::ext::{BoxedError, PlainError};
use common_error::status_code::StatusCode;
use common_query::error::{self, Result};
use common_query::error;
use datafusion::arrow::array::{Array, AsArray, ListBuilder, StringViewBuilder};
use datafusion::arrow::datatypes::{DataType, Field, Float64Type, UInt8Type};
use datafusion::logical_expr::ColumnarValue;
@@ -40,23 +40,13 @@ fn ensure_resolution_usize(v: u8) -> datafusion_common::Result<usize> {
}
/// Function that return geohash string for a given geospatial coordinate.
#[derive(Clone, Debug, Default)]
pub struct GeohashFunction;
impl GeohashFunction {
const NAME: &'static str = "geohash";
#[derive(Clone, Debug)]
pub(crate) struct GeohashFunction {
signature: Signature,
}
impl Function for GeohashFunction {
fn name(&self) -> &str {
Self::NAME
}
fn return_type(&self, _: &[DataType]) -> Result<DataType> {
Ok(DataType::Utf8)
}
fn signature(&self) -> Signature {
impl Default for GeohashFunction {
fn default() -> Self {
let mut signatures = Vec::new();
for coord_type in &[DataType::Float32, DataType::Float64] {
for resolution_type in INTEGERS {
@@ -70,7 +60,27 @@ impl Function for GeohashFunction {
]));
}
}
Signature::one_of(signatures, Volatility::Stable)
Self {
signature: Signature::one_of(signatures, Volatility::Stable),
}
}
}
impl GeohashFunction {
const NAME: &'static str = "geohash";
}
impl Function for GeohashFunction {
fn name(&self) -> &str {
Self::NAME
}
fn return_type(&self, _: &[DataType]) -> datafusion_common::Result<DataType> {
Ok(DataType::Utf8)
}
fn signature(&self) -> &Signature {
&self.signature
}
fn invoke_with_args(
@@ -127,27 +137,13 @@ impl fmt::Display for GeohashFunction {
}
/// Function that return geohash string for a given geospatial coordinate.
#[derive(Clone, Debug, Default)]
pub struct GeohashNeighboursFunction;
impl GeohashNeighboursFunction {
const NAME: &'static str = "geohash_neighbours";
#[derive(Clone, Debug)]
pub(crate) struct GeohashNeighboursFunction {
signature: Signature,
}
impl Function for GeohashNeighboursFunction {
fn name(&self) -> &str {
GeohashNeighboursFunction::NAME
}
fn return_type(&self, _: &[DataType]) -> Result<DataType> {
Ok(DataType::List(Arc::new(Field::new(
"item",
DataType::Utf8View,
false,
))))
}
fn signature(&self) -> Signature {
impl Default for GeohashNeighboursFunction {
fn default() -> Self {
let mut signatures = Vec::new();
for coord_type in &[DataType::Float32, DataType::Float64] {
for resolution_type in INTEGERS {
@@ -161,7 +157,31 @@ impl Function for GeohashNeighboursFunction {
]));
}
}
Signature::one_of(signatures, Volatility::Stable)
Self {
signature: Signature::one_of(signatures, Volatility::Stable),
}
}
}
impl GeohashNeighboursFunction {
const NAME: &'static str = "geohash_neighbours";
}
impl Function for GeohashNeighboursFunction {
fn name(&self) -> &str {
GeohashNeighboursFunction::NAME
}
fn return_type(&self, _: &[DataType]) -> datafusion_common::Result<DataType> {
Ok(DataType::List(Arc::new(Field::new(
"item",
DataType::Utf8View,
false,
))))
}
fn signature(&self) -> &Signature {
&self.signature
}
fn invoke_with_args(

View File

@@ -17,7 +17,7 @@ use std::sync::{Arc, LazyLock};
use common_error::ext::{BoxedError, PlainError};
use common_error::status_code::StatusCode;
use common_query::error::{self, Result};
use common_query::error;
use datafusion::arrow::array::{
Array, AsArray, BooleanBuilder, Float64Builder, Int32Builder, ListBuilder, StringViewArray,
StringViewBuilder, UInt8Builder, UInt64Builder,
@@ -51,20 +51,14 @@ static POSITION_TYPES: &[DataType] = INTEGERS;
/// Function that returns [h3] encoding cellid for a given geospatial coordinate.
///
/// [h3]: https://h3geo.org/
#[derive(Clone, Debug, Default, Display)]
#[derive(Clone, Debug, Display)]
#[display("{}", self.name())]
pub struct H3LatLngToCell;
pub(crate) struct H3LatLngToCell {
signature: Signature,
}
impl Function for H3LatLngToCell {
fn name(&self) -> &str {
"h3_latlng_to_cell"
}
fn return_type(&self, _: &[DataType]) -> Result<DataType> {
Ok(DataType::UInt64)
}
fn signature(&self) -> Signature {
impl Default for H3LatLngToCell {
fn default() -> Self {
let mut signatures = Vec::new();
for coord_type in COORDINATE_TYPES.as_slice() {
for resolution_type in RESOLUTION_TYPES {
@@ -78,7 +72,23 @@ impl Function for H3LatLngToCell {
]));
}
}
Signature::one_of(signatures, Volatility::Stable)
Self {
signature: Signature::one_of(signatures, Volatility::Stable),
}
}
}
impl Function for H3LatLngToCell {
fn name(&self) -> &str {
"h3_latlng_to_cell"
}
fn return_type(&self, _: &[DataType]) -> datafusion_common::Result<DataType> {
Ok(DataType::UInt64)
}
fn signature(&self) -> &Signature {
&self.signature
}
fn invoke_with_args(
@@ -132,20 +142,14 @@ impl Function for H3LatLngToCell {
/// geospatial coordinate.
///
/// [h3]: https://h3geo.org/
#[derive(Clone, Debug, Default, Display)]
#[derive(Clone, Debug, Display)]
#[display("{}", self.name())]
pub struct H3LatLngToCellString;
pub(crate) struct H3LatLngToCellString {
signature: Signature,
}
impl Function for H3LatLngToCellString {
fn name(&self) -> &str {
"h3_latlng_to_cell_string"
}
fn return_type(&self, _: &[DataType]) -> Result<DataType> {
Ok(DataType::Utf8View)
}
fn signature(&self) -> Signature {
impl Default for H3LatLngToCellString {
fn default() -> Self {
let mut signatures = Vec::new();
for coord_type in COORDINATE_TYPES.as_slice() {
for resolution_type in RESOLUTION_TYPES {
@@ -159,7 +163,23 @@ impl Function for H3LatLngToCellString {
]));
}
}
Signature::one_of(signatures, Volatility::Stable)
Self {
signature: Signature::one_of(signatures, Volatility::Stable),
}
}
}
impl Function for H3LatLngToCellString {
fn name(&self) -> &str {
"h3_latlng_to_cell_string"
}
fn return_type(&self, _: &[DataType]) -> datafusion_common::Result<DataType> {
Ok(DataType::Utf8View)
}
fn signature(&self) -> &Signature {
&self.signature
}
fn invoke_with_args(
@@ -210,21 +230,31 @@ impl Function for H3LatLngToCellString {
}
/// Function that converts cell id to its string form
#[derive(Clone, Debug, Default, Display)]
#[derive(Clone, Debug, Display)]
#[display("{}", self.name())]
pub struct H3CellToString;
pub(crate) struct H3CellToString {
signature: Signature,
}
impl Default for H3CellToString {
fn default() -> Self {
Self {
signature: signature_of_cell(),
}
}
}
impl Function for H3CellToString {
fn name(&self) -> &str {
"h3_cell_to_string"
}
fn return_type(&self, _: &[DataType]) -> Result<DataType> {
fn return_type(&self, _: &[DataType]) -> datafusion_common::Result<DataType> {
Ok(DataType::Utf8View)
}
fn signature(&self) -> Signature {
signature_of_cell()
fn signature(&self) -> &Signature {
&self.signature
}
fn invoke_with_args(
@@ -248,21 +278,31 @@ impl Function for H3CellToString {
}
/// Function that converts cell string id to uint64 number
#[derive(Clone, Debug, Default, Display)]
#[derive(Clone, Debug, Display)]
#[display("{}", self.name())]
pub struct H3StringToCell;
pub(crate) struct H3StringToCell {
signature: Signature,
}
impl Default for H3StringToCell {
fn default() -> Self {
Self {
signature: Signature::string(1, Volatility::Stable),
}
}
}
impl Function for H3StringToCell {
fn name(&self) -> &str {
"h3_string_to_cell"
}
fn return_type(&self, _: &[DataType]) -> Result<DataType> {
fn return_type(&self, _: &[DataType]) -> datafusion_common::Result<DataType> {
Ok(DataType::UInt64)
}
fn signature(&self) -> Signature {
Signature::string(1, Volatility::Stable)
fn signature(&self) -> &Signature {
&self.signature
}
fn invoke_with_args(
@@ -294,16 +334,26 @@ impl Function for H3StringToCell {
}
/// Function that returns centroid latitude and longitude of given cell id
#[derive(Clone, Debug, Default, Display)]
#[derive(Clone, Debug, Display)]
#[display("{}", self.name())]
pub struct H3CellCenterLatLng;
pub(crate) struct H3CellCenterLatLng {
signature: Signature,
}
impl Default for H3CellCenterLatLng {
fn default() -> Self {
Self {
signature: signature_of_cell(),
}
}
}
impl Function for H3CellCenterLatLng {
fn name(&self) -> &str {
"h3_cell_center_latlng"
}
fn return_type(&self, _: &[DataType]) -> Result<DataType> {
fn return_type(&self, _: &[DataType]) -> datafusion_common::Result<DataType> {
Ok(DataType::List(Arc::new(Field::new(
"x",
DataType::Float64,
@@ -311,8 +361,8 @@ impl Function for H3CellCenterLatLng {
))))
}
fn signature(&self) -> Signature {
signature_of_cell()
fn signature(&self) -> &Signature {
&self.signature
}
fn invoke_with_args(
@@ -342,21 +392,31 @@ impl Function for H3CellCenterLatLng {
}
/// Function that returns resolution of given cell id
#[derive(Clone, Debug, Default, Display)]
#[derive(Clone, Debug, Display)]
#[display("{}", self.name())]
pub struct H3CellResolution;
pub(crate) struct H3CellResolution {
signature: Signature,
}
impl Default for H3CellResolution {
fn default() -> Self {
Self {
signature: signature_of_cell(),
}
}
}
impl Function for H3CellResolution {
fn name(&self) -> &str {
"h3_cell_resolution"
}
fn return_type(&self, _: &[DataType]) -> Result<DataType> {
fn return_type(&self, _: &[DataType]) -> datafusion_common::Result<DataType> {
Ok(DataType::UInt8)
}
fn signature(&self) -> Signature {
signature_of_cell()
fn signature(&self) -> &Signature {
&self.signature
}
fn invoke_with_args(
@@ -379,21 +439,31 @@ impl Function for H3CellResolution {
}
/// Function that returns base cell of given cell id
#[derive(Clone, Debug, Default, Display)]
#[derive(Clone, Debug, Display)]
#[display("{}", self.name())]
pub struct H3CellBase;
pub(crate) struct H3CellBase {
signature: Signature,
}
impl Default for H3CellBase {
fn default() -> Self {
Self {
signature: signature_of_cell(),
}
}
}
impl Function for H3CellBase {
fn name(&self) -> &str {
"h3_cell_base"
}
fn return_type(&self, _: &[DataType]) -> Result<DataType> {
fn return_type(&self, _: &[DataType]) -> datafusion_common::Result<DataType> {
Ok(DataType::UInt8)
}
fn signature(&self) -> Signature {
signature_of_cell()
fn signature(&self) -> &Signature {
&self.signature
}
fn invoke_with_args(
@@ -417,21 +487,31 @@ impl Function for H3CellBase {
}
/// Function that check if given cell id is a pentagon
#[derive(Clone, Debug, Default, Display)]
#[derive(Clone, Debug, Display)]
#[display("{}", self.name())]
pub struct H3CellIsPentagon;
pub(crate) struct H3CellIsPentagon {
signature: Signature,
}
impl Default for H3CellIsPentagon {
fn default() -> Self {
Self {
signature: signature_of_cell(),
}
}
}
impl Function for H3CellIsPentagon {
fn name(&self) -> &str {
"h3_cell_is_pentagon"
}
fn return_type(&self, _: &[DataType]) -> Result<DataType> {
fn return_type(&self, _: &[DataType]) -> datafusion_common::Result<DataType> {
Ok(DataType::Boolean)
}
fn signature(&self) -> Signature {
signature_of_cell()
fn signature(&self) -> &Signature {
&self.signature
}
fn invoke_with_args(
@@ -455,21 +535,31 @@ impl Function for H3CellIsPentagon {
}
/// Function that returns center child cell of given cell id
#[derive(Clone, Debug, Default, Display)]
#[derive(Clone, Debug, Display)]
#[display("{}", self.name())]
pub struct H3CellCenterChild;
pub(crate) struct H3CellCenterChild {
signature: Signature,
}
impl Default for H3CellCenterChild {
fn default() -> Self {
Self {
signature: signature_of_cell_and_resolution(),
}
}
}
impl Function for H3CellCenterChild {
fn name(&self) -> &str {
"h3_cell_center_child"
}
fn return_type(&self, _: &[DataType]) -> Result<DataType> {
fn return_type(&self, _: &[DataType]) -> datafusion_common::Result<DataType> {
Ok(DataType::UInt64)
}
fn signature(&self) -> Signature {
signature_of_cell_and_resolution()
fn signature(&self) -> &Signature {
&self.signature
}
fn invoke_with_args(
@@ -483,21 +573,31 @@ impl Function for H3CellCenterChild {
}
/// Function that returns parent cell of given cell id and resolution
#[derive(Clone, Debug, Default, Display)]
#[derive(Clone, Debug, Display)]
#[display("{}", self.name())]
pub struct H3CellParent;
pub(crate) struct H3CellParent {
signature: Signature,
}
impl Default for H3CellParent {
fn default() -> Self {
Self {
signature: signature_of_cell_and_resolution(),
}
}
}
impl Function for H3CellParent {
fn name(&self) -> &str {
"h3_cell_parent"
}
fn return_type(&self, _: &[DataType]) -> Result<DataType> {
fn return_type(&self, _: &[DataType]) -> datafusion_common::Result<DataType> {
Ok(DataType::UInt64)
}
fn signature(&self) -> Signature {
signature_of_cell_and_resolution()
fn signature(&self) -> &Signature {
&self.signature
}
fn invoke_with_args(
@@ -511,16 +611,26 @@ impl Function for H3CellParent {
}
/// Function that returns children cell list
#[derive(Clone, Debug, Default, Display)]
#[derive(Clone, Debug, Display)]
#[display("{}", self.name())]
pub struct H3CellToChildren;
pub(crate) struct H3CellToChildren {
signature: Signature,
}
impl Default for H3CellToChildren {
fn default() -> Self {
Self {
signature: signature_of_cell_and_resolution(),
}
}
}
impl Function for H3CellToChildren {
fn name(&self) -> &str {
"h3_cell_to_children"
}
fn return_type(&self, _: &[DataType]) -> Result<DataType> {
fn return_type(&self, _: &[DataType]) -> datafusion_common::Result<DataType> {
Ok(DataType::List(Arc::new(Field::new(
"item",
DataType::UInt64,
@@ -528,8 +638,8 @@ impl Function for H3CellToChildren {
))))
}
fn signature(&self) -> Signature {
signature_of_cell_and_resolution()
fn signature(&self) -> &Signature {
&self.signature
}
fn invoke_with_args(
@@ -566,21 +676,31 @@ impl Function for H3CellToChildren {
}
/// Function that returns children cell count
#[derive(Clone, Debug, Default, Display)]
#[derive(Clone, Debug, Display)]
#[display("{}", self.name())]
pub struct H3CellToChildrenSize;
pub(crate) struct H3CellToChildrenSize {
signature: Signature,
}
impl Default for H3CellToChildrenSize {
fn default() -> Self {
Self {
signature: signature_of_cell_and_resolution(),
}
}
}
impl Function for H3CellToChildrenSize {
fn name(&self) -> &str {
"h3_cell_to_children_size"
}
fn return_type(&self, _: &[DataType]) -> Result<DataType> {
fn return_type(&self, _: &[DataType]) -> datafusion_common::Result<DataType> {
Ok(DataType::UInt64)
}
fn signature(&self) -> Signature {
signature_of_cell_and_resolution()
fn signature(&self) -> &Signature {
&self.signature
}
fn invoke_with_args(
@@ -594,21 +714,31 @@ impl Function for H3CellToChildrenSize {
}
/// Function that returns the cell position if its parent at given resolution
#[derive(Clone, Debug, Default, Display)]
#[derive(Clone, Debug, Display)]
#[display("{}", self.name())]
pub struct H3CellToChildPos;
pub(crate) struct H3CellToChildPos {
signature: Signature,
}
impl Default for H3CellToChildPos {
fn default() -> Self {
Self {
signature: signature_of_cell_and_resolution(),
}
}
}
impl Function for H3CellToChildPos {
fn name(&self) -> &str {
"h3_cell_to_child_pos"
}
fn return_type(&self, _: &[DataType]) -> Result<DataType> {
fn return_type(&self, _: &[DataType]) -> datafusion_common::Result<DataType> {
Ok(DataType::UInt64)
}
fn signature(&self) -> Signature {
signature_of_cell_and_resolution()
fn signature(&self) -> &Signature {
&self.signature
}
fn invoke_with_args(
@@ -651,20 +781,14 @@ where
}
/// Function that returns the cell at given position of the parent at given resolution
#[derive(Clone, Debug, Default, Display)]
#[derive(Clone, Debug, Display)]
#[display("{}", self.name())]
pub struct H3ChildPosToCell;
pub(crate) struct H3ChildPosToCell {
signature: Signature,
}
impl Function for H3ChildPosToCell {
fn name(&self) -> &str {
"h3_child_pos_to_cell"
}
fn return_type(&self, _: &[DataType]) -> Result<DataType> {
Ok(DataType::UInt64)
}
fn signature(&self) -> Signature {
impl Default for H3ChildPosToCell {
fn default() -> Self {
let mut signatures =
Vec::with_capacity(POSITION_TYPES.len() * CELL_TYPES.len() * RESOLUTION_TYPES.len());
for position_type in POSITION_TYPES {
@@ -678,7 +802,23 @@ impl Function for H3ChildPosToCell {
}
}
}
Signature::one_of(signatures, Volatility::Stable)
Self {
signature: Signature::one_of(signatures, Volatility::Stable),
}
}
}
impl Function for H3ChildPosToCell {
fn name(&self) -> &str {
"h3_child_pos_to_cell"
}
fn return_type(&self, _: &[DataType]) -> datafusion_common::Result<DataType> {
Ok(DataType::UInt64)
}
fn signature(&self) -> &Signature {
&self.signature
}
fn invoke_with_args(
@@ -711,16 +851,26 @@ impl Function for H3ChildPosToCell {
}
/// Function that returns cells with k distances of given cell
#[derive(Clone, Debug, Default, Display)]
#[derive(Clone, Debug, Display)]
#[display("{}", self.name())]
pub struct H3GridDisk;
pub(crate) struct H3GridDisk {
signature: Signature,
}
impl Default for H3GridDisk {
fn default() -> Self {
Self {
signature: signature_of_cell_and_distance(),
}
}
}
impl Function for H3GridDisk {
fn name(&self) -> &str {
"h3_grid_disk"
}
fn return_type(&self, _: &[DataType]) -> Result<DataType> {
fn return_type(&self, _: &[DataType]) -> datafusion_common::Result<DataType> {
Ok(DataType::List(Arc::new(Field::new(
"item",
DataType::UInt64,
@@ -728,8 +878,8 @@ impl Function for H3GridDisk {
))))
}
fn signature(&self) -> Signature {
signature_of_cell_and_distance()
fn signature(&self) -> &Signature {
&self.signature
}
fn invoke_with_args(
@@ -760,16 +910,26 @@ impl Function for H3GridDisk {
}
/// Function that returns all cells within k distances of given cell
#[derive(Clone, Debug, Default, Display)]
#[derive(Clone, Debug, Display)]
#[display("{}", self.name())]
pub struct H3GridDiskDistances;
pub(crate) struct H3GridDiskDistances {
signature: Signature,
}
impl Default for H3GridDiskDistances {
fn default() -> Self {
Self {
signature: signature_of_cell_and_distance(),
}
}
}
impl Function for H3GridDiskDistances {
fn name(&self) -> &str {
"h3_grid_disk_distances"
}
fn return_type(&self, _: &[DataType]) -> Result<DataType> {
fn return_type(&self, _: &[DataType]) -> datafusion_common::Result<DataType> {
Ok(DataType::List(Arc::new(Field::new(
"item",
DataType::UInt64,
@@ -777,8 +937,8 @@ impl Function for H3GridDiskDistances {
))))
}
fn signature(&self) -> Signature {
signature_of_cell_and_distance()
fn signature(&self) -> &Signature {
&self.signature
}
fn invoke_with_args(
@@ -809,20 +969,30 @@ impl Function for H3GridDiskDistances {
}
/// Function that returns distance between two cells
#[derive(Clone, Debug, Default, Display)]
#[derive(Clone, Debug, Display)]
#[display("{}", self.name())]
pub struct H3GridDistance;
pub(crate) struct H3GridDistance {
signature: Signature,
}
impl Default for H3GridDistance {
fn default() -> Self {
Self {
signature: signature_of_double_cells(),
}
}
}
impl Function for H3GridDistance {
fn name(&self) -> &str {
"h3_grid_distance"
}
fn return_type(&self, _: &[DataType]) -> Result<DataType> {
fn return_type(&self, _: &[DataType]) -> datafusion_common::Result<DataType> {
Ok(DataType::Int32)
}
fn signature(&self) -> Signature {
signature_of_double_cells()
fn signature(&self) -> &Signature {
&self.signature
}
fn invoke_with_args(
@@ -863,16 +1033,26 @@ impl Function for H3GridDistance {
}
/// Function that returns path cells between two cells
#[derive(Clone, Debug, Default, Display)]
#[derive(Clone, Debug, Display)]
#[display("{}", self.name())]
pub struct H3GridPathCells;
pub(crate) struct H3GridPathCells {
signature: Signature,
}
impl Default for H3GridPathCells {
fn default() -> Self {
Self {
signature: signature_of_double_cells(),
}
}
}
impl Function for H3GridPathCells {
fn name(&self) -> &str {
"h3_grid_path_cells"
}
fn return_type(&self, _: &[DataType]) -> Result<DataType> {
fn return_type(&self, _: &[DataType]) -> datafusion_common::Result<DataType> {
Ok(DataType::List(Arc::new(Field::new(
"item",
DataType::UInt64,
@@ -880,8 +1060,8 @@ impl Function for H3GridPathCells {
))))
}
fn signature(&self) -> Signature {
signature_of_double_cells()
fn signature(&self) -> &Signature {
&self.signature
}
fn invoke_with_args(
@@ -919,20 +1099,14 @@ impl Function for H3GridPathCells {
}
/// Tests if cells contains given cells
#[derive(Clone, Debug, Default, Display)]
#[derive(Clone, Debug, Display)]
#[display("{}", self.name())]
pub struct H3CellContains;
pub(crate) struct H3CellContains {
signature: Signature,
}
impl Function for H3CellContains {
fn name(&self) -> &str {
"h3_cells_contains"
}
fn return_type(&self, _: &[DataType]) -> Result<DataType> {
Ok(DataType::Boolean)
}
fn signature(&self) -> Signature {
impl Default for H3CellContains {
fn default() -> Self {
let multi_cell_types = vec![
DataType::new_list(DataType::Int64, true),
DataType::new_list(DataType::UInt64, true),
@@ -949,8 +1123,23 @@ impl Function for H3CellContains {
]));
}
}
Self {
signature: Signature::one_of(signatures, Volatility::Stable),
}
}
}
Signature::one_of(signatures, Volatility::Stable)
impl Function for H3CellContains {
fn name(&self) -> &str {
"h3_cells_contains"
}
fn return_type(&self, _: &[DataType]) -> datafusion_common::Result<DataType> {
Ok(DataType::Boolean)
}
fn signature(&self) -> &Signature {
&self.signature
}
fn invoke_with_args(
@@ -991,20 +1180,30 @@ impl Function for H3CellContains {
}
/// Get WGS84 great circle distance of two cell centroid
#[derive(Clone, Debug, Default, Display)]
#[derive(Clone, Debug, Display)]
#[display("{}", self.name())]
pub struct H3CellDistanceSphereKm;
pub(crate) struct H3CellDistanceSphereKm {
signature: Signature,
}
impl Default for H3CellDistanceSphereKm {
fn default() -> Self {
Self {
signature: signature_of_double_cells(),
}
}
}
impl Function for H3CellDistanceSphereKm {
fn name(&self) -> &str {
"h3_distance_sphere_km"
}
fn return_type(&self, _: &[DataType]) -> Result<DataType> {
fn return_type(&self, _: &[DataType]) -> datafusion_common::Result<DataType> {
Ok(DataType::Float64)
}
fn signature(&self) -> Signature {
signature_of_double_cells()
fn signature(&self) -> &Signature {
&self.signature
}
fn invoke_with_args(
@@ -1039,9 +1238,19 @@ impl Function for H3CellDistanceSphereKm {
}
/// Get Euclidean distance of two cell centroid
#[derive(Clone, Debug, Default, Display)]
#[derive(Clone, Debug, Display)]
#[display("{}", self.name())]
pub struct H3CellDistanceEuclideanDegree;
pub(crate) struct H3CellDistanceEuclideanDegree {
signature: Signature,
}
impl Default for H3CellDistanceEuclideanDegree {
fn default() -> Self {
Self {
signature: signature_of_double_cells(),
}
}
}
impl H3CellDistanceEuclideanDegree {
fn distance(centroid_this: LatLng, centroid_that: LatLng) -> f64 {
@@ -1055,12 +1264,12 @@ impl Function for H3CellDistanceEuclideanDegree {
fn name(&self) -> &str {
"h3_distance_degree"
}
fn return_type(&self, _: &[DataType]) -> Result<DataType> {
fn return_type(&self, _: &[DataType]) -> datafusion_common::Result<DataType> {
Ok(DataType::Float64)
}
fn signature(&self) -> Signature {
signature_of_double_cells()
fn signature(&self) -> &Signature {
&self.signature
}
fn invoke_with_args(

View File

@@ -16,7 +16,7 @@ use std::sync::Arc;
use common_error::ext::{BoxedError, PlainError};
use common_error::status_code::StatusCode;
use common_query::error::{self, Result};
use common_query::error;
use datafusion_common::arrow::array::{Array, AsArray, Float64Builder};
use datafusion_common::arrow::compute;
use datafusion_common::arrow::datatypes::DataType;
@@ -31,21 +31,31 @@ use crate::function::{Function, extract_args};
use crate::scalars::geo::wkt::parse_wkt;
/// Return WGS84(SRID: 4326) euclidean distance between two geometry object, in degree
#[derive(Clone, Debug, Default, Display)]
#[derive(Clone, Debug, Display)]
#[display("{}", self.name())]
pub struct STDistance;
pub(crate) struct STDistance {
signature: Signature,
}
impl Default for STDistance {
fn default() -> Self {
Self {
signature: Signature::string(2, Volatility::Stable),
}
}
}
impl Function for STDistance {
fn name(&self) -> &str {
"st_distance"
}
fn return_type(&self, _: &[DataType]) -> Result<DataType> {
fn return_type(&self, _: &[DataType]) -> datafusion_common::Result<DataType> {
Ok(DataType::Float64)
}
fn signature(&self) -> Signature {
Signature::string(2, Volatility::Stable)
fn signature(&self) -> &Signature {
&self.signature
}
fn invoke_with_args(
@@ -84,21 +94,31 @@ impl Function for STDistance {
}
/// Return great circle distance between two geometry object, in meters
#[derive(Clone, Debug, Default, Display)]
#[derive(Clone, Debug, Display)]
#[display("{}", self.name())]
pub struct STDistanceSphere;
pub(crate) struct STDistanceSphere {
signature: Signature,
}
impl Default for STDistanceSphere {
fn default() -> Self {
Self {
signature: Signature::string(2, Volatility::Stable),
}
}
}
impl Function for STDistanceSphere {
fn name(&self) -> &str {
"st_distance_sphere_m"
}
fn return_type(&self, _: &[DataType]) -> Result<DataType> {
fn return_type(&self, _: &[DataType]) -> datafusion_common::Result<DataType> {
Ok(DataType::Float64)
}
fn signature(&self) -> Signature {
Signature::string(2, Volatility::Stable)
fn signature(&self) -> &Signature {
&self.signature
}
fn invoke_with_args(
@@ -147,21 +167,31 @@ impl Function for STDistanceSphere {
}
/// Return area of given geometry object
#[derive(Clone, Debug, Default, Display)]
#[derive(Clone, Debug, Display)]
#[display("{}", self.name())]
pub struct STArea;
pub(crate) struct STArea {
signature: Signature,
}
impl Default for STArea {
fn default() -> Self {
Self {
signature: Signature::string(1, Volatility::Stable),
}
}
}
impl Function for STArea {
fn name(&self) -> &str {
"st_area"
}
fn return_type(&self, _: &[DataType]) -> Result<DataType> {
fn return_type(&self, _: &[DataType]) -> datafusion_common::Result<DataType> {
Ok(DataType::Float64)
}
fn signature(&self) -> Signature {
Signature::string(1, Volatility::Stable)
fn signature(&self) -> &Signature {
&self.signature
}
fn invoke_with_args(

View File

@@ -14,7 +14,6 @@
use std::sync::Arc;
use common_query::error::Result;
use datafusion_common::arrow::array::{Array, AsArray, BooleanBuilder};
use datafusion_common::arrow::compute;
use datafusion_common::arrow::datatypes::DataType;
@@ -29,39 +28,81 @@ use crate::function::{Function, extract_args};
use crate::scalars::geo::wkt::parse_wkt;
/// Test if spatial relationship: contains
#[derive(Clone, Debug, Default, Display)]
#[derive(Clone, Debug, Display)]
#[display("{}", self.name())]
pub struct STContains;
pub(crate) struct STContains {
signature: Signature,
}
impl Default for STContains {
fn default() -> Self {
Self {
signature: Signature::string(2, Volatility::Stable),
}
}
}
impl StFunction for STContains {
const NAME: &'static str = "st_contains";
fn signature(&self) -> &Signature {
&self.signature
}
fn invoke(g1: Geometry, g2: Geometry) -> bool {
g1.contains(&g2)
}
}
/// Test if spatial relationship: within
#[derive(Clone, Debug, Default, Display)]
#[derive(Clone, Debug, Display)]
#[display("{}", self.name())]
pub struct STWithin;
pub(crate) struct STWithin {
signature: Signature,
}
impl Default for STWithin {
fn default() -> Self {
Self {
signature: Signature::string(2, Volatility::Stable),
}
}
}
impl StFunction for STWithin {
const NAME: &'static str = "st_within";
fn signature(&self) -> &Signature {
&self.signature
}
fn invoke(g1: Geometry, g2: Geometry) -> bool {
g1.is_within(&g2)
}
}
/// Test if spatial relationship: within
#[derive(Clone, Debug, Default, Display)]
#[derive(Clone, Debug, Display)]
#[display("{}", self.name())]
pub struct STIntersects;
pub(crate) struct STIntersects {
signature: Signature,
}
impl Default for STIntersects {
fn default() -> Self {
Self {
signature: Signature::string(2, Volatility::Stable),
}
}
}
impl StFunction for STIntersects {
const NAME: &'static str = "st_intersects";
fn signature(&self) -> &Signature {
&self.signature
}
fn invoke(g1: Geometry, g2: Geometry) -> bool {
g1.intersects(&g2)
}
@@ -70,6 +111,8 @@ impl StFunction for STIntersects {
trait StFunction {
const NAME: &'static str;
fn signature(&self) -> &Signature;
fn invoke(g1: Geometry, g2: Geometry) -> bool;
}
@@ -78,12 +121,12 @@ impl<T: StFunction + Display + Send + Sync> Function for T {
T::NAME
}
fn return_type(&self, _: &[DataType]) -> Result<DataType> {
fn return_type(&self, _: &[DataType]) -> datafusion_common::Result<DataType> {
Ok(DataType::Boolean)
}
fn signature(&self) -> Signature {
Signature::string(2, Volatility::Stable)
fn signature(&self) -> &Signature {
self.signature()
}
fn invoke_with_args(

View File

@@ -14,7 +14,7 @@
use std::sync::{Arc, LazyLock};
use common_query::error::{InvalidFuncArgsSnafu, Result};
use common_query::error::InvalidFuncArgsSnafu;
use datafusion_common::ScalarValue;
use datafusion_common::arrow::array::{Array, AsArray, StringViewBuilder, UInt64Builder};
use datafusion_common::arrow::datatypes::{DataType, Float64Type};
@@ -39,20 +39,14 @@ static LEVEL_TYPES: &[DataType] = datafusion_expr::type_coercion::aggregates::IN
/// Function that returns [s2] encoding cellid for a given geospatial coordinate.
///
/// [s2]: http://s2geometry.io
#[derive(Clone, Debug, Default, Display)]
#[derive(Clone, Debug, Display)]
#[display("{}", self.name())]
pub struct S2LatLngToCell;
pub(crate) struct S2LatLngToCell {
signature: Signature,
}
impl Function for S2LatLngToCell {
fn name(&self) -> &str {
"s2_latlng_to_cell"
}
fn return_type(&self, _: &[DataType]) -> Result<DataType> {
Ok(DataType::UInt64)
}
fn signature(&self) -> Signature {
impl Default for S2LatLngToCell {
fn default() -> Self {
let mut signatures = Vec::with_capacity(COORDINATE_TYPES.len());
for coord_type in COORDINATE_TYPES.as_slice() {
signatures.push(TypeSignature::Exact(vec![
@@ -62,7 +56,23 @@ impl Function for S2LatLngToCell {
coord_type.clone(),
]));
}
Signature::one_of(signatures, Volatility::Stable)
Self {
signature: Signature::one_of(signatures, Volatility::Stable),
}
}
}
impl Function for S2LatLngToCell {
fn name(&self) -> &str {
"s2_latlng_to_cell"
}
fn return_type(&self, _: &[DataType]) -> datafusion_common::Result<DataType> {
Ok(DataType::UInt64)
}
fn signature(&self) -> &Signature {
&self.signature
}
fn invoke_with_args(
@@ -107,21 +117,31 @@ impl Function for S2LatLngToCell {
}
/// Return the level of current s2 cell
#[derive(Clone, Debug, Default, Display)]
#[derive(Clone, Debug, Display)]
#[display("{}", self.name())]
pub struct S2CellLevel;
pub(crate) struct S2CellLevel {
signature: Signature,
}
impl Default for S2CellLevel {
fn default() -> Self {
Self {
signature: signature_of_cell(),
}
}
}
impl Function for S2CellLevel {
fn name(&self) -> &str {
"s2_cell_level"
}
fn return_type(&self, _: &[DataType]) -> Result<DataType> {
fn return_type(&self, _: &[DataType]) -> datafusion_common::Result<DataType> {
Ok(DataType::UInt64)
}
fn signature(&self) -> Signature {
signature_of_cell()
fn signature(&self) -> &Signature {
&self.signature
}
fn invoke_with_args(
@@ -145,21 +165,31 @@ impl Function for S2CellLevel {
}
/// Return the string presentation of the cell
#[derive(Clone, Debug, Default, Display)]
#[derive(Clone, Debug, Display)]
#[display("{}", self.name())]
pub struct S2CellToToken;
pub(crate) struct S2CellToToken {
signature: Signature,
}
impl Default for S2CellToToken {
fn default() -> Self {
Self {
signature: signature_of_cell(),
}
}
}
impl Function for S2CellToToken {
fn name(&self) -> &str {
"s2_cell_to_token"
}
fn return_type(&self, _: &[DataType]) -> Result<DataType> {
fn return_type(&self, _: &[DataType]) -> datafusion_common::Result<DataType> {
Ok(DataType::Utf8View)
}
fn signature(&self) -> Signature {
signature_of_cell()
fn signature(&self) -> &Signature {
&self.signature
}
fn invoke_with_args(
@@ -183,21 +213,31 @@ impl Function for S2CellToToken {
}
/// Return parent at given level of current s2 cell
#[derive(Clone, Debug, Default, Display)]
#[derive(Clone, Debug, Display)]
#[display("{}", self.name())]
pub struct S2CellParent;
pub(crate) struct S2CellParent {
signature: Signature,
}
impl Default for S2CellParent {
fn default() -> Self {
Self {
signature: signature_of_cell_and_level(),
}
}
}
impl Function for S2CellParent {
fn name(&self) -> &str {
"s2_cell_parent"
}
fn return_type(&self, _: &[DataType]) -> Result<DataType> {
fn return_type(&self, _: &[DataType]) -> datafusion_common::Result<DataType> {
Ok(DataType::UInt64)
}
fn signature(&self) -> Signature {
signature_of_cell_and_level()
fn signature(&self) -> &Signature {
&self.signature
}
fn invoke_with_args(

View File

@@ -32,20 +32,14 @@ static COORDINATE_TYPES: LazyLock<Vec<DataType>> =
LazyLock::new(|| vec![DataType::Float32, DataType::Float64]);
/// Return WGS84(SRID: 4326) euclidean distance between two geometry object, in degree
#[derive(Clone, Debug, Default, Display)]
#[derive(Clone, Debug, Display)]
#[display("{}", self.name())]
pub struct LatLngToPointWkt;
pub(crate) struct LatLngToPointWkt {
signature: Signature,
}
impl Function for LatLngToPointWkt {
fn name(&self) -> &str {
"wkt_point_from_latlng"
}
fn return_type(&self, _: &[DataType]) -> Result<DataType> {
Ok(DataType::Utf8View)
}
fn signature(&self) -> Signature {
impl Default for LatLngToPointWkt {
fn default() -> Self {
let mut signatures = Vec::new();
for coord_type in COORDINATE_TYPES.as_slice() {
signatures.push(TypeSignature::Exact(vec![
@@ -55,7 +49,23 @@ impl Function for LatLngToPointWkt {
coord_type.clone(),
]));
}
Signature::one_of(signatures, Volatility::Stable)
Self {
signature: Signature::one_of(signatures, Volatility::Stable),
}
}
}
impl Function for LatLngToPointWkt {
fn name(&self) -> &str {
"wkt_point_from_latlng"
}
fn return_type(&self, _: &[DataType]) -> datafusion_common::Result<DataType> {
Ok(DataType::Utf8View)
}
fn signature(&self) -> &Signature {
&self.signature
}
fn invoke_with_args(

View File

@@ -18,7 +18,6 @@ use std::fmt;
use std::fmt::Display;
use std::sync::Arc;
use common_query::error::Result;
use datafusion_common::DataFusionError;
use datafusion_common::arrow::array::{Array, AsArray, UInt64Builder};
use datafusion_expr::{ColumnarValue, ScalarFunctionArgs, Signature, Volatility};
@@ -37,12 +36,22 @@ const NAME: &str = "hll_count";
/// 1. The serialized HyperLogLogPlus state, as produced by the aggregator (binary).
///
/// For each row, it deserializes the sketch and returns the estimated cardinality.
#[derive(Debug, Default)]
pub struct HllCalcFunction;
#[derive(Debug)]
pub(crate) struct HllCalcFunction {
signature: Signature,
}
impl HllCalcFunction {
pub fn register(registry: &FunctionRegistry) {
registry.register_scalar(HllCalcFunction);
registry.register_scalar(HllCalcFunction::default());
}
}
impl Default for HllCalcFunction {
fn default() -> Self {
Self {
signature: Signature::exact(vec![DataType::Binary], Volatility::Immutable),
}
}
}
@@ -57,13 +66,12 @@ impl Function for HllCalcFunction {
NAME
}
fn return_type(&self, _: &[DataType]) -> Result<DataType> {
fn return_type(&self, _: &[DataType]) -> datafusion_common::Result<DataType> {
Ok(DataType::UInt64)
}
fn signature(&self) -> Signature {
// Only argument: HyperLogLogPlus state (binary)
Signature::exact(vec![DataType::Binary], Volatility::Immutable)
fn signature(&self) -> &Signature {
&self.signature
}
fn invoke_with_args(
@@ -122,7 +130,7 @@ mod tests {
#[test]
fn test_hll_count_function() {
let function = HllCalcFunction;
let function = HllCalcFunction::default();
assert_eq!("hll_count", function.name());
assert_eq!(
DataType::UInt64,
@@ -161,7 +169,7 @@ mod tests {
#[test]
fn test_hll_count_function_errors() {
let function = HllCalcFunction;
let function = HllCalcFunction::default();
// Test with invalid number of arguments
let result = function.invoke_with_args(ScalarFunctionArgs {

View File

@@ -30,14 +30,14 @@ impl IpFunctions {
pub fn register(registry: &FunctionRegistry) {
// Register IPv4 functions
registry.register_scalar(Ipv4NumToString::default());
registry.register_scalar(Ipv4StringToNum);
registry.register_scalar(Ipv4ToCidr);
registry.register_scalar(Ipv4InRange);
registry.register_scalar(Ipv4StringToNum::default());
registry.register_scalar(Ipv4ToCidr::default());
registry.register_scalar(Ipv4InRange::default());
// Register IPv6 functions
registry.register_scalar(Ipv6NumToString);
registry.register_scalar(Ipv6StringToNum);
registry.register_scalar(Ipv6ToCidr);
registry.register_scalar(Ipv6InRange);
registry.register_scalar(Ipv6NumToString::default());
registry.register_scalar(Ipv6StringToNum::default());
registry.register_scalar(Ipv6ToCidr::default());
registry.register_scalar(Ipv6InRange::default());
}
}

View File

@@ -39,30 +39,40 @@ use crate::function::Function;
/// - ipv4_to_cidr('192.168.1.0') -> '192.168.1.0/24'
/// - ipv4_to_cidr('192.168') -> '192.168.0.0/16'
/// - ipv4_to_cidr('192.168.1.1', 24) -> '192.168.1.0/24'
#[derive(Clone, Debug, Default, Display)]
#[derive(Clone, Debug, Display)]
#[display("{}", self.name())]
pub struct Ipv4ToCidr;
pub(crate) struct Ipv4ToCidr {
signature: Signature,
}
impl Default for Ipv4ToCidr {
fn default() -> Self {
Self {
signature: Signature::one_of(
vec![
TypeSignature::String(1),
TypeSignature::Coercible(vec![
Coercion::new_exact(TypeSignatureClass::Native(types::logical_string())),
Coercion::new_exact(TypeSignatureClass::Integer),
]),
],
Volatility::Immutable,
),
}
}
}
impl Function for Ipv4ToCidr {
fn name(&self) -> &str {
"ipv4_to_cidr"
}
fn return_type(&self, _: &[DataType]) -> Result<DataType> {
fn return_type(&self, _: &[DataType]) -> datafusion_common::Result<DataType> {
Ok(DataType::Utf8View)
}
fn signature(&self) -> Signature {
Signature::one_of(
vec![
TypeSignature::String(1),
TypeSignature::Coercible(vec![
Coercion::new_exact(TypeSignatureClass::Native(types::logical_string())),
Coercion::new_exact(TypeSignatureClass::Integer),
]),
],
Volatility::Immutable,
)
fn signature(&self) -> &Signature {
&self.signature
}
fn invoke_with_args(
@@ -170,27 +180,37 @@ impl Function for Ipv4ToCidr {
/// - ipv6_to_cidr('2001:db8::') -> '2001:db8::/32'
/// - ipv6_to_cidr('2001:db8') -> '2001:db8::/32'
/// - ipv6_to_cidr('2001:db8::', 48) -> '2001:db8::/48'
#[derive(Clone, Debug, Default, Display)]
#[derive(Clone, Debug, Display)]
#[display("{}", self.name())]
pub struct Ipv6ToCidr;
pub(crate) struct Ipv6ToCidr {
signature: Signature,
}
impl Default for Ipv6ToCidr {
fn default() -> Self {
Self {
signature: Signature::one_of(
vec![
TypeSignature::String(1),
TypeSignature::Exact(vec![DataType::Utf8, DataType::UInt8]),
],
Volatility::Immutable,
),
}
}
}
impl Function for Ipv6ToCidr {
fn name(&self) -> &str {
"ipv6_to_cidr"
}
fn return_type(&self, _: &[DataType]) -> Result<DataType> {
fn return_type(&self, _: &[DataType]) -> datafusion_common::Result<DataType> {
Ok(DataType::Utf8View)
}
fn signature(&self) -> Signature {
Signature::one_of(
vec![
TypeSignature::String(1),
TypeSignature::Exact(vec![DataType::Utf8, DataType::UInt8]),
],
Volatility::Immutable,
)
fn signature(&self) -> &Signature {
&self.signature
}
fn invoke_with_args(
@@ -387,7 +407,7 @@ mod tests {
#[test]
fn test_ipv4_to_cidr_auto() {
let func = Ipv4ToCidr;
let func = Ipv4ToCidr::default();
// Test data with auto subnet detection
let values = vec!["192.168.1.0", "10.0.0.0", "172.16", "192"];
@@ -412,7 +432,7 @@ mod tests {
#[test]
fn test_ipv4_to_cidr_with_subnet() {
let func = Ipv4ToCidr;
let func = Ipv4ToCidr::default();
// Test data with explicit subnet
let ip_values = vec!["192.168.1.1", "10.0.0.1", "172.16.5.5"];
@@ -438,7 +458,7 @@ mod tests {
#[test]
fn test_ipv6_to_cidr_auto() {
let func = Ipv6ToCidr;
let func = Ipv6ToCidr::default();
// Test data with auto subnet detection
let values = vec!["2001:db8::", "2001:db8", "fe80::1", "::1"];
@@ -463,7 +483,7 @@ mod tests {
#[test]
fn test_ipv6_to_cidr_with_subnet() {
let func = Ipv6ToCidr;
let func = Ipv6ToCidr::default();
// Test data with explicit subnet
let ip_values = vec!["2001:db8::", "fe80::1", "2001:db8:1234::"];
@@ -489,8 +509,8 @@ mod tests {
#[test]
fn test_invalid_inputs() {
let ipv4_func = Ipv4ToCidr;
let ipv6_func = Ipv6ToCidr;
let ipv4_func = Ipv4ToCidr::default();
let ipv6_func = Ipv6ToCidr::default();
// Empty string should fail
let empty_values = vec![""];

View File

@@ -16,7 +16,7 @@ use std::net::Ipv4Addr;
use std::str::FromStr;
use std::sync::Arc;
use common_query::error::{InvalidFuncArgsSnafu, Result};
use common_query::error::InvalidFuncArgsSnafu;
use datafusion_common::arrow::array::{Array, AsArray, StringViewBuilder, UInt32Builder};
use datafusion_common::arrow::compute;
use datafusion_common::arrow::datatypes::{DataType, UInt32Type};
@@ -36,12 +36,17 @@ use crate::function::{Function, extract_args};
#[derive(Clone, Debug, Display)]
#[display("{}", self.name())]
pub struct Ipv4NumToString {
signature: Signature,
aliases: [String; 1],
}
impl Default for Ipv4NumToString {
fn default() -> Self {
Self {
signature: Signature::new(
TypeSignature::Exact(vec![DataType::UInt32]),
Volatility::Immutable,
),
aliases: ["inet_ntoa".to_string()],
}
}
@@ -52,15 +57,12 @@ impl Function for Ipv4NumToString {
"ipv4_num_to_string"
}
fn return_type(&self, _: &[DataType]) -> Result<DataType> {
fn return_type(&self, _: &[DataType]) -> datafusion_common::Result<DataType> {
Ok(DataType::Utf8View)
}
fn signature(&self) -> Signature {
Signature::new(
TypeSignature::Exact(vec![DataType::UInt32]),
Volatility::Immutable,
)
fn signature(&self) -> &Signature {
&self.signature
}
fn invoke_with_args(
@@ -104,21 +106,31 @@ impl Function for Ipv4NumToString {
/// - "10.0.0.1" returns 167772161
/// - "192.168.0.1" returns 3232235521
/// - Invalid IPv4 format throws an exception
#[derive(Clone, Debug, Default, Display)]
#[derive(Clone, Debug, Display)]
#[display("{}", self.name())]
pub struct Ipv4StringToNum;
pub(crate) struct Ipv4StringToNum {
signature: Signature,
}
impl Default for Ipv4StringToNum {
fn default() -> Self {
Self {
signature: Signature::string(1, Volatility::Immutable),
}
}
}
impl Function for Ipv4StringToNum {
fn name(&self) -> &str {
"ipv4_string_to_num"
}
fn return_type(&self, _: &[DataType]) -> Result<DataType> {
fn return_type(&self, _: &[DataType]) -> datafusion_common::Result<DataType> {
Ok(DataType::UInt32)
}
fn signature(&self) -> Signature {
Signature::string(1, Volatility::Immutable)
fn signature(&self) -> &Signature {
&self.signature
}
fn invoke_with_args(
@@ -190,7 +202,7 @@ mod tests {
#[test]
fn test_ipv4_string_to_num() {
let func = Ipv4StringToNum;
let func = Ipv4StringToNum::default();
// Test data
let values = vec!["10.0.0.1", "192.168.0.1", "0.0.0.0", "255.255.255.255"];
@@ -215,7 +227,7 @@ mod tests {
#[test]
fn test_ipv4_conversions_roundtrip() {
let to_num = Ipv4StringToNum;
let to_num = Ipv4StringToNum::default();
let to_string = Ipv4NumToString::default();
// Test data for string to num to string

View File

@@ -16,7 +16,7 @@ use std::net::{Ipv4Addr, Ipv6Addr};
use std::str::FromStr;
use std::sync::Arc;
use common_query::error::{InvalidFuncArgsSnafu, Result};
use common_query::error::InvalidFuncArgsSnafu;
use datafusion::arrow::datatypes::DataType;
use datafusion_common::DataFusionError;
use datafusion_common::arrow::array::{Array, AsArray, BinaryViewBuilder, StringViewBuilder};
@@ -31,21 +31,31 @@ use crate::function::{Function, extract_args};
/// For example:
/// - "20010DB8000000000000000000000001" returns "2001:db8::1"
/// - "00000000000000000000FFFFC0A80001" returns "::ffff:192.168.0.1"
#[derive(Clone, Debug, Default, Display)]
#[derive(Clone, Debug, Display)]
#[display("{}", self.name())]
pub struct Ipv6NumToString;
pub(crate) struct Ipv6NumToString {
signature: Signature,
}
impl Default for Ipv6NumToString {
fn default() -> Self {
Self {
signature: Signature::string(1, Volatility::Immutable),
}
}
}
impl Function for Ipv6NumToString {
fn name(&self) -> &str {
"ipv6_num_to_string"
}
fn return_type(&self, _: &[DataType]) -> Result<DataType> {
fn return_type(&self, _: &[DataType]) -> datafusion_common::Result<DataType> {
Ok(DataType::Utf8View)
}
fn signature(&self) -> Signature {
Signature::string(1, Volatility::Immutable)
fn signature(&self) -> &Signature {
&self.signature
}
fn invoke_with_args(
@@ -119,21 +129,31 @@ impl Function for Ipv6NumToString {
/// - If the input string contains a valid IPv4 address, returns its IPv6 equivalent
/// - HEX can be uppercase or lowercase
/// - Invalid IPv6 format throws an exception
#[derive(Clone, Debug, Default, Display)]
#[derive(Clone, Debug, Display)]
#[display("{}", self.name())]
pub struct Ipv6StringToNum;
pub(crate) struct Ipv6StringToNum {
signature: Signature,
}
impl Default for Ipv6StringToNum {
fn default() -> Self {
Self {
signature: Signature::string(1, Volatility::Immutable),
}
}
}
impl Function for Ipv6StringToNum {
fn name(&self) -> &str {
"ipv6_string_to_num"
}
fn return_type(&self, _: &[DataType]) -> Result<DataType> {
fn return_type(&self, _: &[DataType]) -> datafusion_common::Result<DataType> {
Ok(DataType::BinaryView)
}
fn signature(&self) -> Signature {
Signature::string(1, Volatility::Immutable)
fn signature(&self) -> &Signature {
&self.signature
}
fn invoke_with_args(
@@ -191,7 +211,7 @@ mod tests {
#[test]
fn test_ipv6_num_to_string() {
let func = Ipv6NumToString;
let func = Ipv6NumToString::default();
// Hex string for "2001:db8::1"
let hex_str1 = "20010db8000000000000000000000001";
@@ -219,7 +239,7 @@ mod tests {
#[test]
fn test_ipv6_num_to_string_uppercase() {
let func = Ipv6NumToString;
let func = Ipv6NumToString::default();
// Uppercase hex string for "2001:db8::1"
let hex_str = "20010DB8000000000000000000000001";
@@ -243,7 +263,7 @@ mod tests {
#[test]
fn test_ipv6_num_to_string_error() {
let func = Ipv6NumToString;
let func = Ipv6NumToString::default();
// Invalid hex string - wrong length
let hex_str = "20010db8";
@@ -272,7 +292,7 @@ mod tests {
#[test]
fn test_ipv6_string_to_num() {
let func = Ipv6StringToNum;
let func = Ipv6StringToNum::default();
let values = vec!["2001:db8::1", "::ffff:192.168.0.1", "192.168.0.1"];
let arg0 = ColumnarValue::Array(Arc::new(StringViewArray::from_iter_values(&values)));
@@ -305,8 +325,8 @@ mod tests {
#[test]
fn test_ipv6_conversions_roundtrip() {
let to_num = Ipv6StringToNum;
let to_string = Ipv6NumToString;
let to_num = Ipv6StringToNum::default();
let to_string = Ipv6NumToString::default();
// Test data
let values = vec!["2001:db8::1", "::ffff:192.168.0.1"];
@@ -360,8 +380,8 @@ mod tests {
fn test_ipv6_conversions_hex_roundtrip() {
// Create a new test to verify that the string output from ipv6_num_to_string
// can be converted back using ipv6_string_to_num
let to_string = Ipv6NumToString;
let to_binary = Ipv6StringToNum;
let to_string = Ipv6NumToString::default();
let to_binary = Ipv6StringToNum::default();
// Hex representation of IPv6 addresses
let hex_values = vec![

View File

@@ -36,21 +36,31 @@ use crate::function::{Function, extract_args};
/// - ipv4_in_range('192.168.1.5', '192.168.1.0/24') -> true
/// - ipv4_in_range('192.168.2.1', '192.168.1.0/24') -> false
/// - ipv4_in_range('10.0.0.1', '10.0.0.0/8') -> true
#[derive(Clone, Debug, Default, Display)]
#[derive(Clone, Debug, Display)]
#[display("{}", self.name())]
pub struct Ipv4InRange;
pub(crate) struct Ipv4InRange {
signature: Signature,
}
impl Default for Ipv4InRange {
fn default() -> Self {
Self {
signature: Signature::string(2, Volatility::Immutable),
}
}
}
impl Function for Ipv4InRange {
fn name(&self) -> &str {
"ipv4_in_range"
}
fn return_type(&self, _: &[DataType]) -> Result<DataType> {
fn return_type(&self, _: &[DataType]) -> datafusion_common::Result<DataType> {
Ok(DataType::Boolean)
}
fn signature(&self) -> Signature {
Signature::string(2, Volatility::Immutable)
fn signature(&self) -> &Signature {
&self.signature
}
fn invoke_with_args(
@@ -114,21 +124,31 @@ impl Function for Ipv4InRange {
/// - ipv6_in_range('2001:db8:1::', '2001:db8::/32') -> true
/// - ipv6_in_range('2001:db9::1', '2001:db8::/32') -> false
/// - ipv6_in_range('::1', '::1/128') -> true
#[derive(Clone, Debug, Default, Display)]
#[derive(Clone, Debug, Display)]
#[display("{}", self.name())]
pub struct Ipv6InRange;
pub(crate) struct Ipv6InRange {
signature: Signature,
}
impl Default for Ipv6InRange {
fn default() -> Self {
Self {
signature: Signature::string(2, Volatility::Immutable),
}
}
}
impl Function for Ipv6InRange {
fn name(&self) -> &str {
"ipv6_in_range"
}
fn return_type(&self, _: &[DataType]) -> Result<DataType> {
fn return_type(&self, _: &[DataType]) -> datafusion_common::Result<DataType> {
Ok(DataType::Boolean)
}
fn signature(&self) -> Signature {
Signature::string(2, Volatility::Immutable)
fn signature(&self) -> &Signature {
&self.signature
}
fn invoke_with_args(
@@ -313,7 +333,7 @@ mod tests {
#[test]
fn test_ipv4_in_range() {
let func = Ipv4InRange;
let func = Ipv4InRange::default();
// Test IPs
let ip_values = vec![
@@ -357,7 +377,7 @@ mod tests {
#[test]
fn test_ipv6_in_range() {
let func = Ipv6InRange;
let func = Ipv6InRange::default();
// Test IPs
let ip_values = vec![
@@ -401,8 +421,8 @@ mod tests {
#[test]
fn test_invalid_inputs() {
let ipv4_func = Ipv4InRange;
let ipv6_func = Ipv6InRange;
let ipv4_func = Ipv4InRange::default();
let ipv6_func = Ipv6InRange::default();
// Invalid IPv4 address
let invalid_ip_values = vec!["not-an-ip", "192.168.1.300"];
@@ -448,7 +468,7 @@ mod tests {
#[test]
fn test_edge_cases() {
let ipv4_func = Ipv4InRange;
let ipv4_func = Ipv4InRange::default();
// Edge cases like prefix length 0 (matches everything) and 32 (exact match)
let ip_values = vec!["8.8.8.8", "192.168.1.1", "192.168.1.1"];

View File

@@ -32,23 +32,23 @@ pub(crate) struct JsonFunction;
impl JsonFunction {
pub fn register(registry: &FunctionRegistry) {
registry.register_scalar(JsonToStringFunction);
registry.register_scalar(ParseJsonFunction);
registry.register_scalar(JsonToStringFunction::default());
registry.register_scalar(ParseJsonFunction::default());
registry.register_scalar(JsonGetInt);
registry.register_scalar(JsonGetFloat);
registry.register_scalar(JsonGetString);
registry.register_scalar(JsonGetBool);
registry.register_scalar(JsonGetInt::default());
registry.register_scalar(JsonGetFloat::default());
registry.register_scalar(JsonGetString::default());
registry.register_scalar(JsonGetBool::default());
registry.register_scalar(JsonIsNull);
registry.register_scalar(JsonIsInt);
registry.register_scalar(JsonIsFloat);
registry.register_scalar(JsonIsString);
registry.register_scalar(JsonIsBool);
registry.register_scalar(JsonIsArray);
registry.register_scalar(JsonIsObject);
registry.register_scalar(JsonIsNull::default());
registry.register_scalar(JsonIsInt::default());
registry.register_scalar(JsonIsFloat::default());
registry.register_scalar(JsonIsString::default());
registry.register_scalar(JsonIsBool::default());
registry.register_scalar(JsonIsArray::default());
registry.register_scalar(JsonIsObject::default());
registry.register_scalar(json_path_exists::JsonPathExistsFunction);
registry.register_scalar(json_path_match::JsonPathMatchFunction);
registry.register_scalar(json_path_exists::JsonPathExistsFunction::default());
registry.register_scalar(json_path_match::JsonPathMatchFunction::default());
}
}

View File

@@ -16,7 +16,6 @@ use std::fmt::{self, Display};
use std::sync::Arc;
use arrow::compute;
use common_query::error::Result;
use datafusion_common::arrow::array::{
Array, AsArray, BooleanBuilder, Float64Builder, Int64Builder, StringViewBuilder,
};
@@ -48,24 +47,38 @@ macro_rules! json_get {
($name:ident, $type:ident, $rust_type:ident, $doc:expr) => {
paste::paste! {
#[doc = $doc]
#[derive(Clone, Debug, Default)]
pub struct $name;
#[derive(Clone, Debug)]
pub struct $name {
signature: Signature,
}
impl $name {
pub const NAME: &'static str = stringify!([<$name:snake>]);
}
impl Default for $name {
fn default() -> Self {
Self {
// TODO(LFC): Use a more clear type here instead of "Binary" for Json input, once we have a "Json" type.
signature: helper::one_of_sigs2(
vec![DataType::Binary, DataType::BinaryView],
vec![DataType::Utf8, DataType::Utf8View],
),
}
}
}
impl Function for $name {
fn name(&self) -> &str {
stringify!([<$name:snake>])
Self::NAME
}
fn return_type(&self, _: &[DataType]) -> Result<DataType> {
fn return_type(&self, _: &[DataType]) -> datafusion_common::Result<DataType> {
Ok(DataType::[<$type>])
}
fn signature(&self) -> Signature {
// TODO(LFC): Use a more clear type here instead of "Binary" for Json input, once we have a "Json" type.
helper::one_of_sigs2(
vec![DataType::Binary, DataType::BinaryView],
vec![DataType::Utf8, DataType::Utf8View],
)
fn signature(&self) -> &Signature {
&self.signature
}
fn invoke_with_args(
@@ -101,7 +114,7 @@ macro_rules! json_get {
impl Display for $name {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "{}", stringify!([<$name:snake>]).to_ascii_uppercase())
write!(f, "{}", Self::NAME.to_ascii_uppercase())
}
}
}
@@ -130,24 +143,38 @@ json_get!(
);
/// Get the value from the JSONB by the given path and return it as a string.
#[derive(Clone, Debug, Default)]
pub struct JsonGetString;
#[derive(Clone, Debug)]
pub struct JsonGetString {
signature: Signature,
}
impl JsonGetString {
pub const NAME: &'static str = "json_get_string";
}
impl Default for JsonGetString {
fn default() -> Self {
Self {
// TODO(LFC): Use a more clear type here instead of "Binary" for Json input, once we have a "Json" type.
signature: helper::one_of_sigs2(
vec![DataType::Binary, DataType::BinaryView],
vec![DataType::Utf8, DataType::Utf8View],
),
}
}
}
impl Function for JsonGetString {
fn name(&self) -> &str {
"json_get_string"
Self::NAME
}
fn return_type(&self, _: &[DataType]) -> Result<DataType> {
fn return_type(&self, _: &[DataType]) -> datafusion_common::Result<DataType> {
Ok(DataType::Utf8View)
}
fn signature(&self) -> Signature {
// TODO(LFC): Use a more clear type here instead of "Binary" for Json input, once we have a "Json" type.
helper::one_of_sigs2(
vec![DataType::Binary, DataType::BinaryView],
vec![DataType::Utf8, DataType::Utf8View],
)
fn signature(&self) -> &Signature {
&self.signature
}
fn invoke_with_args(
@@ -181,7 +208,7 @@ impl Function for JsonGetString {
impl Display for JsonGetString {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "{}", "json_get_string".to_ascii_uppercase())
write!(f, "{}", Self::NAME.to_ascii_uppercase())
}
}
@@ -197,7 +224,7 @@ mod tests {
#[test]
fn test_json_get_int() {
let json_get_int = JsonGetInt;
let json_get_int = JsonGetInt::default();
assert_eq!("json_get_int", json_get_int.name());
assert_eq!(
@@ -248,7 +275,7 @@ mod tests {
#[test]
fn test_json_get_float() {
let json_get_float = JsonGetFloat;
let json_get_float = JsonGetFloat::default();
assert_eq!("json_get_float", json_get_float.name());
assert_eq!(
@@ -299,7 +326,7 @@ mod tests {
#[test]
fn test_json_get_bool() {
let json_get_bool = JsonGetBool;
let json_get_bool = JsonGetBool::default();
assert_eq!("json_get_bool", json_get_bool.name());
assert_eq!(
@@ -350,7 +377,7 @@ mod tests {
#[test]
fn test_json_get_string() {
let json_get_string = JsonGetString;
let json_get_string = JsonGetString::default();
assert_eq!("json_get_string", json_get_string.name());
assert_eq!(

View File

@@ -15,7 +15,6 @@
use std::fmt::{self, Display};
use std::sync::Arc;
use common_query::error::Result;
use datafusion_common::arrow::array::{Array, AsArray, BooleanBuilder};
use datafusion_common::arrow::compute;
use datafusion_common::arrow::datatypes::DataType;
@@ -27,25 +26,35 @@ use crate::function::{Function, extract_args};
macro_rules! json_is {
($name:ident, $json_type:ident, $doc:expr) => {
paste::paste! {
#[derive(Clone, Debug, Default)]
pub struct $name;
#[derive(Clone, Debug)]
pub(crate) struct $name {
signature: Signature,
}
impl Default for $name {
fn default() -> Self {
Self {
// TODO(LFC): Use a more clear type here instead of "Binary" for Json input, once we have a "Json" type.
signature: Signature::uniform(
1,
vec![DataType::Binary, DataType::BinaryView],
Volatility::Immutable,
),
}
}
}
impl Function for $name {
fn name(&self) -> &str {
stringify!([<$name:snake>])
}
fn return_type(&self, _: &[DataType]) -> Result<DataType> {
fn return_type(&self, _: &[DataType]) -> datafusion_common::Result<DataType> {
Ok(DataType::Boolean)
}
fn signature(&self) -> Signature {
// TODO(LFC): Use a more clear type here instead of "Binary" for Json input, once we have a "Json" type.
Signature::uniform(
1,
vec![DataType::Binary, DataType::BinaryView],
Volatility::Immutable,
)
fn signature(&self) -> &Signature {
&self.signature
}
fn invoke_with_args(
@@ -127,12 +136,12 @@ mod tests {
#[test]
fn test_json_is_functions() {
let json_is_functions: [&dyn Function; 6] = [
&JsonIsBool,
&JsonIsInt,
&JsonIsFloat,
&JsonIsString,
&JsonIsArray,
&JsonIsObject,
&JsonIsBool::default(),
&JsonIsInt::default(),
&JsonIsFloat::default(),
&JsonIsString::default(),
&JsonIsArray::default(),
&JsonIsObject::default(),
];
let expected_names = [
"json_is_bool",
@@ -148,14 +157,6 @@ mod tests {
func.return_type(&[DataType::Binary]).unwrap(),
DataType::Boolean
);
assert_eq!(
func.signature(),
Signature::uniform(
1,
vec![DataType::Binary, DataType::BinaryView],
Volatility::Immutable
)
);
}
let json_strings = [

View File

@@ -16,7 +16,6 @@ use std::fmt::{self, Display};
use std::sync::Arc;
use arrow::compute;
use common_query::error::Result;
use datafusion_common::DataFusionError;
use datafusion_common::arrow::array::{Array, AsArray, BooleanBuilder};
use datafusion_common::arrow::datatypes::DataType;
@@ -26,8 +25,22 @@ use crate::function::{Function, extract_args};
use crate::helper;
/// Check if the given JSON data contains the given JSON path.
#[derive(Clone, Debug, Default)]
pub struct JsonPathExistsFunction;
#[derive(Clone, Debug)]
pub(crate) struct JsonPathExistsFunction {
signature: Signature,
}
impl Default for JsonPathExistsFunction {
fn default() -> Self {
Self {
// TODO(LFC): Use a more clear type here instead of "Binary" for Json input, once we have a "Json" type.
signature: helper::one_of_sigs2(
vec![DataType::Binary, DataType::BinaryView, DataType::Null],
vec![DataType::Utf8, DataType::Utf8View, DataType::Null],
),
}
}
}
const NAME: &str = "json_path_exists";
@@ -36,16 +49,12 @@ impl Function for JsonPathExistsFunction {
NAME
}
fn return_type(&self, _: &[DataType]) -> Result<DataType> {
fn return_type(&self, _: &[DataType]) -> datafusion_common::Result<DataType> {
Ok(DataType::Boolean)
}
fn signature(&self) -> Signature {
// TODO(LFC): Use a more clear type here instead of "Binary" for Json input, once we have a "Json" type.
helper::one_of_sigs2(
vec![DataType::Binary, DataType::BinaryView, DataType::Null],
vec![DataType::Utf8, DataType::Utf8View, DataType::Null],
)
fn signature(&self) -> &Signature {
&self.signature
}
fn invoke_with_args(
@@ -110,7 +119,7 @@ mod tests {
#[test]
fn test_json_path_exists_function() {
let json_path_exists = JsonPathExistsFunction;
let json_path_exists = JsonPathExistsFunction::default();
assert_eq!("json_path_exists", json_path_exists.name());
assert_eq!(

View File

@@ -16,7 +16,6 @@ use std::fmt::{self, Display};
use std::sync::Arc;
use arrow::compute;
use common_query::error::Result;
use datafusion_common::arrow::array::{Array, AsArray, BooleanBuilder};
use datafusion_common::arrow::datatypes::DataType;
use datafusion_expr::{ColumnarValue, ScalarFunctionArgs, Signature};
@@ -25,8 +24,22 @@ use crate::function::{Function, extract_args};
use crate::helper;
/// Check if the given JSON data match the given JSON path's predicate.
#[derive(Clone, Debug, Default)]
pub struct JsonPathMatchFunction;
#[derive(Clone, Debug)]
pub(crate) struct JsonPathMatchFunction {
signature: Signature,
}
impl Default for JsonPathMatchFunction {
fn default() -> Self {
Self {
// TODO(LFC): Use a more clear type here instead of "Binary" for Json input, once we have a "Json" type.
signature: helper::one_of_sigs2(
vec![DataType::Binary, DataType::BinaryView],
vec![DataType::Utf8, DataType::Utf8View],
),
}
}
}
const NAME: &str = "json_path_match";
@@ -35,16 +48,12 @@ impl Function for JsonPathMatchFunction {
NAME
}
fn return_type(&self, _: &[DataType]) -> Result<DataType> {
fn return_type(&self, _: &[DataType]) -> datafusion_common::Result<DataType> {
Ok(DataType::Boolean)
}
fn signature(&self) -> Signature {
// TODO(LFC): Use a more clear type here instead of "Binary" for Json input, once we have a "Json" type.
helper::one_of_sigs2(
vec![DataType::Binary, DataType::BinaryView],
vec![DataType::Utf8, DataType::Utf8View],
)
fn signature(&self) -> &Signature {
&self.signature
}
fn invoke_with_args(
@@ -102,7 +111,7 @@ mod tests {
#[test]
fn test_json_path_match_function() {
let json_path_match = JsonPathMatchFunction;
let json_path_match = JsonPathMatchFunction::default();
assert_eq!("json_path_match", json_path_match.name());
assert_eq!(

View File

@@ -15,7 +15,6 @@
use std::fmt::{self, Display};
use std::sync::Arc;
use common_query::error::Result;
use datafusion_common::DataFusionError;
use datafusion_common::arrow::array::{Array, AsArray, StringViewBuilder};
use datafusion_common::arrow::datatypes::DataType;
@@ -24,8 +23,19 @@ use datafusion_expr::{ColumnarValue, ScalarFunctionArgs, Signature, Volatility};
use crate::function::{Function, extract_args};
/// Converts the `JSONB` into `String`. It's useful for displaying JSONB content.
#[derive(Clone, Debug, Default)]
pub struct JsonToStringFunction;
#[derive(Clone, Debug)]
pub(crate) struct JsonToStringFunction {
signature: Signature,
}
impl Default for JsonToStringFunction {
fn default() -> Self {
Self {
// TODO(LFC): Use a more clear type here instead of "Binary" for Json input, once we have a "Json" type.
signature: Signature::exact(vec![DataType::Binary], Volatility::Immutable),
}
}
}
const NAME: &str = "json_to_string";
@@ -34,13 +44,12 @@ impl Function for JsonToStringFunction {
NAME
}
fn return_type(&self, _: &[DataType]) -> Result<DataType> {
fn return_type(&self, _: &[DataType]) -> datafusion_common::Result<DataType> {
Ok(DataType::Utf8View)
}
fn signature(&self) -> Signature {
// TODO(LFC): Use a more clear type here instead of "Binary" for Json input, once we have a "Json" type.
Signature::exact(vec![DataType::Binary], Volatility::Immutable)
fn signature(&self) -> &Signature {
&self.signature
}
fn invoke_with_args(
@@ -79,13 +88,12 @@ mod tests {
use arrow_schema::Field;
use datafusion_common::arrow::array::BinaryArray;
use datafusion_expr::TypeSignature;
use super::*;
#[test]
fn test_json_to_string_function() {
let json_to_string = JsonToStringFunction;
let json_to_string = JsonToStringFunction::default();
assert_eq!("json_to_string", json_to_string.name());
assert_eq!(
@@ -93,13 +101,6 @@ mod tests {
json_to_string.return_type(&[DataType::Binary]).unwrap()
);
assert!(matches!(json_to_string.signature(),
Signature {
type_signature: TypeSignature::Exact(valid_types),
volatility: Volatility::Immutable
} if valid_types == vec![DataType::Binary]
));
let json_strings = [
r#"{"a": {"b": 2}, "b": 2, "c": 3}"#,
r#"{"a": 4, "b": {"c": 6}, "c": 6}"#,

View File

@@ -15,7 +15,6 @@
use std::fmt::{self, Display};
use std::sync::Arc;
use common_query::error::Result;
use datafusion_common::DataFusionError;
use datafusion_common::arrow::array::{Array, AsArray, BinaryViewBuilder};
use datafusion_common::arrow::compute;
@@ -25,8 +24,18 @@ use datafusion_expr::{ColumnarValue, ScalarFunctionArgs, Signature, Volatility};
use crate::function::{Function, extract_args};
/// Parses the `String` into `JSONB`.
#[derive(Clone, Debug, Default)]
pub struct ParseJsonFunction;
#[derive(Clone, Debug)]
pub(crate) struct ParseJsonFunction {
signature: Signature,
}
impl Default for ParseJsonFunction {
fn default() -> Self {
Self {
signature: Signature::string(1, Volatility::Immutable),
}
}
}
const NAME: &str = "parse_json";
@@ -35,12 +44,12 @@ impl Function for ParseJsonFunction {
NAME
}
fn return_type(&self, _: &[DataType]) -> Result<DataType> {
fn return_type(&self, _: &[DataType]) -> datafusion_common::Result<DataType> {
Ok(DataType::BinaryView)
}
fn signature(&self) -> Signature {
Signature::string(1, Volatility::Immutable)
fn signature(&self) -> &Signature {
&self.signature
}
fn invoke_with_args(
@@ -87,7 +96,7 @@ mod tests {
#[test]
fn test_get_by_path_function() {
let parse_json = ParseJsonFunction;
let parse_json = ParseJsonFunction::default();
assert_eq!("parse_json", parse_json.name());
assert_eq!(

View File

@@ -35,12 +35,22 @@ use crate::function_registry::FunctionRegistry;
/// `matches` for full text search.
///
/// Usage: matches(`<col>`, `<pattern>`) -> boolean
#[derive(Clone, Debug, Default)]
pub struct MatchesFunction;
#[derive(Clone, Debug)]
pub struct MatchesFunction {
signature: Signature,
}
impl MatchesFunction {
pub fn register(registry: &FunctionRegistry) {
registry.register_scalar(MatchesFunction);
registry.register_scalar(MatchesFunction::default());
}
}
impl Default for MatchesFunction {
fn default() -> Self {
Self {
signature: Signature::string(2, Volatility::Immutable),
}
}
}
@@ -55,12 +65,12 @@ impl Function for MatchesFunction {
"matches"
}
fn return_type(&self, _: &[DataType]) -> Result<DataType> {
fn return_type(&self, _: &[DataType]) -> datafusion_common::Result<DataType> {
Ok(DataType::Boolean)
}
fn signature(&self) -> Signature {
Signature::string(2, Volatility::Immutable)
fn signature(&self) -> &Signature {
&self.signature
}
// TODO: read case-sensitive config
@@ -1382,7 +1392,7 @@ mod test {
),
];
let f = MatchesFunction;
let f = MatchesFunction::default();
for (pattern, expected) in cases {
let args = ScalarFunctionArgs {
args: vec![

View File

@@ -15,7 +15,6 @@
use std::fmt;
use std::sync::Arc;
use common_query::error::Result;
use datafusion_common::arrow::array::{Array, AsArray, BooleanArray, BooleanBuilder};
use datafusion_common::arrow::compute;
use datafusion_common::arrow::datatypes::DataType;
@@ -72,11 +71,21 @@ use crate::function_registry::FunctionRegistry;
/// -- Text: "Cat" => true
/// -- Text: "cat" => false
/// ```
pub struct MatchesTermFunction;
pub struct MatchesTermFunction {
signature: Signature,
}
impl MatchesTermFunction {
pub fn register(registry: &FunctionRegistry) {
registry.register_scalar(MatchesTermFunction);
registry.register_scalar(MatchesTermFunction::default());
}
}
impl Default for MatchesTermFunction {
fn default() -> Self {
Self {
signature: Signature::string(2, Volatility::Immutable),
}
}
}
@@ -91,12 +100,12 @@ impl Function for MatchesTermFunction {
"matches_term"
}
fn return_type(&self, _: &[DataType]) -> Result<DataType> {
fn return_type(&self, _: &[DataType]) -> datafusion_common::Result<DataType> {
Ok(DataType::Boolean)
}
fn signature(&self) -> Signature {
Signature::string(2, Volatility::Immutable)
fn signature(&self) -> &Signature {
&self.signature
}
fn invoke_with_args(

View File

@@ -19,33 +19,36 @@ mod rate;
use std::fmt;
pub use clamp::{ClampFunction, ClampMaxFunction, ClampMinFunction};
use common_query::error::Result;
use datafusion::arrow::datatypes::DataType;
use datafusion::error::DataFusionError;
use datafusion_expr::{Signature, Volatility};
pub use rate::RateFunction;
use datafusion::logical_expr::ColumnarValue;
use datafusion_common::internal_err;
use datafusion_expr::{ScalarFunctionArgs, Signature, Volatility};
use crate::function::Function;
use crate::function_registry::FunctionRegistry;
use crate::scalars::math::modulo::ModuloFunction;
use crate::scalars::math::rate::RateFunction;
pub(crate) struct MathFunction;
impl MathFunction {
pub fn register(registry: &FunctionRegistry) {
registry.register_scalar(ModuloFunction);
registry.register_scalar(RateFunction);
registry.register_scalar(RangeFunction);
registry.register_scalar(ClampFunction);
registry.register_scalar(ClampMinFunction);
registry.register_scalar(ClampMaxFunction);
registry.register_scalar(ModuloFunction::default());
registry.register_scalar(RateFunction::default());
registry.register_scalar(RangeFunction::default());
registry.register_scalar(ClampFunction::default());
registry.register_scalar(ClampMinFunction::default());
registry.register_scalar(ClampMaxFunction::default());
}
}
/// `RangeFunction` will never be used as a normal function,
/// just for datafusion to generate logical plan for RangeSelect
#[derive(Clone, Debug, Default)]
struct RangeFunction;
#[derive(Clone, Debug)]
struct RangeFunction {
signature: Signature,
}
impl fmt::Display for RangeFunction {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
@@ -53,25 +56,36 @@ impl fmt::Display for RangeFunction {
}
}
impl Default for RangeFunction {
fn default() -> Self {
Self {
signature: Signature::variadic_any(Volatility::Immutable),
}
}
}
impl Function for RangeFunction {
fn name(&self) -> &str {
"range_fn"
}
// The first argument to range_fn is the expression to be evaluated
fn return_type(&self, input_types: &[DataType]) -> Result<DataType> {
fn return_type(&self, input_types: &[DataType]) -> datafusion_common::Result<DataType> {
input_types
.first()
.cloned()
.ok_or(DataFusionError::Internal(
"No expr found in range_fn".into(),
))
.map_err(Into::into)
}
/// `range_fn` will never been used. As long as a legal signature is returned, the specific content of the signature does not matter.
/// In fact, the arguments loaded by `range_fn` are very complicated, and it is difficult to use `Signature` to describe
fn signature(&self) -> Signature {
Signature::variadic_any(Volatility::Immutable)
fn signature(&self) -> &Signature {
&self.signature
}
fn invoke_with_args(&self, _: ScalarFunctionArgs) -> datafusion_common::Result<ColumnarValue> {
internal_err!("not expected to invoke 'range_fn' directly")
}
}

View File

@@ -15,7 +15,6 @@
use std::fmt::{self, Display};
use std::sync::Arc;
use common_query::error::Result;
use datafusion::arrow::array::{Array, ArrayRef, AsArray, PrimitiveArray};
use datafusion::arrow::datatypes::DataType as ArrowDataType;
use datafusion::logical_expr::{ColumnarValue, Volatility};
@@ -25,8 +24,19 @@ use datafusion_expr::{ScalarFunctionArgs, Signature};
use crate::function::Function;
#[derive(Clone, Debug, Default)]
pub struct ClampFunction;
#[derive(Clone, Debug)]
pub struct ClampFunction {
signature: Signature,
}
impl Default for ClampFunction {
fn default() -> Self {
Self {
// input, min, max
signature: Signature::uniform(3, NUMERICS.to_vec(), Volatility::Immutable),
}
}
}
const CLAMP_NAME: &str = "clamp";
@@ -35,14 +45,16 @@ impl Function for ClampFunction {
CLAMP_NAME
}
fn return_type(&self, input_types: &[ArrowDataType]) -> Result<ArrowDataType> {
fn return_type(
&self,
input_types: &[ArrowDataType],
) -> datafusion_common::Result<ArrowDataType> {
// Type check is done by `signature`
Ok(input_types[0].clone())
}
fn signature(&self) -> Signature {
// input, min, max
Signature::uniform(3, NUMERICS.to_vec(), Volatility::Immutable)
fn signature(&self) -> &Signature {
&self.signature
}
fn invoke_with_args(
@@ -211,8 +223,19 @@ fn clamp_impl(
}
}
#[derive(Clone, Debug, Default)]
pub struct ClampMinFunction;
#[derive(Clone, Debug)]
pub struct ClampMinFunction {
signature: Signature,
}
impl Default for ClampMinFunction {
fn default() -> Self {
Self {
// input, min
signature: Signature::uniform(2, NUMERICS.to_vec(), Volatility::Immutable),
}
}
}
const CLAMP_MIN_NAME: &str = "clamp_min";
@@ -221,13 +244,15 @@ impl Function for ClampMinFunction {
CLAMP_MIN_NAME
}
fn return_type(&self, input_types: &[ArrowDataType]) -> Result<ArrowDataType> {
fn return_type(
&self,
input_types: &[ArrowDataType],
) -> datafusion_common::Result<ArrowDataType> {
Ok(input_types[0].clone())
}
fn signature(&self) -> Signature {
// input, min
Signature::uniform(2, NUMERICS.to_vec(), Volatility::Immutable)
fn signature(&self) -> &Signature {
&self.signature
}
fn invoke_with_args(
@@ -252,8 +277,19 @@ impl Display for ClampMinFunction {
}
}
#[derive(Clone, Debug, Default)]
pub struct ClampMaxFunction;
#[derive(Clone, Debug)]
pub struct ClampMaxFunction {
signature: Signature,
}
impl Default for ClampMaxFunction {
fn default() -> Self {
Self {
// input, max
signature: Signature::uniform(2, NUMERICS.to_vec(), Volatility::Immutable),
}
}
}
const CLAMP_MAX_NAME: &str = "clamp_max";
@@ -262,13 +298,15 @@ impl Function for ClampMaxFunction {
CLAMP_MAX_NAME
}
fn return_type(&self, input_types: &[ArrowDataType]) -> Result<ArrowDataType> {
fn return_type(
&self,
input_types: &[ArrowDataType],
) -> datafusion_common::Result<ArrowDataType> {
Ok(input_types[0].clone())
}
fn signature(&self) -> Signature {
// input, max
Signature::uniform(2, NUMERICS.to_vec(), Volatility::Immutable)
fn signature(&self) -> &Signature {
&self.signature
}
fn invoke_with_args(
@@ -361,7 +399,7 @@ mod test {
),
];
let func = ClampFunction;
let func = ClampFunction::default();
for (in_data, min, max, expected) in inputs {
let number_rows = in_data.len();
let args = vec![
@@ -404,7 +442,7 @@ mod test {
),
];
let func = ClampFunction;
let func = ClampFunction::default();
for (in_data, min, max, expected) in inputs {
let number_rows = in_data.len();
let args = vec![
@@ -447,7 +485,7 @@ mod test {
),
];
let func = ClampFunction;
let func = ClampFunction::default();
for (in_data, min, max, expected) in inputs {
let number_rows = in_data.len();
let args = vec![
@@ -467,7 +505,7 @@ mod test {
let min = 10.0;
let max = -1.0;
let func = ClampFunction;
let func = ClampFunction::default();
let number_rows = input.len();
let args = vec![
ColumnarValue::Array(Arc::new(Float64Array::from(input))),
@@ -484,7 +522,7 @@ mod test {
let min = -1i64;
let max = 10u64;
let func = ClampFunction;
let func = ClampFunction::default();
let number_rows = input.len();
let args = vec![
ColumnarValue::Array(Arc::new(Float64Array::from(input))),
@@ -501,7 +539,7 @@ mod test {
let min = -10.0;
let max = 1.0;
let func = ClampFunction;
let func = ClampFunction::default();
let number_rows = input.len();
let args = vec![
ColumnarValue::Array(Arc::new(Float64Array::from(input))),
@@ -517,7 +555,7 @@ mod test {
let input = vec![Some(-3.0), Some(-2.0), Some(-1.0), Some(0.0), Some(1.0)];
let min = -10.0;
let func = ClampFunction;
let func = ClampFunction::default();
let number_rows = input.len();
let args = vec![
ColumnarValue::Array(Arc::new(Float64Array::from(input))),
@@ -531,7 +569,7 @@ mod test {
fn clamp_on_string() {
let input = vec![Some("foo"), Some("foo"), Some("foo"), Some("foo")];
let func = ClampFunction;
let func = ClampFunction::default();
let number_rows = input.len();
let args = vec![
ColumnarValue::Array(Arc::new(StringArray::from(input))),
@@ -557,7 +595,7 @@ mod test {
),
];
let func = ClampMinFunction;
let func = ClampMinFunction::default();
for (in_data, min, expected) in inputs {
let number_rows = in_data.len();
let args = vec![
@@ -585,7 +623,7 @@ mod test {
),
];
let func = ClampMaxFunction;
let func = ClampMaxFunction::default();
for (in_data, max, expected) in inputs {
let number_rows = in_data.len();
let args = vec![
@@ -606,7 +644,7 @@ mod test {
vec![Some(-1.0), Some(-1.0), Some(-1.0), Some(0.0), Some(1.0)],
)];
let func = ClampMinFunction;
let func = ClampMinFunction::default();
for (in_data, min, expected) in inputs {
let number_rows = in_data.len();
let args = vec![
@@ -627,7 +665,7 @@ mod test {
vec![Some(-3.0), Some(-2.0), Some(-1.0), Some(0.0), Some(0.0)],
)];
let func = ClampMaxFunction;
let func = ClampMaxFunction::default();
for (in_data, max, expected) in inputs {
let number_rows = in_data.len();
let args = vec![
@@ -645,7 +683,7 @@ mod test {
let input = vec![Some(-3.0), Some(-2.0), Some(-1.0), Some(0.0), Some(1.0)];
let min = -1i64;
let func = ClampMinFunction;
let func = ClampMinFunction::default();
let number_rows = input.len();
let args = vec![
ColumnarValue::Array(Arc::new(Float64Array::from(input))),
@@ -660,7 +698,7 @@ mod test {
let input = vec![Some(-3.0), Some(-2.0), Some(-1.0), Some(0.0), Some(1.0)];
let max = 1i64;
let func = ClampMaxFunction;
let func = ClampMaxFunction::default();
let number_rows = input.len();
let args = vec![
ColumnarValue::Array(Arc::new(Float64Array::from(input))),

View File

@@ -15,7 +15,6 @@
use std::fmt;
use std::fmt::Display;
use common_query::error::Result;
use datafusion_common::arrow::compute;
use datafusion_common::arrow::compute::kernels::numeric;
use datafusion_common::arrow::datatypes::DataType;
@@ -27,8 +26,18 @@ use crate::function::{Function, extract_args};
const NAME: &str = "mod";
/// The function to find remainders
#[derive(Clone, Debug, Default)]
pub struct ModuloFunction;
#[derive(Clone, Debug)]
pub(crate) struct ModuloFunction {
signature: Signature,
}
impl Default for ModuloFunction {
fn default() -> Self {
Self {
signature: Signature::uniform(2, NUMERICS.to_vec(), Volatility::Immutable),
}
}
}
impl Display for ModuloFunction {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
@@ -41,7 +50,7 @@ impl Function for ModuloFunction {
NAME
}
fn return_type(&self, input_types: &[DataType]) -> Result<DataType> {
fn return_type(&self, input_types: &[DataType]) -> datafusion_common::Result<DataType> {
if input_types.iter().all(DataType::is_signed_integer) {
Ok(DataType::Int64)
} else if input_types.iter().all(DataType::is_unsigned_integer) {
@@ -51,8 +60,8 @@ impl Function for ModuloFunction {
}
}
fn signature(&self) -> Signature {
Signature::uniform(2, NUMERICS.to_vec(), Volatility::Immutable)
fn signature(&self) -> &Signature {
&self.signature
}
fn invoke_with_args(
@@ -89,7 +98,7 @@ mod tests {
use super::*;
#[test]
fn test_mod_function_signed() {
let function = ModuloFunction;
let function = ModuloFunction::default();
assert_eq!("mod", function.name());
assert_eq!(
DataType::Int64,
@@ -125,7 +134,7 @@ mod tests {
#[test]
fn test_mod_function_unsigned() {
let function = ModuloFunction;
let function = ModuloFunction::default();
assert_eq!("mod", function.name());
assert_eq!(
DataType::UInt64,
@@ -161,7 +170,7 @@ mod tests {
#[test]
fn test_mod_function_float() {
let function = ModuloFunction;
let function = ModuloFunction::default();
assert_eq!("mod", function.name());
assert_eq!(
DataType::Float64,
@@ -197,7 +206,7 @@ mod tests {
#[test]
fn test_mod_function_errors() {
let function = ModuloFunction;
let function = ModuloFunction::default();
assert_eq!("mod", function.name());
let nums = vec![27];
let divs = vec![0];

View File

@@ -14,7 +14,7 @@
use std::fmt;
use common_query::error::{self, Result};
use common_query::error;
use datafusion::arrow::compute::kernels::numeric;
use datafusion_common::arrow::compute::kernels::cast;
use datafusion_common::arrow::datatypes::DataType;
@@ -25,8 +25,18 @@ use snafu::ResultExt;
use crate::function::{Function, extract_args};
/// generates rates from a sequence of adjacent data points.
#[derive(Clone, Debug, Default)]
pub struct RateFunction;
#[derive(Clone, Debug)]
pub(crate) struct RateFunction {
signature: Signature,
}
impl Default for RateFunction {
fn default() -> Self {
Self {
signature: Signature::uniform(2, NUMERICS.to_vec(), Volatility::Immutable),
}
}
}
impl fmt::Display for RateFunction {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
@@ -39,12 +49,12 @@ impl Function for RateFunction {
"rate"
}
fn return_type(&self, _: &[DataType]) -> Result<DataType> {
fn return_type(&self, _: &[DataType]) -> datafusion_common::Result<DataType> {
Ok(DataType::Float64)
}
fn signature(&self) -> Signature {
Signature::uniform(2, NUMERICS.to_vec(), Volatility::Immutable)
fn signature(&self) -> &Signature {
&self.signature
}
fn invoke_with_args(
@@ -83,7 +93,7 @@ mod tests {
use super::*;
#[test]
fn test_rate_function() {
let rate = RateFunction;
let rate = RateFunction::default();
assert_eq!("rate", rate.name());
assert_eq!(DataType::Float64, rate.return_type(&[]).unwrap());
assert!(matches!(rate.signature(),

View File

@@ -14,7 +14,6 @@
use std::fmt;
use common_query::error::Result;
use datafusion::logical_expr::ColumnarValue;
use datafusion_expr::{ScalarFunctionArgs, Signature, Volatility};
use datatypes::arrow::datatypes::DataType;
@@ -23,23 +22,33 @@ use datatypes::vectors::{Helper, Vector};
use crate::function::{Function, extract_args};
use crate::scalars::expression::{EvalContext, scalar_binary_op};
#[derive(Clone, Default)]
pub(crate) struct TestAndFunction;
#[derive(Clone)]
pub(crate) struct TestAndFunction {
signature: Signature,
}
impl Default for TestAndFunction {
fn default() -> Self {
Self {
signature: Signature::exact(
vec![DataType::Boolean, DataType::Boolean],
Volatility::Immutable,
),
}
}
}
impl Function for TestAndFunction {
fn name(&self) -> &str {
"test_and"
}
fn return_type(&self, _: &[DataType]) -> Result<DataType> {
fn return_type(&self, _: &[DataType]) -> datafusion_common::Result<DataType> {
Ok(DataType::Boolean)
}
fn signature(&self) -> Signature {
Signature::exact(
vec![DataType::Boolean, DataType::Boolean],
Volatility::Immutable,
)
fn signature(&self) -> &Signature {
&self.signature
}
fn invoke_with_args(

View File

@@ -22,6 +22,6 @@ pub(crate) struct TimestampFunction;
impl TimestampFunction {
pub fn register(registry: &FunctionRegistry) {
registry.register_scalar(ToUnixtimeFunction);
registry.register_scalar(ToUnixtimeFunction::default());
}
}

View File

@@ -15,7 +15,6 @@
use std::fmt;
use std::sync::Arc;
use common_query::error::Result;
use common_time::{Date, Timestamp};
use datafusion_common::DataFusionError;
use datafusion_common::arrow::array::{
@@ -29,8 +28,31 @@ use crate::function::{Function, FunctionContext, extract_args, find_function_con
use crate::helper::with_match_timestamp_types;
/// A function to convert the column into the unix timestamp in seconds.
#[derive(Clone, Debug, Default)]
pub struct ToUnixtimeFunction;
#[derive(Clone, Debug)]
pub(crate) struct ToUnixtimeFunction {
signature: Signature,
}
impl Default for ToUnixtimeFunction {
fn default() -> Self {
Self {
signature: Signature::uniform(
1,
vec![
DataType::Utf8,
DataType::Int32,
DataType::Int64,
DataType::Date32,
DataType::Timestamp(TimeUnit::Second, None),
DataType::Timestamp(TimeUnit::Millisecond, None),
DataType::Timestamp(TimeUnit::Microsecond, None),
DataType::Timestamp(TimeUnit::Nanosecond, None),
],
Volatility::Immutable,
),
}
}
}
const NAME: &str = "to_unixtime";
@@ -72,25 +94,12 @@ impl Function for ToUnixtimeFunction {
NAME
}
fn return_type(&self, _: &[DataType]) -> Result<DataType> {
fn return_type(&self, _: &[DataType]) -> datafusion_common::Result<DataType> {
Ok(DataType::Int64)
}
fn signature(&self) -> Signature {
Signature::uniform(
1,
vec![
DataType::Utf8,
DataType::Int32,
DataType::Int64,
DataType::Date32,
DataType::Timestamp(TimeUnit::Second, None),
DataType::Timestamp(TimeUnit::Millisecond, None),
DataType::Timestamp(TimeUnit::Microsecond, None),
DataType::Timestamp(TimeUnit::Nanosecond, None),
],
Volatility::Immutable,
)
fn signature(&self) -> &Signature {
&self.signature
}
fn invoke_with_args(
@@ -147,7 +156,6 @@ mod tests {
};
use datafusion_common::arrow::datatypes::Int64Type;
use datafusion_common::config::ConfigOptions;
use datafusion_expr::TypeSignature;
use super::{ToUnixtimeFunction, *};
@@ -164,7 +172,7 @@ mod tests {
return_field: Arc::new(Field::new("", DataType::Int64, true)),
config_options,
};
let result = ToUnixtimeFunction
let result = ToUnixtimeFunction::default()
.invoke_with_args(args)
.and_then(|x| x.to_array(number_rows))
.unwrap();
@@ -178,26 +186,10 @@ mod tests {
#[test]
fn test_string_to_unixtime() {
let f = ToUnixtimeFunction;
let f = ToUnixtimeFunction::default();
assert_eq!("to_unixtime", f.name());
assert_eq!(DataType::Int64, f.return_type(&[]).unwrap());
assert!(matches!(f.signature(),
Signature {
type_signature: TypeSignature::Uniform(1, valid_types),
volatility: Volatility::Immutable
} if valid_types == vec![
DataType::Utf8,
DataType::Int32,
DataType::Int64,
DataType::Date32,
DataType::Timestamp(TimeUnit::Second, None),
DataType::Timestamp(TimeUnit::Millisecond, None),
DataType::Timestamp(TimeUnit::Microsecond, None),
DataType::Timestamp(TimeUnit::Nanosecond, None),
]
));
let times = vec![
Some("2023-03-01T06:35:02Z"),
None,

View File

@@ -18,7 +18,6 @@ use std::fmt;
use std::fmt::Display;
use std::sync::Arc;
use common_query::error::Result;
use datafusion_common::DataFusionError;
use datafusion_common::arrow::array::{Array, AsArray, Float64Builder};
use datafusion_common::arrow::datatypes::{DataType, Float64Type};
@@ -37,12 +36,27 @@ const NAME: &str = "uddsketch_calc";
/// 2. The serialized UDDSketch state, as produced by the aggregator (binary).
///
/// For each row, it deserializes the sketch and returns the computed quantile value.
#[derive(Debug, Default)]
pub struct UddSketchCalcFunction;
#[derive(Debug)]
pub(crate) struct UddSketchCalcFunction {
signature: Signature,
}
impl UddSketchCalcFunction {
pub fn register(registry: &FunctionRegistry) {
registry.register_scalar(UddSketchCalcFunction);
registry.register_scalar(UddSketchCalcFunction::default());
}
}
impl Default for UddSketchCalcFunction {
fn default() -> Self {
Self {
// First argument: percentile (float64)
// Second argument: UDDSketch state (binary)
signature: Signature::exact(
vec![DataType::Float64, DataType::Binary],
Volatility::Immutable,
),
}
}
}
@@ -57,17 +71,12 @@ impl Function for UddSketchCalcFunction {
NAME
}
fn return_type(&self, _: &[DataType]) -> Result<DataType> {
fn return_type(&self, _: &[DataType]) -> datafusion_common::Result<DataType> {
Ok(DataType::Float64)
}
fn signature(&self) -> Signature {
// First argument: percentile (float64)
// Second argument: UDDSketch state (binary)
Signature::exact(
vec![DataType::Float64, DataType::Binary],
Volatility::Immutable,
)
fn signature(&self) -> &Signature {
&self.signature
}
fn invoke_with_args(
@@ -142,7 +151,7 @@ mod tests {
#[test]
fn test_uddsketch_calc_function() {
let function = UddSketchCalcFunction;
let function = UddSketchCalcFunction::default();
assert_eq!("uddsketch_calc", function.name());
assert_eq!(
DataType::Float64,
@@ -200,7 +209,7 @@ mod tests {
#[test]
fn test_uddsketch_calc_function_errors() {
let function = UddSketchCalcFunction;
let function = UddSketchCalcFunction::default();
// Test with invalid number of arguments
let result = function.invoke_with_args(ScalarFunctionArgs {

View File

@@ -14,28 +14,21 @@
use std::any::Any;
use std::fmt::{Debug, Formatter};
use std::sync::Arc;
use common_query::prelude::ColumnarValue;
use datafusion::arrow::datatypes::DataType;
use datafusion::logical_expr::{ScalarFunctionArgs, ScalarUDFImpl};
use datafusion_expr::ScalarUDF;
use session::context::QueryContextRef;
use crate::function::{FunctionContext, FunctionRef};
use crate::state::FunctionState;
use crate::function::FunctionRef;
struct ScalarUdf {
function: FunctionRef,
signature: datafusion_expr::Signature,
context: FunctionContext,
}
impl Debug for ScalarUdf {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
f.debug_struct("ScalarUdf")
.field("function", &self.function.name())
.field("signature", &self.signature)
.finish()
}
}
@@ -54,51 +47,24 @@ impl ScalarUDFImpl for ScalarUdf {
}
fn signature(&self) -> &datafusion_expr::Signature {
&self.signature
self.function.signature()
}
fn return_type(&self, arg_types: &[DataType]) -> datafusion_common::Result<DataType> {
self.function.return_type(arg_types).map_err(Into::into)
self.function.return_type(arg_types)
}
fn invoke_with_args(
&self,
args: ScalarFunctionArgs,
) -> datafusion_common::Result<datafusion_expr::ColumnarValue> {
let result = self.function.invoke_with_args(args.clone());
if !matches!(
result,
Err(datafusion_common::DataFusionError::NotImplemented(_))
) {
return result;
}
let columns = args
.args
.iter()
.map(|x| ColumnarValue::try_from(x).and_then(|y| y.try_into_vector(args.number_rows)))
.collect::<common_query::error::Result<Vec<_>>>()?;
let v = self
.function
.eval(&self.context, &columns)
.map(ColumnarValue::Vector)?;
Ok(v.into())
self.function.invoke_with_args(args)
}
}
/// Create a ScalarUdf from function, query context and state.
pub fn create_udf(
func: FunctionRef,
query_ctx: QueryContextRef,
state: Arc<FunctionState>,
) -> ScalarUDF {
let signature = func.signature();
let udf = ScalarUdf {
function: func,
signature,
context: FunctionContext { query_ctx, state },
};
ScalarUDF::new_from_impl(udf)
pub fn create_udf(function: FunctionRef) -> ScalarUDF {
ScalarUDF::new_from_impl(ScalarUdf { function })
}
#[cfg(test)]
@@ -112,7 +78,6 @@ mod tests {
use datafusion_common::config::ConfigOptions;
use datatypes::arrow::datatypes::Field;
use datatypes::data_type::{ConcreteDataType, DataType};
use session::context::QueryContextBuilder;
use super::*;
use crate::function::Function;
@@ -120,8 +85,7 @@ mod tests {
#[test]
fn test_create_udf() {
let f = Arc::new(TestAndFunction);
let query_ctx = QueryContextBuilder::default().build().into();
let f = Arc::new(TestAndFunction::default());
let args = ScalarFunctionArgs {
args: vec![
@@ -150,11 +114,9 @@ mod tests {
assert!(vector.value(2));
// create a udf and test it again
let udf = create_udf(f.clone(), query_ctx, Arc::new(FunctionState::default()));
let udf = create_udf(f);
assert_eq!("test_and", udf.name());
let expected_signature: datafusion_expr::Signature = f.signature();
assert_eq!(udf.signature(), &expected_signature);
assert_eq!(
ConcreteDataType::boolean_datatype(),
udf.return_type(&[])

View File

@@ -41,29 +41,29 @@ pub(crate) struct VectorFunction;
impl VectorFunction {
pub fn register(registry: &FunctionRegistry) {
// conversion
registry.register_scalar(convert::ParseVectorFunction);
registry.register_scalar(convert::VectorToStringFunction);
registry.register_scalar(convert::ParseVectorFunction::default());
registry.register_scalar(convert::VectorToStringFunction::default());
// distance
registry.register_scalar(distance::CosDistanceFunction);
registry.register_scalar(distance::DotProductFunction);
registry.register_scalar(distance::L2SqDistanceFunction);
registry.register_scalar(distance::CosDistanceFunction::default());
registry.register_scalar(distance::DotProductFunction::default());
registry.register_scalar(distance::L2SqDistanceFunction::default());
// scalar calculation
registry.register_scalar(scalar_add::ScalarAddFunction);
registry.register_scalar(scalar_mul::ScalarMulFunction);
registry.register_scalar(scalar_add::ScalarAddFunction::default());
registry.register_scalar(scalar_mul::ScalarMulFunction::default());
// vector calculation
registry.register_scalar(vector_add::VectorAddFunction);
registry.register_scalar(vector_sub::VectorSubFunction);
registry.register_scalar(vector_mul::VectorMulFunction);
registry.register_scalar(vector_div::VectorDivFunction);
registry.register_scalar(vector_norm::VectorNormFunction);
registry.register_scalar(vector_dim::VectorDimFunction);
registry.register_scalar(vector_kth_elem::VectorKthElemFunction);
registry.register_scalar(vector_subvector::VectorSubvectorFunction);
registry.register_scalar(elem_sum::ElemSumFunction);
registry.register_scalar(elem_product::ElemProductFunction);
registry.register_scalar(vector_add::VectorAddFunction::default());
registry.register_scalar(vector_sub::VectorSubFunction::default());
registry.register_scalar(vector_mul::VectorMulFunction::default());
registry.register_scalar(vector_div::VectorDivFunction::default());
registry.register_scalar(vector_norm::VectorNormFunction::default());
registry.register_scalar(vector_dim::VectorDimFunction::default());
registry.register_scalar(vector_kth_elem::VectorKthElemFunction::default());
registry.register_scalar(vector_subvector::VectorSubvectorFunction::default());
registry.register_scalar(elem_sum::ElemSumFunction::default());
registry.register_scalar(elem_product::ElemProductFunction::default());
}
}
@@ -218,3 +218,44 @@ where
Ok(ColumnarValue::Array(results))
}
}
macro_rules! define_args_of_two_vector_literals_udf {
($(#[$attr:meta])* $name: ident) => {
$(#[$attr])*
#[derive(Debug, Clone)]
pub(crate) struct $name {
signature: datafusion_expr::Signature,
}
impl Default for $name {
fn default() -> Self {
use arrow::datatypes::DataType;
Self {
signature: crate::helper::one_of_sigs2(
vec![
DataType::Utf8,
DataType::Utf8View,
DataType::Binary,
DataType::BinaryView,
],
vec![
DataType::Utf8,
DataType::Utf8View,
DataType::Binary,
DataType::BinaryView,
],
),
}
}
}
impl std::fmt::Display for $name {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{}", self.name().to_ascii_uppercase())
}
}
};
}
pub(crate) use define_args_of_two_vector_literals_udf;

View File

@@ -15,7 +15,7 @@
use std::fmt::Display;
use std::sync::Arc;
use common_query::error::{InvalidVectorStringSnafu, Result};
use common_query::error::InvalidVectorStringSnafu;
use datafusion_common::arrow::array::{Array, AsArray, BinaryViewBuilder};
use datafusion_common::arrow::compute;
use datafusion_common::arrow::datatypes::DataType;
@@ -27,20 +27,30 @@ use crate::function::{Function, extract_args};
const NAME: &str = "parse_vec";
#[derive(Debug, Clone, Default)]
pub struct ParseVectorFunction;
#[derive(Debug, Clone)]
pub struct ParseVectorFunction {
signature: Signature,
}
impl Default for ParseVectorFunction {
fn default() -> Self {
Self {
signature: Signature::string(1, Volatility::Immutable),
}
}
}
impl Function for ParseVectorFunction {
fn name(&self) -> &str {
NAME
}
fn return_type(&self, _: &[DataType]) -> Result<DataType> {
fn return_type(&self, _: &[DataType]) -> datafusion_common::Result<DataType> {
Ok(DataType::BinaryView)
}
fn signature(&self) -> Signature {
Signature::string(1, Volatility::Immutable)
fn signature(&self) -> &Signature {
&self.signature
}
fn invoke_with_args(
@@ -87,7 +97,7 @@ mod tests {
#[test]
fn test_parse_vector() {
let func = ParseVectorFunction;
let func = ParseVectorFunction::default();
let arg0 = Arc::new(StringViewArray::from_iter([
Some("[1.0,2.0,3.0]".to_string()),
@@ -132,7 +142,7 @@ mod tests {
#[test]
fn test_parse_vector_error() {
let func = ParseVectorFunction;
let func = ParseVectorFunction::default();
let inputs = [
StringViewArray::from_iter([

View File

@@ -15,7 +15,6 @@
use std::fmt::Display;
use std::sync::Arc;
use common_query::error::Result;
use datafusion_common::DataFusionError;
use datafusion_common::arrow::array::{Array, AsArray, StringViewBuilder};
use datafusion_common::arrow::compute;
@@ -28,26 +27,36 @@ use crate::function::{Function, extract_args};
const NAME: &str = "vec_to_string";
#[derive(Debug, Clone, Default)]
pub struct VectorToStringFunction;
#[derive(Debug, Clone)]
pub struct VectorToStringFunction {
signature: Signature,
}
impl Default for VectorToStringFunction {
fn default() -> Self {
Self {
signature: Signature::one_of(
vec![
TypeSignature::Uniform(1, vec![DataType::BinaryView]),
TypeSignature::Uniform(1, BINARYS.to_vec()),
],
Volatility::Immutable,
),
}
}
}
impl Function for VectorToStringFunction {
fn name(&self) -> &str {
NAME
}
fn return_type(&self, _: &[DataType]) -> Result<DataType> {
fn return_type(&self, _: &[DataType]) -> datafusion_common::Result<DataType> {
Ok(DataType::Utf8View)
}
fn signature(&self) -> Signature {
Signature::one_of(
vec![
TypeSignature::Uniform(1, vec![DataType::BinaryView]),
TypeSignature::Uniform(1, BINARYS.to_vec()),
],
Volatility::Immutable,
)
fn signature(&self) -> &Signature {
&self.signature
}
fn invoke_with_args(
@@ -102,7 +111,7 @@ mod tests {
#[test]
fn test_vector_to_string() {
let func = VectorToStringFunction;
let func = VectorToStringFunction::default();
let mut builder = BinaryViewBuilder::with_capacity(3);
builder.append_option(Some(

View File

@@ -19,7 +19,6 @@ mod l2sq;
use std::borrow::Cow;
use std::fmt::Display;
use common_query::error::Result;
use datafusion::logical_expr::ColumnarValue;
use datafusion_common::ScalarValue;
use datafusion_expr::{ScalarFunctionArgs, Signature};
@@ -32,33 +31,43 @@ macro_rules! define_distance_function {
($StructName:ident, $display_name:expr, $similarity_method:path) => {
/// A function calculates the distance between two vectors.
#[derive(Debug, Clone, Default)]
pub struct $StructName;
#[derive(Debug, Clone)]
pub(crate) struct $StructName {
signature: Signature,
}
impl Default for $StructName {
fn default() -> Self {
Self {
signature: helper::one_of_sigs2(
vec![
DataType::Utf8,
DataType::Utf8View,
DataType::Binary,
DataType::BinaryView,
],
vec![
DataType::Utf8,
DataType::Utf8View,
DataType::Binary,
DataType::BinaryView,
],
),
}
}
}
impl Function for $StructName {
fn name(&self) -> &str {
$display_name
}
fn return_type(&self, _: &[DataType]) -> Result<DataType> {
fn return_type(&self, _: &[DataType]) -> datafusion_common::Result<DataType> {
Ok(DataType::Float32)
}
fn signature(&self) -> Signature {
helper::one_of_sigs2(
vec![
DataType::Utf8,
DataType::Utf8View,
DataType::Binary,
DataType::BinaryView,
],
vec![
DataType::Utf8,
DataType::Utf8View,
DataType::Binary,
DataType::BinaryView,
],
)
fn signature(&self) -> &Signature {
&self.signature
}
fn invoke_with_args(
@@ -134,9 +143,9 @@ mod tests {
#[test]
fn test_distance_string_string() {
let funcs = [
Box::new(CosDistanceFunction {}) as Box<dyn Function>,
Box::new(L2SqDistanceFunction {}) as Box<dyn Function>,
Box::new(DotProductFunction {}) as Box<dyn Function>,
Box::new(CosDistanceFunction::default()) as Box<dyn Function>,
Box::new(L2SqDistanceFunction::default()) as Box<dyn Function>,
Box::new(DotProductFunction::default()) as Box<dyn Function>,
];
for func in funcs {
@@ -174,9 +183,9 @@ mod tests {
#[test]
fn test_distance_binary_binary() {
let funcs = [
Box::new(CosDistanceFunction {}) as Box<dyn Function>,
Box::new(L2SqDistanceFunction {}) as Box<dyn Function>,
Box::new(DotProductFunction {}) as Box<dyn Function>,
Box::new(CosDistanceFunction::default()) as Box<dyn Function>,
Box::new(L2SqDistanceFunction::default()) as Box<dyn Function>,
Box::new(DotProductFunction::default()) as Box<dyn Function>,
];
for func in funcs {
@@ -215,9 +224,9 @@ mod tests {
#[test]
fn test_distance_string_binary() {
let funcs = [
Box::new(CosDistanceFunction {}) as Box<dyn Function>,
Box::new(L2SqDistanceFunction {}) as Box<dyn Function>,
Box::new(DotProductFunction {}) as Box<dyn Function>,
Box::new(CosDistanceFunction::default()) as Box<dyn Function>,
Box::new(L2SqDistanceFunction::default()) as Box<dyn Function>,
Box::new(DotProductFunction::default()) as Box<dyn Function>,
];
for func in funcs {
@@ -256,9 +265,9 @@ mod tests {
#[test]
fn test_invalid_vector_length() {
let funcs = [
Box::new(CosDistanceFunction {}) as Box<dyn Function>,
Box::new(L2SqDistanceFunction {}) as Box<dyn Function>,
Box::new(DotProductFunction {}) as Box<dyn Function>,
Box::new(CosDistanceFunction::default()) as Box<dyn Function>,
Box::new(L2SqDistanceFunction::default()) as Box<dyn Function>,
Box::new(DotProductFunction::default()) as Box<dyn Function>,
];
for func in funcs {

View File

@@ -14,7 +14,6 @@
use std::fmt::Display;
use common_query::error::Result;
use datafusion::arrow::datatypes::DataType;
use datafusion::logical_expr::ColumnarValue;
use datafusion::logical_expr_common::type_coercion::aggregates::{BINARYS, STRINGS};
@@ -40,27 +39,37 @@ const NAME: &str = "vec_elem_product";
// | 24.0 |
// +-----------------------------------------------------------+
/// ``````
#[derive(Debug, Clone, Default)]
pub struct ElemProductFunction;
#[derive(Debug, Clone)]
pub(crate) struct ElemProductFunction {
signature: Signature,
}
impl Default for ElemProductFunction {
fn default() -> Self {
Self {
signature: Signature::one_of(
vec![
TypeSignature::Uniform(1, STRINGS.to_vec()),
TypeSignature::Uniform(1, BINARYS.to_vec()),
TypeSignature::Uniform(1, vec![DataType::BinaryView]),
],
Volatility::Immutable,
),
}
}
}
impl Function for ElemProductFunction {
fn name(&self) -> &str {
NAME
}
fn return_type(&self, _: &[DataType]) -> Result<DataType> {
fn return_type(&self, _: &[DataType]) -> datafusion_common::Result<DataType> {
Ok(DataType::Float32)
}
fn signature(&self) -> Signature {
Signature::one_of(
vec![
TypeSignature::Uniform(1, STRINGS.to_vec()),
TypeSignature::Uniform(1, BINARYS.to_vec()),
TypeSignature::Uniform(1, vec![DataType::BinaryView]),
],
Volatility::Immutable,
)
fn signature(&self) -> &Signature {
&self.signature
}
fn invoke_with_args(
@@ -100,7 +109,7 @@ mod tests {
#[test]
fn test_elem_product() {
let func = ElemProductFunction;
let func = ElemProductFunction::default();
let input = Arc::new(StringArray::from(vec![
Some("[1.0,2.0,3.0]".to_string()),

View File

@@ -14,7 +14,6 @@
use std::fmt::Display;
use common_query::error::Result;
use datafusion::arrow::datatypes::DataType;
use datafusion::logical_expr::ColumnarValue;
use datafusion_common::ScalarValue;
@@ -27,27 +26,37 @@ use crate::scalars::vector::{VectorCalculator, impl_conv};
const NAME: &str = "vec_elem_sum";
#[derive(Debug, Clone, Default)]
pub struct ElemSumFunction;
#[derive(Debug, Clone)]
pub(crate) struct ElemSumFunction {
signature: Signature,
}
impl Default for ElemSumFunction {
fn default() -> Self {
Self {
signature: Signature::one_of(
vec![
TypeSignature::Uniform(1, STRINGS.to_vec()),
TypeSignature::Uniform(1, BINARYS.to_vec()),
TypeSignature::Uniform(1, vec![DataType::BinaryView]),
],
Volatility::Immutable,
),
}
}
}
impl Function for ElemSumFunction {
fn name(&self) -> &str {
NAME
}
fn return_type(&self, _: &[DataType]) -> Result<DataType> {
fn return_type(&self, _: &[DataType]) -> datafusion_common::Result<DataType> {
Ok(DataType::Float32)
}
fn signature(&self) -> Signature {
Signature::one_of(
vec![
TypeSignature::Uniform(1, STRINGS.to_vec()),
TypeSignature::Uniform(1, BINARYS.to_vec()),
TypeSignature::Uniform(1, vec![DataType::BinaryView]),
],
Volatility::Immutable,
)
fn signature(&self) -> &Signature {
&self.signature
}
fn invoke_with_args(
@@ -88,7 +97,7 @@ mod tests {
#[test]
fn test_elem_sum() {
let func = ElemSumFunction;
let func = ElemSumFunction::default();
let input = Arc::new(StringViewArray::from(vec![
Some("[1.0,2.0,3.0]".to_string()),

View File

@@ -14,7 +14,6 @@
use std::fmt::Display;
use common_query::error::Result;
use datafusion::arrow::datatypes::DataType;
use datafusion::logical_expr::ColumnarValue;
use datafusion_common::ScalarValue;
@@ -50,23 +49,38 @@ const NAME: &str = "vec_scalar_add";
/// | [0,1,2] |
/// +---------+
/// ```
#[derive(Debug, Clone, Default)]
pub struct ScalarAddFunction;
#[derive(Debug, Clone)]
pub(crate) struct ScalarAddFunction {
signature: Signature,
}
impl Default for ScalarAddFunction {
fn default() -> Self {
Self {
signature: helper::one_of_sigs2(
vec![DataType::Float64],
vec![
DataType::Utf8,
DataType::Utf8View,
DataType::Binary,
DataType::BinaryView,
],
),
}
}
}
impl Function for ScalarAddFunction {
fn name(&self) -> &str {
NAME
}
fn return_type(&self, _: &[DataType]) -> Result<DataType> {
fn return_type(&self, _: &[DataType]) -> datafusion_common::Result<DataType> {
Ok(DataType::BinaryView)
}
fn signature(&self) -> Signature {
helper::one_of_sigs2(
vec![DataType::Float64],
vec![DataType::Utf8, DataType::Binary, DataType::BinaryView],
)
fn signature(&self) -> &Signature {
&self.signature
}
fn invoke_with_args(
@@ -110,7 +124,7 @@ mod tests {
#[test]
fn test_scalar_add() {
let func = ScalarAddFunction;
let func = ScalarAddFunction::default();
let input0 = Arc::new(Float64Array::from(vec![
Some(1.0),

View File

@@ -14,7 +14,6 @@
use std::fmt::Display;
use common_query::error::Result;
use datafusion::arrow::datatypes::DataType;
use datafusion::logical_expr::ColumnarValue;
use datafusion_common::ScalarValue;
@@ -50,28 +49,38 @@ const NAME: &str = "vec_scalar_mul";
/// | [1,2,3] |
/// +---------+
/// ```
#[derive(Debug, Clone, Default)]
pub struct ScalarMulFunction;
#[derive(Debug, Clone)]
pub(crate) struct ScalarMulFunction {
signature: Signature,
}
impl Default for ScalarMulFunction {
fn default() -> Self {
Self {
signature: helper::one_of_sigs2(
vec![DataType::Float64],
vec![
DataType::Utf8,
DataType::Utf8View,
DataType::Binary,
DataType::BinaryView,
],
),
}
}
}
impl Function for ScalarMulFunction {
fn name(&self) -> &str {
NAME
}
fn return_type(&self, _: &[DataType]) -> Result<DataType> {
fn return_type(&self, _: &[DataType]) -> datafusion_common::Result<DataType> {
Ok(DataType::BinaryView)
}
fn signature(&self) -> Signature {
helper::one_of_sigs2(
vec![DataType::Float64],
vec![
DataType::Utf8,
DataType::Utf8View,
DataType::Binary,
DataType::BinaryView,
],
)
fn signature(&self) -> &Signature {
&self.signature
}
fn invoke_with_args(
@@ -115,7 +124,7 @@ mod tests {
#[test]
fn test_scalar_mul() {
let func = ScalarMulFunction;
let func = ScalarMulFunction::default();
let input0 = Arc::new(Float64Array::from(vec![
Some(2.0),

View File

@@ -13,9 +13,7 @@
// limitations under the License.
use std::borrow::Cow;
use std::fmt::Display;
use common_query::error::Result;
use datafusion::arrow::datatypes::DataType;
use datafusion::logical_expr::ColumnarValue;
use datafusion_common::{DataFusionError, ScalarValue};
@@ -23,12 +21,12 @@ use datafusion_expr::{ScalarFunctionArgs, Signature};
use nalgebra::DVectorView;
use crate::function::Function;
use crate::helper;
use crate::scalars::vector::VectorCalculator;
use crate::scalars::vector::impl_conv::veclit_to_binlit;
use crate::scalars::vector::{VectorCalculator, define_args_of_two_vector_literals_udf};
const NAME: &str = "vec_add";
define_args_of_two_vector_literals_udf!(
/// Adds corresponding elements of two vectors, returns a vector.
///
/// # Example
@@ -42,23 +40,20 @@ const NAME: &str = "vec_add";
/// | [2,3] |
/// +---------------------------------------------------------------+
///
#[derive(Debug, Clone, Default)]
pub struct VectorAddFunction;
VectorAddFunction);
impl Function for VectorAddFunction {
fn name(&self) -> &str {
NAME
}
fn return_type(&self, _: &[DataType]) -> Result<DataType> {
fn return_type(&self, _: &[DataType]) -> datafusion_common::Result<DataType> {
Ok(DataType::BinaryView)
}
fn signature(&self) -> Signature {
helper::one_of_sigs2(
vec![DataType::Utf8, DataType::Binary, DataType::BinaryView],
vec![DataType::Utf8, DataType::Binary, DataType::BinaryView],
)
fn signature(&self) -> &Signature {
&self.signature
}
fn invoke_with_args(
@@ -94,12 +89,6 @@ impl Function for VectorAddFunction {
}
}
impl Display for VectorAddFunction {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{}", NAME.to_ascii_uppercase())
}
}
#[cfg(test)]
mod tests {
use std::sync::Arc;
@@ -112,7 +101,7 @@ mod tests {
#[test]
fn test_sub() {
let func = VectorAddFunction;
let func = VectorAddFunction::default();
let input0 = Arc::new(StringViewArray::from(vec![
Some("[1.0,2.0,3.0]".to_string()),
@@ -155,7 +144,7 @@ mod tests {
#[test]
fn test_sub_error() {
let func = VectorAddFunction;
let func = VectorAddFunction::default();
let input0 = Arc::new(StringViewArray::from(vec![
Some("[1.0,2.0,3.0]".to_string()),

View File

@@ -14,7 +14,6 @@
use std::fmt::Display;
use common_query::error::Result;
use datafusion::arrow::datatypes::DataType;
use datafusion::logical_expr::ColumnarValue;
use datafusion::logical_expr_common::type_coercion::aggregates::{BINARYS, STRINGS};
@@ -40,26 +39,36 @@ const NAME: &str = "vec_dim";
/// | 4 |
/// +---------------------------------------------------------------+
///
#[derive(Debug, Clone, Default)]
pub struct VectorDimFunction;
#[derive(Debug, Clone)]
pub(crate) struct VectorDimFunction {
signature: Signature,
}
impl Default for VectorDimFunction {
fn default() -> Self {
Self {
signature: Signature::one_of(
vec![
TypeSignature::Uniform(1, STRINGS.to_vec()),
TypeSignature::Uniform(1, BINARYS.to_vec()),
],
Volatility::Immutable,
),
}
}
}
impl Function for VectorDimFunction {
fn name(&self) -> &str {
NAME
}
fn return_type(&self, _: &[DataType]) -> Result<DataType> {
fn return_type(&self, _: &[DataType]) -> datafusion_common::Result<DataType> {
Ok(DataType::UInt64)
}
fn signature(&self) -> Signature {
Signature::one_of(
vec![
TypeSignature::Uniform(1, STRINGS.to_vec()),
TypeSignature::Uniform(1, BINARYS.to_vec()),
],
Volatility::Immutable,
)
fn signature(&self) -> &Signature {
&self.signature
}
fn invoke_with_args(
@@ -98,7 +107,7 @@ mod tests {
#[test]
fn test_vec_dim() {
let func = VectorDimFunction;
let func = VectorDimFunction::default();
let input0 = Arc::new(StringViewArray::from(vec![
Some("[0.0,2.0,3.0]".to_string()),
@@ -129,7 +138,7 @@ mod tests {
#[test]
fn test_dim_error() {
let func = VectorDimFunction;
let func = VectorDimFunction::default();
let input0 = Arc::new(StringViewArray::from(vec![
Some("[1.0,2.0,3.0]".to_string()),

View File

@@ -13,9 +13,7 @@
// limitations under the License.
use std::borrow::Cow;
use std::fmt::Display;
use common_query::error::Result;
use datafusion::arrow::datatypes::DataType;
use datafusion::logical_expr::ColumnarValue;
use datafusion_common::{DataFusionError, ScalarValue};
@@ -23,12 +21,12 @@ use datafusion_expr::{ScalarFunctionArgs, Signature};
use nalgebra::DVectorView;
use crate::function::Function;
use crate::helper;
use crate::scalars::vector::VectorCalculator;
use crate::scalars::vector::impl_conv::veclit_to_binlit;
use crate::scalars::vector::{VectorCalculator, define_args_of_two_vector_literals_udf};
const NAME: &str = "vec_div";
define_args_of_two_vector_literals_udf!(
/// Divides corresponding elements of two vectors.
///
/// # Example
@@ -43,23 +41,20 @@ const NAME: &str = "vec_div";
/// +---------+
///
/// ```
#[derive(Debug, Clone, Default)]
pub struct VectorDivFunction;
VectorDivFunction);
impl Function for VectorDivFunction {
fn name(&self) -> &str {
NAME
}
fn return_type(&self, _: &[DataType]) -> Result<DataType> {
fn return_type(&self, _: &[DataType]) -> datafusion_common::Result<DataType> {
Ok(DataType::BinaryView)
}
fn signature(&self) -> Signature {
helper::one_of_sigs2(
vec![DataType::Utf8, DataType::Binary, DataType::BinaryView],
vec![DataType::Utf8, DataType::Binary, DataType::BinaryView],
)
fn signature(&self) -> &Signature {
&self.signature
}
fn invoke_with_args(
@@ -95,12 +90,6 @@ impl Function for VectorDivFunction {
}
}
impl Display for VectorDivFunction {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{}", NAME.to_ascii_uppercase())
}
}
#[cfg(test)]
mod tests {
use std::sync::Arc;
@@ -113,7 +102,7 @@ mod tests {
#[test]
fn test_vector_mul() {
let func = VectorDivFunction;
let func = VectorDivFunction::default();
let vec0 = vec![1.0, 2.0, 3.0];
let vec1 = vec![1.0, 1.0];

View File

@@ -14,7 +14,6 @@
use std::fmt::Display;
use common_query::error::Result;
use datafusion::logical_expr::ColumnarValue;
use datafusion_common::{DataFusionError, ScalarValue};
use datafusion_expr::{ScalarFunctionArgs, Signature};
@@ -43,23 +42,33 @@ const NAME: &str = "vec_kth_elem";
/// ```
///
#[derive(Debug, Clone, Default)]
pub struct VectorKthElemFunction;
#[derive(Debug, Clone)]
pub(crate) struct VectorKthElemFunction {
signature: Signature,
}
impl Default for VectorKthElemFunction {
fn default() -> Self {
Self {
signature: helper::one_of_sigs2(
vec![DataType::Utf8, DataType::Binary],
vec![DataType::Int64],
),
}
}
}
impl Function for VectorKthElemFunction {
fn name(&self) -> &str {
NAME
}
fn return_type(&self, _: &[DataType]) -> Result<DataType> {
fn return_type(&self, _: &[DataType]) -> datafusion_common::Result<DataType> {
Ok(DataType::Float32)
}
fn signature(&self) -> Signature {
helper::one_of_sigs2(
vec![DataType::Utf8, DataType::Binary],
vec![DataType::Int64],
)
fn signature(&self) -> &Signature {
&self.signature
}
fn invoke_with_args(
@@ -122,7 +131,7 @@ mod tests {
#[test]
fn test_vec_kth_elem() {
let func = VectorKthElemFunction;
let func = VectorKthElemFunction::default();
let input0: ArrayRef = Arc::new(StringViewArray::from(vec![
Some("[1.0,2.0,3.0]".to_string()),

View File

@@ -13,9 +13,7 @@
// limitations under the License.
use std::borrow::Cow;
use std::fmt::Display;
use common_query::error::Result;
use datafusion::arrow::datatypes::DataType;
use datafusion::logical_expr::ColumnarValue;
use datafusion_common::{DataFusionError, ScalarValue};
@@ -23,12 +21,12 @@ use datafusion_expr::{ScalarFunctionArgs, Signature};
use nalgebra::DVectorView;
use crate::function::Function;
use crate::helper;
use crate::scalars::vector::VectorCalculator;
use crate::scalars::vector::impl_conv::veclit_to_binlit;
use crate::scalars::vector::{VectorCalculator, define_args_of_two_vector_literals_udf};
const NAME: &str = "vec_mul";
define_args_of_two_vector_literals_udf!(
/// Multiplies corresponding elements of two vectors.
///
/// # Example
@@ -43,23 +41,19 @@ const NAME: &str = "vec_mul";
/// +---------+
///
/// ```
#[derive(Debug, Clone, Default)]
pub struct VectorMulFunction;
VectorMulFunction);
impl Function for VectorMulFunction {
fn name(&self) -> &str {
NAME
}
fn return_type(&self, _: &[DataType]) -> Result<DataType> {
fn return_type(&self, _: &[DataType]) -> datafusion_common::Result<DataType> {
Ok(DataType::BinaryView)
}
fn signature(&self) -> Signature {
helper::one_of_sigs2(
vec![DataType::Utf8, DataType::Binary, DataType::BinaryView],
vec![DataType::Utf8, DataType::Binary, DataType::BinaryView],
)
fn signature(&self) -> &Signature {
&self.signature
}
fn invoke_with_args(
@@ -95,12 +89,6 @@ impl Function for VectorMulFunction {
}
}
impl Display for VectorMulFunction {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{}", NAME.to_ascii_uppercase())
}
}
#[cfg(test)]
mod tests {
use std::sync::Arc;
@@ -113,7 +101,7 @@ mod tests {
#[test]
fn test_vector_mul() {
let func = VectorMulFunction;
let func = VectorMulFunction::default();
let vec0 = vec![1.0, 2.0, 3.0];
let vec1 = vec![1.0, 1.0];

View File

@@ -14,7 +14,6 @@
use std::fmt::Display;
use common_query::error::Result;
use datafusion::arrow::datatypes::DataType;
use datafusion::logical_expr::ColumnarValue;
use datafusion::logical_expr_common::type_coercion::aggregates::{BINARYS, STRINGS};
@@ -43,27 +42,37 @@ const NAME: &str = "vec_norm";
/// +--------------------------------------------------+
///
/// ```
#[derive(Debug, Clone, Default)]
pub struct VectorNormFunction;
#[derive(Debug, Clone)]
pub(crate) struct VectorNormFunction {
signature: Signature,
}
impl Default for VectorNormFunction {
fn default() -> Self {
Self {
signature: Signature::one_of(
vec![
TypeSignature::Uniform(1, STRINGS.to_vec()),
TypeSignature::Uniform(1, BINARYS.to_vec()),
TypeSignature::Uniform(1, vec![DataType::BinaryView]),
],
Volatility::Immutable,
),
}
}
}
impl Function for VectorNormFunction {
fn name(&self) -> &str {
NAME
}
fn return_type(&self, _: &[DataType]) -> Result<DataType> {
fn return_type(&self, _: &[DataType]) -> datafusion_common::Result<DataType> {
Ok(DataType::BinaryView)
}
fn signature(&self) -> Signature {
Signature::one_of(
vec![
TypeSignature::Uniform(1, STRINGS.to_vec()),
TypeSignature::Uniform(1, BINARYS.to_vec()),
TypeSignature::Uniform(1, vec![DataType::BinaryView]),
],
Volatility::Immutable,
)
fn signature(&self) -> &Signature {
&self.signature
}
fn invoke_with_args(
@@ -108,7 +117,7 @@ mod tests {
#[test]
fn test_vec_norm() {
let func = VectorNormFunction;
let func = VectorNormFunction::default();
let input0 = Arc::new(StringViewArray::from(vec![
Some("[0.0,2.0,3.0]".to_string()),

View File

@@ -13,9 +13,7 @@
// limitations under the License.
use std::borrow::Cow;
use std::fmt::Display;
use common_query::error::Result;
use datafusion::arrow::datatypes::DataType;
use datafusion::logical_expr::ColumnarValue;
use datafusion_common::{DataFusionError, ScalarValue};
@@ -23,12 +21,12 @@ use datafusion_expr::{ScalarFunctionArgs, Signature};
use nalgebra::DVectorView;
use crate::function::Function;
use crate::helper;
use crate::scalars::vector::VectorCalculator;
use crate::scalars::vector::impl_conv::veclit_to_binlit;
use crate::scalars::vector::{VectorCalculator, define_args_of_two_vector_literals_udf};
const NAME: &str = "vec_sub";
define_args_of_two_vector_literals_udf!(
/// Subtracts corresponding elements of two vectors, returns a vector.
///
/// # Example
@@ -42,23 +40,19 @@ const NAME: &str = "vec_sub";
/// | [0,-1] |
/// +---------------------------------------------------------------+
///
#[derive(Debug, Clone, Default)]
pub struct VectorSubFunction;
VectorSubFunction);
impl Function for VectorSubFunction {
fn name(&self) -> &str {
NAME
}
fn return_type(&self, _: &[DataType]) -> Result<DataType> {
fn return_type(&self, _: &[DataType]) -> datafusion_common::Result<DataType> {
Ok(DataType::BinaryView)
}
fn signature(&self) -> Signature {
helper::one_of_sigs2(
vec![DataType::Utf8, DataType::Binary, DataType::BinaryView],
vec![DataType::Utf8, DataType::Binary, DataType::BinaryView],
)
fn signature(&self) -> &Signature {
&self.signature
}
fn invoke_with_args(
@@ -94,12 +88,6 @@ impl Function for VectorSubFunction {
}
}
impl Display for VectorSubFunction {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{}", NAME.to_ascii_uppercase())
}
}
#[cfg(test)]
mod tests {
use std::sync::Arc;
@@ -112,7 +100,7 @@ mod tests {
#[test]
fn test_sub() {
let func = VectorSubFunction;
let func = VectorSubFunction::default();
let input0: ArrayRef = Arc::new(StringViewArray::from(vec![
Some("[1.0,2.0,3.0]".to_string()),
@@ -155,7 +143,7 @@ mod tests {
#[test]
fn test_sub_error() {
let func = VectorSubFunction;
let func = VectorSubFunction::default();
let input0: ArrayRef = Arc::new(StringViewArray::from(vec![
Some("[1.0,2.0,3.0]".to_string()),

View File

@@ -15,7 +15,7 @@
use std::fmt::Display;
use std::sync::Arc;
use common_query::error::{InvalidFuncArgsSnafu, Result};
use common_query::error::InvalidFuncArgsSnafu;
use datafusion::arrow::array::{Array, AsArray, BinaryViewBuilder};
use datafusion::arrow::datatypes::Int64Type;
use datafusion::logical_expr::ColumnarValue;
@@ -45,26 +45,36 @@ const NAME: &str = "vec_subvector";
/// ```
///
#[derive(Debug, Clone, Default)]
pub struct VectorSubvectorFunction;
#[derive(Debug, Clone)]
pub(crate) struct VectorSubvectorFunction {
signature: Signature,
}
impl Default for VectorSubvectorFunction {
fn default() -> Self {
Self {
signature: Signature::one_of(
vec![
TypeSignature::Exact(vec![DataType::Utf8, DataType::Int64, DataType::Int64]),
TypeSignature::Exact(vec![DataType::Binary, DataType::Int64, DataType::Int64]),
],
Volatility::Immutable,
),
}
}
}
impl Function for VectorSubvectorFunction {
fn name(&self) -> &str {
NAME
}
fn return_type(&self, _: &[DataType]) -> Result<DataType> {
fn return_type(&self, _: &[DataType]) -> datafusion_common::Result<DataType> {
Ok(DataType::BinaryView)
}
fn signature(&self) -> Signature {
Signature::one_of(
vec![
TypeSignature::Exact(vec![DataType::Utf8, DataType::Int64, DataType::Int64]),
TypeSignature::Exact(vec![DataType::Binary, DataType::Int64, DataType::Int64]),
],
Volatility::Immutable,
)
fn signature(&self) -> &Signature {
&self.signature
}
fn invoke_with_args(
@@ -130,7 +140,7 @@ mod tests {
#[test]
fn test_subvector() {
let func = VectorSubvectorFunction;
let func = VectorSubvectorFunction::default();
let input0: ArrayRef = Arc::new(StringViewArray::from(vec![
Some("[1.0, 2.0, 3.0, 4.0, 5.0]".to_string()),
@@ -169,7 +179,7 @@ mod tests {
}
#[test]
fn test_subvector_error() {
let func = VectorSubvectorFunction;
let func = VectorSubvectorFunction::default();
let input0: ArrayRef = Arc::new(StringViewArray::from(vec![
Some("[1.0, 2.0, 3.0]".to_string()),
@@ -197,7 +207,7 @@ mod tests {
#[test]
fn test_subvector_invalid_indices() {
let func = VectorSubvectorFunction;
let func = VectorSubvectorFunction::default();
let input0 = Arc::new(StringViewArray::from(vec![
Some("[1.0, 2.0, 3.0]".to_string()),

View File

@@ -34,14 +34,35 @@ pub(crate) struct SystemFunction;
impl SystemFunction {
pub fn register(registry: &FunctionRegistry) {
registry.register_scalar(BuildFunction);
registry.register_scalar(VersionFunction);
registry.register_scalar(DatabaseFunction);
registry.register_scalar(ReadPreferenceFunction);
registry.register_scalar(PgBackendPidFunction);
registry.register_scalar(ConnectionIdFunction);
registry.register_scalar(TimezoneFunction);
registry.register_scalar(BuildFunction::default());
registry.register_scalar(VersionFunction::default());
registry.register_scalar(DatabaseFunction::default());
registry.register_scalar(ReadPreferenceFunction::default());
registry.register_scalar(PgBackendPidFunction::default());
registry.register_scalar(ConnectionIdFunction::default());
registry.register_scalar(TimezoneFunction::default());
registry.register(ProcedureStateFunction::factory());
PGCatalogFunction::register(registry);
}
}
macro_rules! define_nullary_udf {
($(#[$attr:meta])* $name: ident) => {
$(#[$attr])*
#[derive(Clone, Debug, derive_more::Display)]
#[display("{}", self.name())]
pub(crate) struct $name {
signature: datafusion_expr::Signature,
}
impl Default for $name {
fn default() -> Self {
Self {
signature: datafusion_expr::Signature::nullary(Volatility::Immutable),
}
}
}
};
}
pub(crate) use define_nullary_udf;

View File

@@ -12,38 +12,31 @@
// See the License for the specific language governing permissions and
// limitations under the License.
use std::fmt;
use std::sync::Arc;
use common_query::error::Result;
use datafusion::arrow::array::StringViewArray;
use datafusion::arrow::datatypes::DataType;
use datafusion::logical_expr::ColumnarValue;
use datafusion_expr::{ScalarFunctionArgs, Signature, Volatility};
use crate::function::Function;
use crate::system::define_nullary_udf;
define_nullary_udf!(
/// Generates build information
#[derive(Clone, Debug, Default)]
pub struct BuildFunction;
impl fmt::Display for BuildFunction {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "BUILD")
}
}
BuildFunction);
impl Function for BuildFunction {
fn name(&self) -> &str {
"build"
}
fn return_type(&self, _: &[DataType]) -> Result<DataType> {
fn return_type(&self, _: &[DataType]) -> datafusion_common::Result<DataType> {
Ok(DataType::Utf8View)
}
fn signature(&self) -> Signature {
Signature::nullary(Volatility::Immutable)
fn signature(&self) -> &Signature {
&self.signature
}
fn invoke_with_args(&self, _: ScalarFunctionArgs) -> datafusion_common::Result<ColumnarValue> {
@@ -65,10 +58,9 @@ mod tests {
use super::*;
#[test]
fn test_build_function() {
let build = BuildFunction;
let build = BuildFunction::default();
assert_eq!("build", build.name());
assert_eq!(DataType::Utf8View, build.return_type(&[]).unwrap());
assert_eq!(build.signature(), Signature::nullary(Volatility::Immutable));
let build_info = common_version::build_info().to_string();
let actual = build
.invoke_with_args(ScalarFunctionArgs {

View File

@@ -12,29 +12,17 @@
// See the License for the specific language governing permissions and
// limitations under the License.
use std::fmt::{self};
use common_query::error::Result;
use datafusion::arrow::datatypes::DataType;
use datafusion_common::ScalarValue;
use datafusion_expr::{ColumnarValue, ScalarFunctionArgs, Signature, Volatility};
use derive_more::Display;
use crate::function::{Function, find_function_context};
use crate::system::define_nullary_udf;
/// A function to return current schema name.
#[derive(Clone, Debug, Default)]
pub struct DatabaseFunction;
pub struct ReadPreferenceFunction;
#[derive(Display)]
#[display("{}", self.name())]
pub struct PgBackendPidFunction;
#[derive(Display)]
#[display("{}", self.name())]
pub struct ConnectionIdFunction;
define_nullary_udf!(DatabaseFunction);
define_nullary_udf!(ReadPreferenceFunction);
define_nullary_udf!(PgBackendPidFunction);
define_nullary_udf!(ConnectionIdFunction);
const DATABASE_FUNCTION_NAME: &str = "database";
const READ_PREFERENCE_FUNCTION_NAME: &str = "read_preference";
@@ -46,12 +34,12 @@ impl Function for DatabaseFunction {
DATABASE_FUNCTION_NAME
}
fn return_type(&self, _: &[DataType]) -> Result<DataType> {
fn return_type(&self, _: &[DataType]) -> datafusion_common::Result<DataType> {
Ok(DataType::Utf8View)
}
fn signature(&self) -> Signature {
Signature::nullary(Volatility::Immutable)
fn signature(&self) -> &Signature {
&self.signature
}
fn invoke_with_args(
@@ -70,12 +58,12 @@ impl Function for ReadPreferenceFunction {
READ_PREFERENCE_FUNCTION_NAME
}
fn return_type(&self, _: &[DataType]) -> Result<DataType> {
fn return_type(&self, _: &[DataType]) -> datafusion_common::Result<DataType> {
Ok(DataType::Utf8View)
}
fn signature(&self) -> Signature {
Signature::nullary(Volatility::Immutable)
fn signature(&self) -> &Signature {
&self.signature
}
fn invoke_with_args(
@@ -96,12 +84,12 @@ impl Function for PgBackendPidFunction {
PG_BACKEND_PID
}
fn return_type(&self, _: &[DataType]) -> Result<DataType> {
fn return_type(&self, _: &[DataType]) -> datafusion_common::Result<DataType> {
Ok(DataType::UInt64)
}
fn signature(&self) -> Signature {
Signature::nullary(Volatility::Immutable)
fn signature(&self) -> &Signature {
&self.signature
}
fn invoke_with_args(
@@ -120,12 +108,12 @@ impl Function for ConnectionIdFunction {
CONNECTION_ID
}
fn return_type(&self, _: &[DataType]) -> Result<DataType> {
fn return_type(&self, _: &[DataType]) -> datafusion_common::Result<DataType> {
Ok(DataType::UInt32)
}
fn signature(&self) -> Signature {
Signature::nullary(Volatility::Immutable)
fn signature(&self) -> &Signature {
&self.signature
}
fn invoke_with_args(
@@ -139,18 +127,6 @@ impl Function for ConnectionIdFunction {
}
}
impl fmt::Display for DatabaseFunction {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "DATABASE")
}
}
impl fmt::Display for ReadPreferenceFunction {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "READ_PREFERENCE")
}
}
#[cfg(test)]
mod tests {
use std::sync::Arc;
@@ -163,10 +139,9 @@ mod tests {
use crate::function::FunctionContext;
#[test]
fn test_build_function() {
let build = DatabaseFunction;
let build = DatabaseFunction::default();
assert_eq!("database", build.name());
assert_eq!(DataType::Utf8View, build.return_type(&[]).unwrap());
assert_eq!(build.signature(), Signature::nullary(Volatility::Immutable));
let query_ctx = QueryContextBuilder::default()
.current_schema("test_db".to_string())

View File

@@ -16,7 +16,6 @@ mod version;
use std::sync::Arc;
use common_query::error::Result;
use datafusion::arrow::array::{ArrayRef, StringArray, as_boolean_array};
use datafusion::catalog::TableFunction;
use datafusion::common::ScalarValue;
@@ -24,27 +23,19 @@ use datafusion::common::utils::SingleRowListArrayBuilder;
use datafusion_expr::{ColumnarValue, ScalarFunctionArgs, Signature, Volatility};
use datafusion_pg_catalog::pg_catalog::{self, PgCatalogStaticTables};
use datatypes::arrow::datatypes::{DataType, Field};
use derive_more::Display;
use version::PGVersionFunction;
use crate::function::{Function, find_function_context};
use crate::function_registry::FunctionRegistry;
use crate::system::define_nullary_udf;
const CURRENT_SCHEMA_FUNCTION_NAME: &str = "current_schema";
const CURRENT_SCHEMAS_FUNCTION_NAME: &str = "current_schemas";
const SESSION_USER_FUNCTION_NAME: &str = "session_user";
#[derive(Clone, Debug, Default, Display)]
#[display("{}", self.name())]
pub struct CurrentSchemaFunction;
#[derive(Clone, Debug, Default, Display)]
#[display("{}", self.name())]
pub struct CurrentSchemasFunction;
#[derive(Clone, Debug, Default, Display)]
#[display("{}", self.name())]
pub struct SessionUserFunction;
define_nullary_udf!(CurrentSchemaFunction);
define_nullary_udf!(CurrentSchemasFunction);
define_nullary_udf!(SessionUserFunction);
// Though "current_schema" can be aliased to "database", to not cause any breaking changes,
// we are not doing it: not until https://github.com/apache/datafusion/issues/17469 is resolved.
@@ -53,12 +44,12 @@ impl Function for CurrentSchemaFunction {
CURRENT_SCHEMA_FUNCTION_NAME
}
fn return_type(&self, _: &[DataType]) -> Result<DataType> {
fn return_type(&self, _: &[DataType]) -> datafusion_common::Result<DataType> {
Ok(DataType::Utf8View)
}
fn signature(&self) -> Signature {
Signature::nullary(Volatility::Immutable)
fn signature(&self) -> &Signature {
&self.signature
}
fn invoke_with_args(
@@ -77,12 +68,12 @@ impl Function for SessionUserFunction {
SESSION_USER_FUNCTION_NAME
}
fn return_type(&self, _: &[DataType]) -> Result<DataType> {
fn return_type(&self, _: &[DataType]) -> datafusion_common::Result<DataType> {
Ok(DataType::Utf8View)
}
fn signature(&self) -> Signature {
Signature::nullary(Volatility::Immutable)
fn signature(&self) -> &Signature {
&self.signature
}
fn invoke_with_args(
@@ -103,7 +94,7 @@ impl Function for CurrentSchemasFunction {
CURRENT_SCHEMAS_FUNCTION_NAME
}
fn return_type(&self, _: &[DataType]) -> Result<DataType> {
fn return_type(&self, _: &[DataType]) -> datafusion_common::Result<DataType> {
Ok(DataType::List(Arc::new(Field::new(
"x",
DataType::Utf8View,
@@ -111,8 +102,8 @@ impl Function for CurrentSchemasFunction {
))))
}
fn signature(&self) -> Signature {
Signature::exact(vec![DataType::Boolean], Volatility::Immutable)
fn signature(&self) -> &Signature {
&self.signature
}
fn invoke_with_args(
@@ -146,10 +137,10 @@ impl PGCatalogFunction {
let static_tables =
Arc::new(PgCatalogStaticTables::try_new().expect("load postgres static tables"));
registry.register_scalar(PGVersionFunction);
registry.register_scalar(CurrentSchemaFunction);
registry.register_scalar(CurrentSchemasFunction);
registry.register_scalar(SessionUserFunction);
registry.register_scalar(PGVersionFunction::default());
registry.register_scalar(CurrentSchemaFunction::default());
registry.register_scalar(CurrentSchemasFunction::default());
registry.register_scalar(SessionUserFunction::default());
registry.register(pg_catalog::format_type::create_format_type_udf());
registry.register(pg_catalog::create_pg_get_partkeydef_udf());
registry.register(pg_catalog::has_privilege_udf::create_has_privilege_udf(

View File

@@ -14,15 +14,24 @@
use std::fmt;
use common_query::error::Result;
use datafusion::arrow::datatypes::DataType;
use datafusion_common::ScalarValue;
use datafusion_expr::{ColumnarValue, ScalarFunctionArgs, Signature, Volatility};
use crate::function::Function;
#[derive(Clone, Debug, Default)]
pub(crate) struct PGVersionFunction;
#[derive(Clone, Debug)]
pub(crate) struct PGVersionFunction {
signature: Signature,
}
impl Default for PGVersionFunction {
fn default() -> Self {
Self {
signature: Signature::exact(vec![], Volatility::Immutable),
}
}
}
impl fmt::Display for PGVersionFunction {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
@@ -35,12 +44,12 @@ impl Function for PGVersionFunction {
"pg_catalog.version"
}
fn return_type(&self, _: &[DataType]) -> Result<DataType> {
fn return_type(&self, _: &[DataType]) -> datafusion_common::Result<DataType> {
Ok(DataType::Utf8View)
}
fn signature(&self) -> Signature {
Signature::exact(vec![], Volatility::Immutable)
fn signature(&self) -> &Signature {
&self.signature
}
fn invoke_with_args(&self, _: ScalarFunctionArgs) -> datafusion_common::Result<ColumnarValue> {

View File

@@ -12,18 +12,16 @@
// See the License for the specific language governing permissions and
// limitations under the License.
use std::fmt::{self};
use common_query::error::Result;
use datafusion::arrow::datatypes::DataType;
use datafusion_common::ScalarValue;
use datafusion_expr::{ColumnarValue, ScalarFunctionArgs, Signature, Volatility};
use crate::function::{Function, find_function_context};
use crate::system::define_nullary_udf;
define_nullary_udf!(
/// A function to return current session timezone.
#[derive(Clone, Debug, Default)]
pub struct TimezoneFunction;
TimezoneFunction);
const NAME: &str = "timezone";
@@ -32,12 +30,12 @@ impl Function for TimezoneFunction {
NAME
}
fn return_type(&self, _: &[DataType]) -> Result<DataType> {
fn return_type(&self, _: &[DataType]) -> datafusion_common::Result<DataType> {
Ok(DataType::Utf8View)
}
fn signature(&self) -> Signature {
Signature::nullary(Volatility::Immutable)
fn signature(&self) -> &Signature {
&self.signature
}
fn invoke_with_args(
@@ -51,12 +49,6 @@ impl Function for TimezoneFunction {
}
}
impl fmt::Display for TimezoneFunction {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "TIMEZONE")
}
}
#[cfg(test)]
mod tests {
use std::sync::Arc;
@@ -70,10 +62,9 @@ mod tests {
#[test]
fn test_build_function() {
let build = TimezoneFunction;
let build = TimezoneFunction::default();
assert_eq!("timezone", build.name());
assert_eq!(DataType::Utf8View, build.return_type(&[]).unwrap());
assert_eq!(build.signature(), Signature::nullary(Volatility::Immutable));
let query_ctx = QueryContextBuilder::default().build().into();

View File

@@ -12,36 +12,27 @@
// See the License for the specific language governing permissions and
// limitations under the License.
use std::fmt;
use common_query::error::Result;
use datafusion::arrow::datatypes::DataType;
use datafusion_common::ScalarValue;
use datafusion_expr::{ColumnarValue, ScalarFunctionArgs, Signature, Volatility};
use session::context::Channel;
use crate::function::{Function, find_function_context};
use crate::system::define_nullary_udf;
#[derive(Clone, Debug, Default)]
pub(crate) struct VersionFunction;
impl fmt::Display for VersionFunction {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "VERSION")
}
}
define_nullary_udf!(VersionFunction);
impl Function for VersionFunction {
fn name(&self) -> &str {
"version"
}
fn return_type(&self, _: &[DataType]) -> Result<DataType> {
fn return_type(&self, _: &[DataType]) -> datafusion_common::Result<DataType> {
Ok(DataType::Utf8View)
}
fn signature(&self) -> Signature {
Signature::nullary(Volatility::Immutable)
fn signature(&self) -> &Signature {
&self.signature
}
fn invoke_with_args(

View File

@@ -18,7 +18,8 @@ use std::sync::Arc;
use common_function::function::FunctionRef;
use datafusion::arrow::datatypes::{DataType, TimeUnit};
use datafusion_expr::{Signature, Volatility};
use datafusion::logical_expr::ColumnarValue;
use datafusion_expr::{ScalarFunctionArgs, Signature, Volatility};
use datafusion_substrait::extensions::Extensions;
use query::QueryEngine;
use serde::{Deserialize, Serialize};
@@ -119,12 +120,14 @@ pub fn register_function_to_query_engine(engine: &Arc<dyn QueryEngine>) {
#[derive(Debug)]
pub struct TumbleFunction {
name: String,
signature: Signature,
}
impl TumbleFunction {
fn new(name: &str) -> Self {
Self {
name: name.to_string(),
signature: Signature::variadic_any(Volatility::Immutable),
}
}
}
@@ -140,12 +143,16 @@ impl common_function::function::Function for TumbleFunction {
&self.name
}
fn return_type(&self, _: &[DataType]) -> common_query::error::Result<DataType> {
fn return_type(&self, _: &[DataType]) -> datafusion_common::Result<DataType> {
Ok(DataType::Timestamp(TimeUnit::Millisecond, None))
}
fn signature(&self) -> Signature {
Signature::variadic_any(Volatility::Immutable)
fn signature(&self) -> &Signature {
&self.signature
}
fn invoke_with_args(&self, _: ScalarFunctionArgs) -> datafusion_common::Result<ColumnarValue> {
datafusion_common::not_impl_err!("{}", self.name())
}
}

View File

@@ -18,12 +18,11 @@ use std::sync::Arc;
use async_trait::async_trait;
use catalog::CatalogManagerRef;
use common_catalog::consts::{TRACE_TABLE_NAME, trace_services_table_name};
use common_function::function::{Function, FunctionRef};
use common_function::function::FunctionRef;
use common_function::scalars::json::json_get::{
JsonGetBool, JsonGetFloat, JsonGetInt, JsonGetString,
};
use common_function::scalars::udf::create_udf;
use common_function::state::FunctionState;
use common_query::{Output, OutputData};
use common_recordbatch::adapter::RecordBatchStreamAdapter;
use common_recordbatch::util;
@@ -337,7 +336,7 @@ async fn query_trace_table(
.map(|s| s.as_str())
== Some(TABLE_DATA_MODEL_TRACE_V1);
let df_context = create_df_context(query_engine, ctx.clone())?;
let df_context = create_df_context(query_engine)?;
let dataframe = df_context
.read_table(Arc::new(DfTableProviderAdapter::new(table)))
@@ -392,28 +391,21 @@ async fn query_trace_table(
// to utilize them through DataFrame APIs. To address this limitation, we create a new session
// context and register the required UDFs, allowing them to be decoupled from the global context.
// TODO(zyy17): Is it possible or necessary to reuse the existing session context?
fn create_df_context(
query_engine: &QueryEngineRef,
ctx: QueryContextRef,
) -> ServerResult<SessionContext> {
fn create_df_context(query_engine: &QueryEngineRef) -> ServerResult<SessionContext> {
let df_context = SessionContext::new_with_state(
SessionStateBuilder::new_from_existing(query_engine.engine_state().session_state()).build(),
);
// The following JSON UDFs will be used for tags filters on v0 data model.
let udfs: Vec<FunctionRef> = vec![
Arc::new(JsonGetInt),
Arc::new(JsonGetFloat),
Arc::new(JsonGetBool),
Arc::new(JsonGetString),
Arc::new(JsonGetInt::default()),
Arc::new(JsonGetFloat::default()),
Arc::new(JsonGetBool::default()),
Arc::new(JsonGetString::default()),
];
for udf in udfs {
df_context.register_udf(create_udf(
udf,
ctx.clone(),
Arc::new(FunctionState::default()),
));
df_context.register_udf(create_udf(udf));
}
Ok(df_context)
@@ -431,7 +423,7 @@ fn json_tag_filters(
filters.push(
dataframe
.registry()
.udf(JsonGetString {}.name())
.udf(JsonGetString::NAME)
.context(DataFusionSnafu)?
.call(vec![
col(SPAN_ATTRIBUTES_COLUMN),
@@ -445,7 +437,7 @@ fn json_tag_filters(
filters.push(
dataframe
.registry()
.udf(JsonGetInt {}.name())
.udf(JsonGetInt::NAME)
.context(DataFusionSnafu)?
.call(vec![
col(SPAN_ATTRIBUTES_COLUMN),
@@ -458,7 +450,7 @@ fn json_tag_filters(
filters.push(
dataframe
.registry()
.udf(JsonGetFloat {}.name())
.udf(JsonGetFloat::NAME)
.context(DataFusionSnafu)?
.call(vec![
col(SPAN_ATTRIBUTES_COLUMN),
@@ -472,7 +464,7 @@ fn json_tag_filters(
filters.push(
dataframe
.registry()
.udf(JsonGetBool {}.name())
.udf(JsonGetBool::NAME)
.context(DataFusionSnafu)?
.call(vec![
col(SPAN_ATTRIBUTES_COLUMN),

View File

@@ -320,14 +320,14 @@ mod tests {
fn matches_func() -> Arc<ScalarUDF> {
Arc::new(
ScalarFunctionFactory::from(Arc::new(MatchesFunction) as FunctionRef)
ScalarFunctionFactory::from(Arc::new(MatchesFunction::default()) as FunctionRef)
.provide(Default::default()),
)
}
fn matches_term_func() -> Arc<ScalarUDF> {
Arc::new(
ScalarFunctionFactory::from(Arc::new(MatchesTermFunction) as FunctionRef)
ScalarFunctionFactory::from(Arc::new(MatchesTermFunction::default()) as FunctionRef)
.provide(Default::default()),
)
}

View File

@@ -240,7 +240,6 @@ mod tests {
use common_catalog::consts::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME};
use common_function::scalars::matches_term::MatchesTermFunction;
use common_function::scalars::udf::create_udf;
use common_function::state::FunctionState;
use datafusion::datasource::memory::MemorySourceConfig;
use datafusion::datasource::source::DataSourceExec;
use datafusion::physical_optimizer::PhysicalOptimizerRule;
@@ -328,11 +327,7 @@ mod tests {
}
fn matches_term_udf() -> Arc<ScalarUDF> {
Arc::new(create_udf(
Arc::new(MatchesTermFunction),
QueryContext::arc(),
Arc::new(FunctionState::default()),
))
Arc::new(create_udf(Arc::new(MatchesTermFunction::default())))
}
#[test]

View File

@@ -16,14 +16,12 @@ use std::sync::Arc;
use common_function::scalars::matches_term::MatchesTermFunction;
use common_function::scalars::udf::create_udf;
use common_function::state::FunctionState;
use datafusion::config::ConfigOptions;
use datafusion_common::Result;
use datafusion_common::tree_node::{Transformed, TreeNode, TreeNodeRewriter};
use datafusion_expr::expr::ScalarFunction;
use datafusion_expr::{Expr, LogicalPlan};
use datafusion_optimizer::analyzer::AnalyzerRule;
use session::context::QueryContext;
use crate::plan::ExtractExpr;
@@ -88,11 +86,7 @@ impl TreeNodeRewriter for TranscribeAtatRewriter {
&& matches!(binary_expr.op, datafusion_expr::Operator::AtAt)
{
self.transcribed = true;
let scalar_udf = create_udf(
Arc::new(MatchesTermFunction),
QueryContext::arc(),
Arc::new(FunctionState::default()),
);
let scalar_udf = create_udf(Arc::new(MatchesTermFunction::default()));
let exprs = vec![
binary_expr.left.as_ref().clone(),
binary_expr.right.as_ref().clone(),