From bbfbc9f0f8ca39859ef831771e41f57c4ced3ac6 Mon Sep 17 00:00:00 2001 From: yihong Date: Fri, 24 Jan 2025 16:20:01 +0800 Subject: [PATCH] fix: drop unused numpy code since pyo3 rustpython do not support any more (#5442) Signed-off-by: yihong0618 --- src/common/function/src/function_registry.rs | 2 - src/common/function/src/scalars.rs | 1 - src/common/function/src/scalars/numpy.rs | 30 -- src/common/function/src/scalars/numpy/clip.rs | 298 --------------- .../function/src/scalars/numpy/interp.rs | 360 ------------------ 5 files changed, 691 deletions(-) delete mode 100644 src/common/function/src/scalars/numpy.rs delete mode 100644 src/common/function/src/scalars/numpy/clip.rs delete mode 100644 src/common/function/src/scalars/numpy/interp.rs diff --git a/src/common/function/src/function_registry.rs b/src/common/function/src/function_registry.rs index 04d68a93d8..0ce3f8abef 100644 --- a/src/common/function/src/function_registry.rs +++ b/src/common/function/src/function_registry.rs @@ -25,7 +25,6 @@ use crate::scalars::expression::ExpressionFunction; use crate::scalars::json::JsonFunction; use crate::scalars::matches::MatchesFunction; use crate::scalars::math::MathFunction; -use crate::scalars::numpy::NumpyFunction; use crate::scalars::timestamp::TimestampFunction; use crate::scalars::vector::VectorFunction; use crate::system::SystemFunction; @@ -103,7 +102,6 @@ pub static FUNCTION_REGISTRY: Lazy> = Lazy::new(|| { // Utility functions MathFunction::register(&function_registry); - NumpyFunction::register(&function_registry); TimestampFunction::register(&function_registry); DateFunction::register(&function_registry); ExpressionFunction::register(&function_registry); diff --git a/src/common/function/src/scalars.rs b/src/common/function/src/scalars.rs index 52a238273d..8a2556d733 100644 --- a/src/common/function/src/scalars.rs +++ b/src/common/function/src/scalars.rs @@ -20,7 +20,6 @@ pub mod geo; pub mod json; pub mod matches; pub mod math; -pub mod numpy; pub mod vector; #[cfg(test)] diff --git a/src/common/function/src/scalars/numpy.rs b/src/common/function/src/scalars/numpy.rs deleted file mode 100644 index 33c82d44e3..0000000000 --- a/src/common/function/src/scalars/numpy.rs +++ /dev/null @@ -1,30 +0,0 @@ -// Copyright 2023 Greptime Team -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -mod clip; -mod interp; - -use std::sync::Arc; - -use clip::ClipFunction; - -use crate::function_registry::FunctionRegistry; - -pub(crate) struct NumpyFunction; - -impl NumpyFunction { - pub fn register(registry: &FunctionRegistry) { - registry.register(Arc::new(ClipFunction)); - } -} diff --git a/src/common/function/src/scalars/numpy/clip.rs b/src/common/function/src/scalars/numpy/clip.rs deleted file mode 100644 index 02e1256207..0000000000 --- a/src/common/function/src/scalars/numpy/clip.rs +++ /dev/null @@ -1,298 +0,0 @@ -// Copyright 2023 Greptime Team -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -use std::fmt; -use std::sync::Arc; - -use common_query::error::Result; -use common_query::prelude::{Signature, Volatility}; -use datatypes::arrow::compute; -use datatypes::arrow::datatypes::ArrowPrimitiveType; -use datatypes::data_type::ConcreteDataType; -use datatypes::prelude::*; -use datatypes::vectors::PrimitiveVector; -use paste::paste; - -use crate::function::{Function, FunctionContext}; -use crate::scalars::expression::{scalar_binary_op, EvalContext}; - -/// numpy.clip function, -#[derive(Clone, Debug, Default)] -pub struct ClipFunction; - -macro_rules! define_eval { - ($O: ident) => { - paste! { - fn [](columns: &[VectorRef]) -> Result { - fn cast_vector(input: &VectorRef) -> VectorRef { - Arc::new(PrimitiveVector::<<$O as WrapperType>::LogicalType>::try_from_arrow_array( - compute::cast(&input.to_arrow_array(), &<<<$O as WrapperType>::LogicalType as LogicalPrimitiveType>::ArrowPrimitive as ArrowPrimitiveType>::DATA_TYPE).unwrap() - ).unwrap()) as _ - } - let operator_1 = cast_vector(&columns[0]); - let operator_2 = cast_vector(&columns[1]); - let operator_3 = cast_vector(&columns[2]); - - // clip(a, min, max) is equals to min(max(a, min), max) - let col: VectorRef = Arc::new(scalar_binary_op::<$O, $O, $O, _>( - &operator_1, - &operator_2, - scalar_max, - &mut EvalContext::default(), - )?); - let col = scalar_binary_op::<$O, $O, $O, _>( - &col, - &operator_3, - scalar_min, - &mut EvalContext::default(), - )?; - Ok(Arc::new(col)) - } - } - }; -} - -define_eval!(i64); -define_eval!(u64); -define_eval!(f64); - -impl Function for ClipFunction { - fn name(&self) -> &str { - "clip" - } - - fn return_type(&self, input_types: &[ConcreteDataType]) -> Result { - if input_types.iter().all(ConcreteDataType::is_signed) { - Ok(ConcreteDataType::int64_datatype()) - } else if input_types.iter().all(ConcreteDataType::is_unsigned) { - Ok(ConcreteDataType::uint64_datatype()) - } else { - Ok(ConcreteDataType::float64_datatype()) - } - } - - fn signature(&self) -> Signature { - Signature::uniform(3, ConcreteDataType::numerics(), Volatility::Immutable) - } - - fn eval(&self, _func_ctx: FunctionContext, columns: &[VectorRef]) -> Result { - if columns.iter().all(|v| v.data_type().is_signed()) { - eval_i64(columns) - } else if columns.iter().all(|v| v.data_type().is_unsigned()) { - eval_u64(columns) - } else { - eval_f64(columns) - } - } -} - -#[inline] -pub fn min(input: T, min: T) -> T { - if input < min { - input - } else { - min - } -} - -#[inline] -pub fn max(input: T, max: T) -> T { - if input > max { - input - } else { - max - } -} - -#[inline] -fn scalar_min(left: Option, right: Option, _ctx: &mut EvalContext) -> Option -where - O: Scalar + Copy + PartialOrd, -{ - match (left, right) { - (Some(left), Some(right)) => Some(min(left, right)), - _ => None, - } -} - -#[inline] -fn scalar_max(left: Option, right: Option, _ctx: &mut EvalContext) -> Option -where - O: Scalar + Copy + PartialOrd, -{ - match (left, right) { - (Some(left), Some(right)) => Some(max(left, right)), - _ => None, - } -} - -impl fmt::Display for ClipFunction { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, "CLIP") - } -} - -#[cfg(test)] -mod tests { - use common_query::prelude::TypeSignature; - use datatypes::value::Value; - use datatypes::vectors::{ - ConstantVector, Float32Vector, Int16Vector, Int32Vector, Int8Vector, UInt16Vector, - UInt32Vector, UInt8Vector, - }; - - use super::*; - - #[test] - fn test_clip_signature() { - let clip = ClipFunction; - - assert_eq!("clip", clip.name()); - assert_eq!( - ConcreteDataType::int64_datatype(), - clip.return_type(&[]).unwrap() - ); - - assert_eq!( - ConcreteDataType::int64_datatype(), - clip.return_type(&[ - ConcreteDataType::int16_datatype(), - ConcreteDataType::int64_datatype(), - ConcreteDataType::int8_datatype() - ]) - .unwrap() - ); - assert_eq!( - ConcreteDataType::uint64_datatype(), - clip.return_type(&[ - ConcreteDataType::uint16_datatype(), - ConcreteDataType::uint64_datatype(), - ConcreteDataType::uint8_datatype() - ]) - .unwrap() - ); - assert_eq!( - ConcreteDataType::float64_datatype(), - clip.return_type(&[ - ConcreteDataType::uint16_datatype(), - ConcreteDataType::int64_datatype(), - ConcreteDataType::uint8_datatype() - ]) - .unwrap() - ); - - assert!(matches!(clip.signature(), - Signature { - type_signature: TypeSignature::Uniform(3, valid_types), - volatility: Volatility::Immutable - } if valid_types == ConcreteDataType::numerics() - )); - } - - #[test] - fn test_clip_fn_signed() { - // eval with signed integers - let args: Vec = vec![ - Arc::new(Int32Vector::from_values(0..10)), - Arc::new(ConstantVector::new( - Arc::new(Int8Vector::from_vec(vec![3])), - 10, - )), - Arc::new(ConstantVector::new( - Arc::new(Int16Vector::from_vec(vec![6])), - 10, - )), - ]; - - let vector = ClipFunction - .eval(FunctionContext::default(), &args) - .unwrap(); - assert_eq!(10, vector.len()); - - // clip([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], 3, 6) = [3, 3, 3, 3, 4, 5, 6, 6, 6, 6] - for i in 0..10 { - if i <= 3 { - assert!(matches!(vector.get(i), Value::Int64(v) if v == 3)); - } else if i <= 6 { - assert!(matches!(vector.get(i), Value::Int64(v) if v == (i as i64))); - } else { - assert!(matches!(vector.get(i), Value::Int64(v) if v == 6)); - } - } - } - - #[test] - fn test_clip_fn_unsigned() { - // eval with unsigned integers - let args: Vec = vec![ - Arc::new(UInt8Vector::from_values(0..10)), - Arc::new(ConstantVector::new( - Arc::new(UInt32Vector::from_vec(vec![3])), - 10, - )), - Arc::new(ConstantVector::new( - Arc::new(UInt16Vector::from_vec(vec![6])), - 10, - )), - ]; - - let vector = ClipFunction - .eval(FunctionContext::default(), &args) - .unwrap(); - assert_eq!(10, vector.len()); - - // clip([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], 3, 6) = [3, 3, 3, 3, 4, 5, 6, 6, 6, 6] - for i in 0..10 { - if i <= 3 { - assert!(matches!(vector.get(i), Value::UInt64(v) if v == 3)); - } else if i <= 6 { - assert!(matches!(vector.get(i), Value::UInt64(v) if v == (i as u64))); - } else { - assert!(matches!(vector.get(i), Value::UInt64(v) if v == 6)); - } - } - } - - #[test] - fn test_clip_fn_float() { - // eval with floats - let args: Vec = vec![ - Arc::new(Int8Vector::from_values(0..10)), - Arc::new(ConstantVector::new( - Arc::new(UInt32Vector::from_vec(vec![3])), - 10, - )), - Arc::new(ConstantVector::new( - Arc::new(Float32Vector::from_vec(vec![6f32])), - 10, - )), - ]; - - let vector = ClipFunction - .eval(FunctionContext::default(), &args) - .unwrap(); - assert_eq!(10, vector.len()); - - // clip([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], 3, 6) = [3, 3, 3, 3, 4, 5, 6, 6, 6, 6] - for i in 0..10 { - if i <= 3 { - assert!(matches!(vector.get(i), Value::Float64(v) if v == 3.0)); - } else if i <= 6 { - assert!(matches!(vector.get(i), Value::Float64(v) if v == (i as f64))); - } else { - assert!(matches!(vector.get(i), Value::Float64(v) if v == 6.0)); - } - } - } -} diff --git a/src/common/function/src/scalars/numpy/interp.rs b/src/common/function/src/scalars/numpy/interp.rs deleted file mode 100644 index 85f25d8b3b..0000000000 --- a/src/common/function/src/scalars/numpy/interp.rs +++ /dev/null @@ -1,360 +0,0 @@ -// Copyright 2023 Greptime Team -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -use std::sync::Arc; - -use common_query::error::{self, Result}; -use datatypes::arrow::compute::cast; -use datatypes::arrow::datatypes::DataType as ArrowDataType; -use datatypes::data_type::DataType; -use datatypes::prelude::ScalarVector; -use datatypes::value::Value; -use datatypes::vectors::{Float64Vector, Vector, VectorRef}; -use datatypes::with_match_primitive_type_id; -use snafu::{ensure, ResultExt}; - -/// search the biggest number that smaller than x in xp -fn linear_search_ascending_vector(x: Value, xp: &Float64Vector) -> usize { - for i in 0..xp.len() { - if x < xp.get(i) { - return i - 1; - } - } - xp.len() - 1 -} - -/// search the biggest number that smaller than x in xp -fn binary_search_ascending_vector(key: Value, xp: &Float64Vector) -> usize { - let mut left = 0; - let mut right = xp.len(); - /* If len <= 4 use linear search. */ - if xp.len() <= 4 { - return linear_search_ascending_vector(key, xp); - } - /* find index by bisection */ - while left < right { - let mid = left + ((right - left) >> 1); - if key >= xp.get(mid) { - left = mid + 1; - } else { - right = mid; - } - } - left - 1 -} - -fn concrete_type_to_primitive_vector(arg: &VectorRef) -> Result { - with_match_primitive_type_id!(arg.data_type().logical_type_id(), |$S| { - let tmp = arg.to_arrow_array(); - let array = cast(&tmp, &ArrowDataType::Float64).context(error::TypeCastSnafu { - typ: ArrowDataType::Float64, - })?; - // Safety: array has been cast to Float64Array. - Ok(Float64Vector::try_from_arrow_array(array).unwrap()) - },{ - unreachable!() - }) -} - -/// One-dimensional linear interpolation for monotonically increasing sample points. Refers to -/// -#[allow(unused)] -pub fn interp(args: &[VectorRef]) -> Result { - let mut left = None; - let mut right = None; - - ensure!( - args.len() >= 3, - error::InvalidFuncArgsSnafu { - err_msg: format!( - "The length of the args is not enough, expect at least: {}, have: {}", - 3, - args.len() - ), - } - ); - - let x = concrete_type_to_primitive_vector(&args[0])?; - let xp = concrete_type_to_primitive_vector(&args[1])?; - let fp = concrete_type_to_primitive_vector(&args[2])?; - - // make sure the args.len() is 3 or 5 - if args.len() > 3 { - ensure!( - args.len() == 5, - error::InvalidFuncArgsSnafu { - err_msg: format!( - "The length of the args is not enough, expect at least: {}, have: {}", - 5, - args.len() - ), - } - ); - - left = concrete_type_to_primitive_vector(&args[3]) - .unwrap() - .get_data(0); - right = concrete_type_to_primitive_vector(&args[4]) - .unwrap() - .get_data(0); - } - - ensure!( - x.len() != 0, - error::InvalidFuncArgsSnafu { - err_msg: "The sample x is empty", - } - ); - ensure!( - xp.len() != 0, - error::InvalidFuncArgsSnafu { - err_msg: "The sample xp is empty", - } - ); - ensure!( - fp.len() != 0, - error::InvalidFuncArgsSnafu { - err_msg: "The sample fp is empty", - } - ); - ensure!( - xp.len() == fp.len(), - error::InvalidFuncArgsSnafu { - err_msg: format!( - "The length of the len1: {} don't match the length of the len2: {}", - xp.len(), - fp.len() - ), - } - ); - - /* Get left and right fill values. */ - let left = match left { - Some(left) => Some(left), - _ => fp.get_data(0), - }; - - let right = match right { - Some(right) => Some(right), - _ => fp.get_data(fp.len() - 1), - }; - - let res; - if xp.len() == 1 { - let data = x - .iter_data() - .map(|x| { - if Value::from(x) < xp.get(0) { - left - } else if Value::from(x) > xp.get(xp.len() - 1) { - right - } else { - fp.get_data(0) - } - }) - .collect::>(); - res = Float64Vector::from(data); - } else { - let mut j = 0; - /* only pre-calculate slopes if there are relatively few of them. */ - let mut slopes: Option> = None; - if x.len() >= xp.len() { - let mut slopes_tmp = Vec::with_capacity(xp.len() - 1); - for i in 0..xp.len() - 1 { - let slope = match ( - fp.get_data(i + 1), - fp.get_data(i), - xp.get_data(i + 1), - xp.get_data(i), - ) { - (Some(fp1), Some(fp2), Some(xp1), Some(xp2)) => { - if xp1 == xp2 { - None - } else { - Some((fp1 - fp2) / (xp1 - xp2)) - } - } - _ => None, - }; - slopes_tmp.push(slope); - } - slopes = Some(slopes_tmp); - } - let data = x - .iter_data() - .map(|x| match x { - Some(xi) => { - if Value::from(xi) > xp.get(xp.len() - 1) { - right - } else if Value::from(xi) < xp.get(0) { - left - } else { - j = binary_search_ascending_vector(Value::from(xi), &xp); - if j == xp.len() - 1 || xp.get(j) == Value::from(xi) { - fp.get_data(j) - } else { - let slope = match &slopes { - Some(slopes) => slopes[j], - _ => match ( - fp.get_data(j + 1), - fp.get_data(j), - xp.get_data(j + 1), - xp.get_data(j), - ) { - (Some(fp1), Some(fp2), Some(xp1), Some(xp2)) => { - if xp1 == xp2 { - None - } else { - Some((fp1 - fp2) / (xp1 - xp2)) - } - } - _ => None, - }, - }; - - /* If we get nan in one direction, try the other */ - let ans = match (slope, xp.get_data(j), fp.get_data(j)) { - (Some(slope), Some(xp), Some(fp)) => Some(slope * (xi - xp) + fp), - _ => None, - }; - - let ans = match ans { - Some(ans) => Some(ans), - _ => match (slope, xp.get_data(j + 1), fp.get_data(j + 1)) { - (Some(slope), Some(xp), Some(fp)) => { - Some(slope * (xi - xp) + fp) - } - _ => None, - }, - }; - let ans = match ans { - Some(ans) => Some(ans), - _ => { - if fp.get_data(j) == fp.get_data(j + 1) { - fp.get_data(j) - } else { - None - } - } - }; - ans - } - } - } - _ => None, - }) - .collect::>(); - res = Float64Vector::from(data); - } - Ok(Arc::new(res) as _) -} - -#[cfg(test)] -mod tests { - use std::sync::Arc; - - use datatypes::vectors::{Int32Vector, Int64Vector}; - - use super::*; - #[test] - fn test_basic_interp() { - // x xp fp - let x = 2.5; - let xp = vec![1i32, 2i32, 3i32]; - let fp = vec![3i64, 2i64, 0i64]; - - let args: Vec = vec![ - Arc::new(Float64Vector::from_vec(vec![x])), - Arc::new(Int32Vector::from_vec(xp.clone())), - Arc::new(Int64Vector::from_vec(fp.clone())), - ]; - let vector = interp(&args).unwrap(); - assert_eq!(vector.len(), 1); - - assert!(matches!(vector.get(0), Value::Float64(v) if v==1.0)); - - let x = vec![0.0, 1.0, 1.5, 3.2]; - let args: Vec = vec![ - Arc::new(Float64Vector::from_vec(x)), - Arc::new(Int32Vector::from_vec(xp)), - Arc::new(Int64Vector::from_vec(fp)), - ]; - let vector = interp(&args).unwrap(); - assert_eq!(4, vector.len()); - let res = [3.0, 3.0, 2.5, 0.0]; - for (i, item) in res.iter().enumerate().take(vector.len()) { - assert!(matches!(vector.get(i),Value::Float64(v) if v==*item)); - } - } - - #[test] - fn test_left_right() { - let x = vec![0.0, 1.0, 1.5, 2.0, 3.0, 4.0]; - let xp = vec![1i32, 2i32, 3i32]; - let fp = vec![3i64, 2i64, 0i64]; - let left = vec![-1]; - let right = vec![2]; - - let expect = [-1.0, 3.0, 2.5, 2.0, 0.0, 2.0]; - - let args: Vec = vec![ - Arc::new(Float64Vector::from_vec(x)), - Arc::new(Int32Vector::from_vec(xp)), - Arc::new(Int64Vector::from_vec(fp)), - Arc::new(Int32Vector::from_vec(left)), - Arc::new(Int32Vector::from_vec(right)), - ]; - let vector = interp(&args).unwrap(); - - for (i, item) in expect.iter().enumerate().take(vector.len()) { - assert!(matches!(vector.get(i),Value::Float64(v) if v==*item)); - } - } - - #[test] - fn test_scalar_interpolation_point() { - // x=0 output:0 - let x = vec![0]; - let xp = vec![0, 1, 5]; - let fp = vec![0, 1, 5]; - let args: Vec = vec![ - Arc::new(Int64Vector::from_vec(x.clone())), - Arc::new(Int64Vector::from_vec(xp.clone())), - Arc::new(Int64Vector::from_vec(fp.clone())), - ]; - let vector = interp(&args).unwrap(); - assert!(matches!(vector.get(0), Value::Float64(v) if v==x[0] as f64)); - - // x=0.3 output:0.3 - let x = vec![0.3]; - let args: Vec = vec![ - Arc::new(Float64Vector::from_vec(x.clone())), - Arc::new(Int64Vector::from_vec(xp.clone())), - Arc::new(Int64Vector::from_vec(fp.clone())), - ]; - let vector = interp(&args).unwrap(); - assert!(matches!(vector.get(0), Value::Float64(v) if v == x[0])); - - // x=None output:Null - let input = vec![None, Some(0.0), Some(0.3)]; - let x = Float64Vector::from(input); - let args: Vec = vec![ - Arc::new(x), - Arc::new(Int64Vector::from_vec(xp)), - Arc::new(Int64Vector::from_vec(fp)), - ]; - let vector = interp(&args).unwrap(); - assert!(matches!(vector.get(0), Value::Null)); - } -}