fix: drop unused numpy code since pyo3 rustpython do not support any more (#5442)

Signed-off-by: yihong0618 <zouzou0208@gmail.com>
2026-01-08 06:12:55 +00:00 · 2025-01-24 16:20:01 +08:00
parent b107384cc6
commit bbfbc9f0f8
5 changed files with 0 additions and 691 deletions
--- a/src/common/function/src/function_registry.rs
+++ b/src/common/function/src/function_registry.rs
@@ -25,7 +25,6 @@ use crate::scalars::expression::ExpressionFunction;
 use crate::scalars::json::JsonFunction;
 use crate::scalars::matches::MatchesFunction;
 use crate::scalars::math::MathFunction;
 use crate::scalars::numpy::NumpyFunction;
 use crate::scalars::timestamp::TimestampFunction;
 use crate::scalars::vector::VectorFunction;
 use crate::system::SystemFunction;
@@ -103,7 +102,6 @@ pub static FUNCTION_REGISTRY: Lazy<Arc<FunctionRegistry>> = Lazy::new(|| {
    // Utility functions
    MathFunction::register(&function_registry);
    NumpyFunction::register(&function_registry);
    TimestampFunction::register(&function_registry);
    DateFunction::register(&function_registry);
    ExpressionFunction::register(&function_registry);
--- a/src/common/function/src/scalars.rs
+++ b/src/common/function/src/scalars.rs
@@ -20,7 +20,6 @@ pub mod geo;
 pub mod json;
 pub mod matches;
 pub mod math;
 pub mod numpy;
 pub mod vector;
 #[cfg(test)]
--- a/src/common/function/src/scalars/numpy.rs
+++ b/src/common/function/src/scalars/numpy.rs
@@ -1,30 +0,0 @@
 // Copyright 2023 Greptime Team
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //     http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 mod clip;
 mod interp;
 use std::sync::Arc;
 use clip::ClipFunction;
 use crate::function_registry::FunctionRegistry;
 pub(crate) struct NumpyFunction;
 impl NumpyFunction {
    pub fn register(registry: &FunctionRegistry) {
        registry.register(Arc::new(ClipFunction));
    }
 }
--- a/src/common/function/src/scalars/numpy/clip.rs
+++ b/src/common/function/src/scalars/numpy/clip.rs
@@ -1,298 +0,0 @@
 // Copyright 2023 Greptime Team
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //     http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 use std::fmt;
 use std::sync::Arc;
 use common_query::error::Result;
 use common_query::prelude::{Signature, Volatility};
 use datatypes::arrow::compute;
 use datatypes::arrow::datatypes::ArrowPrimitiveType;
 use datatypes::data_type::ConcreteDataType;
 use datatypes::prelude::*;
 use datatypes::vectors::PrimitiveVector;
 use paste::paste;
 use crate::function::{Function, FunctionContext};
 use crate::scalars::expression::{scalar_binary_op, EvalContext};
 /// numpy.clip function, <https://numpy.org/doc/stable/reference/generated/numpy.clip.html>
 #[derive(Clone, Debug, Default)]
 pub struct ClipFunction;
 macro_rules! define_eval {
    ($O: ident) => {
        paste! {
            fn [<eval_ $O>](columns: &[VectorRef]) -> Result<VectorRef> {
                fn cast_vector(input: &VectorRef) -> VectorRef {
                    Arc::new(PrimitiveVector::<<$O as WrapperType>::LogicalType>::try_from_arrow_array(
                        compute::cast(&input.to_arrow_array(), &<<<$O as WrapperType>::LogicalType as LogicalPrimitiveType>::ArrowPrimitive as ArrowPrimitiveType>::DATA_TYPE).unwrap()
                    ).unwrap()) as _
                }
                let operator_1 = cast_vector(&columns[0]);
                let operator_2 = cast_vector(&columns[1]);
                let operator_3 = cast_vector(&columns[2]);
                // clip(a, min, max) is equals to min(max(a, min), max)
                let col: VectorRef = Arc::new(scalar_binary_op::<$O, $O, $O, _>(
                    &operator_1,
                    &operator_2,
                    scalar_max,
                    &mut EvalContext::default(),
                )?);
                let col = scalar_binary_op::<$O, $O, $O, _>(
                    &col,
                    &operator_3,
                    scalar_min,
                    &mut EvalContext::default(),
                )?;
                Ok(Arc::new(col))
            }
        }
    };
 }
 define_eval!(i64);
 define_eval!(u64);
 define_eval!(f64);
 impl Function for ClipFunction {
    fn name(&self) -> &str {
        "clip"
    }
    fn return_type(&self, input_types: &[ConcreteDataType]) -> Result<ConcreteDataType> {
        if input_types.iter().all(ConcreteDataType::is_signed) {
            Ok(ConcreteDataType::int64_datatype())
        } else if input_types.iter().all(ConcreteDataType::is_unsigned) {
            Ok(ConcreteDataType::uint64_datatype())
        } else {
            Ok(ConcreteDataType::float64_datatype())
        }
    }
    fn signature(&self) -> Signature {
        Signature::uniform(3, ConcreteDataType::numerics(), Volatility::Immutable)
    }
    fn eval(&self, _func_ctx: FunctionContext, columns: &[VectorRef]) -> Result<VectorRef> {
        if columns.iter().all(|v| v.data_type().is_signed()) {
            eval_i64(columns)
        } else if columns.iter().all(|v| v.data_type().is_unsigned()) {
            eval_u64(columns)
        } else {
            eval_f64(columns)
        }
    }
 }
 #[inline]
 pub fn min<T: PartialOrd>(input: T, min: T) -> T {
    if input < min {
        input
    } else {
        min
    }
 }
 #[inline]
 pub fn max<T: PartialOrd>(input: T, max: T) -> T {
    if input > max {
        input
    } else {
        max
    }
 }
 #[inline]
 fn scalar_min<O>(left: Option<O>, right: Option<O>, _ctx: &mut EvalContext) -> Option<O>
 where
    O: Scalar + Copy + PartialOrd,
 {
    match (left, right) {
        (Some(left), Some(right)) => Some(min(left, right)),
        _ => None,
    }
 }
 #[inline]
 fn scalar_max<O>(left: Option<O>, right: Option<O>, _ctx: &mut EvalContext) -> Option<O>
 where
    O: Scalar + Copy + PartialOrd,
 {
    match (left, right) {
        (Some(left), Some(right)) => Some(max(left, right)),
        _ => None,
    }
 }
 impl fmt::Display for ClipFunction {
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
        write!(f, "CLIP")
    }
 }
 #[cfg(test)]
 mod tests {
    use common_query::prelude::TypeSignature;
    use datatypes::value::Value;
    use datatypes::vectors::{
        ConstantVector, Float32Vector, Int16Vector, Int32Vector, Int8Vector, UInt16Vector,
        UInt32Vector, UInt8Vector,
    };
    use super::*;
    #[test]
    fn test_clip_signature() {
        let clip = ClipFunction;
        assert_eq!("clip", clip.name());
        assert_eq!(
            ConcreteDataType::int64_datatype(),
            clip.return_type(&[]).unwrap()
        );
        assert_eq!(
            ConcreteDataType::int64_datatype(),
            clip.return_type(&[
                ConcreteDataType::int16_datatype(),
                ConcreteDataType::int64_datatype(),
                ConcreteDataType::int8_datatype()
            ])
            .unwrap()
        );
        assert_eq!(
            ConcreteDataType::uint64_datatype(),
            clip.return_type(&[
                ConcreteDataType::uint16_datatype(),
                ConcreteDataType::uint64_datatype(),
                ConcreteDataType::uint8_datatype()
            ])
            .unwrap()
        );
        assert_eq!(
            ConcreteDataType::float64_datatype(),
            clip.return_type(&[
                ConcreteDataType::uint16_datatype(),
                ConcreteDataType::int64_datatype(),
                ConcreteDataType::uint8_datatype()
            ])
            .unwrap()
        );
        assert!(matches!(clip.signature(),
                         Signature {
                             type_signature: TypeSignature::Uniform(3, valid_types),
                             volatility: Volatility::Immutable
                         } if  valid_types == ConcreteDataType::numerics()
        ));
    }
    #[test]
    fn test_clip_fn_signed() {
        // eval with signed integers
        let args: Vec<VectorRef> = vec![
            Arc::new(Int32Vector::from_values(0..10)),
            Arc::new(ConstantVector::new(
                Arc::new(Int8Vector::from_vec(vec![3])),
                10,
            )),
            Arc::new(ConstantVector::new(
                Arc::new(Int16Vector::from_vec(vec![6])),
                10,
            )),
        ];
        let vector = ClipFunction
            .eval(FunctionContext::default(), &args)
            .unwrap();
        assert_eq!(10, vector.len());
        // clip([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], 3, 6) = [3, 3, 3, 3, 4, 5, 6, 6, 6, 6]
        for i in 0..10 {
            if i <= 3 {
                assert!(matches!(vector.get(i), Value::Int64(v) if v == 3));
            } else if i <= 6 {
                assert!(matches!(vector.get(i), Value::Int64(v) if v == (i as i64)));
            } else {
                assert!(matches!(vector.get(i), Value::Int64(v) if v == 6));
            }
        }
    }
    #[test]
    fn test_clip_fn_unsigned() {
        // eval with unsigned integers
        let args: Vec<VectorRef> = vec![
            Arc::new(UInt8Vector::from_values(0..10)),
            Arc::new(ConstantVector::new(
                Arc::new(UInt32Vector::from_vec(vec![3])),
                10,
            )),
            Arc::new(ConstantVector::new(
                Arc::new(UInt16Vector::from_vec(vec![6])),
                10,
            )),
        ];
        let vector = ClipFunction
            .eval(FunctionContext::default(), &args)
            .unwrap();
        assert_eq!(10, vector.len());
        // clip([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], 3, 6) = [3, 3, 3, 3, 4, 5, 6, 6, 6, 6]
        for i in 0..10 {
            if i <= 3 {
                assert!(matches!(vector.get(i), Value::UInt64(v) if v == 3));
            } else if i <= 6 {
                assert!(matches!(vector.get(i), Value::UInt64(v) if v == (i as u64)));
            } else {
                assert!(matches!(vector.get(i), Value::UInt64(v) if v == 6));
            }
        }
    }
    #[test]
    fn test_clip_fn_float() {
        // eval with floats
        let args: Vec<VectorRef> = vec![
            Arc::new(Int8Vector::from_values(0..10)),
            Arc::new(ConstantVector::new(
                Arc::new(UInt32Vector::from_vec(vec![3])),
                10,
            )),
            Arc::new(ConstantVector::new(
                Arc::new(Float32Vector::from_vec(vec![6f32])),
                10,
            )),
        ];
        let vector = ClipFunction
            .eval(FunctionContext::default(), &args)
            .unwrap();
        assert_eq!(10, vector.len());
        // clip([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], 3, 6) = [3, 3, 3, 3, 4, 5, 6, 6, 6, 6]
        for i in 0..10 {
            if i <= 3 {
                assert!(matches!(vector.get(i), Value::Float64(v) if v == 3.0));
            } else if i <= 6 {
                assert!(matches!(vector.get(i), Value::Float64(v) if v == (i as f64)));
            } else {
                assert!(matches!(vector.get(i), Value::Float64(v) if v == 6.0));
            }
        }
    }
 }
--- a/src/common/function/src/scalars/numpy/interp.rs
+++ b/src/common/function/src/scalars/numpy/interp.rs
@@ -1,360 +0,0 @@
 // Copyright 2023 Greptime Team
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //     http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 use std::sync::Arc;
 use common_query::error::{self, Result};
 use datatypes::arrow::compute::cast;
 use datatypes::arrow::datatypes::DataType as ArrowDataType;
 use datatypes::data_type::DataType;
 use datatypes::prelude::ScalarVector;
 use datatypes::value::Value;
 use datatypes::vectors::{Float64Vector, Vector, VectorRef};
 use datatypes::with_match_primitive_type_id;
 use snafu::{ensure, ResultExt};
 /// search the biggest number that smaller than x in xp
 fn linear_search_ascending_vector(x: Value, xp: &Float64Vector) -> usize {
    for i in 0..xp.len() {
        if x < xp.get(i) {
            return i - 1;
        }
    }
    xp.len() - 1
 }
 /// search the biggest number that smaller than x in xp
 fn binary_search_ascending_vector(key: Value, xp: &Float64Vector) -> usize {
    let mut left = 0;
    let mut right = xp.len();
    /* If len <= 4 use linear search. */
    if xp.len() <= 4 {
        return linear_search_ascending_vector(key, xp);
    }
    /* find index by bisection */
    while left < right {
        let mid = left + ((right - left) >> 1);
        if key >= xp.get(mid) {
            left = mid + 1;
        } else {
            right = mid;
        }
    }
    left - 1
 }
 fn concrete_type_to_primitive_vector(arg: &VectorRef) -> Result<Float64Vector> {
    with_match_primitive_type_id!(arg.data_type().logical_type_id(), |$S| {
        let tmp = arg.to_arrow_array();
        let array = cast(&tmp, &ArrowDataType::Float64).context(error::TypeCastSnafu {
            typ: ArrowDataType::Float64,
        })?;
        // Safety: array has been cast to Float64Array.
        Ok(Float64Vector::try_from_arrow_array(array).unwrap())
    },{
        unreachable!()
    })
 }
 /// One-dimensional linear interpolation for monotonically increasing sample points. Refers to
 /// <https://github.com/numpy/numpy/blob/b101756ac02e390d605b2febcded30a1da50cc2c/numpy/core/src/multiarray/compiled_base.c#L491>
 #[allow(unused)]
 pub fn interp(args: &[VectorRef]) -> Result<VectorRef> {
    let mut left = None;
    let mut right = None;
    ensure!(
        args.len() >= 3,
        error::InvalidFuncArgsSnafu {
            err_msg: format!(
                "The length of the args is not enough, expect at least: {}, have: {}",
                3,
                args.len()
            ),
        }
    );
    let x = concrete_type_to_primitive_vector(&args[0])?;
    let xp = concrete_type_to_primitive_vector(&args[1])?;
    let fp = concrete_type_to_primitive_vector(&args[2])?;
    // make sure the args.len() is 3 or 5
    if args.len() > 3 {
        ensure!(
            args.len() == 5,
            error::InvalidFuncArgsSnafu {
                err_msg: format!(
                    "The length of the args is not enough, expect at least: {}, have: {}",
                    5,
                    args.len()
                ),
            }
        );
        left = concrete_type_to_primitive_vector(&args[3])
            .unwrap()
            .get_data(0);
        right = concrete_type_to_primitive_vector(&args[4])
            .unwrap()
            .get_data(0);
    }
    ensure!(
        x.len() != 0,
        error::InvalidFuncArgsSnafu {
            err_msg: "The sample x is empty",
        }
    );
    ensure!(
        xp.len() != 0,
        error::InvalidFuncArgsSnafu {
            err_msg: "The sample xp is empty",
        }
    );
    ensure!(
        fp.len() != 0,
        error::InvalidFuncArgsSnafu {
            err_msg: "The sample fp is empty",
        }
    );
    ensure!(
        xp.len() == fp.len(),
        error::InvalidFuncArgsSnafu {
            err_msg: format!(
                "The length of the len1: {} don't match the length of the len2: {}",
                xp.len(),
                fp.len()
            ),
        }
    );
    /* Get left and right fill values. */
    let left = match left {
        Some(left) => Some(left),
        _ => fp.get_data(0),
    };
    let right = match right {
        Some(right) => Some(right),
        _ => fp.get_data(fp.len() - 1),
    };
    let res;
    if xp.len() == 1 {
        let data = x
            .iter_data()
            .map(|x| {
                if Value::from(x) < xp.get(0) {
                    left
                } else if Value::from(x) > xp.get(xp.len() - 1) {
                    right
                } else {
                    fp.get_data(0)
                }
            })
            .collect::<Vec<_>>();
        res = Float64Vector::from(data);
    } else {
        let mut j = 0;
        /* only pre-calculate slopes if there are relatively few of them. */
        let mut slopes: Option<Vec<_>> = None;
        if x.len() >= xp.len() {
            let mut slopes_tmp = Vec::with_capacity(xp.len() - 1);
            for i in 0..xp.len() - 1 {
                let slope = match (
                    fp.get_data(i + 1),
                    fp.get_data(i),
                    xp.get_data(i + 1),
                    xp.get_data(i),
                ) {
                    (Some(fp1), Some(fp2), Some(xp1), Some(xp2)) => {
                        if xp1 == xp2 {
                            None
                        } else {
                            Some((fp1 - fp2) / (xp1 - xp2))
                        }
                    }
                    _ => None,
                };
                slopes_tmp.push(slope);
            }
            slopes = Some(slopes_tmp);
        }
        let data = x
            .iter_data()
            .map(|x| match x {
                Some(xi) => {
                    if Value::from(xi) > xp.get(xp.len() - 1) {
                        right
                    } else if Value::from(xi) < xp.get(0) {
                        left
                    } else {
                        j = binary_search_ascending_vector(Value::from(xi), &xp);
                        if j == xp.len() - 1 || xp.get(j) == Value::from(xi) {
                            fp.get_data(j)
                        } else {
                            let slope = match &slopes {
                                Some(slopes) => slopes[j],
                                _ => match (
                                    fp.get_data(j + 1),
                                    fp.get_data(j),
                                    xp.get_data(j + 1),
                                    xp.get_data(j),
                                ) {
                                    (Some(fp1), Some(fp2), Some(xp1), Some(xp2)) => {
                                        if xp1 == xp2 {
                                            None
                                        } else {
                                            Some((fp1 - fp2) / (xp1 - xp2))
                                        }
                                    }
                                    _ => None,
                                },
                            };
                            /* If we get nan in one direction, try the other */
                            let ans = match (slope, xp.get_data(j), fp.get_data(j)) {
                                (Some(slope), Some(xp), Some(fp)) => Some(slope * (xi - xp) + fp),
                                _ => None,
                            };
                            let ans = match ans {
                                Some(ans) => Some(ans),
                                _ => match (slope, xp.get_data(j + 1), fp.get_data(j + 1)) {
                                    (Some(slope), Some(xp), Some(fp)) => {
                                        Some(slope * (xi - xp) + fp)
                                    }
                                    _ => None,
                                },
                            };
                            let ans = match ans {
                                Some(ans) => Some(ans),
                                _ => {
                                    if fp.get_data(j) == fp.get_data(j + 1) {
                                        fp.get_data(j)
                                    } else {
                                        None
                                    }
                                }
                            };
                            ans
                        }
                    }
                }
                _ => None,
            })
            .collect::<Vec<_>>();
        res = Float64Vector::from(data);
    }
    Ok(Arc::new(res) as _)
 }
 #[cfg(test)]
 mod tests {
    use std::sync::Arc;
    use datatypes::vectors::{Int32Vector, Int64Vector};
    use super::*;
    #[test]
    fn test_basic_interp() {
        // x xp fp
        let x = 2.5;
        let xp = vec![1i32, 2i32, 3i32];
        let fp = vec![3i64, 2i64, 0i64];
        let args: Vec<VectorRef> = vec![
            Arc::new(Float64Vector::from_vec(vec![x])),
            Arc::new(Int32Vector::from_vec(xp.clone())),
            Arc::new(Int64Vector::from_vec(fp.clone())),
        ];
        let vector = interp(&args).unwrap();
        assert_eq!(vector.len(), 1);
        assert!(matches!(vector.get(0), Value::Float64(v) if v==1.0));
        let x = vec![0.0, 1.0, 1.5, 3.2];
        let args: Vec<VectorRef> = vec![
            Arc::new(Float64Vector::from_vec(x)),
            Arc::new(Int32Vector::from_vec(xp)),
            Arc::new(Int64Vector::from_vec(fp)),
        ];
        let vector = interp(&args).unwrap();
        assert_eq!(4, vector.len());
        let res = [3.0, 3.0, 2.5, 0.0];
        for (i, item) in res.iter().enumerate().take(vector.len()) {
            assert!(matches!(vector.get(i),Value::Float64(v) if v==*item));
        }
    }
    #[test]
    fn test_left_right() {
        let x = vec![0.0, 1.0, 1.5, 2.0, 3.0, 4.0];
        let xp = vec![1i32, 2i32, 3i32];
        let fp = vec![3i64, 2i64, 0i64];
        let left = vec![-1];
        let right = vec![2];
        let expect = [-1.0, 3.0, 2.5, 2.0, 0.0, 2.0];
        let args: Vec<VectorRef> = vec![
            Arc::new(Float64Vector::from_vec(x)),
            Arc::new(Int32Vector::from_vec(xp)),
            Arc::new(Int64Vector::from_vec(fp)),
            Arc::new(Int32Vector::from_vec(left)),
            Arc::new(Int32Vector::from_vec(right)),
        ];
        let vector = interp(&args).unwrap();
        for (i, item) in expect.iter().enumerate().take(vector.len()) {
            assert!(matches!(vector.get(i),Value::Float64(v) if v==*item));
        }
    }
    #[test]
    fn test_scalar_interpolation_point() {
        // x=0 output:0
        let x = vec![0];
        let xp = vec![0, 1, 5];
        let fp = vec![0, 1, 5];
        let args: Vec<VectorRef> = vec![
            Arc::new(Int64Vector::from_vec(x.clone())),
            Arc::new(Int64Vector::from_vec(xp.clone())),
            Arc::new(Int64Vector::from_vec(fp.clone())),
        ];
        let vector = interp(&args).unwrap();
        assert!(matches!(vector.get(0), Value::Float64(v) if v==x[0] as f64));
        // x=0.3 output:0.3
        let x = vec![0.3];
        let args: Vec<VectorRef> = vec![
            Arc::new(Float64Vector::from_vec(x.clone())),
            Arc::new(Int64Vector::from_vec(xp.clone())),
            Arc::new(Int64Vector::from_vec(fp.clone())),
        ];
        let vector = interp(&args).unwrap();
        assert!(matches!(vector.get(0), Value::Float64(v) if v == x[0]));
        // x=None output:Null
        let input = vec![None, Some(0.0), Some(0.3)];
        let x = Float64Vector::from(input);
        let args: Vec<VectorRef> = vec![
            Arc::new(x),
            Arc::new(Int64Vector::from_vec(xp)),
            Arc::new(Int64Vector::from_vec(fp)),
        ];
        let vector = interp(&args).unwrap();
        assert!(matches!(vector.get(0), Value::Null));
    }
 }