fix: drop unused numpy code since pyo3 rustpython do not support any more (#5442)

Signed-off-by: yihong0618 <zouzou0208@gmail.com>
This commit is contained in:
yihong
2025-01-24 16:20:01 +08:00
committed by GitHub
parent b107384cc6
commit bbfbc9f0f8
5 changed files with 0 additions and 691 deletions

View File

@@ -25,7 +25,6 @@ use crate::scalars::expression::ExpressionFunction;
use crate::scalars::json::JsonFunction; use crate::scalars::json::JsonFunction;
use crate::scalars::matches::MatchesFunction; use crate::scalars::matches::MatchesFunction;
use crate::scalars::math::MathFunction; use crate::scalars::math::MathFunction;
use crate::scalars::numpy::NumpyFunction;
use crate::scalars::timestamp::TimestampFunction; use crate::scalars::timestamp::TimestampFunction;
use crate::scalars::vector::VectorFunction; use crate::scalars::vector::VectorFunction;
use crate::system::SystemFunction; use crate::system::SystemFunction;
@@ -103,7 +102,6 @@ pub static FUNCTION_REGISTRY: Lazy<Arc<FunctionRegistry>> = Lazy::new(|| {
// Utility functions // Utility functions
MathFunction::register(&function_registry); MathFunction::register(&function_registry);
NumpyFunction::register(&function_registry);
TimestampFunction::register(&function_registry); TimestampFunction::register(&function_registry);
DateFunction::register(&function_registry); DateFunction::register(&function_registry);
ExpressionFunction::register(&function_registry); ExpressionFunction::register(&function_registry);

View File

@@ -20,7 +20,6 @@ pub mod geo;
pub mod json; pub mod json;
pub mod matches; pub mod matches;
pub mod math; pub mod math;
pub mod numpy;
pub mod vector; pub mod vector;
#[cfg(test)] #[cfg(test)]

View File

@@ -1,30 +0,0 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
mod clip;
mod interp;
use std::sync::Arc;
use clip::ClipFunction;
use crate::function_registry::FunctionRegistry;
pub(crate) struct NumpyFunction;
impl NumpyFunction {
pub fn register(registry: &FunctionRegistry) {
registry.register(Arc::new(ClipFunction));
}
}

View File

@@ -1,298 +0,0 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::fmt;
use std::sync::Arc;
use common_query::error::Result;
use common_query::prelude::{Signature, Volatility};
use datatypes::arrow::compute;
use datatypes::arrow::datatypes::ArrowPrimitiveType;
use datatypes::data_type::ConcreteDataType;
use datatypes::prelude::*;
use datatypes::vectors::PrimitiveVector;
use paste::paste;
use crate::function::{Function, FunctionContext};
use crate::scalars::expression::{scalar_binary_op, EvalContext};
/// numpy.clip function, <https://numpy.org/doc/stable/reference/generated/numpy.clip.html>
#[derive(Clone, Debug, Default)]
pub struct ClipFunction;
macro_rules! define_eval {
($O: ident) => {
paste! {
fn [<eval_ $O>](columns: &[VectorRef]) -> Result<VectorRef> {
fn cast_vector(input: &VectorRef) -> VectorRef {
Arc::new(PrimitiveVector::<<$O as WrapperType>::LogicalType>::try_from_arrow_array(
compute::cast(&input.to_arrow_array(), &<<<$O as WrapperType>::LogicalType as LogicalPrimitiveType>::ArrowPrimitive as ArrowPrimitiveType>::DATA_TYPE).unwrap()
).unwrap()) as _
}
let operator_1 = cast_vector(&columns[0]);
let operator_2 = cast_vector(&columns[1]);
let operator_3 = cast_vector(&columns[2]);
// clip(a, min, max) is equals to min(max(a, min), max)
let col: VectorRef = Arc::new(scalar_binary_op::<$O, $O, $O, _>(
&operator_1,
&operator_2,
scalar_max,
&mut EvalContext::default(),
)?);
let col = scalar_binary_op::<$O, $O, $O, _>(
&col,
&operator_3,
scalar_min,
&mut EvalContext::default(),
)?;
Ok(Arc::new(col))
}
}
};
}
define_eval!(i64);
define_eval!(u64);
define_eval!(f64);
impl Function for ClipFunction {
fn name(&self) -> &str {
"clip"
}
fn return_type(&self, input_types: &[ConcreteDataType]) -> Result<ConcreteDataType> {
if input_types.iter().all(ConcreteDataType::is_signed) {
Ok(ConcreteDataType::int64_datatype())
} else if input_types.iter().all(ConcreteDataType::is_unsigned) {
Ok(ConcreteDataType::uint64_datatype())
} else {
Ok(ConcreteDataType::float64_datatype())
}
}
fn signature(&self) -> Signature {
Signature::uniform(3, ConcreteDataType::numerics(), Volatility::Immutable)
}
fn eval(&self, _func_ctx: FunctionContext, columns: &[VectorRef]) -> Result<VectorRef> {
if columns.iter().all(|v| v.data_type().is_signed()) {
eval_i64(columns)
} else if columns.iter().all(|v| v.data_type().is_unsigned()) {
eval_u64(columns)
} else {
eval_f64(columns)
}
}
}
#[inline]
pub fn min<T: PartialOrd>(input: T, min: T) -> T {
if input < min {
input
} else {
min
}
}
#[inline]
pub fn max<T: PartialOrd>(input: T, max: T) -> T {
if input > max {
input
} else {
max
}
}
#[inline]
fn scalar_min<O>(left: Option<O>, right: Option<O>, _ctx: &mut EvalContext) -> Option<O>
where
O: Scalar + Copy + PartialOrd,
{
match (left, right) {
(Some(left), Some(right)) => Some(min(left, right)),
_ => None,
}
}
#[inline]
fn scalar_max<O>(left: Option<O>, right: Option<O>, _ctx: &mut EvalContext) -> Option<O>
where
O: Scalar + Copy + PartialOrd,
{
match (left, right) {
(Some(left), Some(right)) => Some(max(left, right)),
_ => None,
}
}
impl fmt::Display for ClipFunction {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "CLIP")
}
}
#[cfg(test)]
mod tests {
use common_query::prelude::TypeSignature;
use datatypes::value::Value;
use datatypes::vectors::{
ConstantVector, Float32Vector, Int16Vector, Int32Vector, Int8Vector, UInt16Vector,
UInt32Vector, UInt8Vector,
};
use super::*;
#[test]
fn test_clip_signature() {
let clip = ClipFunction;
assert_eq!("clip", clip.name());
assert_eq!(
ConcreteDataType::int64_datatype(),
clip.return_type(&[]).unwrap()
);
assert_eq!(
ConcreteDataType::int64_datatype(),
clip.return_type(&[
ConcreteDataType::int16_datatype(),
ConcreteDataType::int64_datatype(),
ConcreteDataType::int8_datatype()
])
.unwrap()
);
assert_eq!(
ConcreteDataType::uint64_datatype(),
clip.return_type(&[
ConcreteDataType::uint16_datatype(),
ConcreteDataType::uint64_datatype(),
ConcreteDataType::uint8_datatype()
])
.unwrap()
);
assert_eq!(
ConcreteDataType::float64_datatype(),
clip.return_type(&[
ConcreteDataType::uint16_datatype(),
ConcreteDataType::int64_datatype(),
ConcreteDataType::uint8_datatype()
])
.unwrap()
);
assert!(matches!(clip.signature(),
Signature {
type_signature: TypeSignature::Uniform(3, valid_types),
volatility: Volatility::Immutable
} if valid_types == ConcreteDataType::numerics()
));
}
#[test]
fn test_clip_fn_signed() {
// eval with signed integers
let args: Vec<VectorRef> = vec![
Arc::new(Int32Vector::from_values(0..10)),
Arc::new(ConstantVector::new(
Arc::new(Int8Vector::from_vec(vec![3])),
10,
)),
Arc::new(ConstantVector::new(
Arc::new(Int16Vector::from_vec(vec![6])),
10,
)),
];
let vector = ClipFunction
.eval(FunctionContext::default(), &args)
.unwrap();
assert_eq!(10, vector.len());
// clip([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], 3, 6) = [3, 3, 3, 3, 4, 5, 6, 6, 6, 6]
for i in 0..10 {
if i <= 3 {
assert!(matches!(vector.get(i), Value::Int64(v) if v == 3));
} else if i <= 6 {
assert!(matches!(vector.get(i), Value::Int64(v) if v == (i as i64)));
} else {
assert!(matches!(vector.get(i), Value::Int64(v) if v == 6));
}
}
}
#[test]
fn test_clip_fn_unsigned() {
// eval with unsigned integers
let args: Vec<VectorRef> = vec![
Arc::new(UInt8Vector::from_values(0..10)),
Arc::new(ConstantVector::new(
Arc::new(UInt32Vector::from_vec(vec![3])),
10,
)),
Arc::new(ConstantVector::new(
Arc::new(UInt16Vector::from_vec(vec![6])),
10,
)),
];
let vector = ClipFunction
.eval(FunctionContext::default(), &args)
.unwrap();
assert_eq!(10, vector.len());
// clip([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], 3, 6) = [3, 3, 3, 3, 4, 5, 6, 6, 6, 6]
for i in 0..10 {
if i <= 3 {
assert!(matches!(vector.get(i), Value::UInt64(v) if v == 3));
} else if i <= 6 {
assert!(matches!(vector.get(i), Value::UInt64(v) if v == (i as u64)));
} else {
assert!(matches!(vector.get(i), Value::UInt64(v) if v == 6));
}
}
}
#[test]
fn test_clip_fn_float() {
// eval with floats
let args: Vec<VectorRef> = vec![
Arc::new(Int8Vector::from_values(0..10)),
Arc::new(ConstantVector::new(
Arc::new(UInt32Vector::from_vec(vec![3])),
10,
)),
Arc::new(ConstantVector::new(
Arc::new(Float32Vector::from_vec(vec![6f32])),
10,
)),
];
let vector = ClipFunction
.eval(FunctionContext::default(), &args)
.unwrap();
assert_eq!(10, vector.len());
// clip([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], 3, 6) = [3, 3, 3, 3, 4, 5, 6, 6, 6, 6]
for i in 0..10 {
if i <= 3 {
assert!(matches!(vector.get(i), Value::Float64(v) if v == 3.0));
} else if i <= 6 {
assert!(matches!(vector.get(i), Value::Float64(v) if v == (i as f64)));
} else {
assert!(matches!(vector.get(i), Value::Float64(v) if v == 6.0));
}
}
}
}

View File

@@ -1,360 +0,0 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::sync::Arc;
use common_query::error::{self, Result};
use datatypes::arrow::compute::cast;
use datatypes::arrow::datatypes::DataType as ArrowDataType;
use datatypes::data_type::DataType;
use datatypes::prelude::ScalarVector;
use datatypes::value::Value;
use datatypes::vectors::{Float64Vector, Vector, VectorRef};
use datatypes::with_match_primitive_type_id;
use snafu::{ensure, ResultExt};
/// search the biggest number that smaller than x in xp
fn linear_search_ascending_vector(x: Value, xp: &Float64Vector) -> usize {
for i in 0..xp.len() {
if x < xp.get(i) {
return i - 1;
}
}
xp.len() - 1
}
/// search the biggest number that smaller than x in xp
fn binary_search_ascending_vector(key: Value, xp: &Float64Vector) -> usize {
let mut left = 0;
let mut right = xp.len();
/* If len <= 4 use linear search. */
if xp.len() <= 4 {
return linear_search_ascending_vector(key, xp);
}
/* find index by bisection */
while left < right {
let mid = left + ((right - left) >> 1);
if key >= xp.get(mid) {
left = mid + 1;
} else {
right = mid;
}
}
left - 1
}
fn concrete_type_to_primitive_vector(arg: &VectorRef) -> Result<Float64Vector> {
with_match_primitive_type_id!(arg.data_type().logical_type_id(), |$S| {
let tmp = arg.to_arrow_array();
let array = cast(&tmp, &ArrowDataType::Float64).context(error::TypeCastSnafu {
typ: ArrowDataType::Float64,
})?;
// Safety: array has been cast to Float64Array.
Ok(Float64Vector::try_from_arrow_array(array).unwrap())
},{
unreachable!()
})
}
/// One-dimensional linear interpolation for monotonically increasing sample points. Refers to
/// <https://github.com/numpy/numpy/blob/b101756ac02e390d605b2febcded30a1da50cc2c/numpy/core/src/multiarray/compiled_base.c#L491>
#[allow(unused)]
pub fn interp(args: &[VectorRef]) -> Result<VectorRef> {
let mut left = None;
let mut right = None;
ensure!(
args.len() >= 3,
error::InvalidFuncArgsSnafu {
err_msg: format!(
"The length of the args is not enough, expect at least: {}, have: {}",
3,
args.len()
),
}
);
let x = concrete_type_to_primitive_vector(&args[0])?;
let xp = concrete_type_to_primitive_vector(&args[1])?;
let fp = concrete_type_to_primitive_vector(&args[2])?;
// make sure the args.len() is 3 or 5
if args.len() > 3 {
ensure!(
args.len() == 5,
error::InvalidFuncArgsSnafu {
err_msg: format!(
"The length of the args is not enough, expect at least: {}, have: {}",
5,
args.len()
),
}
);
left = concrete_type_to_primitive_vector(&args[3])
.unwrap()
.get_data(0);
right = concrete_type_to_primitive_vector(&args[4])
.unwrap()
.get_data(0);
}
ensure!(
x.len() != 0,
error::InvalidFuncArgsSnafu {
err_msg: "The sample x is empty",
}
);
ensure!(
xp.len() != 0,
error::InvalidFuncArgsSnafu {
err_msg: "The sample xp is empty",
}
);
ensure!(
fp.len() != 0,
error::InvalidFuncArgsSnafu {
err_msg: "The sample fp is empty",
}
);
ensure!(
xp.len() == fp.len(),
error::InvalidFuncArgsSnafu {
err_msg: format!(
"The length of the len1: {} don't match the length of the len2: {}",
xp.len(),
fp.len()
),
}
);
/* Get left and right fill values. */
let left = match left {
Some(left) => Some(left),
_ => fp.get_data(0),
};
let right = match right {
Some(right) => Some(right),
_ => fp.get_data(fp.len() - 1),
};
let res;
if xp.len() == 1 {
let data = x
.iter_data()
.map(|x| {
if Value::from(x) < xp.get(0) {
left
} else if Value::from(x) > xp.get(xp.len() - 1) {
right
} else {
fp.get_data(0)
}
})
.collect::<Vec<_>>();
res = Float64Vector::from(data);
} else {
let mut j = 0;
/* only pre-calculate slopes if there are relatively few of them. */
let mut slopes: Option<Vec<_>> = None;
if x.len() >= xp.len() {
let mut slopes_tmp = Vec::with_capacity(xp.len() - 1);
for i in 0..xp.len() - 1 {
let slope = match (
fp.get_data(i + 1),
fp.get_data(i),
xp.get_data(i + 1),
xp.get_data(i),
) {
(Some(fp1), Some(fp2), Some(xp1), Some(xp2)) => {
if xp1 == xp2 {
None
} else {
Some((fp1 - fp2) / (xp1 - xp2))
}
}
_ => None,
};
slopes_tmp.push(slope);
}
slopes = Some(slopes_tmp);
}
let data = x
.iter_data()
.map(|x| match x {
Some(xi) => {
if Value::from(xi) > xp.get(xp.len() - 1) {
right
} else if Value::from(xi) < xp.get(0) {
left
} else {
j = binary_search_ascending_vector(Value::from(xi), &xp);
if j == xp.len() - 1 || xp.get(j) == Value::from(xi) {
fp.get_data(j)
} else {
let slope = match &slopes {
Some(slopes) => slopes[j],
_ => match (
fp.get_data(j + 1),
fp.get_data(j),
xp.get_data(j + 1),
xp.get_data(j),
) {
(Some(fp1), Some(fp2), Some(xp1), Some(xp2)) => {
if xp1 == xp2 {
None
} else {
Some((fp1 - fp2) / (xp1 - xp2))
}
}
_ => None,
},
};
/* If we get nan in one direction, try the other */
let ans = match (slope, xp.get_data(j), fp.get_data(j)) {
(Some(slope), Some(xp), Some(fp)) => Some(slope * (xi - xp) + fp),
_ => None,
};
let ans = match ans {
Some(ans) => Some(ans),
_ => match (slope, xp.get_data(j + 1), fp.get_data(j + 1)) {
(Some(slope), Some(xp), Some(fp)) => {
Some(slope * (xi - xp) + fp)
}
_ => None,
},
};
let ans = match ans {
Some(ans) => Some(ans),
_ => {
if fp.get_data(j) == fp.get_data(j + 1) {
fp.get_data(j)
} else {
None
}
}
};
ans
}
}
}
_ => None,
})
.collect::<Vec<_>>();
res = Float64Vector::from(data);
}
Ok(Arc::new(res) as _)
}
#[cfg(test)]
mod tests {
use std::sync::Arc;
use datatypes::vectors::{Int32Vector, Int64Vector};
use super::*;
#[test]
fn test_basic_interp() {
// x xp fp
let x = 2.5;
let xp = vec![1i32, 2i32, 3i32];
let fp = vec![3i64, 2i64, 0i64];
let args: Vec<VectorRef> = vec![
Arc::new(Float64Vector::from_vec(vec![x])),
Arc::new(Int32Vector::from_vec(xp.clone())),
Arc::new(Int64Vector::from_vec(fp.clone())),
];
let vector = interp(&args).unwrap();
assert_eq!(vector.len(), 1);
assert!(matches!(vector.get(0), Value::Float64(v) if v==1.0));
let x = vec![0.0, 1.0, 1.5, 3.2];
let args: Vec<VectorRef> = vec![
Arc::new(Float64Vector::from_vec(x)),
Arc::new(Int32Vector::from_vec(xp)),
Arc::new(Int64Vector::from_vec(fp)),
];
let vector = interp(&args).unwrap();
assert_eq!(4, vector.len());
let res = [3.0, 3.0, 2.5, 0.0];
for (i, item) in res.iter().enumerate().take(vector.len()) {
assert!(matches!(vector.get(i),Value::Float64(v) if v==*item));
}
}
#[test]
fn test_left_right() {
let x = vec![0.0, 1.0, 1.5, 2.0, 3.0, 4.0];
let xp = vec![1i32, 2i32, 3i32];
let fp = vec![3i64, 2i64, 0i64];
let left = vec![-1];
let right = vec![2];
let expect = [-1.0, 3.0, 2.5, 2.0, 0.0, 2.0];
let args: Vec<VectorRef> = vec![
Arc::new(Float64Vector::from_vec(x)),
Arc::new(Int32Vector::from_vec(xp)),
Arc::new(Int64Vector::from_vec(fp)),
Arc::new(Int32Vector::from_vec(left)),
Arc::new(Int32Vector::from_vec(right)),
];
let vector = interp(&args).unwrap();
for (i, item) in expect.iter().enumerate().take(vector.len()) {
assert!(matches!(vector.get(i),Value::Float64(v) if v==*item));
}
}
#[test]
fn test_scalar_interpolation_point() {
// x=0 output:0
let x = vec![0];
let xp = vec![0, 1, 5];
let fp = vec![0, 1, 5];
let args: Vec<VectorRef> = vec![
Arc::new(Int64Vector::from_vec(x.clone())),
Arc::new(Int64Vector::from_vec(xp.clone())),
Arc::new(Int64Vector::from_vec(fp.clone())),
];
let vector = interp(&args).unwrap();
assert!(matches!(vector.get(0), Value::Float64(v) if v==x[0] as f64));
// x=0.3 output:0.3
let x = vec![0.3];
let args: Vec<VectorRef> = vec![
Arc::new(Float64Vector::from_vec(x.clone())),
Arc::new(Int64Vector::from_vec(xp.clone())),
Arc::new(Int64Vector::from_vec(fp.clone())),
];
let vector = interp(&args).unwrap();
assert!(matches!(vector.get(0), Value::Float64(v) if v == x[0]));
// x=None output:Null
let input = vec![None, Some(0.0), Some(0.3)];
let x = Float64Vector::from(input);
let args: Vec<VectorRef> = vec![
Arc::new(x),
Arc::new(Int64Vector::from_vec(xp)),
Arc::new(Int64Vector::from_vec(fp)),
];
let vector = interp(&args).unwrap();
assert!(matches!(vector.get(0), Value::Null));
}
}