diff --git a/src/common/function/src/scalars/math/pow.rs b/src/common/function/src/scalars/math/pow.rs index 7e85dffd15..0df3739004 100644 --- a/src/common/function/src/scalars/math/pow.rs +++ b/src/common/function/src/scalars/math/pow.rs @@ -4,7 +4,6 @@ use std::sync::Arc; use common_query::prelude::{Signature, Volatility}; use datatypes::data_type::DataType; use datatypes::prelude::ConcreteDataType; -use datatypes::type_id::LogicalTypeId; use datatypes::vectors::VectorRef; use datatypes::with_match_primitive_type_id; use num::traits::Pow; diff --git a/src/common/function/src/scalars/numpy/clip.rs b/src/common/function/src/scalars/numpy/clip.rs index 6cd83b6567..5a57fbc7a9 100644 --- a/src/common/function/src/scalars/numpy/clip.rs +++ b/src/common/function/src/scalars/numpy/clip.rs @@ -5,7 +5,6 @@ use common_query::prelude::{Signature, Volatility}; use datatypes::data_type::ConcreteDataType; use datatypes::data_type::DataType; use datatypes::prelude::{Scalar, VectorRef}; -use datatypes::type_id::LogicalTypeId; use datatypes::with_match_primitive_type_id; use num_traits::AsPrimitive; use paste::paste; diff --git a/src/datatypes/src/macros.rs b/src/datatypes/src/macros.rs index f194cbf482..da385cd4ce 100644 --- a/src/datatypes/src/macros.rs +++ b/src/datatypes/src/macros.rs @@ -64,6 +64,7 @@ macro_rules! with_match_primitive_type_id { }; } + use $crate::type_id::LogicalTypeId; match $key_type { LogicalTypeId::Int8 => __with_ty__! { i8 }, LogicalTypeId::Int16 => __with_ty__! { i16 }, diff --git a/src/datatypes/src/vectors.rs b/src/datatypes/src/vectors.rs index 660151b45e..ee8bf88946 100644 --- a/src/datatypes/src/vectors.rs +++ b/src/datatypes/src/vectors.rs @@ -4,6 +4,7 @@ mod builder; pub mod constant; pub mod date; pub mod datetime; +mod eq; mod helper; mod list; pub mod mutable; @@ -21,6 +22,8 @@ pub use binary::*; pub use boolean::*; pub use builder::VectorBuilder; pub use constant::*; +pub use date::*; +pub use datetime::*; pub use helper::Helper; pub use list::*; pub use mutable::MutableVector; diff --git a/src/datatypes/src/vectors/binary.rs b/src/datatypes/src/vectors/binary.rs index 1a7a3aa724..5985823b2d 100644 --- a/src/datatypes/src/vectors/binary.rs +++ b/src/datatypes/src/vectors/binary.rs @@ -17,7 +17,7 @@ use crate::value::Value; use crate::vectors::{self, MutableVector, Validity, Vector, VectorRef}; /// Vector of binary strings. -#[derive(Debug)] +#[derive(Debug, PartialEq)] pub struct BinaryVector { array: BinaryArray, } diff --git a/src/datatypes/src/vectors/boolean.rs b/src/datatypes/src/vectors/boolean.rs index 2ba04c0be3..85d926a324 100644 --- a/src/datatypes/src/vectors/boolean.rs +++ b/src/datatypes/src/vectors/boolean.rs @@ -16,7 +16,7 @@ use crate::value::Value; use crate::vectors::{self, MutableVector, Validity, Vector, VectorRef}; /// Vector of boolean. -#[derive(Debug)] +#[derive(Debug, PartialEq)] pub struct BooleanVector { array: BooleanArray, } diff --git a/src/datatypes/src/vectors/constant.rs b/src/datatypes/src/vectors/constant.rs index 5841fb6213..09dfd197a8 100644 --- a/src/datatypes/src/vectors/constant.rs +++ b/src/datatypes/src/vectors/constant.rs @@ -27,6 +27,7 @@ impl ConstantVector { } Self { vector, length } } + pub fn inner(&self) -> &VectorRef { &self.vector } diff --git a/src/datatypes/src/vectors/date.rs b/src/datatypes/src/vectors/date.rs index 3cddd1d801..b8e125e37c 100644 --- a/src/datatypes/src/vectors/date.rs +++ b/src/datatypes/src/vectors/date.rs @@ -12,7 +12,7 @@ use crate::scalars::ScalarVector; use crate::serialize::Serializable; use crate::vectors::{MutableVector, PrimitiveIter, PrimitiveVector, PrimitiveVectorBuilder}; -#[derive(Debug, Clone)] +#[derive(Debug, Clone, PartialEq)] pub struct DateVector { array: PrimitiveVector, } diff --git a/src/datatypes/src/vectors/datetime.rs b/src/datatypes/src/vectors/datetime.rs index 130a1e6f11..cd52ca8a23 100644 --- a/src/datatypes/src/vectors/datetime.rs +++ b/src/datatypes/src/vectors/datetime.rs @@ -13,7 +13,7 @@ use crate::prelude::{ use crate::serialize::Serializable; use crate::vectors::{PrimitiveIter, PrimitiveVector, PrimitiveVectorBuilder}; -#[derive(Debug, Clone)] +#[derive(Debug, Clone, PartialEq)] pub struct DateTimeVector { array: PrimitiveVector, } diff --git a/src/datatypes/src/vectors/eq.rs b/src/datatypes/src/vectors/eq.rs new file mode 100644 index 0000000000..6afb793e0c --- /dev/null +++ b/src/datatypes/src/vectors/eq.rs @@ -0,0 +1,190 @@ +use std::sync::Arc; + +use crate::data_type::DataType; +use crate::vectors::{ + BinaryVector, BooleanVector, ConstantVector, DateTimeVector, DateVector, ListVector, + PrimitiveVector, StringVector, Vector, +}; +use crate::with_match_primitive_type_id; + +impl Eq for dyn Vector + '_ {} + +impl PartialEq for dyn Vector + '_ { + fn eq(&self, other: &dyn Vector) -> bool { + equal(self, other) + } +} + +impl PartialEq for Arc { + fn eq(&self, other: &dyn Vector) -> bool { + equal(&**self, other) + } +} + +macro_rules! is_vector_eq { + ($VectorType: ident, $lhs: ident, $rhs: ident) => {{ + let lhs = $lhs.as_any().downcast_ref::<$VectorType>().unwrap(); + let rhs = $rhs.as_any().downcast_ref::<$VectorType>().unwrap(); + + lhs == rhs + }}; +} + +fn equal(lhs: &dyn Vector, rhs: &dyn Vector) -> bool { + if lhs.data_type() != rhs.data_type() || lhs.len() != rhs.len() { + return false; + } + + if lhs.is_const() || rhs.is_const() { + // Length has been checked before, so we only need to compare inner + // vector here. + return equal( + &**lhs + .as_any() + .downcast_ref::() + .unwrap() + .inner(), + &**lhs + .as_any() + .downcast_ref::() + .unwrap() + .inner(), + ); + } + + use crate::data_type::ConcreteDataType::*; + + match lhs.data_type() { + Null(_) => true, + Boolean(_) => is_vector_eq!(BooleanVector, lhs, rhs), + Binary(_) => is_vector_eq!(BinaryVector, lhs, rhs), + String(_) => is_vector_eq!(StringVector, lhs, rhs), + Date(_) => is_vector_eq!(DateVector, lhs, rhs), + DateTime(_) => is_vector_eq!(DateTimeVector, lhs, rhs), + List(_) => is_vector_eq!(ListVector, lhs, rhs), + other => with_match_primitive_type_id!(other.logical_type_id(), |$T| { + let lhs = lhs.as_any().downcast_ref::>().unwrap(); + let rhs = rhs.as_any().downcast_ref::>().unwrap(); + + lhs == rhs + }, + { + unreachable!() + }), + } +} + +#[cfg(test)] +mod tests { + use arrow::array::{Int64Array, ListArray, MutableListArray, MutablePrimitiveArray, TryExtend}; + + use super::*; + use crate::vectors::{ + Float32Vector, Float64Vector, Int16Vector, Int32Vector, Int64Vector, Int8Vector, + NullVector, UInt16Vector, UInt32Vector, UInt64Vector, UInt8Vector, VectorRef, + }; + + fn assert_vector_ref_eq(vector: VectorRef) { + let rhs = vector.clone(); + assert_eq!(vector, rhs); + assert_dyn_vector_eq(&*vector, &*rhs); + } + + fn assert_dyn_vector_eq(lhs: &dyn Vector, rhs: &dyn Vector) { + assert_eq!(lhs, rhs); + } + + fn assert_vector_ref_ne(lhs: VectorRef, rhs: VectorRef) { + assert_ne!(lhs, rhs); + } + + #[test] + fn test_vector_eq() { + assert_vector_ref_eq(Arc::new(BinaryVector::from(vec![ + Some(b"hello".to_vec()), + Some(b"world".to_vec()), + ]))); + assert_vector_ref_eq(Arc::new(BooleanVector::from(vec![true, false]))); + assert_vector_ref_eq(Arc::new(ConstantVector::new( + Arc::new(BooleanVector::from(vec![true])), + 5, + ))); + assert_vector_ref_eq(Arc::new(BooleanVector::from(vec![true, false]))); + assert_vector_ref_eq(Arc::new(DateVector::from(vec![Some(100), Some(120)]))); + assert_vector_ref_eq(Arc::new(DateTimeVector::new(Int64Array::from(vec![ + Some(100), + Some(120), + ])))); + + let mut arrow_array = MutableListArray::>::new(); + arrow_array + .try_extend(vec![Some(vec![Some(1), Some(2), Some(3)])]) + .unwrap(); + let arrow_array: ListArray = arrow_array.into(); + assert_vector_ref_eq(Arc::new(ListVector::from(arrow_array))); + + assert_vector_ref_eq(Arc::new(NullVector::new(4))); + assert_vector_ref_eq(Arc::new(StringVector::from(vec![ + Some("hello"), + Some("world"), + ]))); + + assert_vector_ref_eq(Arc::new(Int8Vector::from_slice(&[1, 2, 3, 4]))); + assert_vector_ref_eq(Arc::new(UInt8Vector::from_slice(&[1, 2, 3, 4]))); + assert_vector_ref_eq(Arc::new(Int16Vector::from_slice(&[1, 2, 3, 4]))); + assert_vector_ref_eq(Arc::new(UInt16Vector::from_slice(&[1, 2, 3, 4]))); + assert_vector_ref_eq(Arc::new(Int32Vector::from_slice(&[1, 2, 3, 4]))); + assert_vector_ref_eq(Arc::new(UInt32Vector::from_slice(&[1, 2, 3, 4]))); + assert_vector_ref_eq(Arc::new(Int64Vector::from_slice(&[1, 2, 3, 4]))); + assert_vector_ref_eq(Arc::new(UInt64Vector::from_slice(&[1, 2, 3, 4]))); + assert_vector_ref_eq(Arc::new(Float32Vector::from_slice(&[1.0, 2.0, 3.0, 4.0]))); + assert_vector_ref_eq(Arc::new(Float64Vector::from_slice(&[1.0, 2.0, 3.0, 4.0]))); + } + + #[test] + fn test_vector_ne() { + assert_vector_ref_ne( + Arc::new(Int32Vector::from_slice(&[1, 2, 3, 4])), + Arc::new(Int32Vector::from_slice(&[1, 2])), + ); + assert_vector_ref_ne( + Arc::new(Int32Vector::from_slice(&[1, 2, 3, 4])), + Arc::new(Int8Vector::from_slice(&[1, 2, 3, 4])), + ); + assert_vector_ref_ne( + Arc::new(Int32Vector::from_slice(&[1, 2, 3, 4])), + Arc::new(BooleanVector::from(vec![true, true])), + ); + assert_vector_ref_ne( + Arc::new(ConstantVector::new( + Arc::new(BooleanVector::from(vec![true])), + 5, + )), + Arc::new(ConstantVector::new( + Arc::new(BooleanVector::from(vec![true])), + 4, + )), + ); + assert_vector_ref_ne( + Arc::new(ConstantVector::new( + Arc::new(BooleanVector::from(vec![true])), + 5, + )), + Arc::new(ConstantVector::new( + Arc::new(BooleanVector::from(vec![true, false])), + 4, + )), + ); + assert_vector_ref_ne( + Arc::new(ConstantVector::new( + Arc::new(BooleanVector::from(vec![true])), + 5, + )), + Arc::new(ConstantVector::new( + Arc::new(Int32Vector::from_slice(vec![1, 2])), + 4, + )), + ); + assert_vector_ref_ne(Arc::new(NullVector::new(5)), Arc::new(NullVector::new(8))); + } +} diff --git a/src/datatypes/src/vectors/list.rs b/src/datatypes/src/vectors/list.rs index 560dd93f8e..a8ed1878f8 100644 --- a/src/datatypes/src/vectors/list.rs +++ b/src/datatypes/src/vectors/list.rs @@ -16,7 +16,7 @@ use crate::vectors::{impl_try_from_arrow_array_for_vector, impl_validity_for_vec type ArrowListArray = ListArray; /// Vector of Lists, basically backed by Arrow's `ListArray`. -#[derive(Debug, Clone)] +#[derive(Debug, Clone, PartialEq)] pub struct ListVector { array: ArrowListArray, inner_data_type: ConcreteDataType, @@ -68,6 +68,10 @@ impl Vector for ListVector { } fn get(&self, index: usize) -> Value { + if !self.array.is_valid(index) { + return Value::Null; + } + let array = &self.array.value(index); let vector = VectorHelper::try_into_vector(array).unwrap_or_else(|_| { panic!( @@ -190,13 +194,7 @@ mod tests { )), list_vector.get(0) ); - assert_eq!( - Value::List(ListValue::new( - Some(Box::new(vec![])), - ConcreteDataType::int32_datatype() - )), - list_vector.get(1) - ); + assert_eq!(Value::Null, list_vector.get(1)); assert_eq!( Value::List(ListValue::new( Some(Box::new(vec![ diff --git a/src/datatypes/src/vectors/null.rs b/src/datatypes/src/vectors/null.rs index cb20bdcca0..0f30b5fb47 100644 --- a/src/datatypes/src/vectors/null.rs +++ b/src/datatypes/src/vectors/null.rs @@ -15,6 +15,7 @@ use crate::value::Value; use crate::vectors::impl_try_from_arrow_array_for_vector; use crate::vectors::{Validity, Vector, VectorRef}; +#[derive(PartialEq)] pub struct NullVector { array: NullArray, } diff --git a/src/datatypes/src/vectors/primitive.rs b/src/datatypes/src/vectors/primitive.rs index 851d16ac29..b0a9085197 100644 --- a/src/datatypes/src/vectors/primitive.rs +++ b/src/datatypes/src/vectors/primitive.rs @@ -19,7 +19,7 @@ use crate::value::Value; use crate::vectors::{self, MutableVector, Validity, Vector, VectorRef}; /// Vector for primitive data types. -#[derive(Debug, Clone)] +#[derive(Debug, Clone, PartialEq)] pub struct PrimitiveVector { pub(crate) array: PrimitiveArray, } diff --git a/src/datatypes/src/vectors/string.rs b/src/datatypes/src/vectors/string.rs index 424e02ee7c..702ca19688 100644 --- a/src/datatypes/src/vectors/string.rs +++ b/src/datatypes/src/vectors/string.rs @@ -17,7 +17,7 @@ use crate::value::Value; use crate::vectors::{self, MutableVector, Validity, Vector, VectorRef}; /// String array wrapper -#[derive(Debug, Clone)] +#[derive(Debug, Clone, PartialEq)] pub struct StringVector { array: StringArray, } diff --git a/src/storage/src/read.rs b/src/storage/src/read.rs index 0f5255a549..d5e3e3f0d3 100644 --- a/src/storage/src/read.rs +++ b/src/storage/src/read.rs @@ -7,7 +7,7 @@ use crate::error::Result; /// Storage internal representation of a batch of rows. // Now the structure of `Batch` is still unstable, all pub fields may be changed. -#[derive(Debug, Default)] +#[derive(Debug, Default, PartialEq)] pub struct Batch { /// Rows organized in columnar format. /// diff --git a/src/storage/src/schema.rs b/src/storage/src/schema.rs index 6b7eee1b6d..1c05c3677e 100644 --- a/src/storage/src/schema.rs +++ b/src/storage/src/schema.rs @@ -840,14 +840,8 @@ mod tests { // (v0, timestamp) let chunk = projected_schema.batch_to_chunk(&batch); assert_eq!(2, chunk.columns.len()); - assert_eq!( - chunk.columns[0].to_arrow_array(), - batch.column(2).to_arrow_array() - ); - assert_eq!( - chunk.columns[1].to_arrow_array(), - batch.column(1).to_arrow_array() - ); + assert_eq!(&chunk.columns[0], batch.column(2)); + assert_eq!(&chunk.columns[1], batch.column(1)); // Test batch_from_parts let keys = batch.columns()[0..2].to_vec(); @@ -858,13 +852,7 @@ mod tests { batch.column(3).clone(), batch.column(4).clone(), ); - assert_eq!(5, created.num_columns()); - for i in 0..5 { - assert_eq!( - batch.column(i).to_arrow_array(), - created.column(i).to_arrow_array() - ); - } + assert_eq!(batch, created); } #[test]