From 819b60ca13f88951aecc782d69a8a1e3a371696a Mon Sep 17 00:00:00 2001 From: elijah <30852919+e1ijah1@users.noreply.github.com> Date: Tue, 7 Mar 2023 19:27:33 +0800 Subject: [PATCH] feat(datatypes): implement VectorOp::take (#1115) * feat: add take index method for VectorOp * chore: make clippy happy * chore: make clippy happy * chore: improve the code * chore: improve the code * chore: add take null test * chore: fix clippy --- src/datatypes/src/vectors/constant.rs | 37 +++- src/datatypes/src/vectors/operations.rs | 25 ++- src/datatypes/src/vectors/operations/take.rs | 203 +++++++++++++++++++ 3 files changed, 260 insertions(+), 5 deletions(-) create mode 100644 src/datatypes/src/vectors/operations/take.rs diff --git a/src/datatypes/src/vectors/constant.rs b/src/datatypes/src/vectors/constant.rs index a2e7bc76dc..750b816b61 100644 --- a/src/datatypes/src/vectors/constant.rs +++ b/src/datatypes/src/vectors/constant.rs @@ -16,14 +16,14 @@ use std::any::Any; use std::fmt; use std::sync::Arc; -use arrow::array::{Array, ArrayRef}; -use snafu::ResultExt; +use arrow::array::{Array, ArrayRef, UInt32Array}; +use snafu::{ensure, ResultExt}; use crate::data_type::ConcreteDataType; -use crate::error::{Result, SerializeSnafu}; +use crate::error::{self, Result, SerializeSnafu}; use crate::serialize::Serializable; use crate::value::{Value, ValueRef}; -use crate::vectors::{BooleanVector, Helper, Validity, Vector, VectorRef}; +use crate::vectors::{BooleanVector, Helper, UInt32Vector, Validity, Vector, VectorRef}; #[derive(Clone)] pub struct ConstantVector { @@ -83,6 +83,35 @@ impl ConstantVector { self.length, ))) } + + pub(crate) fn take_vector(&self, indices: &UInt32Vector) -> Result { + if indices.is_empty() { + return Ok(self.slice(0, 0)); + } + ensure!( + indices.null_count() == 0, + error::UnsupportedOperationSnafu { + op: "taking a null index", + vector_type: self.vector_type_name(), + } + ); + + let len = self.len(); + let arr = indices.to_arrow_array(); + let indices_arr = arr.as_any().downcast_ref::().unwrap(); + if !arrow::compute::min_boolean( + &arrow::compute::lt_scalar(indices_arr, len as u32).unwrap(), + ) + .unwrap() + { + panic!("Array index out of bounds, cannot take index out of the length of the array: {len}"); + } + + Ok(Arc::new(ConstantVector::new( + self.inner().clone(), + indices.len(), + ))) + } } impl Vector for ConstantVector { diff --git a/src/datatypes/src/vectors/operations.rs b/src/datatypes/src/vectors/operations.rs index 11ff506bb8..748bcd3ff5 100644 --- a/src/datatypes/src/vectors/operations.rs +++ b/src/datatypes/src/vectors/operations.rs @@ -16,6 +16,7 @@ mod cast; mod filter; mod find_unique; mod replicate; +mod take; use common_base::BitVec; @@ -24,7 +25,7 @@ use crate::types::LogicalPrimitiveType; use crate::vectors::constant::ConstantVector; use crate::vectors::{ BinaryVector, BooleanVector, ConcreteDataType, ListVector, NullVector, PrimitiveVector, - StringVector, Vector, VectorRef, + StringVector, UInt32Vector, Vector, VectorRef, }; /// Vector compute operations. @@ -63,6 +64,12 @@ pub trait VectorOp { /// /// TODO(dennis) describe behaviors in details. fn cast(&self, to_type: &ConcreteDataType) -> Result; + + /// Take elements from the vector by the given indices. + /// + /// # Panics + /// Panics if an index is out of bounds. + fn take(&self, indices: &UInt32Vector) -> Result; } macro_rules! impl_scalar_vector_op { @@ -84,6 +91,10 @@ macro_rules! impl_scalar_vector_op { fn cast(&self, to_type: &ConcreteDataType) -> Result { cast::cast_non_constant!(self, to_type) } + + fn take(&self, indices: &UInt32Vector) -> Result { + take::take_indices!(self, $VectorType, indices) + } } )+}; } @@ -108,6 +119,10 @@ impl VectorOp for PrimitiveVector { fn cast(&self, to_type: &ConcreteDataType) -> Result { cast::cast_non_constant!(self, to_type) } + + fn take(&self, indices: &UInt32Vector) -> Result { + take::take_indices!(self, PrimitiveVector, indices) + } } impl VectorOp for NullVector { @@ -131,6 +146,10 @@ impl VectorOp for NullVector { } .fail() } + + fn take(&self, indices: &UInt32Vector) -> Result { + take::take_indices!(self, NullVector, indices) + } } impl VectorOp for ConstantVector { @@ -150,4 +169,8 @@ impl VectorOp for ConstantVector { fn cast(&self, to_type: &ConcreteDataType) -> Result { self.cast_vector(to_type) } + + fn take(&self, indices: &UInt32Vector) -> Result { + self.take_vector(indices) + } } diff --git a/src/datatypes/src/vectors/operations/take.rs b/src/datatypes/src/vectors/operations/take.rs new file mode 100644 index 0000000000..d457a1dbe3 --- /dev/null +++ b/src/datatypes/src/vectors/operations/take.rs @@ -0,0 +1,203 @@ +// Copyright 2023 Greptime Team +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +macro_rules! take_indices { + ($vector: expr, $VectorType: ty, $indices: ident) => {{ + use std::sync::Arc; + + use arrow::compute; + use snafu::ResultExt; + + let arrow_array = $vector.as_arrow(); + let taken = compute::take(arrow_array, $indices.as_arrow(), None) + .context(crate::error::ArrowComputeSnafu)?; + Ok(Arc::new(<$VectorType>::try_from_arrow_array(taken)?)) + }}; +} + +pub(crate) use take_indices; + +#[cfg(test)] +mod tests { + use std::sync::Arc; + + use arrow::array::{PrimitiveArray, UInt32Array}; + use common_time::{Date, DateTime}; + + use crate::prelude::VectorRef; + use crate::scalars::ScalarVector; + use crate::timestamp::{ + TimestampMicrosecond, TimestampMillisecond, TimestampNanosecond, TimestampSecond, + }; + use crate::types::{LogicalPrimitiveType, WrapperType}; + use crate::vectors::operations::VectorOp; + use crate::vectors::{ + BooleanVector, ConstantVector, Int32Vector, NullVector, PrimitiveVector, StringVector, + UInt32Vector, + }; + + fn check_take_primitive( + input: Vec>, + indices: Vec>, + expect: Vec>, + ) where + T: LogicalPrimitiveType, + PrimitiveArray: From>>, + { + let v = PrimitiveVector::::new(PrimitiveArray::::from(input)); + let indices = UInt32Vector::new(UInt32Array::from(indices)); + let output = v.take(&indices).unwrap(); + + let expected: VectorRef = Arc::new(PrimitiveVector::::new(PrimitiveArray::< + T::ArrowPrimitive, + >::from(expect))); + assert_eq!(expected, output); + } + + macro_rules! take_time_like_test { + ($VectorType: ident, $ValueType: ident, $method: ident) => {{ + use $crate::vectors::{$VectorType, VectorRef}; + + let v = $VectorType::from_iterator((0..5).map($ValueType::$method)); + let indices = UInt32Vector::from_slice(&[3, 0, 1, 4]); + let out = v.take(&indices).unwrap(); + + let expect: VectorRef = Arc::new($VectorType::from_iterator( + [3, 0, 1, 4].into_iter().map($ValueType::$method), + )); + assert_eq!(expect, out); + }}; + } + + #[test] + fn test_take_primitive() { + // nullable int32 + check_take_primitive::( + vec![Some(1), None, Some(3), Some(4), Some(-5)], + vec![Some(3), None, Some(0), Some(1), Some(4)], + vec![Some(4), None, Some(1), None, Some(-5)], + ); + + // nullable float32 + check_take_primitive::( + vec![Some(3.24), None, Some(1.34), Some(4.13), Some(5.13)], + vec![Some(3), None, Some(0), Some(1), Some(4)], + vec![Some(4.13), None, Some(3.24), None, Some(5.13)], + ); + + // nullable uint32 + check_take_primitive::( + vec![Some(0), None, Some(2), Some(3), Some(4)], + vec![Some(4), None, Some(2), Some(1), Some(3)], + vec![Some(4), None, Some(2), None, Some(3)], + ); + + // test date like type + take_time_like_test!(DateVector, Date, new); + take_time_like_test!(DateTimeVector, DateTime, new); + take_time_like_test!(TimestampSecondVector, TimestampSecond, from_native); + take_time_like_test!( + TimestampMillisecondVector, + TimestampMillisecond, + from_native + ); + take_time_like_test!( + TimestampMicrosecondVector, + TimestampMicrosecond, + from_native + ); + take_time_like_test!(TimestampNanosecondVector, TimestampNanosecond, from_native); + } + + fn check_take_constant(expect_length: usize, input_length: usize, indices: &[u32]) { + let v = ConstantVector::new(Arc::new(Int32Vector::from_slice([111])), input_length); + let indices = UInt32Vector::from_slice(indices); + let out = v.take(&indices).unwrap(); + + assert!(out.is_const()); + assert_eq!(expect_length, out.len()); + } + + #[test] + fn test_take_constant() { + check_take_constant(2, 5, &[3, 4]); + check_take_constant(3, 10, &[1, 2, 3]); + check_take_constant(4, 10, &[1, 5, 3, 6]); + check_take_constant(5, 10, &[1, 9, 8, 7, 3]); + } + + #[test] + #[should_panic] + fn test_take_constant_out_of_index() { + check_take_constant(2, 5, &[3, 5]); + } + + #[test] + #[should_panic] + fn test_take_out_of_index() { + let v = Int32Vector::from_slice([1, 2, 3, 4, 5]); + let indies = UInt32Vector::from_slice([1, 5, 6]); + v.take(&indies).unwrap(); + } + + #[test] + fn test_take_null() { + let v = NullVector::new(5); + let indices = UInt32Vector::from_slice([1, 3, 2]); + let out = v.take(&indices).unwrap(); + + let expect: VectorRef = Arc::new(NullVector::new(3)); + assert_eq!(expect, out); + } + + #[test] + fn test_take_scalar() { + let v = StringVector::from_slice(&["0", "1", "2", "3"]); + let indices = UInt32Vector::from_slice([1, 3, 2]); + let out = v.take(&indices).unwrap(); + + let expect: VectorRef = Arc::new(StringVector::from_slice(&["1", "3", "2"])); + assert_eq!(expect, out); + } + + #[test] + fn test_take_bool() { + let v = BooleanVector::from_slice(&[false, true, false, true, false, false, true]); + let indices = UInt32Vector::from_slice([1, 3, 5, 6]); + let out = v.take(&indices).unwrap(); + let expected: VectorRef = Arc::new(BooleanVector::from_slice(&[true, true, false, true])); + assert_eq!(out, expected); + + let v = BooleanVector::from(vec![ + Some(true), + None, + Some(false), + Some(true), + Some(false), + Some(false), + Some(true), + None, + ]); + let indices = UInt32Vector::from(vec![Some(1), None, Some(3), Some(5), Some(6)]); + let out = v.take(&indices).unwrap(); + let expected: VectorRef = Arc::new(BooleanVector::from(vec![ + None, + None, + Some(true), + Some(false), + Some(true), + ])); + assert_eq!(out, expected); + } +}