mirror of
https://github.com/GreptimeTeam/greptimedb.git
synced 2026-05-23 16:30:39 +00:00
refactor: remove deprecated find_unique method (#5790)
Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
This commit is contained in:
@@ -254,10 +254,6 @@ impl VectorOp for DictionaryVector {
|
||||
})
|
||||
}
|
||||
|
||||
fn find_unique(&self, _selected: &mut common_base::BitVec, _prev_vector: Option<&dyn Vector>) {
|
||||
// Method is deprecated.
|
||||
}
|
||||
|
||||
fn filter(&self, filter: &vectors::BooleanVector) -> Result<VectorRef> {
|
||||
let key_array: ArrayRef = Arc::new(self.array.keys().clone());
|
||||
let key_vector = Helper::try_into_vector(&key_array)?;
|
||||
|
||||
@@ -14,14 +14,11 @@
|
||||
|
||||
mod cast;
|
||||
mod filter;
|
||||
mod find_unique;
|
||||
mod replicate;
|
||||
mod take;
|
||||
|
||||
use std::sync::Arc;
|
||||
|
||||
use common_base::BitVec;
|
||||
|
||||
use crate::error::{self, Result};
|
||||
use crate::types::LogicalPrimitiveType;
|
||||
use crate::vectors::constant::ConstantVector;
|
||||
@@ -40,23 +37,6 @@ pub trait VectorOp {
|
||||
/// Panics if `offsets.len() != self.len()`.
|
||||
fn replicate(&self, offsets: &[usize]) -> VectorRef;
|
||||
|
||||
/// Mark `i-th` bit of `selected` to `true` if the `i-th` element of `self` is unique, which
|
||||
/// means there is no elements behind it have same value as it.
|
||||
///
|
||||
/// The caller should ensure
|
||||
/// 1. the length of `selected` bitmap is equal to `vector.len()`.
|
||||
/// 2. `vector` and `prev_vector` are sorted.
|
||||
///
|
||||
/// If there are multiple duplicate elements, this function retains the **first** element.
|
||||
/// The first element is considered as unique if the first element of `self` is different
|
||||
/// from its previous element, that is the last element of `prev_vector`.
|
||||
///
|
||||
/// # Panics
|
||||
/// Panics if
|
||||
/// - `selected.len() < self.len()`.
|
||||
/// - `prev_vector` and `self` have different data types.
|
||||
fn find_unique(&self, selected: &mut BitVec, prev_vector: Option<&dyn Vector>);
|
||||
|
||||
/// Filters the vector, returns elements matching the `filter` (i.e. where the values are true).
|
||||
///
|
||||
/// Note that the nulls of `filter` are interpreted as `false` will lead to these elements being masked out.
|
||||
@@ -81,11 +61,6 @@ macro_rules! impl_scalar_vector_op {
|
||||
replicate::replicate_scalar(self, offsets)
|
||||
}
|
||||
|
||||
fn find_unique(&self, selected: &mut BitVec, prev_vector: Option<&dyn Vector>) {
|
||||
let prev_vector = prev_vector.map(|pv| pv.as_any().downcast_ref::<$VectorType>().unwrap());
|
||||
find_unique::find_unique_scalar(self, selected, prev_vector);
|
||||
}
|
||||
|
||||
fn filter(&self, filter: &BooleanVector) -> Result<VectorRef> {
|
||||
filter::filter_non_constant!(self, $VectorType, filter)
|
||||
}
|
||||
@@ -121,11 +96,6 @@ impl VectorOp for Decimal128Vector {
|
||||
std::sync::Arc::new(replicate::replicate_decimal128(self, offsets))
|
||||
}
|
||||
|
||||
fn find_unique(&self, selected: &mut BitVec, prev_vector: Option<&dyn Vector>) {
|
||||
let prev_vector = prev_vector.and_then(|pv| pv.as_any().downcast_ref::<Decimal128Vector>());
|
||||
find_unique::find_unique_scalar(self, selected, prev_vector);
|
||||
}
|
||||
|
||||
fn filter(&self, filter: &BooleanVector) -> Result<VectorRef> {
|
||||
filter::filter_non_constant!(self, Decimal128Vector, filter)
|
||||
}
|
||||
@@ -144,12 +114,6 @@ impl<T: LogicalPrimitiveType> VectorOp for PrimitiveVector<T> {
|
||||
std::sync::Arc::new(replicate::replicate_primitive(self, offsets))
|
||||
}
|
||||
|
||||
fn find_unique(&self, selected: &mut BitVec, prev_vector: Option<&dyn Vector>) {
|
||||
let prev_vector =
|
||||
prev_vector.and_then(|pv| pv.as_any().downcast_ref::<PrimitiveVector<T>>());
|
||||
find_unique::find_unique_scalar(self, selected, prev_vector);
|
||||
}
|
||||
|
||||
fn filter(&self, filter: &BooleanVector) -> Result<VectorRef> {
|
||||
filter::filter_non_constant!(self, PrimitiveVector<T>, filter)
|
||||
}
|
||||
@@ -168,11 +132,6 @@ impl VectorOp for NullVector {
|
||||
replicate::replicate_null(self, offsets)
|
||||
}
|
||||
|
||||
fn find_unique(&self, selected: &mut BitVec, prev_vector: Option<&dyn Vector>) {
|
||||
let prev_vector = prev_vector.and_then(|pv| pv.as_any().downcast_ref::<NullVector>());
|
||||
find_unique::find_unique_null(self, selected, prev_vector);
|
||||
}
|
||||
|
||||
fn filter(&self, filter: &BooleanVector) -> Result<VectorRef> {
|
||||
filter::filter_non_constant!(self, NullVector, filter)
|
||||
}
|
||||
@@ -195,11 +154,6 @@ impl VectorOp for ConstantVector {
|
||||
self.replicate_vector(offsets)
|
||||
}
|
||||
|
||||
fn find_unique(&self, selected: &mut BitVec, prev_vector: Option<&dyn Vector>) {
|
||||
let prev_vector = prev_vector.and_then(|pv| pv.as_any().downcast_ref::<ConstantVector>());
|
||||
find_unique::find_unique_constant(self, selected, prev_vector);
|
||||
}
|
||||
|
||||
fn filter(&self, filter: &BooleanVector) -> Result<VectorRef> {
|
||||
self.filter_vector(filter)
|
||||
}
|
||||
|
||||
@@ -1,366 +0,0 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use common_base::BitVec;
|
||||
|
||||
use crate::scalars::ScalarVector;
|
||||
use crate::vectors::constant::ConstantVector;
|
||||
use crate::vectors::{NullVector, Vector};
|
||||
|
||||
// To implement `find_unique()` correctly, we need to keep in mind that always marks an element as
|
||||
// selected when it is different from the previous one, and leaves the `selected` unchanged
|
||||
// in any other case.
|
||||
pub(crate) fn find_unique_scalar<'a, T: ScalarVector>(
|
||||
vector: &'a T,
|
||||
selected: &'a mut BitVec,
|
||||
prev_vector: Option<&'a T>,
|
||||
) where
|
||||
T::RefItem<'a>: PartialEq,
|
||||
{
|
||||
assert!(selected.len() >= vector.len());
|
||||
|
||||
if vector.is_empty() {
|
||||
return;
|
||||
}
|
||||
|
||||
for ((i, current), next) in vector
|
||||
.iter_data()
|
||||
.enumerate()
|
||||
.zip(vector.iter_data().skip(1))
|
||||
{
|
||||
if current != next {
|
||||
// If next element is a different element, we mark it as selected.
|
||||
selected.set(i + 1, true);
|
||||
}
|
||||
}
|
||||
|
||||
// Marks first element as selected if it is different from previous element, otherwise
|
||||
// keep selected bitmap unchanged.
|
||||
let is_first_not_duplicate = prev_vector
|
||||
.map(|pv| {
|
||||
if pv.is_empty() {
|
||||
true
|
||||
} else {
|
||||
let last = pv.get_data(pv.len() - 1);
|
||||
last != vector.get_data(0)
|
||||
}
|
||||
})
|
||||
.unwrap_or(true);
|
||||
if is_first_not_duplicate {
|
||||
selected.set(0, true);
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn find_unique_null(
|
||||
vector: &NullVector,
|
||||
selected: &mut BitVec,
|
||||
prev_vector: Option<&NullVector>,
|
||||
) {
|
||||
if vector.is_empty() {
|
||||
return;
|
||||
}
|
||||
|
||||
let is_first_not_duplicate = prev_vector.map(NullVector::is_empty).unwrap_or(true);
|
||||
if is_first_not_duplicate {
|
||||
selected.set(0, true);
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn find_unique_constant(
|
||||
vector: &ConstantVector,
|
||||
selected: &mut BitVec,
|
||||
prev_vector: Option<&ConstantVector>,
|
||||
) {
|
||||
if vector.is_empty() {
|
||||
return;
|
||||
}
|
||||
|
||||
let is_first_not_duplicate = prev_vector
|
||||
.map(|pv| {
|
||||
if pv.is_empty() {
|
||||
true
|
||||
} else {
|
||||
vector.get_constant_ref() != pv.get_constant_ref()
|
||||
}
|
||||
})
|
||||
.unwrap_or(true);
|
||||
|
||||
if is_first_not_duplicate {
|
||||
selected.set(0, true);
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::sync::Arc;
|
||||
|
||||
use common_time::Date;
|
||||
|
||||
use super::*;
|
||||
use crate::timestamp::*;
|
||||
use crate::vectors::{Int32Vector, StringVector, Vector, VectorOp};
|
||||
|
||||
fn check_bitmap(expect: &[bool], selected: &BitVec) {
|
||||
let actual = selected.iter().collect::<Vec<_>>();
|
||||
assert_eq!(expect, actual);
|
||||
}
|
||||
|
||||
fn check_find_unique_scalar(expect: &[bool], input: &[i32], prev: Option<&[i32]>) {
|
||||
check_find_unique_scalar_opt(expect, input.iter().map(|v| Some(*v)), prev);
|
||||
}
|
||||
|
||||
fn check_find_unique_scalar_opt(
|
||||
expect: &[bool],
|
||||
input: impl Iterator<Item = Option<i32>>,
|
||||
prev: Option<&[i32]>,
|
||||
) {
|
||||
let input = Int32Vector::from(input.collect::<Vec<_>>());
|
||||
let prev = prev.map(Int32Vector::from_slice);
|
||||
|
||||
let mut selected = BitVec::repeat(false, input.len());
|
||||
input.find_unique(&mut selected, prev.as_ref().map(|v| v as _));
|
||||
|
||||
check_bitmap(expect, &selected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_find_unique_scalar() {
|
||||
check_find_unique_scalar(&[], &[], None);
|
||||
check_find_unique_scalar(&[true], &[1], None);
|
||||
check_find_unique_scalar(&[true, false], &[1, 1], None);
|
||||
check_find_unique_scalar(&[true, true], &[1, 2], None);
|
||||
check_find_unique_scalar(&[true, true, true, true], &[1, 2, 3, 4], None);
|
||||
check_find_unique_scalar(&[true, false, true, false], &[1, 1, 3, 3], None);
|
||||
check_find_unique_scalar(&[true, false, false, false, true], &[2, 2, 2, 2, 3], None);
|
||||
|
||||
check_find_unique_scalar(&[true], &[5], Some(&[]));
|
||||
check_find_unique_scalar(&[true], &[5], Some(&[3]));
|
||||
check_find_unique_scalar(&[false], &[5], Some(&[5]));
|
||||
check_find_unique_scalar(&[false], &[5], Some(&[4, 5]));
|
||||
check_find_unique_scalar(&[false, true], &[5, 6], Some(&[4, 5]));
|
||||
check_find_unique_scalar(&[false, true, false], &[5, 6, 6], Some(&[4, 5]));
|
||||
check_find_unique_scalar(
|
||||
&[false, true, false, true, true],
|
||||
&[5, 6, 6, 7, 8],
|
||||
Some(&[4, 5]),
|
||||
);
|
||||
|
||||
check_find_unique_scalar_opt(
|
||||
&[true, true, false, true, false],
|
||||
[Some(1), Some(2), Some(2), None, None].into_iter(),
|
||||
None,
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_find_unique_scalar_multi_times_with_prev() {
|
||||
let prev = Int32Vector::from_slice([1]);
|
||||
|
||||
let v1 = Int32Vector::from_slice([2, 3, 4]);
|
||||
let mut selected = BitVec::repeat(false, v1.len());
|
||||
v1.find_unique(&mut selected, Some(&prev));
|
||||
|
||||
// Though element in v2 are the same as prev, but we should still keep them.
|
||||
let v2 = Int32Vector::from_slice([1, 1, 1]);
|
||||
v2.find_unique(&mut selected, Some(&prev));
|
||||
|
||||
check_bitmap(&[true, true, true], &selected);
|
||||
}
|
||||
|
||||
fn new_bitmap(bits: &[bool]) -> BitVec {
|
||||
BitVec::from_iter(bits)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_find_unique_scalar_with_prev() {
|
||||
let prev = Int32Vector::from_slice([1]);
|
||||
|
||||
let mut selected = new_bitmap(&[true, false, true, false]);
|
||||
let v = Int32Vector::from_slice([2, 3, 4, 5]);
|
||||
v.find_unique(&mut selected, Some(&prev));
|
||||
// All elements are different.
|
||||
check_bitmap(&[true, true, true, true], &selected);
|
||||
|
||||
let mut selected = new_bitmap(&[true, false, true, false]);
|
||||
let v = Int32Vector::from_slice([1, 2, 3, 4]);
|
||||
v.find_unique(&mut selected, Some(&prev));
|
||||
// Though first element is duplicate, but we keep the flag unchanged.
|
||||
check_bitmap(&[true, true, true, true], &selected);
|
||||
|
||||
// Same case as above, but now `prev` is None.
|
||||
let mut selected = new_bitmap(&[true, false, true, false]);
|
||||
let v = Int32Vector::from_slice([1, 2, 3, 4]);
|
||||
v.find_unique(&mut selected, None);
|
||||
check_bitmap(&[true, true, true, true], &selected);
|
||||
|
||||
// Same case as above, but now `prev` is empty.
|
||||
let mut selected = new_bitmap(&[true, false, true, false]);
|
||||
let v = Int32Vector::from_slice([1, 2, 3, 4]);
|
||||
v.find_unique(&mut selected, Some(&Int32Vector::from_slice([])));
|
||||
check_bitmap(&[true, true, true, true], &selected);
|
||||
|
||||
let mut selected = new_bitmap(&[false, false, false, false]);
|
||||
let v = Int32Vector::from_slice([2, 2, 4, 5]);
|
||||
v.find_unique(&mut selected, Some(&prev));
|
||||
// only v[1] is duplicate.
|
||||
check_bitmap(&[true, false, true, true], &selected);
|
||||
}
|
||||
|
||||
fn check_find_unique_null(len: usize) {
|
||||
let input = NullVector::new(len);
|
||||
let mut selected = BitVec::repeat(false, input.len());
|
||||
input.find_unique(&mut selected, None);
|
||||
|
||||
let mut expect = vec![false; len];
|
||||
if !expect.is_empty() {
|
||||
expect[0] = true;
|
||||
}
|
||||
check_bitmap(&expect, &selected);
|
||||
|
||||
let mut selected = BitVec::repeat(false, input.len());
|
||||
let prev = Some(NullVector::new(1));
|
||||
input.find_unique(&mut selected, prev.as_ref().map(|v| v as _));
|
||||
let expect = vec![false; len];
|
||||
check_bitmap(&expect, &selected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_find_unique_null() {
|
||||
for len in 0..5 {
|
||||
check_find_unique_null(len);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_find_unique_null_with_prev() {
|
||||
let prev = NullVector::new(1);
|
||||
|
||||
// Keep flags unchanged.
|
||||
let mut selected = new_bitmap(&[true, false, true, false]);
|
||||
let v = NullVector::new(4);
|
||||
v.find_unique(&mut selected, Some(&prev));
|
||||
check_bitmap(&[true, false, true, false], &selected);
|
||||
|
||||
// Keep flags unchanged.
|
||||
let mut selected = new_bitmap(&[false, false, true, false]);
|
||||
v.find_unique(&mut selected, Some(&prev));
|
||||
check_bitmap(&[false, false, true, false], &selected);
|
||||
|
||||
// Prev is None, select first element.
|
||||
let mut selected = new_bitmap(&[false, false, true, false]);
|
||||
v.find_unique(&mut selected, None);
|
||||
check_bitmap(&[true, false, true, false], &selected);
|
||||
|
||||
// Prev is empty, select first element.
|
||||
let mut selected = new_bitmap(&[false, false, true, false]);
|
||||
v.find_unique(&mut selected, Some(&NullVector::new(0)));
|
||||
check_bitmap(&[true, false, true, false], &selected);
|
||||
}
|
||||
|
||||
fn check_find_unique_constant(len: usize) {
|
||||
let input = ConstantVector::new(Arc::new(Int32Vector::from_slice([8])), len);
|
||||
let mut selected = BitVec::repeat(false, len);
|
||||
input.find_unique(&mut selected, None);
|
||||
|
||||
let mut expect = vec![false; len];
|
||||
if !expect.is_empty() {
|
||||
expect[0] = true;
|
||||
}
|
||||
check_bitmap(&expect, &selected);
|
||||
|
||||
let mut selected = BitVec::repeat(false, len);
|
||||
let prev = Some(ConstantVector::new(
|
||||
Arc::new(Int32Vector::from_slice([8])),
|
||||
1,
|
||||
));
|
||||
input.find_unique(&mut selected, prev.as_ref().map(|v| v as _));
|
||||
let expect = vec![false; len];
|
||||
check_bitmap(&expect, &selected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_find_unique_constant() {
|
||||
for len in 0..5 {
|
||||
check_find_unique_constant(len);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_find_unique_constant_with_prev() {
|
||||
let prev = ConstantVector::new(Arc::new(Int32Vector::from_slice([1])), 1);
|
||||
|
||||
// Keep flags unchanged.
|
||||
let mut selected = new_bitmap(&[true, false, true, false]);
|
||||
let v = ConstantVector::new(Arc::new(Int32Vector::from_slice([1])), 4);
|
||||
v.find_unique(&mut selected, Some(&prev));
|
||||
check_bitmap(&[true, false, true, false], &selected);
|
||||
|
||||
// Keep flags unchanged.
|
||||
let mut selected = new_bitmap(&[false, false, true, false]);
|
||||
v.find_unique(&mut selected, Some(&prev));
|
||||
check_bitmap(&[false, false, true, false], &selected);
|
||||
|
||||
// Prev is None, select first element.
|
||||
let mut selected = new_bitmap(&[false, false, true, false]);
|
||||
v.find_unique(&mut selected, None);
|
||||
check_bitmap(&[true, false, true, false], &selected);
|
||||
|
||||
// Prev is empty, select first element.
|
||||
let mut selected = new_bitmap(&[false, false, true, false]);
|
||||
v.find_unique(
|
||||
&mut selected,
|
||||
Some(&ConstantVector::new(
|
||||
Arc::new(Int32Vector::from_slice([1])),
|
||||
0,
|
||||
)),
|
||||
);
|
||||
check_bitmap(&[true, false, true, false], &selected);
|
||||
|
||||
// Different constant vector.
|
||||
let mut selected = new_bitmap(&[false, false, true, false]);
|
||||
let v = ConstantVector::new(Arc::new(Int32Vector::from_slice([2])), 4);
|
||||
v.find_unique(&mut selected, Some(&prev));
|
||||
check_bitmap(&[true, false, true, false], &selected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_find_unique_string() {
|
||||
let input = StringVector::from_slice(&["a", "a", "b", "c"]);
|
||||
let mut selected = BitVec::repeat(false, 4);
|
||||
input.find_unique(&mut selected, None);
|
||||
let expect = vec![true, false, true, true];
|
||||
check_bitmap(&expect, &selected);
|
||||
}
|
||||
|
||||
macro_rules! impl_find_unique_date_like_test {
|
||||
($VectorType: ident, $ValueType: ident, $method: ident) => {{
|
||||
use $crate::vectors::$VectorType;
|
||||
|
||||
let v = $VectorType::from_iterator([8, 8, 9, 10].into_iter().map($ValueType::$method));
|
||||
let mut selected = BitVec::repeat(false, 4);
|
||||
v.find_unique(&mut selected, None);
|
||||
let expect = vec![true, false, true, true];
|
||||
check_bitmap(&expect, &selected);
|
||||
}};
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_find_unique_date_like() {
|
||||
impl_find_unique_date_like_test!(DateVector, Date, new);
|
||||
impl_find_unique_date_like_test!(TimestampSecondVector, TimestampSecond, from);
|
||||
impl_find_unique_date_like_test!(TimestampMillisecondVector, TimestampMillisecond, from);
|
||||
impl_find_unique_date_like_test!(TimestampMicrosecondVector, TimestampMicrosecond, from);
|
||||
impl_find_unique_date_like_test!(TimestampNanosecondVector, TimestampNanosecond, from);
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user