diff --git a/src/datatypes/src/vectors/operations.rs b/src/datatypes/src/vectors/operations.rs index a35e17cf67..7ec3b547b5 100644 --- a/src/datatypes/src/vectors/operations.rs +++ b/src/datatypes/src/vectors/operations.rs @@ -1,5 +1,5 @@ -mod dedup; mod filter; +mod find_unique; mod replicate; use arrow::bitmap::MutableBitmap; @@ -19,23 +19,22 @@ pub trait VectorOp { /// Panics if `offsets.len() != self.len()`. fn replicate(&self, offsets: &[usize]) -> VectorRef; - /// Dedup elements in `self` and mark `i-th` bit of `selected` to `true` if the `i-th` element - /// of `self` is retained. + /// Mark `i-th` bit of `selected` to `true` if the `i-th` element of `self` is unique, which + /// means there is no elements behind it have same value as it. /// /// The caller should ensure - /// 1. the `selected` bitmap is intialized by setting `[0, vector.len())` - /// bits to false. + /// 1. the length of `selected` bitmap is equal to `vector.len()`. /// 2. `vector` and `prev_vector` are sorted. /// /// If there are multiple duplicate elements, this function retains the **first** element. - /// If the first element of `self` is equal to the last element of `prev_vector`, then that - /// first element is also considered as duplicated and won't be retained. + /// The first element is considered as unique if the first element of `self` is different + /// from its previous element, that is the last element of `prev_vector`. /// /// # Panics /// Panics if /// - `selected.len() < self.len()`. /// - `prev_vector` and `self` have different data types. - fn dedup(&self, selected: &mut MutableBitmap, prev_vector: Option<&dyn Vector>); + fn find_unique(&self, selected: &mut MutableBitmap, prev_vector: Option<&dyn Vector>); /// Filters the vector, returns elements matching the `filter` (i.e. where the values are true). /// @@ -50,9 +49,9 @@ macro_rules! impl_scalar_vector_op { replicate::$replicate(self, offsets) } - fn dedup(&self, selected: &mut MutableBitmap, prev_vector: Option<&dyn Vector>) { + fn find_unique(&self, selected: &mut MutableBitmap, prev_vector: Option<&dyn Vector>) { let prev_vector = prev_vector.map(|pv| pv.as_any().downcast_ref::<$VectorType>().unwrap()); - dedup::dedup_scalar(self, selected, prev_vector); + find_unique::find_unique_scalar(self, selected, prev_vector); } fn filter(&self, filter: &BooleanVector) -> Result { @@ -77,9 +76,9 @@ impl VectorOp for ConstantVector { replicate::replicate_constant(self, offsets) } - fn dedup(&self, selected: &mut MutableBitmap, prev_vector: Option<&dyn Vector>) { + fn find_unique(&self, selected: &mut MutableBitmap, prev_vector: Option<&dyn Vector>) { let prev_vector = prev_vector.and_then(|pv| pv.as_any().downcast_ref::()); - dedup::dedup_constant(self, selected, prev_vector); + find_unique::find_unique_constant(self, selected, prev_vector); } fn filter(&self, filter: &BooleanVector) -> Result { @@ -92,9 +91,9 @@ impl VectorOp for NullVector { replicate::replicate_null(self, offsets) } - fn dedup(&self, selected: &mut MutableBitmap, prev_vector: Option<&dyn Vector>) { + fn find_unique(&self, selected: &mut MutableBitmap, prev_vector: Option<&dyn Vector>) { let prev_vector = prev_vector.and_then(|pv| pv.as_any().downcast_ref::()); - dedup::dedup_null(self, selected, prev_vector); + find_unique::find_unique_null(self, selected, prev_vector); } fn filter(&self, filter: &BooleanVector) -> Result { @@ -110,10 +109,10 @@ where replicate::replicate_primitive(self, offsets) } - fn dedup(&self, selected: &mut MutableBitmap, prev_vector: Option<&dyn Vector>) { + fn find_unique(&self, selected: &mut MutableBitmap, prev_vector: Option<&dyn Vector>) { let prev_vector = prev_vector.and_then(|pv| pv.as_any().downcast_ref::>()); - dedup::dedup_scalar(self, selected, prev_vector); + find_unique::find_unique_scalar(self, selected, prev_vector); } fn filter(&self, filter: &BooleanVector) -> Result { diff --git a/src/datatypes/src/vectors/operations/dedup.rs b/src/datatypes/src/vectors/operations/dedup.rs deleted file mode 100644 index 33ea0dfbb7..0000000000 --- a/src/datatypes/src/vectors/operations/dedup.rs +++ /dev/null @@ -1,223 +0,0 @@ -use arrow::bitmap::MutableBitmap; - -use crate::scalars::ScalarVector; -use crate::vectors::{ConstantVector, NullVector, Vector}; - -pub(crate) fn dedup_scalar<'a, T: ScalarVector>( - vector: &'a T, - selected: &'a mut MutableBitmap, - prev_vector: Option<&'a T>, -) where - T::RefItem<'a>: PartialEq, -{ - assert!(selected.len() >= vector.len()); - - if vector.is_empty() { - return; - } - - for ((i, current), next) in vector - .iter_data() - .enumerate() - .zip(vector.iter_data().skip(1)) - { - if current != next { - // If next element is a different element, we mark it as selected. - selected.set(i + 1, true); - } - } - - // Always retain the first element. - selected.set(0, true); - - // Then check whether still keep the first element based last element in previous vector. - if let Some(pv) = &prev_vector { - if !pv.is_empty() { - let last = pv.get_data(pv.len() - 1); - if last == vector.get_data(0) { - selected.set(0, false); - } - } - } -} - -pub(crate) fn dedup_null( - vector: &NullVector, - selected: &mut MutableBitmap, - prev_vector: Option<&NullVector>, -) { - if vector.is_empty() { - return; - } - - let no_prev_element = prev_vector.map(|v| v.is_empty()).unwrap_or(true); - if no_prev_element { - // Retain first element if no previous element (we known that it must - // be null). - selected.set(0, true); - } -} - -pub(crate) fn dedup_constant( - vector: &ConstantVector, - selected: &mut MutableBitmap, - prev_vector: Option<&ConstantVector>, -) { - if vector.is_empty() { - return; - } - - let equal_to_prev = if let Some(prev) = prev_vector { - !prev.is_empty() && vector.get_constant_ref() == prev.get_constant_ref() - } else { - false - }; - - if !equal_to_prev { - selected.set(0, true); - } -} - -#[cfg(test)] -mod tests { - use std::sync::Arc; - - use super::*; - use crate::vectors::{Int32Vector, StringVector, VectorOp}; - - fn check_bitmap(expect: &[bool], selected: &MutableBitmap) { - assert_eq!(expect.len(), selected.len()); - for (exp, v) in expect.iter().zip(selected.iter()) { - assert_eq!(*exp, v); - } - } - - fn check_dedup_scalar(expect: &[bool], input: &[i32], prev: Option<&[i32]>) { - check_dedup_scalar_opt(expect, input.iter().map(|v| Some(*v)), prev); - } - - fn check_dedup_scalar_opt( - expect: &[bool], - input: impl Iterator>, - prev: Option<&[i32]>, - ) { - let input = Int32Vector::from_iter(input); - let prev = prev.map(Int32Vector::from_slice); - - let mut selected = MutableBitmap::from_len_zeroed(input.len()); - input.dedup(&mut selected, prev.as_ref().map(|v| v as _)); - - check_bitmap(expect, &selected); - } - - #[test] - fn test_dedup_scalar() { - check_dedup_scalar(&[], &[], None); - check_dedup_scalar(&[true], &[1], None); - check_dedup_scalar(&[true, false], &[1, 1], None); - check_dedup_scalar(&[true, true], &[1, 2], None); - check_dedup_scalar(&[true, true, true, true], &[1, 2, 3, 4], None); - check_dedup_scalar(&[true, false, true, false], &[1, 1, 3, 3], None); - check_dedup_scalar(&[true, false, false, false, true], &[2, 2, 2, 2, 3], None); - - check_dedup_scalar(&[true], &[5], Some(&[])); - check_dedup_scalar(&[true], &[5], Some(&[3])); - check_dedup_scalar(&[false], &[5], Some(&[5])); - check_dedup_scalar(&[false], &[5], Some(&[4, 5])); - check_dedup_scalar(&[false, true], &[5, 6], Some(&[4, 5])); - check_dedup_scalar(&[false, true, false], &[5, 6, 6], Some(&[4, 5])); - check_dedup_scalar( - &[false, true, false, true, true], - &[5, 6, 6, 7, 8], - Some(&[4, 5]), - ); - - check_dedup_scalar_opt( - &[true, true, false, true, false], - [Some(1), Some(2), Some(2), None, None].into_iter(), - None, - ); - } - - fn check_dedup_null(len: usize) { - let input = NullVector::new(len); - let mut selected = MutableBitmap::from_len_zeroed(input.len()); - input.dedup(&mut selected, None); - - let mut expect = vec![false; len]; - if !expect.is_empty() { - expect[0] = true; - } - check_bitmap(&expect, &selected); - - let mut selected = MutableBitmap::from_len_zeroed(input.len()); - let prev = Some(NullVector::new(1)); - input.dedup(&mut selected, prev.as_ref().map(|v| v as _)); - let expect = vec![false; len]; - check_bitmap(&expect, &selected); - } - - #[test] - fn test_dedup_null() { - for len in 0..5 { - check_dedup_null(len); - } - } - - fn check_dedup_constant(len: usize) { - let input = ConstantVector::new(Arc::new(Int32Vector::from_slice(&[8])), len); - let mut selected = MutableBitmap::from_len_zeroed(len); - input.dedup(&mut selected, None); - - let mut expect = vec![false; len]; - if !expect.is_empty() { - expect[0] = true; - } - check_bitmap(&expect, &selected); - - let mut selected = MutableBitmap::from_len_zeroed(len); - let prev = Some(ConstantVector::new( - Arc::new(Int32Vector::from_slice(&[8])), - 1, - )); - input.dedup(&mut selected, prev.as_ref().map(|v| v as _)); - let expect = vec![false; len]; - check_bitmap(&expect, &selected); - } - - #[test] - fn test_dedup_constant() { - for len in 0..5 { - check_dedup_constant(len); - } - } - - #[test] - fn test_dedup_string() { - let input = StringVector::from_slice(&["a", "a", "b", "c"]); - let mut selected = MutableBitmap::from_len_zeroed(4); - input.dedup(&mut selected, None); - let expect = vec![true, false, true, true]; - check_bitmap(&expect, &selected); - } - - macro_rules! impl_dedup_date_like_test { - ($VectorType: ident, $ValueType: ident, $method: ident) => {{ - use common_time::$ValueType; - use $crate::vectors::$VectorType; - - let v = $VectorType::from_iterator([8, 8, 9, 10].into_iter().map($ValueType::$method)); - let mut selected = MutableBitmap::from_len_zeroed(4); - v.dedup(&mut selected, None); - let expect = vec![true, false, true, true]; - check_bitmap(&expect, &selected); - }}; - } - - #[test] - fn test_dedup_date_like() { - impl_dedup_date_like_test!(DateVector, Date, new); - impl_dedup_date_like_test!(DateTimeVector, DateTime, new); - impl_dedup_date_like_test!(TimestampVector, Timestamp, from_millis); - } -} diff --git a/src/datatypes/src/vectors/operations/find_unique.rs b/src/datatypes/src/vectors/operations/find_unique.rs new file mode 100644 index 0000000000..a077c4516f --- /dev/null +++ b/src/datatypes/src/vectors/operations/find_unique.rs @@ -0,0 +1,354 @@ +use arrow::bitmap::MutableBitmap; + +use crate::scalars::ScalarVector; +use crate::vectors::{ConstantVector, NullVector, Vector}; + +// To implement `find_unique()` correctly, we need to keep in mind that always marks an element as +// selected when it is different from the previous one, and leaves the `selected` unchanged +// in any other case. +pub(crate) fn find_unique_scalar<'a, T: ScalarVector>( + vector: &'a T, + selected: &'a mut MutableBitmap, + prev_vector: Option<&'a T>, +) where + T::RefItem<'a>: PartialEq, +{ + assert!(selected.len() >= vector.len()); + + if vector.is_empty() { + return; + } + + for ((i, current), next) in vector + .iter_data() + .enumerate() + .zip(vector.iter_data().skip(1)) + { + if current != next { + // If next element is a different element, we mark it as selected. + selected.set(i + 1, true); + } + } + + // Marks first element as selcted if it is different from previous element, otherwise + // keep selected bitmap unchanged. + let is_first_not_duplicate = prev_vector + .map(|pv| { + if pv.is_empty() { + true + } else { + let last = pv.get_data(pv.len() - 1); + last != vector.get_data(0) + } + }) + .unwrap_or(true); + if is_first_not_duplicate { + selected.set(0, true); + } +} + +pub(crate) fn find_unique_null( + vector: &NullVector, + selected: &mut MutableBitmap, + prev_vector: Option<&NullVector>, +) { + if vector.is_empty() { + return; + } + + let is_first_not_duplicate = prev_vector.map(|pv| pv.is_empty()).unwrap_or(true); + if is_first_not_duplicate { + selected.set(0, true); + } +} + +pub(crate) fn find_unique_constant( + vector: &ConstantVector, + selected: &mut MutableBitmap, + prev_vector: Option<&ConstantVector>, +) { + if vector.is_empty() { + return; + } + + let is_first_not_duplicate = prev_vector + .map(|pv| { + if pv.is_empty() { + true + } else { + vector.get_constant_ref() != pv.get_constant_ref() + } + }) + .unwrap_or(true); + + if is_first_not_duplicate { + selected.set(0, true); + } +} + +#[cfg(test)] +mod tests { + use std::sync::Arc; + + use super::*; + use crate::vectors::{Int32Vector, StringVector, VectorOp}; + + fn check_bitmap(expect: &[bool], selected: &MutableBitmap) { + let actual = selected.iter().collect::>(); + assert_eq!(expect, actual); + } + + fn check_find_unique_scalar(expect: &[bool], input: &[i32], prev: Option<&[i32]>) { + check_find_unique_scalar_opt(expect, input.iter().map(|v| Some(*v)), prev); + } + + fn check_find_unique_scalar_opt( + expect: &[bool], + input: impl Iterator>, + prev: Option<&[i32]>, + ) { + let input = Int32Vector::from_iter(input); + let prev = prev.map(Int32Vector::from_slice); + + let mut selected = MutableBitmap::from_len_zeroed(input.len()); + input.find_unique(&mut selected, prev.as_ref().map(|v| v as _)); + + check_bitmap(expect, &selected); + } + + #[test] + fn test_find_unique_scalar() { + check_find_unique_scalar(&[], &[], None); + check_find_unique_scalar(&[true], &[1], None); + check_find_unique_scalar(&[true, false], &[1, 1], None); + check_find_unique_scalar(&[true, true], &[1, 2], None); + check_find_unique_scalar(&[true, true, true, true], &[1, 2, 3, 4], None); + check_find_unique_scalar(&[true, false, true, false], &[1, 1, 3, 3], None); + check_find_unique_scalar(&[true, false, false, false, true], &[2, 2, 2, 2, 3], None); + + check_find_unique_scalar(&[true], &[5], Some(&[])); + check_find_unique_scalar(&[true], &[5], Some(&[3])); + check_find_unique_scalar(&[false], &[5], Some(&[5])); + check_find_unique_scalar(&[false], &[5], Some(&[4, 5])); + check_find_unique_scalar(&[false, true], &[5, 6], Some(&[4, 5])); + check_find_unique_scalar(&[false, true, false], &[5, 6, 6], Some(&[4, 5])); + check_find_unique_scalar( + &[false, true, false, true, true], + &[5, 6, 6, 7, 8], + Some(&[4, 5]), + ); + + check_find_unique_scalar_opt( + &[true, true, false, true, false], + [Some(1), Some(2), Some(2), None, None].into_iter(), + None, + ); + } + + #[test] + fn test_find_unique_scalar_multi_times_with_prev() { + let prev = Int32Vector::from_slice(&[1]); + + let v1 = Int32Vector::from_slice(&[2, 3, 4]); + let mut selected = MutableBitmap::from_len_zeroed(v1.len()); + v1.find_unique(&mut selected, Some(&prev)); + + // Though element in v2 are the same as prev, but we should still keep them. + let v2 = Int32Vector::from_slice(&[1, 1, 1]); + v2.find_unique(&mut selected, Some(&prev)); + + check_bitmap(&[true, true, true], &selected); + } + + fn new_bitmap(bits: &[bool]) -> MutableBitmap { + let mut bitmap = MutableBitmap::from_len_zeroed(bits.len()); + for (i, bit) in bits.iter().enumerate() { + if *bit { + bitmap.set(i, true); + } + } + + bitmap + } + + #[test] + fn test_find_unique_scalar_with_prev() { + let prev = Int32Vector::from_slice(&[1]); + + let mut selected = new_bitmap(&[true, false, true, false]); + let v = Int32Vector::from_slice(&[2, 3, 4, 5]); + v.find_unique(&mut selected, Some(&prev)); + // All elements are different. + check_bitmap(&[true, true, true, true], &selected); + + let mut selected = new_bitmap(&[true, false, true, false]); + let v = Int32Vector::from_slice(&[1, 2, 3, 4]); + v.find_unique(&mut selected, Some(&prev)); + // Though first element is duplicate, but we keep the flag unchanged. + check_bitmap(&[true, true, true, true], &selected); + + // Same case as above, but now `prev` is None. + let mut selected = new_bitmap(&[true, false, true, false]); + let v = Int32Vector::from_slice(&[1, 2, 3, 4]); + v.find_unique(&mut selected, None); + check_bitmap(&[true, true, true, true], &selected); + + // Same case as above, but now `prev` is empty. + let mut selected = new_bitmap(&[true, false, true, false]); + let v = Int32Vector::from_slice(&[1, 2, 3, 4]); + v.find_unique(&mut selected, Some(&Int32Vector::from_slice(&[]))); + check_bitmap(&[true, true, true, true], &selected); + + let mut selected = new_bitmap(&[false, false, false, false]); + let v = Int32Vector::from_slice(&[2, 2, 4, 5]); + v.find_unique(&mut selected, Some(&prev)); + // only v[1] is duplicate. + check_bitmap(&[true, false, true, true], &selected); + } + + fn check_find_unique_null(len: usize) { + let input = NullVector::new(len); + let mut selected = MutableBitmap::from_len_zeroed(input.len()); + input.find_unique(&mut selected, None); + + let mut expect = vec![false; len]; + if !expect.is_empty() { + expect[0] = true; + } + check_bitmap(&expect, &selected); + + let mut selected = MutableBitmap::from_len_zeroed(input.len()); + let prev = Some(NullVector::new(1)); + input.find_unique(&mut selected, prev.as_ref().map(|v| v as _)); + let expect = vec![false; len]; + check_bitmap(&expect, &selected); + } + + #[test] + fn test_find_unique_null() { + for len in 0..5 { + check_find_unique_null(len); + } + } + + #[test] + fn test_find_unique_null_with_prev() { + let prev = NullVector::new(1); + + // Keep flags unchanged. + let mut selected = new_bitmap(&[true, false, true, false]); + let v = NullVector::new(4); + v.find_unique(&mut selected, Some(&prev)); + check_bitmap(&[true, false, true, false], &selected); + + // Keep flags unchanged. + let mut selected = new_bitmap(&[false, false, true, false]); + v.find_unique(&mut selected, Some(&prev)); + check_bitmap(&[false, false, true, false], &selected); + + // Prev is None, select first element. + let mut selected = new_bitmap(&[false, false, true, false]); + v.find_unique(&mut selected, None); + check_bitmap(&[true, false, true, false], &selected); + + // Prev is empty, select first element. + let mut selected = new_bitmap(&[false, false, true, false]); + v.find_unique(&mut selected, Some(&NullVector::new(0))); + check_bitmap(&[true, false, true, false], &selected); + } + + fn check_find_unique_constant(len: usize) { + let input = ConstantVector::new(Arc::new(Int32Vector::from_slice(&[8])), len); + let mut selected = MutableBitmap::from_len_zeroed(len); + input.find_unique(&mut selected, None); + + let mut expect = vec![false; len]; + if !expect.is_empty() { + expect[0] = true; + } + check_bitmap(&expect, &selected); + + let mut selected = MutableBitmap::from_len_zeroed(len); + let prev = Some(ConstantVector::new( + Arc::new(Int32Vector::from_slice(&[8])), + 1, + )); + input.find_unique(&mut selected, prev.as_ref().map(|v| v as _)); + let expect = vec![false; len]; + check_bitmap(&expect, &selected); + } + + #[test] + fn test_find_unique_constant() { + for len in 0..5 { + check_find_unique_constant(len); + } + } + + #[test] + fn test_find_unique_constant_with_prev() { + let prev = ConstantVector::new(Arc::new(Int32Vector::from_slice(&[1])), 1); + + // Keep flags unchanged. + let mut selected = new_bitmap(&[true, false, true, false]); + let v = ConstantVector::new(Arc::new(Int32Vector::from_slice(&[1])), 4); + v.find_unique(&mut selected, Some(&prev)); + check_bitmap(&[true, false, true, false], &selected); + + // Keep flags unchanged. + let mut selected = new_bitmap(&[false, false, true, false]); + v.find_unique(&mut selected, Some(&prev)); + check_bitmap(&[false, false, true, false], &selected); + + // Prev is None, select first element. + let mut selected = new_bitmap(&[false, false, true, false]); + v.find_unique(&mut selected, None); + check_bitmap(&[true, false, true, false], &selected); + + // Prev is empty, select first element. + let mut selected = new_bitmap(&[false, false, true, false]); + v.find_unique( + &mut selected, + Some(&ConstantVector::new( + Arc::new(Int32Vector::from_slice(&[1])), + 0, + )), + ); + check_bitmap(&[true, false, true, false], &selected); + + // Different constant vector. + let mut selected = new_bitmap(&[false, false, true, false]); + let v = ConstantVector::new(Arc::new(Int32Vector::from_slice(&[2])), 4); + v.find_unique(&mut selected, Some(&prev)); + check_bitmap(&[true, false, true, false], &selected); + } + + #[test] + fn test_find_unique_string() { + let input = StringVector::from_slice(&["a", "a", "b", "c"]); + let mut selected = MutableBitmap::from_len_zeroed(4); + input.find_unique(&mut selected, None); + let expect = vec![true, false, true, true]; + check_bitmap(&expect, &selected); + } + + macro_rules! impl_find_unique_date_like_test { + ($VectorType: ident, $ValueType: ident, $method: ident) => {{ + use common_time::$ValueType; + use $crate::vectors::$VectorType; + + let v = $VectorType::from_iterator([8, 8, 9, 10].into_iter().map($ValueType::$method)); + let mut selected = MutableBitmap::from_len_zeroed(4); + v.find_unique(&mut selected, None); + let expect = vec![true, false, true, true]; + check_bitmap(&expect, &selected); + }}; + } + + #[test] + fn test_find_unique_date_like() { + impl_find_unique_date_like_test!(DateVector, Date, new); + impl_find_unique_date_like_test!(DateTimeVector, DateTime, new); + impl_find_unique_date_like_test!(TimestampVector, Timestamp, from_millis); + } +} diff --git a/src/storage/src/read.rs b/src/storage/src/read.rs index 08b91f8077..616d130a82 100644 --- a/src/storage/src/read.rs +++ b/src/storage/src/read.rs @@ -98,20 +98,22 @@ pub trait BatchOp { /// - `left` or `right` has insufficient column num. fn compare_row(&self, left: &Batch, i: usize, right: &Batch, j: usize) -> Ordering; - /// Dedup rows in `batch` by row key. + /// Find unique rows in `batch` by row key. /// /// If `prev` is `Some` and not empty, the last row of `prev` would be used to dedup - /// current `batch`. Set `i-th` bit of `selected` to `true` if we need to keep `i-th` - /// row. So the caller could use `selected` to build a [BooleanVector] to filter the - /// batch. + /// current `batch`. Set `i-th` bit of `selected` to `true` if `i-th` row is unique, + /// which means the row key of `i-th` row is different from `i+1-th`'s. /// - /// The caller must ensure `selected` is initialized by filling `batch.num_rows()` bits + /// The caller could use `selected` to build a [BooleanVector] to filter the + /// batch, and must ensure `selected` is initialized by filling `batch.num_rows()` bits /// to zero. /// /// # Panics - /// Panics if `batch` and `prev` have different number of columns (unless `prev` is + /// Panics if + /// - `batch` and `prev` have different number of columns (unless `prev` is /// empty). - fn dedup(&self, batch: &Batch, selected: &mut MutableBitmap, prev: Option<&Batch>); + /// - `selected.len()` is less than the number of rows. + fn find_unique(&self, batch: &Batch, selected: &mut MutableBitmap, prev: Option<&Batch>); /// Filters the `batch`, returns elements matching the `filter` (i.e. where the values /// are true). diff --git a/src/storage/src/read/dedup.rs b/src/storage/src/read/dedup.rs index 6d6f93bdf7..95b3a44703 100644 --- a/src/storage/src/read/dedup.rs +++ b/src/storage/src/read/dedup.rs @@ -39,7 +39,7 @@ impl DedupReader { // but we couldn't zero all bits in the mutable array easily. let mut selected = MutableBitmap::from_len_zeroed(batch.num_rows()); self.schema - .dedup(&batch, &mut selected, self.prev_batch.as_ref()); + .find_unique(&batch, &mut selected, self.prev_batch.as_ref()); // Store current batch to `prev_batch` so we could compare the next batch // with this batch. We store batch before filtering it mainly for correctness, as diff --git a/src/storage/src/schema/projected.rs b/src/storage/src/schema/projected.rs index fe0ebce126..a57fb1aab3 100644 --- a/src/storage/src/schema/projected.rs +++ b/src/storage/src/schema/projected.rs @@ -289,7 +289,7 @@ impl BatchOp for ProjectedSchema { }) } - fn dedup(&self, batch: &Batch, selected: &mut MutableBitmap, prev: Option<&Batch>) { + fn find_unique(&self, batch: &Batch, selected: &mut MutableBitmap, prev: Option<&Batch>) { if let Some(prev) = prev { assert_eq!(batch.num_columns(), prev.num_columns()); } @@ -299,7 +299,7 @@ impl BatchOp for ProjectedSchema { batch.column(idx), prev.map(|prev| prev.column(idx).as_ref()), ); - current.dedup(selected, prev_col); + current.find_unique(selected, prev_col); } } @@ -485,18 +485,19 @@ mod tests { } #[test] - fn test_dedup_batch() { + fn test_batch_find_unique() { let schema = read_util::new_projected_schema(); let batch = read_util::new_kv_batch(&[(1000, Some(1)), (2000, Some(2)), (2000, Some(2))]); - let mut selected = MutableBitmap::from_len_zeroed(3); - schema.dedup(&batch, &mut selected, None); + let mut selected = MutableBitmap::from_len_zeroed(3); + schema.find_unique(&batch, &mut selected, None); assert!(selected.get(0)); assert!(selected.get(1)); assert!(!selected.get(2)); + let mut selected = MutableBitmap::from_len_zeroed(3); let prev = read_util::new_kv_batch(&[(1000, Some(1))]); - schema.dedup(&batch, &mut selected, Some(&prev)); + schema.find_unique(&batch, &mut selected, Some(&prev)); assert!(!selected.get(0)); assert!(selected.get(1)); assert!(!selected.get(2));