mirror of
https://github.com/GreptimeTeam/greptimedb.git
synced 2026-01-14 17:23:09 +00:00
refactor: replace some usage of MutableBitmap by BitVec (#610)
This commit is contained in:
@@ -12,8 +12,8 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use bitvec::prelude as bv;
|
||||
pub use bitvec::prelude;
|
||||
|
||||
// `Lsb0` provides the best codegen for bit manipulation,
|
||||
// see https://github.com/bitvecto-rs/bitvec/blob/main/doc/order/Lsb0.md
|
||||
pub type BitVec = bv::BitVec<u8, bv::Lsb0>;
|
||||
pub type BitVec = prelude::BitVec<u8>;
|
||||
@@ -12,8 +12,8 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
pub mod bitset;
|
||||
pub mod bit_vec;
|
||||
pub mod buffer;
|
||||
pub mod bytes;
|
||||
|
||||
pub use bitset::BitVec;
|
||||
pub use bit_vec::BitVec;
|
||||
|
||||
@@ -18,8 +18,6 @@ use std::sync::Arc;
|
||||
|
||||
use arrow::array::{Array, ArrayRef, BooleanArray, MutableArray, MutableBooleanArray};
|
||||
use arrow::bitmap::utils::{BitmapIter, ZipValidity};
|
||||
use arrow::bitmap::MutableBitmap;
|
||||
use arrow::datatypes::DataType as ArrowDataType;
|
||||
use snafu::{OptionExt, ResultExt};
|
||||
|
||||
use crate::data_type::ConcreteDataType;
|
||||
@@ -75,14 +73,6 @@ impl<Ptr: Borrow<Option<bool>>> FromIterator<Ptr> for BooleanVector {
|
||||
}
|
||||
}
|
||||
|
||||
impl From<MutableBitmap> for BooleanVector {
|
||||
fn from(bitmap: MutableBitmap) -> BooleanVector {
|
||||
BooleanVector {
|
||||
array: BooleanArray::new(ArrowDataType::Boolean, bitmap.into(), None),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Vector for BooleanVector {
|
||||
fn data_type(&self) -> ConcreteDataType {
|
||||
ConcreteDataType::boolean_datatype()
|
||||
@@ -351,16 +341,4 @@ mod tests {
|
||||
let expect: VectorRef = Arc::new(BooleanVector::from_slice(&[true, false, true]));
|
||||
assert_eq!(expect, vector);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_from_mutable_bitmap() {
|
||||
let mut bitmap = MutableBitmap::new();
|
||||
let values = [false, true, true, false, true];
|
||||
for v in values {
|
||||
bitmap.push(v);
|
||||
}
|
||||
let vector = BooleanVector::from(bitmap);
|
||||
let expect = BooleanVector::from_slice(&values);
|
||||
assert_eq!(expect, vector);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -16,7 +16,7 @@ mod filter;
|
||||
mod find_unique;
|
||||
mod replicate;
|
||||
|
||||
use arrow::bitmap::MutableBitmap;
|
||||
use common_base::BitVec;
|
||||
|
||||
use crate::error::Result;
|
||||
use crate::types::PrimitiveElement;
|
||||
@@ -50,7 +50,7 @@ pub trait VectorOp {
|
||||
/// Panics if
|
||||
/// - `selected.len() < self.len()`.
|
||||
/// - `prev_vector` and `self` have different data types.
|
||||
fn find_unique(&self, selected: &mut MutableBitmap, prev_vector: Option<&dyn Vector>);
|
||||
fn find_unique(&self, selected: &mut BitVec, prev_vector: Option<&dyn Vector>);
|
||||
|
||||
/// Filters the vector, returns elements matching the `filter` (i.e. where the values are true).
|
||||
///
|
||||
@@ -65,7 +65,7 @@ macro_rules! impl_scalar_vector_op {
|
||||
replicate::$replicate(self, offsets)
|
||||
}
|
||||
|
||||
fn find_unique(&self, selected: &mut MutableBitmap, prev_vector: Option<&dyn Vector>) {
|
||||
fn find_unique(&self, selected: &mut BitVec, prev_vector: Option<&dyn Vector>) {
|
||||
let prev_vector = prev_vector.map(|pv| pv.as_any().downcast_ref::<$VectorType>().unwrap());
|
||||
find_unique::find_unique_scalar(self, selected, prev_vector);
|
||||
}
|
||||
@@ -92,7 +92,7 @@ impl VectorOp for ConstantVector {
|
||||
replicate::replicate_constant(self, offsets)
|
||||
}
|
||||
|
||||
fn find_unique(&self, selected: &mut MutableBitmap, prev_vector: Option<&dyn Vector>) {
|
||||
fn find_unique(&self, selected: &mut BitVec, prev_vector: Option<&dyn Vector>) {
|
||||
let prev_vector = prev_vector.and_then(|pv| pv.as_any().downcast_ref::<ConstantVector>());
|
||||
find_unique::find_unique_constant(self, selected, prev_vector);
|
||||
}
|
||||
@@ -107,7 +107,7 @@ impl VectorOp for NullVector {
|
||||
replicate::replicate_null(self, offsets)
|
||||
}
|
||||
|
||||
fn find_unique(&self, selected: &mut MutableBitmap, prev_vector: Option<&dyn Vector>) {
|
||||
fn find_unique(&self, selected: &mut BitVec, prev_vector: Option<&dyn Vector>) {
|
||||
let prev_vector = prev_vector.and_then(|pv| pv.as_any().downcast_ref::<NullVector>());
|
||||
find_unique::find_unique_null(self, selected, prev_vector);
|
||||
}
|
||||
@@ -125,7 +125,7 @@ where
|
||||
replicate::replicate_primitive(self, offsets)
|
||||
}
|
||||
|
||||
fn find_unique(&self, selected: &mut MutableBitmap, prev_vector: Option<&dyn Vector>) {
|
||||
fn find_unique(&self, selected: &mut BitVec, prev_vector: Option<&dyn Vector>) {
|
||||
let prev_vector =
|
||||
prev_vector.and_then(|pv| pv.as_any().downcast_ref::<PrimitiveVector<T>>());
|
||||
find_unique::find_unique_scalar(self, selected, prev_vector);
|
||||
|
||||
@@ -12,7 +12,7 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use arrow::bitmap::MutableBitmap;
|
||||
use common_base::BitVec;
|
||||
|
||||
use crate::scalars::ScalarVector;
|
||||
use crate::vectors::{ConstantVector, NullVector, Vector};
|
||||
@@ -22,7 +22,7 @@ use crate::vectors::{ConstantVector, NullVector, Vector};
|
||||
// in any other case.
|
||||
pub(crate) fn find_unique_scalar<'a, T: ScalarVector>(
|
||||
vector: &'a T,
|
||||
selected: &'a mut MutableBitmap,
|
||||
selected: &'a mut BitVec,
|
||||
prev_vector: Option<&'a T>,
|
||||
) where
|
||||
T::RefItem<'a>: PartialEq,
|
||||
@@ -63,7 +63,7 @@ pub(crate) fn find_unique_scalar<'a, T: ScalarVector>(
|
||||
|
||||
pub(crate) fn find_unique_null(
|
||||
vector: &NullVector,
|
||||
selected: &mut MutableBitmap,
|
||||
selected: &mut BitVec,
|
||||
prev_vector: Option<&NullVector>,
|
||||
) {
|
||||
if vector.is_empty() {
|
||||
@@ -78,7 +78,7 @@ pub(crate) fn find_unique_null(
|
||||
|
||||
pub(crate) fn find_unique_constant(
|
||||
vector: &ConstantVector,
|
||||
selected: &mut MutableBitmap,
|
||||
selected: &mut BitVec,
|
||||
prev_vector: Option<&ConstantVector>,
|
||||
) {
|
||||
if vector.is_empty() {
|
||||
@@ -107,7 +107,7 @@ mod tests {
|
||||
use super::*;
|
||||
use crate::vectors::{Int32Vector, StringVector, VectorOp};
|
||||
|
||||
fn check_bitmap(expect: &[bool], selected: &MutableBitmap) {
|
||||
fn check_bitmap(expect: &[bool], selected: &BitVec) {
|
||||
let actual = selected.iter().collect::<Vec<_>>();
|
||||
assert_eq!(expect, actual);
|
||||
}
|
||||
@@ -124,7 +124,7 @@ mod tests {
|
||||
let input = Int32Vector::from_iter(input);
|
||||
let prev = prev.map(Int32Vector::from_slice);
|
||||
|
||||
let mut selected = MutableBitmap::from_len_zeroed(input.len());
|
||||
let mut selected = BitVec::repeat(false, input.len());
|
||||
input.find_unique(&mut selected, prev.as_ref().map(|v| v as _));
|
||||
|
||||
check_bitmap(expect, &selected);
|
||||
@@ -164,7 +164,7 @@ mod tests {
|
||||
let prev = Int32Vector::from_slice(&[1]);
|
||||
|
||||
let v1 = Int32Vector::from_slice(&[2, 3, 4]);
|
||||
let mut selected = MutableBitmap::from_len_zeroed(v1.len());
|
||||
let mut selected = BitVec::repeat(false, v1.len());
|
||||
v1.find_unique(&mut selected, Some(&prev));
|
||||
|
||||
// Though element in v2 are the same as prev, but we should still keep them.
|
||||
@@ -174,15 +174,8 @@ mod tests {
|
||||
check_bitmap(&[true, true, true], &selected);
|
||||
}
|
||||
|
||||
fn new_bitmap(bits: &[bool]) -> MutableBitmap {
|
||||
let mut bitmap = MutableBitmap::from_len_zeroed(bits.len());
|
||||
for (i, bit) in bits.iter().enumerate() {
|
||||
if *bit {
|
||||
bitmap.set(i, true);
|
||||
}
|
||||
}
|
||||
|
||||
bitmap
|
||||
fn new_bitmap(bits: &[bool]) -> BitVec {
|
||||
BitVec::from_iter(bits)
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -222,7 +215,7 @@ mod tests {
|
||||
|
||||
fn check_find_unique_null(len: usize) {
|
||||
let input = NullVector::new(len);
|
||||
let mut selected = MutableBitmap::from_len_zeroed(input.len());
|
||||
let mut selected = BitVec::repeat(false, input.len());
|
||||
input.find_unique(&mut selected, None);
|
||||
|
||||
let mut expect = vec![false; len];
|
||||
@@ -231,7 +224,7 @@ mod tests {
|
||||
}
|
||||
check_bitmap(&expect, &selected);
|
||||
|
||||
let mut selected = MutableBitmap::from_len_zeroed(input.len());
|
||||
let mut selected = BitVec::repeat(false, input.len());
|
||||
let prev = Some(NullVector::new(1));
|
||||
input.find_unique(&mut selected, prev.as_ref().map(|v| v as _));
|
||||
let expect = vec![false; len];
|
||||
@@ -273,7 +266,7 @@ mod tests {
|
||||
|
||||
fn check_find_unique_constant(len: usize) {
|
||||
let input = ConstantVector::new(Arc::new(Int32Vector::from_slice(&[8])), len);
|
||||
let mut selected = MutableBitmap::from_len_zeroed(len);
|
||||
let mut selected = BitVec::repeat(false, len);
|
||||
input.find_unique(&mut selected, None);
|
||||
|
||||
let mut expect = vec![false; len];
|
||||
@@ -282,7 +275,7 @@ mod tests {
|
||||
}
|
||||
check_bitmap(&expect, &selected);
|
||||
|
||||
let mut selected = MutableBitmap::from_len_zeroed(len);
|
||||
let mut selected = BitVec::repeat(false, len);
|
||||
let prev = Some(ConstantVector::new(
|
||||
Arc::new(Int32Vector::from_slice(&[8])),
|
||||
1,
|
||||
@@ -340,7 +333,7 @@ mod tests {
|
||||
#[test]
|
||||
fn test_find_unique_string() {
|
||||
let input = StringVector::from_slice(&["a", "a", "b", "c"]);
|
||||
let mut selected = MutableBitmap::from_len_zeroed(4);
|
||||
let mut selected = BitVec::repeat(false, 4);
|
||||
input.find_unique(&mut selected, None);
|
||||
let expect = vec![true, false, true, true];
|
||||
check_bitmap(&expect, &selected);
|
||||
@@ -352,7 +345,7 @@ mod tests {
|
||||
use $crate::vectors::$VectorType;
|
||||
|
||||
let v = $VectorType::from_iterator([8, 8, 9, 10].into_iter().map($ValueType::$method));
|
||||
let mut selected = MutableBitmap::from_len_zeroed(4);
|
||||
let mut selected = BitVec::repeat(false, 4);
|
||||
v.find_unique(&mut selected, None);
|
||||
let expect = vec![true, false, true, true];
|
||||
check_bitmap(&expect, &selected);
|
||||
|
||||
@@ -20,7 +20,7 @@ mod merge;
|
||||
use std::cmp::Ordering;
|
||||
|
||||
use async_trait::async_trait;
|
||||
use datatypes::arrow::bitmap::MutableBitmap;
|
||||
use common_base::BitVec;
|
||||
use datatypes::data_type::DataType;
|
||||
use datatypes::prelude::ConcreteDataType;
|
||||
use datatypes::vectors::{BooleanVector, MutableVector, VectorRef};
|
||||
@@ -127,7 +127,7 @@ pub trait BatchOp {
|
||||
/// - `batch` and `prev` have different number of columns (unless `prev` is
|
||||
/// empty).
|
||||
/// - `selected.len()` is less than the number of rows.
|
||||
fn find_unique(&self, batch: &Batch, selected: &mut MutableBitmap, prev: Option<&Batch>);
|
||||
fn find_unique(&self, batch: &Batch, selected: &mut BitVec, prev: Option<&Batch>);
|
||||
|
||||
/// Filters the `batch`, returns elements matching the `filter` (i.e. where the values
|
||||
/// are true).
|
||||
|
||||
@@ -13,7 +13,8 @@
|
||||
// limitations under the License.
|
||||
|
||||
use async_trait::async_trait;
|
||||
use datatypes::arrow::bitmap::MutableBitmap;
|
||||
use common_base::BitVec;
|
||||
use datatypes::prelude::ScalarVector;
|
||||
use datatypes::vectors::BooleanVector;
|
||||
|
||||
use crate::error::Result;
|
||||
@@ -28,6 +29,8 @@ pub struct DedupReader<R> {
|
||||
reader: R,
|
||||
/// Previous batch from the reader.
|
||||
prev_batch: Option<Batch>,
|
||||
/// Reused bitmap buffer.
|
||||
selected: BitVec,
|
||||
}
|
||||
|
||||
impl<R> DedupReader<R> {
|
||||
@@ -36,6 +39,7 @@ impl<R> DedupReader<R> {
|
||||
schema,
|
||||
reader,
|
||||
prev_batch: None,
|
||||
selected: BitVec::default(),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -48,12 +52,11 @@ impl<R> DedupReader<R> {
|
||||
return Ok(batch);
|
||||
}
|
||||
|
||||
// The `arrow` filter needs `BooleanArray` as input so there is no convenient
|
||||
// and efficient way to reuse the bitmap. Though we could use `MutableBooleanArray`,
|
||||
// but we couldn't zero all bits in the mutable array easily.
|
||||
let mut selected = MutableBitmap::from_len_zeroed(batch.num_rows());
|
||||
// Reinitialize the bit map to zeros.
|
||||
self.selected.clear();
|
||||
self.selected.resize(batch.num_rows(), false);
|
||||
self.schema
|
||||
.find_unique(&batch, &mut selected, self.prev_batch.as_ref());
|
||||
.find_unique(&batch, &mut self.selected, self.prev_batch.as_ref());
|
||||
|
||||
// Store current batch to `prev_batch` so we could compare the next batch
|
||||
// with this batch. We store batch before filtering it mainly for correctness, as
|
||||
@@ -66,7 +69,7 @@ impl<R> DedupReader<R> {
|
||||
// TODO(yingwen): To support `DELETE`, we could find all rows whose op_types are equal
|
||||
// to `OpType::Delete`, mark their `selected` to false, then filter the batch.
|
||||
|
||||
let filter = BooleanVector::from(selected);
|
||||
let filter = BooleanVector::from_iterator(self.selected.iter().by_vals());
|
||||
// Filter duplicate rows.
|
||||
self.schema.filter(&batch, &filter)
|
||||
}
|
||||
|
||||
@@ -16,8 +16,8 @@ use std::cmp::Ordering;
|
||||
use std::collections::{BTreeSet, HashMap};
|
||||
use std::sync::Arc;
|
||||
|
||||
use common_base::BitVec;
|
||||
use common_error::prelude::*;
|
||||
use datatypes::arrow::bitmap::MutableBitmap;
|
||||
use datatypes::schema::{SchemaBuilder, SchemaRef};
|
||||
use datatypes::vectors::BooleanVector;
|
||||
use store_api::storage::{Chunk, ColumnId};
|
||||
@@ -303,7 +303,7 @@ impl BatchOp for ProjectedSchema {
|
||||
})
|
||||
}
|
||||
|
||||
fn find_unique(&self, batch: &Batch, selected: &mut MutableBitmap, prev: Option<&Batch>) {
|
||||
fn find_unique(&self, batch: &Batch, selected: &mut BitVec, prev: Option<&Batch>) {
|
||||
if let Some(prev) = prev {
|
||||
assert_eq!(batch.num_columns(), prev.num_columns());
|
||||
}
|
||||
@@ -503,18 +503,18 @@ mod tests {
|
||||
let schema = read_util::new_projected_schema();
|
||||
let batch = read_util::new_kv_batch(&[(1000, Some(1)), (2000, Some(2)), (2000, Some(2))]);
|
||||
|
||||
let mut selected = MutableBitmap::from_len_zeroed(3);
|
||||
let mut selected = BitVec::repeat(false, 3);
|
||||
schema.find_unique(&batch, &mut selected, None);
|
||||
assert!(selected.get(0));
|
||||
assert!(selected.get(1));
|
||||
assert!(!selected.get(2));
|
||||
assert!(selected[0]);
|
||||
assert!(selected[1]);
|
||||
assert!(!selected[2]);
|
||||
|
||||
let mut selected = MutableBitmap::from_len_zeroed(3);
|
||||
let mut selected = BitVec::repeat(false, 3);
|
||||
let prev = read_util::new_kv_batch(&[(1000, Some(1))]);
|
||||
schema.find_unique(&batch, &mut selected, Some(&prev));
|
||||
assert!(!selected.get(0));
|
||||
assert!(selected.get(1));
|
||||
assert!(!selected.get(2));
|
||||
assert!(!selected[0]);
|
||||
assert!(selected[1]);
|
||||
assert!(!selected[2]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
||||
Reference in New Issue
Block a user