chore: replace bitvec impl (#214)

* chore: replace bitvec impl

* chore: reduce one copy of nullmask

* chore: move bitvec to common_base
This commit is contained in:
fys
2022-08-31 14:13:36 +08:00
committed by GitHub
parent 38d5febafe
commit ba93aa83f2
13 changed files with 46 additions and 268 deletions

View File

@@ -10,8 +10,8 @@ arc-swap = "1.0"
arrow-format = { version = "0.4", features = ["ipc"] }
async-stream = "0.3"
async-trait = "0.1"
bitvec = "1.0"
bytes = "1.1"
common-base = { path = "../common/base" }
common-error = { path = "../common/error" }
common-runtime = { path = "../common/runtime" }
common-telemetry = { path = "../common/telemetry" }

View File

@@ -5,6 +5,7 @@
use std::io::Read;
use arrow_format::{self, ipc::planus::ReadAsRoot};
use common_base::BitVec;
use datatypes::arrow::{
datatypes::Schema,
error::{ArrowError, Result},
@@ -14,8 +15,6 @@ use datatypes::arrow::{
},
};
use crate::bit_vec;
const CONTINUATION_MARKER: [u8; 4] = [0xff; 4];
pub struct ArrowStreamReader<R: Read> {
@@ -92,7 +91,7 @@ impl<R: Read> ArrowStreamReader<R> {
}
fn valid_metadata(metadata: &StreamMetadata, column_null_mask: &[u8]) -> StreamMetadata {
let column_null_mask = bit_vec::BitVec::from_slice(column_null_mask);
let column_null_mask = BitVec::from_slice(column_null_mask);
let schema = Schema::from(
metadata

View File

@@ -1,5 +0,0 @@
use bitvec::prelude as bv;
// `Lsb0` provides the best codegen for bit manipulation,
// see https://github.com/bitvecto-rs/bitvec/blob/main/doc/order/Lsb0.md
pub type BitVec = bv::BitVec<u8, bv::Lsb0>;

View File

@@ -1,7 +1,6 @@
//! Storage engine implementation.
mod arrow_stream;
mod background;
mod bit_vec;
mod chunk;
pub mod codec;
pub mod config;

View File

@@ -1,10 +1,9 @@
#![allow(clippy::all)]
tonic::include_proto!("greptime.storage.wal.v1");
use crate::{
bit_vec,
write_batch::{Mutation, WriteBatch},
};
use common_base::BitVec;
use crate::write_batch::{Mutation, WriteBatch};
pub fn gen_mutation_extras(write_batch: &WriteBatch) -> Vec<MutationExtra> {
let column_schemas = write_batch.schema().column_schemas();
@@ -18,7 +17,7 @@ pub fn gen_mutation_extras(write_batch: &WriteBatch) -> Vec<MutationExtra> {
column_null_mask: Default::default(),
}
} else {
let mut column_null_mask = bit_vec::BitVec::repeat(false, column_schemas.len());
let mut column_null_mask = BitVec::repeat(false, column_schemas.len());
for (i, cs) in column_schemas.iter().enumerate() {
if put.column_by_name(&cs.name).is_none() {
column_null_mask.set(i, true);

View File

@@ -3,6 +3,7 @@ tonic::include_proto!("greptime.storage.write_batch.v1");
use std::sync::Arc;
use common_base::BitVec;
use common_error::prelude::*;
use datatypes::schema;
use datatypes::{
@@ -20,8 +21,6 @@ use datatypes::{
use paste::paste;
use snafu::OptionExt;
use crate::bit_vec;
#[derive(Debug, Snafu)]
pub enum Error {
#[snafu(display("Failed to convert datafusion type: {}", from))]
@@ -178,7 +177,7 @@ macro_rules! gen_columns {
.with_context(|| ConversionSnafu {
from: std::format!("{:?}", vector.as_ref().data_type()),
})?;
let mut bits: Option<bit_vec::BitVec> = None;
let mut bits: Option<BitVec> = None;
vector_ref
.iter_data()
@@ -187,7 +186,7 @@ macro_rules! gen_columns {
Some($vari) => values.[<$key _values>].push($cast),
None => {
if (bits.is_none()) {
bits = Some(bit_vec::BitVec::repeat(false, vector_ref.len()));
bits = Some(BitVec::repeat(false, vector_ref.len()));
}
bits.as_mut().map(|x| x.set(i, true));
}
@@ -237,7 +236,7 @@ macro_rules! gen_put_data {
(0..num_rows)
.for_each(|_| builder.push(vector_iter.next().map(|$vari| $cast)));
} else {
bit_vec::BitVec::from_vec(column.value_null_mask)
BitVec::from_vec(column.value_null_mask)
.into_iter()
.take(num_rows)
.for_each(|is_null| {

View File

@@ -432,6 +432,7 @@ pub mod codec {
use std::{io::Cursor, sync::Arc};
use common_base::BitVec;
use datatypes::{
arrow::{
chunk::Chunk as ArrowChunk,
@@ -454,18 +455,15 @@ pub mod codec {
Error as WriteBatchError, FromProtobufSnafu, Mutation, ParseSchemaSnafu, Result,
ToProtobufSnafu, WriteBatch,
};
use crate::proto::{
wal::{MutationExtra, MutationType},
write_batch::{self, gen_columns, gen_put_data_vector},
};
use crate::write_batch::{DecodeProtobufSnafu, EncodeProtobufSnafu, PutData};
use crate::{
arrow_stream::ArrowStreamReader,
codec::{Decoder, Encoder},
};
use crate::{
bit_vec,
proto::{
wal::{MutationExtra, MutationType},
write_batch::{self, gen_columns, gen_put_data_vector},
},
};
// TODO(jiachun): The codec logic is too complex, maybe we should use protobuf to
// serialize/deserialize all our data.
@@ -523,7 +521,7 @@ pub mod codec {
} else {
let valid_ipc_fields = ipc_fields
.iter()
.zip(bit_vec::BitVec::from_slice(column_null_mask))
.zip(BitVec::from_slice(column_null_mask))
.filter(|(_, is_null)| !*is_null)
.map(|(ipc_field, _)| ipc_field.clone())
.collect::<Vec<_>>();
@@ -644,13 +642,12 @@ pub mod codec {
if ext.column_null_mask.is_empty() {
gen_mutation_put(&column_names)
} else {
let valid_columns =
bit_vec::BitVec::from_slice(&ext.column_null_mask)
.into_iter()
.zip(column_names.iter())
.filter(|(is_null, _)| !*is_null)
.map(|(_, column_name)| column_name.clone())
.collect::<Vec<_>>();
let valid_columns = BitVec::from_slice(&ext.column_null_mask)
.into_iter()
.zip(column_names.iter())
.filter(|(is_null, _)| !*is_null)
.map(|(_, column_name)| column_name.clone())
.collect::<Vec<_>>();
gen_mutation_put(&valid_columns)
}
@@ -765,7 +762,7 @@ pub mod codec {
.map(|column| (column.name.clone(), column.data_type.clone()))
.collect::<Vec<_>>()
} else {
bit_vec::BitVec::from_slice(&ext.column_null_mask)
BitVec::from_slice(&ext.column_null_mask)
.into_iter()
.zip(column_schemas.iter())
.filter(|(is_null, _)| !*is_null)