mirror of
https://github.com/GreptimeTeam/greptimedb.git
synced 2026-06-03 13:50:40 +00:00
feat: Impl BatchIterator for btree memtable
feat: Impl MapIterWrapper refactor: Rename RowKey to InnerKey
This commit is contained in:
@@ -2,7 +2,7 @@
|
||||
|
||||
mod engine;
|
||||
mod error;
|
||||
mod memtable;
|
||||
pub mod memtable;
|
||||
pub mod metadata;
|
||||
mod region;
|
||||
mod region_writer;
|
||||
|
||||
@@ -20,15 +20,64 @@ pub trait Memtable: Send + Sync {
|
||||
/// Write key/values to the memtable.
|
||||
///
|
||||
/// # Panics
|
||||
/// Panic if the schema of key/value differs from memtable's schema.
|
||||
/// Panics if the schema of key/value differs from memtable's schema.
|
||||
fn write(&self, kvs: &KeyValues) -> Result<()>;
|
||||
|
||||
/// Iterators the memtable.
|
||||
// TODO(yingwen): Consider passing a projector (does column projection).
|
||||
fn iter(&self, ctx: IterContext) -> Result<BatchIteratorPtr>;
|
||||
|
||||
/// Returns the estimated bytes allocated by this memtable from heap.
|
||||
fn bytes_allocated(&self) -> usize;
|
||||
}
|
||||
|
||||
pub type MemtableRef = Arc<dyn Memtable>;
|
||||
|
||||
/// Context for iterating memtable.
|
||||
#[derive(Debug)]
|
||||
pub struct IterContext {
|
||||
/// The suggested batch size of the iterator.
|
||||
pub batch_size: usize,
|
||||
}
|
||||
|
||||
impl Default for IterContext {
|
||||
fn default() -> Self {
|
||||
Self { batch_size: 256 }
|
||||
}
|
||||
}
|
||||
|
||||
/// The ordering of the iterator output.
|
||||
#[derive(Debug)]
|
||||
pub enum RowOrdering {
|
||||
/// The output rows are unordered.
|
||||
Unordered,
|
||||
|
||||
/// The output rows are ordered by key.
|
||||
Key,
|
||||
}
|
||||
|
||||
pub struct Batch {
|
||||
pub keys: Vec<VectorRef>,
|
||||
pub values: Vec<VectorRef>,
|
||||
}
|
||||
|
||||
/// Iterator of memtable.
|
||||
pub trait BatchIterator: Send {
|
||||
/// Returns the schema of this iterator.
|
||||
fn schema(&self) -> &MemtableSchema;
|
||||
|
||||
/// Returns the ordering of the output rows from this iterator.
|
||||
fn ordering(&self) -> RowOrdering;
|
||||
|
||||
/// Fetch next batch from the memtable.
|
||||
///
|
||||
/// # Panics
|
||||
/// Panics if the iterator has already been exhausted.
|
||||
fn next(&mut self) -> Result<Option<Batch>>;
|
||||
}
|
||||
|
||||
pub type BatchIteratorPtr = Box<dyn BatchIterator>;
|
||||
|
||||
pub trait MemtableBuilder: Send + Sync {
|
||||
fn build(&self, schema: MemtableSchema) -> MemtableRef;
|
||||
}
|
||||
@@ -54,9 +103,7 @@ impl KeyValues {
|
||||
self.keys.clear();
|
||||
self.values.clear();
|
||||
}
|
||||
}
|
||||
|
||||
impl KeyValues {
|
||||
pub fn len(&self) -> usize {
|
||||
self.keys.first().map(|v| v.len()).unwrap_or_default()
|
||||
}
|
||||
|
||||
@@ -1,26 +1,34 @@
|
||||
use std::cmp::Ordering;
|
||||
use std::collections::BTreeMap;
|
||||
use std::sync::RwLock;
|
||||
use std::collections::{btree_map, BTreeMap};
|
||||
use std::ops::Bound;
|
||||
use std::sync::{Arc, RwLock};
|
||||
|
||||
use datatypes::value::Value;
|
||||
use datatypes::vectors::VectorBuilder;
|
||||
use datatypes::vectors::VectorRef;
|
||||
use store_api::storage::{SequenceNumber, ValueType};
|
||||
|
||||
use crate::error::Result;
|
||||
use crate::memtable::{KeyValues, Memtable, MemtableSchema};
|
||||
use crate::memtable::{
|
||||
Batch, BatchIterator, BatchIteratorPtr, IterContext, KeyValues, Memtable, MemtableSchema,
|
||||
RowOrdering,
|
||||
};
|
||||
|
||||
type RwLockMap = RwLock<BTreeMap<InnerKey, RowValue>>;
|
||||
|
||||
/// A simple memtable implementation based on std's [`BTreeMap`].
|
||||
///
|
||||
/// Mainly for test purpose.
|
||||
/// Mainly for test purpose, don't use in production.
|
||||
pub struct BTreeMemtable {
|
||||
schema: MemtableSchema,
|
||||
map: RwLock<BTreeMap<RowKey, RowValue>>,
|
||||
map: Arc<RwLockMap>,
|
||||
}
|
||||
|
||||
impl BTreeMemtable {
|
||||
pub fn new(schema: MemtableSchema) -> BTreeMemtable {
|
||||
BTreeMemtable {
|
||||
schema,
|
||||
map: RwLock::new(BTreeMap::new()),
|
||||
map: Arc::new(RwLock::new(BTreeMap::new())),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -34,18 +42,177 @@ impl Memtable for BTreeMemtable {
|
||||
let mut map = self.map.write().unwrap();
|
||||
|
||||
let iter_row = IterRow::new(kvs);
|
||||
for (row_key, row_value) in iter_row {
|
||||
map.insert(row_key, row_value);
|
||||
for (inner_key, row_value) in iter_row {
|
||||
map.insert(inner_key, row_value);
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn iter(&self, ctx: IterContext) -> Result<BatchIteratorPtr> {
|
||||
let iter = BTreeIterator::new(ctx, self.schema.clone(), self.map.clone());
|
||||
|
||||
Ok(Box::new(iter))
|
||||
}
|
||||
|
||||
fn bytes_allocated(&self) -> usize {
|
||||
unimplemented!()
|
||||
}
|
||||
}
|
||||
|
||||
struct BTreeIterator {
|
||||
ctx: IterContext,
|
||||
schema: MemtableSchema,
|
||||
map: Arc<RwLockMap>,
|
||||
last_key: Option<InnerKey>,
|
||||
}
|
||||
|
||||
impl BatchIterator for BTreeIterator {
|
||||
fn schema(&self) -> &MemtableSchema {
|
||||
&self.schema
|
||||
}
|
||||
|
||||
fn ordering(&self) -> RowOrdering {
|
||||
RowOrdering::Key
|
||||
}
|
||||
|
||||
fn next(&mut self) -> Result<Option<Batch>> {
|
||||
Ok(self.next_batch())
|
||||
}
|
||||
}
|
||||
|
||||
impl BTreeIterator {
|
||||
fn new(ctx: IterContext, schema: MemtableSchema, map: Arc<RwLockMap>) -> BTreeIterator {
|
||||
BTreeIterator {
|
||||
ctx,
|
||||
schema,
|
||||
map,
|
||||
last_key: None,
|
||||
}
|
||||
}
|
||||
|
||||
fn next_batch(&mut self) -> Option<Batch> {
|
||||
let map = self.map.read().unwrap();
|
||||
let iter = if let Some(last_key) = &self.last_key {
|
||||
map.range((Bound::Excluded(last_key), Bound::Unbounded))
|
||||
} else {
|
||||
map.range(..)
|
||||
};
|
||||
let iter = MapIterWrapper::new(iter);
|
||||
|
||||
let mut keys = Vec::with_capacity(self.ctx.batch_size);
|
||||
let mut values = Vec::with_capacity(self.ctx.batch_size);
|
||||
for (inner_key, row_value) in iter.take(self.ctx.batch_size) {
|
||||
keys.push(inner_key);
|
||||
values.push(row_value);
|
||||
}
|
||||
|
||||
if keys.is_empty() {
|
||||
return None;
|
||||
}
|
||||
self.last_key = keys.last().map(|k| (*k).clone());
|
||||
|
||||
Some(Batch {
|
||||
keys: Self::keys_to_vectors(&keys),
|
||||
values: Self::values_to_vectors(&values),
|
||||
})
|
||||
}
|
||||
|
||||
// Assumes column num of all row key is equal.
|
||||
fn keys_to_vectors(keys: &[&InnerKey]) -> Vec<VectorRef> {
|
||||
if keys.is_empty() {
|
||||
return Vec::new();
|
||||
}
|
||||
|
||||
let column_num = keys[0].row_key.len();
|
||||
let row_num = keys.len();
|
||||
let mut builders = Vec::with_capacity(column_num);
|
||||
for v in &keys[0].row_key {
|
||||
builders.push(VectorBuilder::with_capacity(v.data_type(), row_num));
|
||||
}
|
||||
|
||||
let mut vectors = Vec::with_capacity(column_num);
|
||||
for (col_idx, builder) in builders.iter_mut().enumerate() {
|
||||
for row_key in keys {
|
||||
let value = &row_key.row_key[col_idx];
|
||||
builder.push(value);
|
||||
}
|
||||
|
||||
vectors.push(builder.finish());
|
||||
}
|
||||
|
||||
vectors
|
||||
}
|
||||
|
||||
// Assumes column num of all row value is equal.
|
||||
fn values_to_vectors(values: &[&RowValue]) -> Vec<VectorRef> {
|
||||
if values.is_empty() {
|
||||
return Vec::new();
|
||||
}
|
||||
|
||||
let column_num = values[0].values.len();
|
||||
let row_num = values.len();
|
||||
let mut builders = Vec::with_capacity(column_num);
|
||||
for v in &values[0].values {
|
||||
builders.push(VectorBuilder::with_capacity(v.data_type(), row_num));
|
||||
}
|
||||
|
||||
let mut vectors = Vec::with_capacity(column_num);
|
||||
for (col_idx, builder) in builders.iter_mut().enumerate() {
|
||||
for row_value in values {
|
||||
let value = &row_value.values[col_idx];
|
||||
builder.push(value);
|
||||
}
|
||||
|
||||
vectors.push(builder.finish());
|
||||
}
|
||||
|
||||
vectors
|
||||
}
|
||||
}
|
||||
|
||||
/// `MapIterWrapper` removes same user key with elder sequence.
|
||||
struct MapIterWrapper<'a, InnerKey, RowValue> {
|
||||
iter: btree_map::Range<'a, InnerKey, RowValue>,
|
||||
prev_key: Option<InnerKey>,
|
||||
}
|
||||
|
||||
impl<'a> MapIterWrapper<'a, InnerKey, RowValue> {
|
||||
fn new(
|
||||
iter: btree_map::Range<'a, InnerKey, RowValue>,
|
||||
) -> MapIterWrapper<'a, InnerKey, RowValue> {
|
||||
MapIterWrapper {
|
||||
iter,
|
||||
prev_key: None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> Iterator for MapIterWrapper<'a, InnerKey, RowValue> {
|
||||
type Item = (&'a InnerKey, &'a RowValue);
|
||||
|
||||
fn next(&mut self) -> Option<(&'a InnerKey, &'a RowValue)> {
|
||||
let (mut current_key, mut current_value) = self.iter.next()?;
|
||||
if self.prev_key.is_none() {
|
||||
self.prev_key = Some(current_key.clone());
|
||||
return Some((current_key, current_value));
|
||||
}
|
||||
|
||||
let prev_key = self.prev_key.take().unwrap();
|
||||
while prev_key.is_row_key_equal(current_key) {
|
||||
if let Some((next_key, next_value)) = self.iter.next() {
|
||||
(current_key, current_value) = (next_key, next_value);
|
||||
} else {
|
||||
return None;
|
||||
}
|
||||
}
|
||||
|
||||
self.prev_key = Some(current_key.clone());
|
||||
|
||||
Some((current_key, current_value))
|
||||
}
|
||||
}
|
||||
|
||||
struct IterRow<'a> {
|
||||
kvs: &'a KeyValues,
|
||||
index: usize,
|
||||
@@ -61,22 +228,22 @@ impl<'a> IterRow<'a> {
|
||||
}
|
||||
}
|
||||
|
||||
fn fetch_row(&mut self) -> (RowKey, RowValue) {
|
||||
let keys = self
|
||||
fn fetch_row(&mut self) -> (InnerKey, RowValue) {
|
||||
let row_key = self
|
||||
.kvs
|
||||
.keys
|
||||
.iter()
|
||||
.map(|vector| vector.get(self.index))
|
||||
.collect();
|
||||
let row_key = RowKey {
|
||||
keys,
|
||||
let inner_key = InnerKey {
|
||||
row_key,
|
||||
sequence: self.kvs.sequence,
|
||||
index_in_batch: self.kvs.start_index_in_batch + self.index,
|
||||
value_type: self.kvs.value_type,
|
||||
};
|
||||
|
||||
let row_value = RowValue {
|
||||
_values: self
|
||||
values: self
|
||||
.kvs
|
||||
.values
|
||||
.iter()
|
||||
@@ -84,14 +251,14 @@ impl<'a> IterRow<'a> {
|
||||
.collect(),
|
||||
};
|
||||
|
||||
(row_key, row_value)
|
||||
(inner_key, row_value)
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> Iterator for IterRow<'a> {
|
||||
type Item = (RowKey, RowValue);
|
||||
type Item = (InnerKey, RowValue);
|
||||
|
||||
fn next(&mut self) -> Option<(RowKey, RowValue)> {
|
||||
fn next(&mut self) -> Option<(InnerKey, RowValue)> {
|
||||
if self.index >= self.len {
|
||||
return None;
|
||||
}
|
||||
@@ -104,33 +271,39 @@ impl<'a> Iterator for IterRow<'a> {
|
||||
}
|
||||
}
|
||||
|
||||
// TODO(yingwen): Actually the version and timestamp may order desc.
|
||||
#[derive(PartialEq, Eq)]
|
||||
struct RowKey {
|
||||
keys: Vec<Value>,
|
||||
#[derive(Clone, PartialEq, Eq)]
|
||||
struct InnerKey {
|
||||
row_key: Vec<Value>,
|
||||
sequence: SequenceNumber,
|
||||
index_in_batch: usize,
|
||||
value_type: ValueType,
|
||||
}
|
||||
|
||||
impl Ord for RowKey {
|
||||
fn cmp(&self, other: &RowKey) -> Ordering {
|
||||
// Order by (keys asc, sequence desc, index_in_batch desc, value type desc), though (key,
|
||||
impl Ord for InnerKey {
|
||||
fn cmp(&self, other: &InnerKey) -> Ordering {
|
||||
// Order by (row_key asc, sequence desc, index_in_batch desc, value type desc), though (key,
|
||||
// sequence, index_in_batch) should be enough to disambiguate.
|
||||
self.keys
|
||||
.cmp(&other.keys)
|
||||
self.row_key
|
||||
.cmp(&other.row_key)
|
||||
.then_with(|| other.sequence.cmp(&self.sequence))
|
||||
.then_with(|| other.index_in_batch.cmp(&self.index_in_batch))
|
||||
.then_with(|| other.value_type.cmp(&self.value_type))
|
||||
}
|
||||
}
|
||||
|
||||
impl PartialOrd for RowKey {
|
||||
fn partial_cmp(&self, other: &RowKey) -> Option<Ordering> {
|
||||
impl PartialOrd for InnerKey {
|
||||
fn partial_cmp(&self, other: &InnerKey) -> Option<Ordering> {
|
||||
Some(self.cmp(other))
|
||||
}
|
||||
}
|
||||
|
||||
struct RowValue {
|
||||
_values: Vec<Value>,
|
||||
impl InnerKey {
|
||||
fn is_row_key_equal(&self, other: &InnerKey) -> bool {
|
||||
self.row_key == other.row_key
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
struct RowValue {
|
||||
values: Vec<Value>,
|
||||
}
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
use crate::metadata::{ColumnMetadata, ColumnsRowKeyMetadataRef};
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct MemtableSchema {
|
||||
columns_row_key: ColumnsRowKeyMetadataRef,
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user