feat(store-api): Prototype of storage engine api (#33)

This commit is contained in:
evenyag
2022-05-20 18:51:51 +08:00
committed by GitHub
parent e75a54b766
commit 1594da337f
11 changed files with 213 additions and 1 deletions

4
Cargo.lock generated
View File

@@ -2812,6 +2812,10 @@ checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f"
[[package]]
name = "store-api"
version = "0.1.0"
dependencies = [
"common-error",
"datatypes",
]
[[package]]
name = "streaming-decompression"

View File

@@ -1,7 +1,7 @@
#![feature(generic_associated_types)]
pub mod arrow_array;
mod data_type;
pub mod data_type;
pub mod deserialize;
pub mod error;
pub mod prelude;

View File

@@ -6,3 +6,5 @@ edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
common-error = { path = "../common/error" }
datatypes = { path = "../datatypes" }

View File

@@ -1 +1,22 @@
//! Storage APIs.
mod column_family;
mod descriptors;
mod engine;
mod region;
mod requests;
mod responses;
mod snapshot;
pub use datatypes::data_type::ConcretDataType;
pub use datatypes::schema::SchemaRef;
pub use self::column_family::ColumnFamily;
pub use self::descriptors::{
ColumnDescriptor, ColumnFamilyDescriptor, KeyDescriptor, RegionDescriptor,
};
pub use self::engine::{EngineContext, StorageEngine};
pub use self::region::{Region, WriteContext};
pub use self::requests::{GetRequest, ScanRequest, WriteRequest};
pub use self::responses::{GetResponse, ScanResponse, WriteResponse};
pub use self::snapshot::{ReadContext, Snapshot};

View File

@@ -0,0 +1,4 @@
/// A group of value columns.
pub trait ColumnFamily: Send + Sync + Clone {
fn name(&self) -> &str;
}

View File

@@ -0,0 +1,39 @@
use crate::storage::ConcretDataType;
/// A [ColumnDescriptor] contains information about a column.
#[derive(Debug)]
pub struct ColumnDescriptor {
pub name: String,
pub data_type: ConcretDataType,
pub is_nullable: bool,
}
/// A [KeyDescriptor] contains information about a row key.
#[derive(Debug)]
pub struct KeyDescriptor {
pub columns: Vec<ColumnDescriptor>,
pub timestamp: ColumnDescriptor,
/// Enable version column in row key if this field is true.
///
/// The default value is true.
pub enable_version_column: bool,
}
/// A [ColumnFamilyDescriptor] contains information about a column family.
#[derive(Debug)]
pub struct ColumnFamilyDescriptor {
pub name: String,
/// Descriptors of columns in this column family.
pub columns: Vec<ColumnDescriptor>,
}
/// A [RegionDescriptor] contains information about a region.
#[derive(Debug)]
pub struct RegionDescriptor {
/// Row key descriptor of this region.
pub key: KeyDescriptor,
/// Default column family.
pub default_cf: ColumnFamilyDescriptor,
/// Extra column families defined by user.
pub extra_cfs: Vec<ColumnFamilyDescriptor>,
}

View File

@@ -0,0 +1,43 @@
//! Storage Engine traits.
//!
//! [`StorageEngine`] is the abstraction over a multi-regions, schematized data storage system,
//! a [`StorageEngine`] instance manages a bunch of storage unit called [`Region`], which holds
//! chunks of rows, support operations like PUT/DELETE/SCAN.
use common_error::ext::ErrorExt;
use crate::storage::descriptors::RegionDescriptor;
use crate::storage::region::Region;
/// Storage engine provides primitive operations to store and access data.
pub trait StorageEngine: Send + Sync + Clone {
type Error: ErrorExt + Send + Sync;
type Region: Region;
/// Open an existing region.
fn open_region(&self, ctx: &EngineContext, name: &str) -> Result<Self::Region, Self::Error>;
/// Close given region.
fn close_region(&self, ctx: &EngineContext, region: Self::Region) -> Result<(), Self::Error>;
/// Create and return a new region.
fn create_region(
&self,
ctx: &EngineContext,
descriptor: RegionDescriptor,
) -> Result<Self::Region, Self::Error>;
/// Drop given region.
fn drop_region(&self, ctx: &EngineContext, region: Self::Region) -> Result<(), Self::Error>;
/// Return the opened region with given name.
fn get_region(
&self,
ctx: &EngineContext,
name: &str,
) -> Result<Option<Self::Region>, Self::Error>;
}
/// Storage engine context.
#[derive(Debug, Clone)]
pub struct EngineContext {}

View File

@@ -0,0 +1,54 @@
//! Region holds chunks of rows stored in the storage engine, but does not require that
//! rows must have continuous primary key range, which is implementation sepecific.
//!
//! Regions support operations like PUT/DELETE/SCAN that most key-value stores provide.
//! However, unlike key-value store, data stored in region has data model like:
//!
//! ```text
//! colk-1, ..., colk-m, timestamp, version -> colv-1, ..., colv-n
//! ```
//!
//! The data model require each row
//! - has 0 ~ m key column
//! - **MUST** has a timestamp column
//! - has a version column
//! - has 0 ~ n value column
//!
//! Each row is identify by (value of key columns, timestamp, version), which forms
//! a row key. Note that the implementation may allow multiple rows have same row
//! key (like ClickHouse), which is useful is analytic scenario.
use common_error::ext::ErrorExt;
use crate::storage::column_family::ColumnFamily;
use crate::storage::requests::WriteRequest;
use crate::storage::responses::WriteResponse;
use crate::storage::snapshot::{ReadContext, Snapshot};
use crate::storage::SchemaRef;
/// Chunks of rows in storage engine.
pub trait Region: Send + Sync + Clone {
type Error: ErrorExt + Send + Sync;
type WriteRequest: WriteRequest;
type ColumnFamily: ColumnFamily;
type Snapshot: Snapshot;
fn schema(&self) -> &SchemaRef;
/// List all column families.
fn list_cf(&self) -> Result<Vec<Self::ColumnFamily>, Self::Error>;
/// Write updates to region.
fn write(
&self,
ctx: &WriteContext,
request: Self::WriteRequest,
) -> Result<WriteResponse, Self::Error>;
/// Create a snapshot for read.
fn snapshot(&self, ctx: &ReadContext) -> Result<Self::Snapshot, Self::Error>;
}
/// Context for write operations.
#[derive(Debug, Clone)]
pub struct WriteContext {}

View File

@@ -0,0 +1,12 @@
use crate::storage::column_family::ColumnFamily;
/// Write request holds a collection of updates to apply to a region.
pub trait WriteRequest: Send {
type ColumnFamily: ColumnFamily;
}
#[derive(Debug)]
pub struct ScanRequest {}
#[derive(Debug)]
pub struct GetRequest {}

View File

@@ -0,0 +1,8 @@
#[derive(Debug)]
pub struct WriteResponse {}
#[derive(Debug)]
pub struct ScanResponse {}
#[derive(Debug)]
pub struct GetResponse {}

View File

@@ -0,0 +1,25 @@
use common_error::ext::ErrorExt;
use datatypes::schema::SchemaRef;
use crate::storage::column_family::ColumnFamily;
use crate::storage::requests::{GetRequest, ScanRequest};
use crate::storage::responses::{GetResponse, ScanResponse};
/// A consistent read-only view of region.
pub trait Snapshot: Send + Sync {
type Error: ErrorExt + Send + Sync;
type ColumnFamily: ColumnFamily;
fn schema(&self) -> &SchemaRef;
fn scan(&self, ctx: &ReadContext, request: ScanRequest) -> Result<ScanResponse, Self::Error>;
fn get(&self, ctx: &ReadContext, request: GetRequest) -> Result<GetResponse, Self::Error>;
/// List all column families.
fn list_cf(&self) -> Result<Vec<Self::ColumnFamily>, Self::Error>;
}
/// Context for read.
#[derive(Debug, Clone)]
pub struct ReadContext {}