From 1594da337f8aeec5815e26f4780fec80b1d370ca Mon Sep 17 00:00:00 2001 From: evenyag Date: Fri, 20 May 2022 18:51:51 +0800 Subject: [PATCH] feat(store-api): Prototype of storage engine api (#33) --- Cargo.lock | 4 ++ src/datatypes/src/lib.rs | 2 +- src/store-api/Cargo.toml | 2 + src/store-api/src/storage.rs | 21 +++++++++ src/store-api/src/storage/column_family.rs | 4 ++ src/store-api/src/storage/descriptors.rs | 39 ++++++++++++++++ src/store-api/src/storage/engine.rs | 43 +++++++++++++++++ src/store-api/src/storage/region.rs | 54 ++++++++++++++++++++++ src/store-api/src/storage/requests.rs | 12 +++++ src/store-api/src/storage/responses.rs | 8 ++++ src/store-api/src/storage/snapshot.rs | 25 ++++++++++ 11 files changed, 213 insertions(+), 1 deletion(-) create mode 100644 src/store-api/src/storage/column_family.rs create mode 100644 src/store-api/src/storage/descriptors.rs create mode 100644 src/store-api/src/storage/engine.rs create mode 100644 src/store-api/src/storage/region.rs create mode 100644 src/store-api/src/storage/requests.rs create mode 100644 src/store-api/src/storage/responses.rs create mode 100644 src/store-api/src/storage/snapshot.rs diff --git a/Cargo.lock b/Cargo.lock index ccffc55011..10bd083294 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2812,6 +2812,10 @@ checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" [[package]] name = "store-api" version = "0.1.0" +dependencies = [ + "common-error", + "datatypes", +] [[package]] name = "streaming-decompression" diff --git a/src/datatypes/src/lib.rs b/src/datatypes/src/lib.rs index 8e8e1e0cc2..7877f342fd 100644 --- a/src/datatypes/src/lib.rs +++ b/src/datatypes/src/lib.rs @@ -1,7 +1,7 @@ #![feature(generic_associated_types)] pub mod arrow_array; -mod data_type; +pub mod data_type; pub mod deserialize; pub mod error; pub mod prelude; diff --git a/src/store-api/Cargo.toml b/src/store-api/Cargo.toml index ed15b54c0b..99cab0b2b6 100644 --- a/src/store-api/Cargo.toml +++ b/src/store-api/Cargo.toml @@ -6,3 +6,5 @@ edition = "2021" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] +common-error = { path = "../common/error" } +datatypes = { path = "../datatypes" } diff --git a/src/store-api/src/storage.rs b/src/store-api/src/storage.rs index 04aeedce33..76f64fb9c6 100644 --- a/src/store-api/src/storage.rs +++ b/src/store-api/src/storage.rs @@ -1 +1,22 @@ //! Storage APIs. + +mod column_family; +mod descriptors; +mod engine; +mod region; +mod requests; +mod responses; +mod snapshot; + +pub use datatypes::data_type::ConcretDataType; +pub use datatypes::schema::SchemaRef; + +pub use self::column_family::ColumnFamily; +pub use self::descriptors::{ + ColumnDescriptor, ColumnFamilyDescriptor, KeyDescriptor, RegionDescriptor, +}; +pub use self::engine::{EngineContext, StorageEngine}; +pub use self::region::{Region, WriteContext}; +pub use self::requests::{GetRequest, ScanRequest, WriteRequest}; +pub use self::responses::{GetResponse, ScanResponse, WriteResponse}; +pub use self::snapshot::{ReadContext, Snapshot}; diff --git a/src/store-api/src/storage/column_family.rs b/src/store-api/src/storage/column_family.rs new file mode 100644 index 0000000000..f26a371b39 --- /dev/null +++ b/src/store-api/src/storage/column_family.rs @@ -0,0 +1,4 @@ +/// A group of value columns. +pub trait ColumnFamily: Send + Sync + Clone { + fn name(&self) -> &str; +} diff --git a/src/store-api/src/storage/descriptors.rs b/src/store-api/src/storage/descriptors.rs new file mode 100644 index 0000000000..7195b3b66c --- /dev/null +++ b/src/store-api/src/storage/descriptors.rs @@ -0,0 +1,39 @@ +use crate::storage::ConcretDataType; + +/// A [ColumnDescriptor] contains information about a column. +#[derive(Debug)] +pub struct ColumnDescriptor { + pub name: String, + pub data_type: ConcretDataType, + pub is_nullable: bool, +} + +/// A [KeyDescriptor] contains information about a row key. +#[derive(Debug)] +pub struct KeyDescriptor { + pub columns: Vec, + pub timestamp: ColumnDescriptor, + /// Enable version column in row key if this field is true. + /// + /// The default value is true. + pub enable_version_column: bool, +} + +/// A [ColumnFamilyDescriptor] contains information about a column family. +#[derive(Debug)] +pub struct ColumnFamilyDescriptor { + pub name: String, + /// Descriptors of columns in this column family. + pub columns: Vec, +} + +/// A [RegionDescriptor] contains information about a region. +#[derive(Debug)] +pub struct RegionDescriptor { + /// Row key descriptor of this region. + pub key: KeyDescriptor, + /// Default column family. + pub default_cf: ColumnFamilyDescriptor, + /// Extra column families defined by user. + pub extra_cfs: Vec, +} diff --git a/src/store-api/src/storage/engine.rs b/src/store-api/src/storage/engine.rs new file mode 100644 index 0000000000..42e45813ff --- /dev/null +++ b/src/store-api/src/storage/engine.rs @@ -0,0 +1,43 @@ +//! Storage Engine traits. +//! +//! [`StorageEngine`] is the abstraction over a multi-regions, schematized data storage system, +//! a [`StorageEngine`] instance manages a bunch of storage unit called [`Region`], which holds +//! chunks of rows, support operations like PUT/DELETE/SCAN. + +use common_error::ext::ErrorExt; + +use crate::storage::descriptors::RegionDescriptor; +use crate::storage::region::Region; + +/// Storage engine provides primitive operations to store and access data. +pub trait StorageEngine: Send + Sync + Clone { + type Error: ErrorExt + Send + Sync; + type Region: Region; + + /// Open an existing region. + fn open_region(&self, ctx: &EngineContext, name: &str) -> Result; + + /// Close given region. + fn close_region(&self, ctx: &EngineContext, region: Self::Region) -> Result<(), Self::Error>; + + /// Create and return a new region. + fn create_region( + &self, + ctx: &EngineContext, + descriptor: RegionDescriptor, + ) -> Result; + + /// Drop given region. + fn drop_region(&self, ctx: &EngineContext, region: Self::Region) -> Result<(), Self::Error>; + + /// Return the opened region with given name. + fn get_region( + &self, + ctx: &EngineContext, + name: &str, + ) -> Result, Self::Error>; +} + +/// Storage engine context. +#[derive(Debug, Clone)] +pub struct EngineContext {} diff --git a/src/store-api/src/storage/region.rs b/src/store-api/src/storage/region.rs new file mode 100644 index 0000000000..81e149d0ea --- /dev/null +++ b/src/store-api/src/storage/region.rs @@ -0,0 +1,54 @@ +//! Region holds chunks of rows stored in the storage engine, but does not require that +//! rows must have continuous primary key range, which is implementation sepecific. +//! +//! Regions support operations like PUT/DELETE/SCAN that most key-value stores provide. +//! However, unlike key-value store, data stored in region has data model like: +//! +//! ```text +//! colk-1, ..., colk-m, timestamp, version -> colv-1, ..., colv-n +//! ``` +//! +//! The data model require each row +//! - has 0 ~ m key column +//! - **MUST** has a timestamp column +//! - has a version column +//! - has 0 ~ n value column +//! +//! Each row is identify by (value of key columns, timestamp, version), which forms +//! a row key. Note that the implementation may allow multiple rows have same row +//! key (like ClickHouse), which is useful is analytic scenario. + +use common_error::ext::ErrorExt; + +use crate::storage::column_family::ColumnFamily; +use crate::storage::requests::WriteRequest; +use crate::storage::responses::WriteResponse; +use crate::storage::snapshot::{ReadContext, Snapshot}; +use crate::storage::SchemaRef; + +/// Chunks of rows in storage engine. +pub trait Region: Send + Sync + Clone { + type Error: ErrorExt + Send + Sync; + type WriteRequest: WriteRequest; + type ColumnFamily: ColumnFamily; + type Snapshot: Snapshot; + + fn schema(&self) -> &SchemaRef; + + /// List all column families. + fn list_cf(&self) -> Result, Self::Error>; + + /// Write updates to region. + fn write( + &self, + ctx: &WriteContext, + request: Self::WriteRequest, + ) -> Result; + + /// Create a snapshot for read. + fn snapshot(&self, ctx: &ReadContext) -> Result; +} + +/// Context for write operations. +#[derive(Debug, Clone)] +pub struct WriteContext {} diff --git a/src/store-api/src/storage/requests.rs b/src/store-api/src/storage/requests.rs new file mode 100644 index 0000000000..264ff6192a --- /dev/null +++ b/src/store-api/src/storage/requests.rs @@ -0,0 +1,12 @@ +use crate::storage::column_family::ColumnFamily; + +/// Write request holds a collection of updates to apply to a region. +pub trait WriteRequest: Send { + type ColumnFamily: ColumnFamily; +} + +#[derive(Debug)] +pub struct ScanRequest {} + +#[derive(Debug)] +pub struct GetRequest {} diff --git a/src/store-api/src/storage/responses.rs b/src/store-api/src/storage/responses.rs new file mode 100644 index 0000000000..823eb060d9 --- /dev/null +++ b/src/store-api/src/storage/responses.rs @@ -0,0 +1,8 @@ +#[derive(Debug)] +pub struct WriteResponse {} + +#[derive(Debug)] +pub struct ScanResponse {} + +#[derive(Debug)] +pub struct GetResponse {} diff --git a/src/store-api/src/storage/snapshot.rs b/src/store-api/src/storage/snapshot.rs new file mode 100644 index 0000000000..6913d8b96e --- /dev/null +++ b/src/store-api/src/storage/snapshot.rs @@ -0,0 +1,25 @@ +use common_error::ext::ErrorExt; +use datatypes::schema::SchemaRef; + +use crate::storage::column_family::ColumnFamily; +use crate::storage::requests::{GetRequest, ScanRequest}; +use crate::storage::responses::{GetResponse, ScanResponse}; + +/// A consistent read-only view of region. +pub trait Snapshot: Send + Sync { + type Error: ErrorExt + Send + Sync; + type ColumnFamily: ColumnFamily; + + fn schema(&self) -> &SchemaRef; + + fn scan(&self, ctx: &ReadContext, request: ScanRequest) -> Result; + + fn get(&self, ctx: &ReadContext, request: GetRequest) -> Result; + + /// List all column families. + fn list_cf(&self) -> Result, Self::Error>; +} + +/// Context for read. +#[derive(Debug, Clone)] +pub struct ReadContext {}