mirror of
https://github.com/GreptimeTeam/greptimedb.git
synced 2026-05-31 20:30:37 +00:00
feat: Prototype of the storage engine (#107)
* feat: memtable flush (#63) * wip: memtable flush * optimize schema conversion * remove unnecessary import * add parquet file verfication * add backtrace to error * chore: upgrade opendal to 0.9 and fixed some problems * rename error * fix: error description Co-authored-by: Dennis Zhuang <killme2008@gmail.com> * feat: region manifest service (#57) * feat: adds Manifest API * feat: impl region manifest service * refactor: by CR comments * fix: storage error mod test * fix: tweak storage cargo * fix: tweak storage cargo * refactor: by CR comments * refactor: rename current_version * feat: add wal writer (#60) * feat: add Wal * upgrade engine for wal * fix: unit test for wal * feat: wal into region * fix: unix test * fix clippy * chore: by cr * chore: by cr * chore: prevent test data polution * chore: by cr * minor fix * chore: by cr * feat: Implement flush (#65) * feat: Flush framework - feat: Add id to memtable - refactor: Rename MemtableSet/MutableMemtables to MemtableVersion/MemtableSet - feat: Freeze memtable - feat: Trigger flush - feat: Background job pool - feat: flush job - feat: Sst access layer - feat: Custom Deserialize for StringBytes - feat: Use RegionWriter to apply file metas - feat: Apply version edit - chore: Remove unused imports refactor: Use ParquetWriter to replace FlushTask refactor: FsAccessLayer takes object store as param chore: Remove todo from doc comments feat: Move wal to WriterContext chore: Fix clippy chore: Add backtrace to WriteWal error * feat: adds manifest to region and refactor sst/manifest dir config (#72) * feat: adds manifest to region and refactor sst/manifest dir with EngineConfig * refactor: ensure path ends with '/' in ManifestLogStorage * fix: style * refactor: normalize storage directory path and minor changes by CR * refactor: doesn't need slash any more * feat: Implement apply_edit() and add timestamp index to schema (#73) * feat: Implement VersionControl::apply_edit() * feat: Add timestamp index to schema * feat: Implement Schema::timestamp_column() * feat: persist region metadata to manifest (#74) * feat: persist metadata when creating region or sst files * fix: revert FileMeta comment * feat: resolve todo * fix: clippy warning * fix: revert files_to_remove type in RegionEdit * feat: impl SizeBasedStrategy for flush (#76) * feat: impl SizeBasedStrategy for flush * doc: get_mutable_limitation * fix: code style and comment * feat: align timestamp (#75) * feat: align timestamps in write batch * fix cr comments * fix timestamp overflow * simplify overflow check * fix cr comments * fix clippy issues * test: Fix region tests (comment out some unsupported tests) (#82) * feat: flush job (#80) * feat: flush job * fix cr comments * move file name instead of clone * comment log file test (#84) * feat: improve MemtableVersion (#78) * feat: improve MemtableVersion * feat: remove flushed immutable memtables and test MemtableVersion * refactor: by CR comments * refactor: clone kv in iterator * fix: clippy warning * refactor: Make BatchIterator supertrait of Iterator (#85) * refactor: rename Version to ManifestVersion and move out manifest from ShareData (#83) * feat: Insert multiple memtables by time range (#77) * feat: memtable::Inserter supports insert multiple memtables by time range * chore: Update timestamp comment * test: Add tests for Inserter * test: Fix region tests (comment out some unsupported tests) * refactor: align_timestamp() use TimestampMillis::aligned_by_bucket() * chore: rename aligned_by_bucket to align_by_bucket * fix: Fix compile errors * fix: sst and manifest dir (#86) * Set RowKeyDescriptor::enable_version_column to false by default * feat: Implement write stall (#90) * feat: Implement write stall * chore: Update comments * feat: Support reading multiple memtables (#93) * feat: Support reading multiple memtables * test: uncomment tests rely on snapshot read * feat: wal format (#70) * feat: wal codec * chore: minor fix * chore: comment * chore: by cr * chore: write_batch_codec mod * chore: by cr * chore: upgrade proto * chore: by cr * fix failing test * fix failing test * feat: manifest to wal (#100) * feat: write manifest to wal * chore: sequence into wal * chore: by cr * chore: by cr * refactor: create log store (#104) Co-authored-by: dennis zhuang <killme2008@gmail.com> Co-authored-by: Lei, Huang <6406592+v0y4g3r@users.noreply.github.com> Co-authored-by: fariygirl <clickmetoday@163.com> Co-authored-by: Jiachun Feng <jiachun_feng@proton.me> Co-authored-by: Lei, HUANG <mrsatangel@gmail.com> * chore: Fix clippy Co-authored-by: Lei, Huang <6406592+v0y4g3r@users.noreply.github.com> Co-authored-by: Dennis Zhuang <killme2008@gmail.com> Co-authored-by: Jiachun Feng <jiachun_feng@proton.me> Co-authored-by: fariygirl <clickmetoday@163.com> Co-authored-by: Lei, HUANG <mrsatangel@gmail.com>
This commit is contained in:
@@ -10,8 +10,11 @@ async-trait = "0.1"
|
||||
bytes = "1.1"
|
||||
common-base = { path = "../common/base" }
|
||||
common-error = { path = "../common/error" }
|
||||
common-time = { path = "../common/time" }
|
||||
datatypes = { path = "../datatypes" }
|
||||
futures = "0.3"
|
||||
object-store = { path = "../object-store" }
|
||||
serde = { version = "1.0", features = ["derive"] }
|
||||
snafu = { version = "0.7", features = ["backtraces"] }
|
||||
|
||||
[dev-dependencies]
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
//! Storage related APIs
|
||||
|
||||
pub mod logstore;
|
||||
pub mod manifest;
|
||||
pub mod storage;
|
||||
|
||||
@@ -12,8 +12,8 @@ pub mod namespace;
|
||||
|
||||
/// `LogStore` serves as a Write-Ahead-Log for storage engine.
|
||||
#[async_trait::async_trait]
|
||||
pub trait LogStore {
|
||||
type Error: ErrorExt + Send + Sync;
|
||||
pub trait LogStore: Send + Sync + 'static {
|
||||
type Error: ErrorExt + Send + Sync + 'static;
|
||||
type Namespace: Namespace;
|
||||
type Entry: Entry;
|
||||
type AppendResponse: AppendResponse;
|
||||
|
||||
@@ -1,3 +1,5 @@
|
||||
pub trait Namespace: Send + Sync + Clone {
|
||||
fn new(name: &str, id: u64) -> Self;
|
||||
|
||||
fn name(&self) -> &str;
|
||||
}
|
||||
|
||||
45
src/store-api/src/manifest.rs
Normal file
45
src/store-api/src/manifest.rs
Normal file
@@ -0,0 +1,45 @@
|
||||
//! metadata service
|
||||
mod storage;
|
||||
|
||||
use async_trait::async_trait;
|
||||
use common_error::ext::ErrorExt;
|
||||
use object_store::ObjectStore;
|
||||
use serde::{de::DeserializeOwned, Serialize};
|
||||
pub use storage::*;
|
||||
|
||||
pub type ManifestVersion = u64;
|
||||
pub const MIN_VERSION: u64 = 0;
|
||||
pub const MAX_VERSION: u64 = u64::MAX;
|
||||
|
||||
pub trait Metadata: Clone {}
|
||||
|
||||
pub trait MetadataId: Clone + Copy {}
|
||||
|
||||
/// The action to apply on metadata
|
||||
pub trait MetaAction: Serialize + DeserializeOwned {
|
||||
type MetadataId: MetadataId;
|
||||
|
||||
/// Returns the metadata id of the action
|
||||
fn metadata_id(&self) -> Self::MetadataId;
|
||||
}
|
||||
|
||||
/// Manifest service
|
||||
#[async_trait]
|
||||
pub trait Manifest: Send + Sync + Clone + 'static {
|
||||
type Error: ErrorExt + Send + Sync;
|
||||
type MetaAction: MetaAction;
|
||||
type MetadataId: MetadataId;
|
||||
type Metadata: Metadata;
|
||||
|
||||
fn new(id: Self::MetadataId, manifest_dir: &str, object_store: ObjectStore) -> Self;
|
||||
|
||||
/// Update metadata by the action
|
||||
async fn update(&self, action: Self::MetaAction) -> Result<ManifestVersion, Self::Error>;
|
||||
|
||||
/// Retrieve the latest metadata
|
||||
async fn load(&self) -> Result<Option<Self::Metadata>, Self::Error>;
|
||||
|
||||
async fn checkpoint(&self) -> Result<ManifestVersion, Self::Error>;
|
||||
|
||||
fn metadata_id(&self) -> Self::MetadataId;
|
||||
}
|
||||
41
src/store-api/src/manifest/storage.rs
Normal file
41
src/store-api/src/manifest/storage.rs
Normal file
@@ -0,0 +1,41 @@
|
||||
use async_trait::async_trait;
|
||||
use common_error::ext::ErrorExt;
|
||||
|
||||
use crate::manifest::ManifestVersion;
|
||||
|
||||
#[async_trait]
|
||||
pub trait LogIterator: Send + Sync {
|
||||
type Error: ErrorExt + Send + Sync;
|
||||
|
||||
async fn next_log(&mut self) -> Result<Option<(ManifestVersion, Vec<u8>)>, Self::Error>;
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
pub trait ManifestLogStorage {
|
||||
type Error: ErrorExt + Send + Sync;
|
||||
type Iter: LogIterator<Error = Self::Error>;
|
||||
|
||||
/// Scan the logs in [start, end)
|
||||
async fn scan(
|
||||
&self,
|
||||
start: ManifestVersion,
|
||||
end: ManifestVersion,
|
||||
) -> Result<Self::Iter, Self::Error>;
|
||||
|
||||
/// Save a log
|
||||
async fn save(&self, version: ManifestVersion, bytes: &[u8]) -> Result<(), Self::Error>;
|
||||
|
||||
/// Delete logs in [start, end)
|
||||
async fn delete(&self, start: ManifestVersion, end: ManifestVersion)
|
||||
-> Result<(), Self::Error>;
|
||||
|
||||
/// Save a checkpoint
|
||||
async fn save_checkpoint(
|
||||
&self,
|
||||
version: ManifestVersion,
|
||||
bytes: &[u8],
|
||||
) -> Result<(), Self::Error>;
|
||||
|
||||
/// Load the latest checkpoint
|
||||
async fn load_checkpoint(&self) -> Result<Option<(ManifestVersion, Vec<u8>)>, Self::Error>;
|
||||
}
|
||||
@@ -29,6 +29,12 @@ pub const VERSION_COLUMN_NAME: &str = "__version";
|
||||
// Names for default column family.
|
||||
pub const DEFAULT_CF_NAME: &str = "default";
|
||||
|
||||
// Name for reserved column: sequence
|
||||
pub const SEQUENCE_COLUMN_NAME: &str = "__sequence";
|
||||
|
||||
// Name for reserved column: value_type
|
||||
pub const VALUE_TYPE_COLUMN_NAME: &str = "__value_type";
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
// ---------- Default options --------------------------------------------------
|
||||
|
||||
@@ -1,5 +1,7 @@
|
||||
use datatypes::value::Value;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::manifest::MetadataId;
|
||||
use crate::storage::{consts, ColumnSchema, ConcreteDataType};
|
||||
|
||||
/// Id of column, unique in each region.
|
||||
@@ -7,6 +9,7 @@ pub type ColumnId = u32;
|
||||
/// Id of column family, unique in each region.
|
||||
pub type ColumnFamilyId = u32;
|
||||
pub type RegionId = u32;
|
||||
impl MetadataId for RegionId {}
|
||||
/// Default region name prefix
|
||||
pub const REGION_PREFIX: &str = "r_";
|
||||
|
||||
@@ -17,7 +20,7 @@ pub fn gen_region_name(id: RegionId) -> String {
|
||||
|
||||
// TODO(yingwen): Validate default value has same type with column, and name is a valid column name.
|
||||
/// A [ColumnDescriptor] contains information to create a column.
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
|
||||
pub struct ColumnDescriptor {
|
||||
pub id: ColumnId,
|
||||
pub name: String,
|
||||
@@ -131,7 +134,7 @@ impl RowKeyDescriptorBuilder {
|
||||
Self {
|
||||
columns: Vec::new(),
|
||||
timestamp,
|
||||
enable_version_column: true,
|
||||
enable_version_column: false,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -254,7 +257,7 @@ mod tests {
|
||||
|
||||
let desc = RowKeyDescriptorBuilder::new(timestamp.clone()).build();
|
||||
assert!(desc.columns.is_empty());
|
||||
assert!(desc.enable_version_column);
|
||||
assert!(!desc.enable_version_column);
|
||||
|
||||
let desc = RowKeyDescriptorBuilder::new(timestamp.clone())
|
||||
.columns_capacity(1)
|
||||
@@ -266,7 +269,7 @@ mod tests {
|
||||
)
|
||||
.build();
|
||||
assert_eq!(2, desc.columns.len());
|
||||
assert!(desc.enable_version_column);
|
||||
assert!(!desc.enable_version_column);
|
||||
|
||||
let desc = RowKeyDescriptorBuilder::new(timestamp)
|
||||
.enable_version_column(false)
|
||||
|
||||
@@ -1,4 +1,7 @@
|
||||
use std::time::Duration;
|
||||
|
||||
use common_error::ext::ErrorExt;
|
||||
use common_time::RangeMillis;
|
||||
use datatypes::schema::SchemaRef;
|
||||
use datatypes::vectors::VectorRef;
|
||||
|
||||
@@ -12,6 +15,11 @@ pub trait WriteRequest: Send {
|
||||
fn new(schema: SchemaRef) -> Self;
|
||||
|
||||
fn put(&mut self, put: Self::PutOp) -> Result<(), Self::Error>;
|
||||
|
||||
/// Returns all possible time ranges that contain the timestamp in this batch.
|
||||
///
|
||||
/// Each time range is aligned to given `duration`.
|
||||
fn time_ranges(&self, duration: Duration) -> Result<Vec<RangeMillis>, Self::Error>;
|
||||
}
|
||||
|
||||
/// Put multiple rows.
|
||||
|
||||
Reference in New Issue
Block a user