feat: Prototype of the storage engine (#107)

* feat: memtable flush (#63)

* wip: memtable flush

* optimize schema conversion

* remove unnecessary import

* add parquet file verfication

* add backtrace to error

* chore: upgrade opendal to 0.9 and fixed some problems

* rename error

* fix: error description

Co-authored-by: Dennis Zhuang <killme2008@gmail.com>

* feat: region manifest service (#57)

* feat: adds Manifest API

* feat: impl region manifest service

* refactor: by CR comments

* fix: storage error mod test

* fix: tweak storage cargo

* fix: tweak storage cargo

* refactor: by CR comments

* refactor: rename current_version

* feat: add wal writer (#60)

* feat: add Wal

* upgrade engine for wal

* fix: unit test for wal

* feat: wal into region

* fix: unix test

* fix clippy

* chore: by cr

* chore: by cr

* chore: prevent test data polution

* chore: by cr

* minor fix

* chore: by cr

* feat: Implement flush (#65)

* feat: Flush framework

- feat: Add id to memtable
- refactor: Rename MemtableSet/MutableMemtables to MemtableVersion/MemtableSet
- feat: Freeze memtable
- feat: Trigger flush
- feat: Background job pool
- feat: flush job
- feat: Sst access layer
- feat: Custom Deserialize for StringBytes
- feat: Use RegionWriter to apply file metas
- feat: Apply version edit
- chore: Remove unused imports

refactor: Use ParquetWriter to replace FlushTask

refactor: FsAccessLayer takes object store as param

chore: Remove todo from doc comments

feat: Move wal to WriterContext

chore: Fix clippy

chore: Add backtrace to WriteWal error

* feat: adds manifest to region and refactor sst/manifest dir config (#72)

* feat: adds manifest to region and refactor sst/manifest dir with EngineConfig

* refactor: ensure path ends with '/' in ManifestLogStorage

* fix: style

* refactor: normalize storage directory path and minor changes by CR

* refactor: doesn't need slash any more

* feat: Implement apply_edit() and add timestamp index to schema (#73)

* feat: Implement VersionControl::apply_edit()

* feat: Add timestamp index to schema

* feat: Implement Schema::timestamp_column()

* feat: persist region metadata to manifest (#74)

* feat: persist metadata when creating region or sst files

* fix: revert FileMeta comment

* feat: resolve todo

* fix: clippy warning

* fix: revert files_to_remove type in RegionEdit

* feat: impl SizeBasedStrategy for flush (#76)

* feat: impl SizeBasedStrategy for flush

* doc: get_mutable_limitation

* fix: code style and comment

* feat: align timestamp (#75)

* feat: align timestamps in write batch

* fix cr comments

* fix timestamp overflow

* simplify overflow check

* fix cr comments

* fix clippy issues

* test: Fix region tests (comment out some unsupported tests) (#82)

* feat: flush job (#80)

* feat: flush job

* fix cr comments

* move file name instead of clone

* comment log file test (#84)

* feat: improve MemtableVersion (#78)

* feat: improve MemtableVersion

* feat: remove flushed immutable memtables and test MemtableVersion

* refactor: by CR comments

* refactor: clone kv in iterator

* fix: clippy warning

* refactor: Make BatchIterator supertrait of Iterator (#85)

* refactor: rename Version to ManifestVersion and move out manifest from ShareData (#83)

* feat: Insert multiple memtables by time range (#77)

* feat: memtable::Inserter supports insert multiple memtables by time range

* chore: Update timestamp comment

* test: Add tests for Inserter

* test: Fix region tests (comment out some unsupported tests)

* refactor: align_timestamp() use TimestampMillis::aligned_by_bucket()

* chore: rename aligned_by_bucket to align_by_bucket

* fix: Fix compile errors

* fix: sst and manifest dir (#86)

* Set RowKeyDescriptor::enable_version_column to false by default

* feat: Implement write stall (#90)

* feat: Implement write stall

* chore: Update comments

* feat: Support reading multiple memtables (#93)

* feat: Support reading multiple memtables

* test: uncomment tests rely on snapshot read

* feat: wal format (#70)

* feat: wal codec

* chore: minor fix

* chore: comment

* chore: by cr

* chore: write_batch_codec mod

* chore: by cr

* chore: upgrade proto

* chore: by cr

* fix failing test

* fix failing test

* feat: manifest to wal (#100)

* feat: write manifest to wal

* chore: sequence into wal

* chore: by cr

* chore: by cr

* refactor: create log store (#104)

Co-authored-by: dennis zhuang <killme2008@gmail.com>
Co-authored-by: Lei, Huang <6406592+v0y4g3r@users.noreply.github.com>
Co-authored-by: fariygirl <clickmetoday@163.com>
Co-authored-by: Jiachun Feng <jiachun_feng@proton.me>
Co-authored-by: Lei, HUANG <mrsatangel@gmail.com>

* chore: Fix clippy

Co-authored-by: Lei, Huang <6406592+v0y4g3r@users.noreply.github.com>
Co-authored-by: Dennis Zhuang <killme2008@gmail.com>
Co-authored-by: Jiachun Feng <jiachun_feng@proton.me>
Co-authored-by: fariygirl <clickmetoday@163.com>
Co-authored-by: Lei, HUANG <mrsatangel@gmail.com>
This commit is contained in:
evenyag
2022-07-25 15:26:00 +08:00
committed by GitHub
parent 2b064265bf
commit bf5975ca3e
95 changed files with 5675 additions and 543 deletions

View File

@@ -14,6 +14,7 @@ common-telemetry = {path = "../common/telemetry" }
datafusion-common = { git = "https://github.com/apache/arrow-datafusion.git" , branch = "arrow2"}
datatypes = { path = "../datatypes" }
futures = "0.3"
log-store = { path = "../log-store" }
snafu = { version = "0.7", features = ["backtraces"] }
storage ={ path = "../storage" }
store-api ={ path = "../store-api" }
@@ -21,4 +22,5 @@ table = { path = "../table" }
[dev-dependencies]
datatypes = { path = "../datatypes" }
tokio = { version = "1.18", features = ["full"] }
tempdir = "0.3"
tokio = { version = "1.18", features = ["full"] }

View File

@@ -194,8 +194,8 @@ mod tests {
use crate::table::test;
#[tokio::test]
async fn test_creat_table_insert_scan() {
let (_engine, table, schema) = test::setup_test_engine_and_table().await;
async fn test_create_table_insert_scan() {
let (_engine, table, schema, _dir) = test::setup_test_engine_and_table().await;
assert_eq!(TableType::Base, table.table_type());
assert_eq!(schema, table.schema());

View File

@@ -3,14 +3,23 @@ use std::sync::Arc;
use datatypes::prelude::ConcreteDataType;
use datatypes::schema::SchemaRef;
use datatypes::schema::{ColumnSchema, Schema};
use log_store::fs::noop::NoopLogStore;
use storage::config::EngineConfig;
use storage::EngineImpl;
use table::engine::{EngineContext, TableEngine};
use table::engine::EngineContext;
use table::engine::TableEngine;
use table::requests::CreateTableRequest;
use table::TableRef;
use tempdir::TempDir;
use crate::engine::MitoEngine;
pub async fn setup_test_engine_and_table() -> (MitoEngine<EngineImpl>, TableRef, SchemaRef) {
pub async fn setup_test_engine_and_table() -> (
MitoEngine<EngineImpl<NoopLogStore>>,
TableRef,
SchemaRef,
TempDir,
) {
let column_schemas = vec![
ColumnSchema::new("host", ConcreteDataType::string_datatype(), false),
ColumnSchema::new("ts", ConcreteDataType::int64_datatype(), true),
@@ -18,10 +27,22 @@ pub async fn setup_test_engine_and_table() -> (MitoEngine<EngineImpl>, TableRef,
ColumnSchema::new("memory", ConcreteDataType::float64_datatype(), true),
];
let table_engine = MitoEngine::<EngineImpl>::new(EngineImpl::new());
let dir = TempDir::new("setup_test_engine_and_table").unwrap();
let store_dir = dir.path().to_string_lossy();
let table_engine = MitoEngine::<EngineImpl<NoopLogStore>>::new(
EngineImpl::new(
EngineConfig::with_store_dir(&store_dir),
Arc::new(NoopLogStore::default()),
)
.await
.unwrap(),
);
let table_name = "demo";
let schema = Arc::new(Schema::new(column_schemas));
let schema = Arc::new(
Schema::with_timestamp_index(column_schemas, 1).expect("ts must be timestamp column"),
);
let table = table_engine
.create_table(
&EngineContext::default(),
@@ -34,5 +55,5 @@ pub async fn setup_test_engine_and_table() -> (MitoEngine<EngineImpl>, TableRef,
.await
.unwrap();
(table_engine, table, schema)
(table_engine, table, schema, dir)
}