mirror of
https://github.com/GreptimeTeam/greptimedb.git
synced 2025-12-27 08:29:59 +00:00
feat: Prototype of the storage engine (#107)
* feat: memtable flush (#63) * wip: memtable flush * optimize schema conversion * remove unnecessary import * add parquet file verfication * add backtrace to error * chore: upgrade opendal to 0.9 and fixed some problems * rename error * fix: error description Co-authored-by: Dennis Zhuang <killme2008@gmail.com> * feat: region manifest service (#57) * feat: adds Manifest API * feat: impl region manifest service * refactor: by CR comments * fix: storage error mod test * fix: tweak storage cargo * fix: tweak storage cargo * refactor: by CR comments * refactor: rename current_version * feat: add wal writer (#60) * feat: add Wal * upgrade engine for wal * fix: unit test for wal * feat: wal into region * fix: unix test * fix clippy * chore: by cr * chore: by cr * chore: prevent test data polution * chore: by cr * minor fix * chore: by cr * feat: Implement flush (#65) * feat: Flush framework - feat: Add id to memtable - refactor: Rename MemtableSet/MutableMemtables to MemtableVersion/MemtableSet - feat: Freeze memtable - feat: Trigger flush - feat: Background job pool - feat: flush job - feat: Sst access layer - feat: Custom Deserialize for StringBytes - feat: Use RegionWriter to apply file metas - feat: Apply version edit - chore: Remove unused imports refactor: Use ParquetWriter to replace FlushTask refactor: FsAccessLayer takes object store as param chore: Remove todo from doc comments feat: Move wal to WriterContext chore: Fix clippy chore: Add backtrace to WriteWal error * feat: adds manifest to region and refactor sst/manifest dir config (#72) * feat: adds manifest to region and refactor sst/manifest dir with EngineConfig * refactor: ensure path ends with '/' in ManifestLogStorage * fix: style * refactor: normalize storage directory path and minor changes by CR * refactor: doesn't need slash any more * feat: Implement apply_edit() and add timestamp index to schema (#73) * feat: Implement VersionControl::apply_edit() * feat: Add timestamp index to schema * feat: Implement Schema::timestamp_column() * feat: persist region metadata to manifest (#74) * feat: persist metadata when creating region or sst files * fix: revert FileMeta comment * feat: resolve todo * fix: clippy warning * fix: revert files_to_remove type in RegionEdit * feat: impl SizeBasedStrategy for flush (#76) * feat: impl SizeBasedStrategy for flush * doc: get_mutable_limitation * fix: code style and comment * feat: align timestamp (#75) * feat: align timestamps in write batch * fix cr comments * fix timestamp overflow * simplify overflow check * fix cr comments * fix clippy issues * test: Fix region tests (comment out some unsupported tests) (#82) * feat: flush job (#80) * feat: flush job * fix cr comments * move file name instead of clone * comment log file test (#84) * feat: improve MemtableVersion (#78) * feat: improve MemtableVersion * feat: remove flushed immutable memtables and test MemtableVersion * refactor: by CR comments * refactor: clone kv in iterator * fix: clippy warning * refactor: Make BatchIterator supertrait of Iterator (#85) * refactor: rename Version to ManifestVersion and move out manifest from ShareData (#83) * feat: Insert multiple memtables by time range (#77) * feat: memtable::Inserter supports insert multiple memtables by time range * chore: Update timestamp comment * test: Add tests for Inserter * test: Fix region tests (comment out some unsupported tests) * refactor: align_timestamp() use TimestampMillis::aligned_by_bucket() * chore: rename aligned_by_bucket to align_by_bucket * fix: Fix compile errors * fix: sst and manifest dir (#86) * Set RowKeyDescriptor::enable_version_column to false by default * feat: Implement write stall (#90) * feat: Implement write stall * chore: Update comments * feat: Support reading multiple memtables (#93) * feat: Support reading multiple memtables * test: uncomment tests rely on snapshot read * feat: wal format (#70) * feat: wal codec * chore: minor fix * chore: comment * chore: by cr * chore: write_batch_codec mod * chore: by cr * chore: upgrade proto * chore: by cr * fix failing test * fix failing test * feat: manifest to wal (#100) * feat: write manifest to wal * chore: sequence into wal * chore: by cr * chore: by cr * refactor: create log store (#104) Co-authored-by: dennis zhuang <killme2008@gmail.com> Co-authored-by: Lei, Huang <6406592+v0y4g3r@users.noreply.github.com> Co-authored-by: fariygirl <clickmetoday@163.com> Co-authored-by: Jiachun Feng <jiachun_feng@proton.me> Co-authored-by: Lei, HUANG <mrsatangel@gmail.com> * chore: Fix clippy Co-authored-by: Lei, Huang <6406592+v0y4g3r@users.noreply.github.com> Co-authored-by: Dennis Zhuang <killme2008@gmail.com> Co-authored-by: Jiachun Feng <jiachun_feng@proton.me> Co-authored-by: fariygirl <clickmetoday@163.com> Co-authored-by: Lei, HUANG <mrsatangel@gmail.com>
This commit is contained in:
3
.gitignore
vendored
3
.gitignore
vendored
@@ -8,6 +8,9 @@
|
|||||||
# These are backup files generated by rustfmt
|
# These are backup files generated by rustfmt
|
||||||
**/*.rs.bk
|
**/*.rs.bk
|
||||||
|
|
||||||
|
# Mac DS_Store
|
||||||
|
**/*.DS_Store
|
||||||
|
|
||||||
debug/
|
debug/
|
||||||
|
|
||||||
# MSVC Windows builds of rustc generate these, which store debugging information
|
# MSVC Windows builds of rustc generate these, which store debugging information
|
||||||
|
|||||||
301
Cargo.lock
generated
301
Cargo.lock
generated
@@ -142,6 +142,17 @@ dependencies = [
|
|||||||
"strength_reduce",
|
"strength_reduce",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "async-channel"
|
||||||
|
version = "1.6.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "2114d64672151c0c5eaa5e131ec84a74f06e1e559830dabba01ca30605d66319"
|
||||||
|
dependencies = [
|
||||||
|
"concurrent-queue",
|
||||||
|
"event-listener",
|
||||||
|
"futures-core",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "async-compat"
|
name = "async-compat"
|
||||||
version = "0.2.1"
|
version = "0.2.1"
|
||||||
@@ -308,6 +319,18 @@ dependencies = [
|
|||||||
"tower-service",
|
"tower-service",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "backon"
|
||||||
|
version = "0.0.2"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "f334d8b7d003e7d4e17844b81ffbfcd24ad955777997440701c08a834e407105"
|
||||||
|
dependencies = [
|
||||||
|
"futures",
|
||||||
|
"pin-project",
|
||||||
|
"rand 0.8.5",
|
||||||
|
"tokio",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "backtrace"
|
name = "backtrace"
|
||||||
version = "0.3.65"
|
version = "0.3.65"
|
||||||
@@ -329,6 +352,12 @@ version = "0.13.0"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "904dfeac50f3cdaba28fc6f57fdcddb75f49ed61346676a78c4ffe55877802fd"
|
checksum = "904dfeac50f3cdaba28fc6f57fdcddb75f49ed61346676a78c4ffe55877802fd"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "bit-vec"
|
||||||
|
version = "0.6.3"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "349f9b6a179ed607305526ca489b34ad0a41aed5f7980fa90eb03160b69598fb"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "bitflags"
|
name = "bitflags"
|
||||||
version = "1.3.2"
|
version = "1.3.2"
|
||||||
@@ -446,6 +475,15 @@ name = "bytes"
|
|||||||
version = "1.1.0"
|
version = "1.1.0"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "c4872d67bab6358e59559027aa3b9157c53d9358c51423c17554809a8858e0f8"
|
checksum = "c4872d67bab6358e59559027aa3b9157c53d9358c51423c17554809a8858e0f8"
|
||||||
|
dependencies = [
|
||||||
|
"serde",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "cache-padded"
|
||||||
|
version = "1.2.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "c1db59621ec70f09c5e9b597b220c7a2b43611f4710dc03ceb8748637775692c"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "cast"
|
name = "cast"
|
||||||
@@ -456,6 +494,12 @@ dependencies = [
|
|||||||
"rustc_version",
|
"rustc_version",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "castaway"
|
||||||
|
version = "0.1.2"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "a2698f953def977c68f935bb0dfa959375ad4638570e969e2f1e9f433cbf1af6"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "cc"
|
name = "cc"
|
||||||
version = "1.0.73"
|
version = "1.0.73"
|
||||||
@@ -671,7 +715,7 @@ version = "0.1.0"
|
|||||||
dependencies = [
|
dependencies = [
|
||||||
"common-error",
|
"common-error",
|
||||||
"common-telemetry",
|
"common-telemetry",
|
||||||
"metrics",
|
"metrics 0.18.1",
|
||||||
"once_cell",
|
"once_cell",
|
||||||
"paste",
|
"paste",
|
||||||
"snafu",
|
"snafu",
|
||||||
@@ -686,7 +730,7 @@ dependencies = [
|
|||||||
"backtrace",
|
"backtrace",
|
||||||
"common-error",
|
"common-error",
|
||||||
"console-subscriber",
|
"console-subscriber",
|
||||||
"metrics",
|
"metrics 0.18.1",
|
||||||
"metrics-exporter-prometheus",
|
"metrics-exporter-prometheus",
|
||||||
"once_cell",
|
"once_cell",
|
||||||
"opentelemetry",
|
"opentelemetry",
|
||||||
@@ -705,6 +749,15 @@ dependencies = [
|
|||||||
name = "common-time"
|
name = "common-time"
|
||||||
version = "0.1.0"
|
version = "0.1.0"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "concurrent-queue"
|
||||||
|
version = "1.2.2"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "30ed07550be01594c6026cff2a1d7fe9c8f683caa798e12b68694ac9e88286a3"
|
||||||
|
dependencies = [
|
||||||
|
"cache-padded",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "console-api"
|
name = "console-api"
|
||||||
version = "0.2.0"
|
version = "0.2.0"
|
||||||
@@ -949,6 +1002,37 @@ dependencies = [
|
|||||||
"syn",
|
"syn",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "curl"
|
||||||
|
version = "0.4.43"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "37d855aeef205b43f65a5001e0997d81f8efca7badad4fad7d897aa7f0d0651f"
|
||||||
|
dependencies = [
|
||||||
|
"curl-sys",
|
||||||
|
"libc",
|
||||||
|
"openssl-probe",
|
||||||
|
"openssl-sys",
|
||||||
|
"schannel",
|
||||||
|
"socket2",
|
||||||
|
"winapi",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "curl-sys"
|
||||||
|
version = "0.4.55+curl-7.83.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "23734ec77368ec583c2e61dd3f0b0e5c98b93abe6d2a004ca06b91dd7e3e2762"
|
||||||
|
dependencies = [
|
||||||
|
"cc",
|
||||||
|
"libc",
|
||||||
|
"libnghttp2-sys",
|
||||||
|
"libz-sys",
|
||||||
|
"openssl-sys",
|
||||||
|
"pkg-config",
|
||||||
|
"vcpkg",
|
||||||
|
"winapi",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "datafusion"
|
name = "datafusion"
|
||||||
version = "7.0.0"
|
version = "7.0.0"
|
||||||
@@ -1041,7 +1125,8 @@ dependencies = [
|
|||||||
"common-telemetry",
|
"common-telemetry",
|
||||||
"datatypes",
|
"datatypes",
|
||||||
"hyper",
|
"hyper",
|
||||||
"metrics",
|
"log-store",
|
||||||
|
"metrics 0.18.1",
|
||||||
"query",
|
"query",
|
||||||
"serde",
|
"serde",
|
||||||
"serde_json",
|
"serde_json",
|
||||||
@@ -1051,6 +1136,7 @@ dependencies = [
|
|||||||
"store-api",
|
"store-api",
|
||||||
"table",
|
"table",
|
||||||
"table-engine",
|
"table-engine",
|
||||||
|
"tempdir",
|
||||||
"tokio",
|
"tokio",
|
||||||
"tokio-stream",
|
"tokio-stream",
|
||||||
"tonic",
|
"tonic",
|
||||||
@@ -1146,6 +1232,12 @@ dependencies = [
|
|||||||
"syn",
|
"syn",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "event-listener"
|
||||||
|
version = "2.5.2"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "77f3309417938f28bf8228fcff79a4a37103981e3e186d2ccd19c74b38f4eb71"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "fallible-streaming-iterator"
|
name = "fallible-streaming-iterator"
|
||||||
version = "0.1.9"
|
version = "0.1.9"
|
||||||
@@ -1264,6 +1356,21 @@ version = "0.3.21"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "fc4045962a5a5e935ee2fdedaa4e08284547402885ab326734432bed5d12966b"
|
checksum = "fc4045962a5a5e935ee2fdedaa4e08284547402885ab326734432bed5d12966b"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "futures-lite"
|
||||||
|
version = "1.12.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "7694489acd39452c77daa48516b894c153f192c3578d5a839b62c58099fcbf48"
|
||||||
|
dependencies = [
|
||||||
|
"fastrand",
|
||||||
|
"futures-core",
|
||||||
|
"futures-io",
|
||||||
|
"memchr",
|
||||||
|
"parking",
|
||||||
|
"pin-project-lite",
|
||||||
|
"waker-fn",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "futures-macro"
|
name = "futures-macro"
|
||||||
version = "0.3.21"
|
version = "0.3.21"
|
||||||
@@ -1594,6 +1701,33 @@ dependencies = [
|
|||||||
"nom",
|
"nom",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "isahc"
|
||||||
|
version = "1.7.2"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "334e04b4d781f436dc315cb1e7515bd96826426345d498149e4bde36b67f8ee9"
|
||||||
|
dependencies = [
|
||||||
|
"async-channel",
|
||||||
|
"castaway",
|
||||||
|
"crossbeam-utils",
|
||||||
|
"curl",
|
||||||
|
"curl-sys",
|
||||||
|
"encoding_rs",
|
||||||
|
"event-listener",
|
||||||
|
"futures-lite",
|
||||||
|
"http",
|
||||||
|
"log",
|
||||||
|
"mime",
|
||||||
|
"once_cell",
|
||||||
|
"polling",
|
||||||
|
"slab",
|
||||||
|
"sluice",
|
||||||
|
"tracing",
|
||||||
|
"tracing-futures",
|
||||||
|
"url",
|
||||||
|
"waker-fn",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "itertools"
|
name = "itertools"
|
||||||
version = "0.10.3"
|
version = "0.10.3"
|
||||||
@@ -1723,6 +1857,28 @@ version = "0.2.125"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "5916d2ae698f6de9bfb891ad7a8d65c09d232dc58cc4ac433c7da3b2fd84bc2b"
|
checksum = "5916d2ae698f6de9bfb891ad7a8d65c09d232dc58cc4ac433c7da3b2fd84bc2b"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "libnghttp2-sys"
|
||||||
|
version = "0.1.7+1.45.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "57ed28aba195b38d5ff02b9170cbff627e336a20925e43b4945390401c5dc93f"
|
||||||
|
dependencies = [
|
||||||
|
"cc",
|
||||||
|
"libc",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "libz-sys"
|
||||||
|
version = "1.1.8"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "9702761c3935f8cc2f101793272e202c72b99da8f4224a19ddcf1279a6450bbf"
|
||||||
|
dependencies = [
|
||||||
|
"cc",
|
||||||
|
"libc",
|
||||||
|
"pkg-config",
|
||||||
|
"vcpkg",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "lock_api"
|
name = "lock_api"
|
||||||
version = "0.4.7"
|
version = "0.4.7"
|
||||||
@@ -1870,6 +2026,16 @@ dependencies = [
|
|||||||
"metrics-macros",
|
"metrics-macros",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "metrics"
|
||||||
|
version = "0.19.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "142c53885123b68d94108295a09d4afe1a1388ed95b54d5dacd9a454753030f2"
|
||||||
|
dependencies = [
|
||||||
|
"ahash",
|
||||||
|
"metrics-macros",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "metrics-exporter-prometheus"
|
name = "metrics-exporter-prometheus"
|
||||||
version = "0.9.0"
|
version = "0.9.0"
|
||||||
@@ -1877,7 +2043,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
|||||||
checksum = "8b93b470b04c005178058e18ac8bb2eb3fda562cf87af5ea05ba8d44190d458c"
|
checksum = "8b93b470b04c005178058e18ac8bb2eb3fda562cf87af5ea05ba8d44190d458c"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"indexmap",
|
"indexmap",
|
||||||
"metrics",
|
"metrics 0.18.1",
|
||||||
"metrics-util",
|
"metrics-util",
|
||||||
"parking_lot 0.11.2",
|
"parking_lot 0.11.2",
|
||||||
"quanta",
|
"quanta",
|
||||||
@@ -1905,7 +2071,7 @@ dependencies = [
|
|||||||
"crossbeam-epoch",
|
"crossbeam-epoch",
|
||||||
"crossbeam-utils",
|
"crossbeam-utils",
|
||||||
"hashbrown 0.11.2",
|
"hashbrown 0.11.2",
|
||||||
"metrics",
|
"metrics 0.18.1",
|
||||||
"num_cpus",
|
"num_cpus",
|
||||||
"parking_lot 0.11.2",
|
"parking_lot 0.11.2",
|
||||||
"quanta",
|
"quanta",
|
||||||
@@ -2178,9 +2344,9 @@ checksum = "0ab1bc2a289d34bd04a330323ac98a1b4bc82c9d9fcb1e66b63caa84da26b575"
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "opendal"
|
name = "opendal"
|
||||||
version = "0.6.2"
|
version = "0.9.1"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "3649ace5a99d388ac9d02459135ad0425941e8cf6c33f418c4ded80483563ce3"
|
checksum = "e9e982034fd0b4f142efba461604f5ccb1fb1f962c4e84c8e44ce369f0e3d1f2"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"anyhow",
|
"anyhow",
|
||||||
"async-compat",
|
"async-compat",
|
||||||
@@ -2193,15 +2359,14 @@ dependencies = [
|
|||||||
"hyper-tls",
|
"hyper-tls",
|
||||||
"log",
|
"log",
|
||||||
"md5",
|
"md5",
|
||||||
"metrics",
|
"metrics 0.19.0",
|
||||||
"minitrace",
|
"minitrace",
|
||||||
"once_cell",
|
"once_cell",
|
||||||
"parking_lot 0.12.0",
|
"parking_lot 0.12.0",
|
||||||
|
"percent-encoding",
|
||||||
"pin-project",
|
"pin-project",
|
||||||
"quick-xml",
|
"quick-xml",
|
||||||
"reqsign",
|
"reqsign",
|
||||||
"reqwest",
|
|
||||||
"roxmltree",
|
|
||||||
"serde",
|
"serde",
|
||||||
"thiserror",
|
"thiserror",
|
||||||
"time 0.3.9",
|
"time 0.3.9",
|
||||||
@@ -2323,6 +2488,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
|||||||
checksum = "96bcbab4bfea7a59c2c0fe47211a1ac4e3e96bea6eb446d704f310bc5c732ae2"
|
checksum = "96bcbab4bfea7a59c2c0fe47211a1ac4e3e96bea6eb446d704f310bc5c732ae2"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"num-traits",
|
"num-traits",
|
||||||
|
"serde",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
@@ -2341,6 +2507,12 @@ version = "6.0.0"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "8e22443d1643a904602595ba1cd8f7d896afe56d26712531c5ff73a15b2fbf64"
|
checksum = "8e22443d1643a904602595ba1cd8f7d896afe56d26712531c5ff73a15b2fbf64"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "parking"
|
||||||
|
version = "2.0.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "427c3892f9e783d91cc128285287e70a59e206ca452770ece88a76f7a3eddd72"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "parking_lot"
|
name = "parking_lot"
|
||||||
version = "0.11.2"
|
version = "0.11.2"
|
||||||
@@ -2577,6 +2749,19 @@ dependencies = [
|
|||||||
"plotters-backend",
|
"plotters-backend",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "polling"
|
||||||
|
version = "2.2.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "685404d509889fade3e86fe3a5803bca2ec09b0c0778d5ada6ec8bf7a8de5259"
|
||||||
|
dependencies = [
|
||||||
|
"cfg-if",
|
||||||
|
"libc",
|
||||||
|
"log",
|
||||||
|
"wepoll-ffi",
|
||||||
|
"winapi",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "ppv-lite86"
|
name = "ppv-lite86"
|
||||||
version = "0.2.16"
|
version = "0.2.16"
|
||||||
@@ -2585,9 +2770,9 @@ checksum = "eb9f9e6e233e5c4a35559a617bf40a4ec447db2e84c20b55a6f83167b7e57872"
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "prettyplease"
|
name = "prettyplease"
|
||||||
version = "0.1.14"
|
version = "0.1.16"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "c3662417e650bd6af740f5b8b3501776aa10c3d5cbd10b40263ed250d3770884"
|
checksum = "da6ffbe862780245013cb1c0a48c4e44b7d665548088f91f6b90876d0625e4c2"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"proc-macro2",
|
"proc-macro2",
|
||||||
"syn",
|
"syn",
|
||||||
@@ -2714,7 +2899,7 @@ dependencies = [
|
|||||||
"datatypes",
|
"datatypes",
|
||||||
"futures",
|
"futures",
|
||||||
"futures-util",
|
"futures-util",
|
||||||
"metrics",
|
"metrics 0.18.1",
|
||||||
"num",
|
"num",
|
||||||
"num-traits",
|
"num-traits",
|
||||||
"rand 0.8.5",
|
"rand 0.8.5",
|
||||||
@@ -2727,9 +2912,9 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "quick-xml"
|
name = "quick-xml"
|
||||||
version = "0.22.0"
|
version = "0.23.0"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "8533f14c8382aaad0d592c812ac3b826162128b65662331e1127b45c3d18536b"
|
checksum = "9279fbdacaad3baf559d8cabe0acc3d06e30ea14931af31af79578ac0946decc"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"memchr",
|
"memchr",
|
||||||
"serde",
|
"serde",
|
||||||
@@ -2910,12 +3095,12 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "reqsign"
|
name = "reqsign"
|
||||||
version = "0.0.3"
|
version = "0.1.1"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "8931679eac96ffc8eee4e5507c4b91fbc0799f29a6535707ee3ef89c0d0de426"
|
checksum = "9a6b48d7d1f390bcb0149b4d7a3022f5a927fca173c19413ba17e74936716e39"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"anyhow",
|
"anyhow",
|
||||||
"async-trait",
|
"backon",
|
||||||
"base64",
|
"base64",
|
||||||
"bytes",
|
"bytes",
|
||||||
"dirs",
|
"dirs",
|
||||||
@@ -2923,18 +3108,17 @@ dependencies = [
|
|||||||
"hex",
|
"hex",
|
||||||
"hmac",
|
"hmac",
|
||||||
"http",
|
"http",
|
||||||
|
"isahc",
|
||||||
"jsonwebtoken",
|
"jsonwebtoken",
|
||||||
"log",
|
"log",
|
||||||
"once_cell",
|
"once_cell",
|
||||||
"percent-encoding",
|
"percent-encoding",
|
||||||
"reqwest",
|
"quick-xml",
|
||||||
"roxmltree",
|
|
||||||
"rust-ini",
|
"rust-ini",
|
||||||
"serde",
|
"serde",
|
||||||
"serde_json",
|
"serde_json",
|
||||||
"sha2",
|
"sha2",
|
||||||
"time 0.3.9",
|
"time 0.3.9",
|
||||||
"tokio",
|
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
@@ -2996,15 +3180,6 @@ dependencies = [
|
|||||||
"winapi",
|
"winapi",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "roxmltree"
|
|
||||||
version = "0.14.1"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "921904a62e410e37e215c40381b7117f830d9d89ba60ab5236170541dd25646b"
|
|
||||||
dependencies = [
|
|
||||||
"xmlparser",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "rust-ini"
|
name = "rust-ini"
|
||||||
version = "0.18.0"
|
version = "0.18.0"
|
||||||
@@ -3216,10 +3391,21 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
|||||||
checksum = "eb703cfe953bccee95685111adeedb76fabe4e97549a58d16f03ea7b9367bb32"
|
checksum = "eb703cfe953bccee95685111adeedb76fabe4e97549a58d16f03ea7b9367bb32"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "smallvec"
|
name = "sluice"
|
||||||
version = "1.8.0"
|
version = "0.5.5"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "f2dd574626839106c320a323308629dcb1acfc96e32a8cba364ddc61ac23ee83"
|
checksum = "6d7400c0eff44aa2fcb5e31a5f24ba9716ed90138769e4977a2ba6014ae63eb5"
|
||||||
|
dependencies = [
|
||||||
|
"async-channel",
|
||||||
|
"futures-core",
|
||||||
|
"futures-io",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "smallvec"
|
||||||
|
version = "1.9.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "2fd0db749597d91ff862fd1d55ea87f7855a744a8425a64695b6fca237d1dad1"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "snafu"
|
name = "snafu"
|
||||||
@@ -3295,16 +3481,35 @@ name = "storage"
|
|||||||
version = "0.1.0"
|
version = "0.1.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"arc-swap",
|
"arc-swap",
|
||||||
|
"arrow-format",
|
||||||
"async-trait",
|
"async-trait",
|
||||||
"atomic_float",
|
"atomic_float",
|
||||||
|
"bit-vec",
|
||||||
|
"bytes",
|
||||||
"common-error",
|
"common-error",
|
||||||
|
"common-runtime",
|
||||||
"common-telemetry",
|
"common-telemetry",
|
||||||
|
"common-time",
|
||||||
"criterion",
|
"criterion",
|
||||||
"datatypes",
|
"datatypes",
|
||||||
|
"futures",
|
||||||
|
"futures-util",
|
||||||
|
"lazy_static",
|
||||||
|
"log-store",
|
||||||
|
"object-store",
|
||||||
|
"planus",
|
||||||
|
"prost",
|
||||||
"rand 0.8.5",
|
"rand 0.8.5",
|
||||||
|
"regex",
|
||||||
|
"serde",
|
||||||
|
"serde_json",
|
||||||
"snafu",
|
"snafu",
|
||||||
"store-api",
|
"store-api",
|
||||||
|
"tempdir",
|
||||||
"tokio",
|
"tokio",
|
||||||
|
"tonic",
|
||||||
|
"tonic-build",
|
||||||
|
"uuid",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
@@ -3316,8 +3521,11 @@ dependencies = [
|
|||||||
"bytes",
|
"bytes",
|
||||||
"common-base",
|
"common-base",
|
||||||
"common-error",
|
"common-error",
|
||||||
|
"common-time",
|
||||||
"datatypes",
|
"datatypes",
|
||||||
"futures",
|
"futures",
|
||||||
|
"object-store",
|
||||||
|
"serde",
|
||||||
"snafu",
|
"snafu",
|
||||||
"tokio",
|
"tokio",
|
||||||
]
|
]
|
||||||
@@ -3422,10 +3630,12 @@ dependencies = [
|
|||||||
"datafusion-common",
|
"datafusion-common",
|
||||||
"datatypes",
|
"datatypes",
|
||||||
"futures",
|
"futures",
|
||||||
|
"log-store",
|
||||||
"snafu",
|
"snafu",
|
||||||
"storage",
|
"storage",
|
||||||
"store-api",
|
"store-api",
|
||||||
"table",
|
"table",
|
||||||
|
"tempdir",
|
||||||
"tokio",
|
"tokio",
|
||||||
]
|
]
|
||||||
|
|
||||||
@@ -4004,9 +4214,9 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "uuid"
|
name = "uuid"
|
||||||
version = "1.0.0"
|
version = "1.1.2"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "8cfcd319456c4d6ea10087ed423473267e1a071f3bc0aa89f80d60997843c6f0"
|
checksum = "dd6469f4314d5f1ffec476e05f17cc9a78bc7a27a6a857842170bdf8d6f98d2f"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"getrandom",
|
"getrandom",
|
||||||
]
|
]
|
||||||
@@ -4029,6 +4239,12 @@ version = "0.9.4"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f"
|
checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "waker-fn"
|
||||||
|
version = "1.1.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "9d5b2c62b4012a3e1eca5a7e077d13b3bf498c4073e33ccd58626607748ceeca"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "walkdir"
|
name = "walkdir"
|
||||||
version = "2.3.2"
|
version = "2.3.2"
|
||||||
@@ -4144,6 +4360,15 @@ dependencies = [
|
|||||||
"wasm-bindgen",
|
"wasm-bindgen",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "wepoll-ffi"
|
||||||
|
version = "0.1.2"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "d743fdedc5c64377b5fc2bc036b01c7fd642205a0d96356034ae3404d49eb7fb"
|
||||||
|
dependencies = [
|
||||||
|
"cc",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "which"
|
name = "which"
|
||||||
version = "4.2.5"
|
version = "4.2.5"
|
||||||
@@ -4238,12 +4463,6 @@ dependencies = [
|
|||||||
"winapi",
|
"winapi",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "xmlparser"
|
|
||||||
version = "0.13.3"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "114ba2b24d2167ef6d67d7d04c8cc86522b87f490025f39f0303b7db5bf5e3d8"
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "zstd"
|
name = "zstd"
|
||||||
version = "0.10.0+zstd.1.5.2"
|
version = "0.10.0+zstd.1.5.2"
|
||||||
|
|||||||
@@ -5,11 +5,11 @@ members = [
|
|||||||
"src/common/base",
|
"src/common/base",
|
||||||
"src/common/error",
|
"src/common/error",
|
||||||
"src/common/function",
|
"src/common/function",
|
||||||
|
"src/common/query",
|
||||||
|
"src/common/recordbatch",
|
||||||
"src/common/runtime",
|
"src/common/runtime",
|
||||||
"src/common/telemetry",
|
"src/common/telemetry",
|
||||||
"src/common/time",
|
"src/common/time",
|
||||||
"src/common/query",
|
|
||||||
"src/common/recordbatch",
|
|
||||||
"src/cmd",
|
"src/cmd",
|
||||||
"src/datanode",
|
"src/datanode",
|
||||||
"src/datatypes",
|
"src/datatypes",
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
use clap::Parser;
|
use clap::Parser;
|
||||||
use datanode::{Datanode, DatanodeOptions};
|
use datanode::datanode::{Datanode, DatanodeOptions};
|
||||||
use snafu::ResultExt;
|
use snafu::ResultExt;
|
||||||
|
|
||||||
use crate::error::{Result, StartDatanodeSnafu};
|
use crate::error::{Result, StartDatanodeSnafu};
|
||||||
@@ -40,6 +40,7 @@ struct StartCommand {
|
|||||||
impl StartCommand {
|
impl StartCommand {
|
||||||
async fn run(self) -> Result<()> {
|
async fn run(self) -> Result<()> {
|
||||||
Datanode::new(self.into())
|
Datanode::new(self.into())
|
||||||
|
.await
|
||||||
.context(StartDatanodeSnafu)?
|
.context(StartDatanodeSnafu)?
|
||||||
.start()
|
.start()
|
||||||
.await
|
.await
|
||||||
@@ -52,6 +53,7 @@ impl From<StartCommand> for DatanodeOptions {
|
|||||||
DatanodeOptions {
|
DatanodeOptions {
|
||||||
http_addr: cmd.http_addr,
|
http_addr: cmd.http_addr,
|
||||||
rpc_addr: cmd.rpc_addr,
|
rpc_addr: cmd.rpc_addr,
|
||||||
|
..Default::default()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -4,7 +4,7 @@ version = "0.1.0"
|
|||||||
edition = "2021"
|
edition = "2021"
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
bytes = "1.1"
|
bytes = { version = "1.1", features = ["serde"] }
|
||||||
common-error = { path = "../error" }
|
common-error = { path = "../error" }
|
||||||
paste = "1.0"
|
paste = "1.0"
|
||||||
serde = { version = "1.0", features = ["derive"] }
|
serde = { version = "1.0", features = ["derive"] }
|
||||||
|
|||||||
@@ -1,9 +1,9 @@
|
|||||||
use std::ops::Deref;
|
use std::ops::Deref;
|
||||||
|
|
||||||
use serde::{Serialize, Serializer};
|
use serde::{Deserialize, Deserializer, Serialize, Serializer};
|
||||||
|
|
||||||
/// Bytes buffer.
|
/// Bytes buffer.
|
||||||
#[derive(Debug, Default, Clone, PartialEq, Eq, PartialOrd, Ord)]
|
#[derive(Debug, Default, Clone, PartialEq, Eq, PartialOrd, Ord, Deserialize, Serialize)]
|
||||||
pub struct Bytes(bytes::Bytes);
|
pub struct Bytes(bytes::Bytes);
|
||||||
|
|
||||||
impl From<bytes::Bytes> for Bytes {
|
impl From<bytes::Bytes> for Bytes {
|
||||||
@@ -56,15 +56,6 @@ impl PartialEq<Bytes> for [u8] {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Serialize for Bytes {
|
|
||||||
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
|
|
||||||
where
|
|
||||||
S: Serializer,
|
|
||||||
{
|
|
||||||
self.0.serialize(serializer)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// String buffer that can hold arbitrary encoding string (only support UTF-8 now).
|
/// String buffer that can hold arbitrary encoding string (only support UTF-8 now).
|
||||||
///
|
///
|
||||||
/// Now this buffer is restricted to only hold valid UTF-8 string (only allow constructing `StringBytes`
|
/// Now this buffer is restricted to only hold valid UTF-8 string (only allow constructing `StringBytes`
|
||||||
@@ -128,6 +119,17 @@ impl Serialize for StringBytes {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Custom Deserialize to ensure UTF-8 check is always done.
|
||||||
|
impl<'de> Deserialize<'de> for StringBytes {
|
||||||
|
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
|
||||||
|
where
|
||||||
|
D: Deserializer<'de>,
|
||||||
|
{
|
||||||
|
let s = String::deserialize(deserializer)?;
|
||||||
|
Ok(StringBytes::from(s))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use super::*;
|
use super::*;
|
||||||
|
|||||||
@@ -34,6 +34,11 @@ pub enum StatusCode {
|
|||||||
TableNotFound,
|
TableNotFound,
|
||||||
TableColumnNotFound,
|
TableColumnNotFound,
|
||||||
// ====== End of catalog related status code =======
|
// ====== End of catalog related status code =======
|
||||||
|
|
||||||
|
// ====== Begin of storage related status code =====
|
||||||
|
/// Storage is temporarily unable to handle the request
|
||||||
|
StorageUnavailable,
|
||||||
|
// ====== End of storage related status code =======
|
||||||
}
|
}
|
||||||
|
|
||||||
impl fmt::Display for StatusCode {
|
impl fmt::Display for StatusCode {
|
||||||
|
|||||||
@@ -9,4 +9,4 @@ pub use global::{
|
|||||||
spawn_read, spawn_write, write_runtime,
|
spawn_read, spawn_write, write_runtime,
|
||||||
};
|
};
|
||||||
|
|
||||||
pub use crate::runtime::{Builder, JoinHandle, Runtime};
|
pub use crate::runtime::{Builder, JoinError, JoinHandle, Runtime};
|
||||||
|
|||||||
@@ -6,13 +6,13 @@ use metrics::{decrement_gauge, increment_gauge};
|
|||||||
use snafu::ResultExt;
|
use snafu::ResultExt;
|
||||||
use tokio::runtime::{Builder as RuntimeBuilder, Handle};
|
use tokio::runtime::{Builder as RuntimeBuilder, Handle};
|
||||||
use tokio::sync::oneshot;
|
use tokio::sync::oneshot;
|
||||||
pub use tokio::task::JoinHandle;
|
pub use tokio::task::{JoinError, JoinHandle};
|
||||||
|
|
||||||
use crate::error::*;
|
use crate::error::*;
|
||||||
use crate::metric::*;
|
use crate::metric::*;
|
||||||
|
|
||||||
/// A runtime to run future tasks
|
/// A runtime to run future tasks
|
||||||
#[derive(Clone)]
|
#[derive(Clone, Debug)]
|
||||||
pub struct Runtime {
|
pub struct Runtime {
|
||||||
handle: Handle,
|
handle: Handle,
|
||||||
// Used to receive a drop signal when dropper is dropped, inspired by databend
|
// Used to receive a drop signal when dropper is dropped, inspired by databend
|
||||||
@@ -20,6 +20,7 @@ pub struct Runtime {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Dropping the dropper will cause runtime to shutdown.
|
/// Dropping the dropper will cause runtime to shutdown.
|
||||||
|
#[derive(Debug)]
|
||||||
pub struct Dropper {
|
pub struct Dropper {
|
||||||
close: Option<oneshot::Sender<()>>,
|
close: Option<oneshot::Sender<()>>,
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -11,7 +11,7 @@ pub struct TimeRange<T> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl<T> TimeRange<T> {
|
impl<T> TimeRange<T> {
|
||||||
/// Create a new range that contains timestamp in `[start, end)`.
|
/// Creates a new range that contains timestamp in `[start, end)`.
|
||||||
///
|
///
|
||||||
/// Returns `None` if `start` > `end`.
|
/// Returns `None` if `start` > `end`.
|
||||||
pub fn new<U: PartialOrd + Into<T>>(start: U, end: U) -> Option<TimeRange<T>> {
|
pub fn new<U: PartialOrd + Into<T>>(start: U, end: U) -> Option<TimeRange<T>> {
|
||||||
@@ -23,6 +23,14 @@ impl<T> TimeRange<T> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Given a value, creates an empty time range that `start == end == value`.
|
||||||
|
pub fn empty_with_value<U: Clone + Into<T>>(value: U) -> TimeRange<T> {
|
||||||
|
TimeRange {
|
||||||
|
start: value.clone().into(),
|
||||||
|
end: value.into(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// Returns the lower bound of the range (inclusive).
|
/// Returns the lower bound of the range (inclusive).
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn start(&self) -> &T {
|
pub fn start(&self) -> &T {
|
||||||
@@ -71,6 +79,10 @@ mod tests {
|
|||||||
assert_eq!(range_eq.start(), range_eq.end());
|
assert_eq!(range_eq.start(), range_eq.end());
|
||||||
|
|
||||||
assert_eq!(None, RangeMillis::new(1, 0));
|
assert_eq!(None, RangeMillis::new(1, 0));
|
||||||
|
|
||||||
|
let range = RangeMillis::empty_with_value(1024);
|
||||||
|
assert_eq!(range.start(), range.end());
|
||||||
|
assert_eq!(1024, *range.start());
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
|
|||||||
@@ -1,6 +1,8 @@
|
|||||||
use std::cmp::Ordering;
|
use std::cmp::Ordering;
|
||||||
|
|
||||||
/// Unix timestamp in millisecond resolution.
|
/// Unix timestamp in millisecond resolution.
|
||||||
|
///
|
||||||
|
/// Negative timestamp is allowed, which represents timestamp before '1970-01-01T00:00:00'.
|
||||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||||
pub struct TimestampMillis(i64);
|
pub struct TimestampMillis(i64);
|
||||||
|
|
||||||
@@ -18,6 +20,29 @@ impl TimestampMillis {
|
|||||||
pub const fn new(ms: i64) -> TimestampMillis {
|
pub const fn new(ms: i64) -> TimestampMillis {
|
||||||
TimestampMillis(ms)
|
TimestampMillis(ms)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Returns the timestamp aligned by `bucket_duration` in milliseconds or
|
||||||
|
/// `None` if overflow occurred.
|
||||||
|
///
|
||||||
|
/// # Panics
|
||||||
|
/// Panics if `bucket_duration <= 0`.
|
||||||
|
pub fn align_by_bucket(self, bucket_duration: i64) -> Option<TimestampMillis> {
|
||||||
|
assert!(bucket_duration > 0);
|
||||||
|
|
||||||
|
let ts = if self.0 >= 0 {
|
||||||
|
self.0
|
||||||
|
} else {
|
||||||
|
// `bucket_duration > 0` implies `bucket_duration - 1` won't overflow.
|
||||||
|
self.0.checked_sub(bucket_duration - 1)?
|
||||||
|
};
|
||||||
|
|
||||||
|
Some(TimestampMillis(ts / bucket_duration * bucket_duration))
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns the timestamp value as i64.
|
||||||
|
pub fn as_i64(&self) -> i64 {
|
||||||
|
self.0
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl From<i64> for TimestampMillis {
|
impl From<i64> for TimestampMillis {
|
||||||
@@ -60,6 +85,7 @@ mod tests {
|
|||||||
let timestamp = TimestampMillis::from(ts);
|
let timestamp = TimestampMillis::from(ts);
|
||||||
assert_eq!(timestamp, ts);
|
assert_eq!(timestamp, ts);
|
||||||
assert_eq!(ts, timestamp);
|
assert_eq!(ts, timestamp);
|
||||||
|
assert_eq!(ts, timestamp.as_i64());
|
||||||
|
|
||||||
assert_ne!(TimestampMillis::new(0), timestamp);
|
assert_ne!(TimestampMillis::new(0), timestamp);
|
||||||
assert!(TimestampMillis::new(-123) < TimestampMillis::new(0));
|
assert!(TimestampMillis::new(-123) < TimestampMillis::new(0));
|
||||||
@@ -70,4 +96,28 @@ mod tests {
|
|||||||
assert_eq!(i64::MAX - 1, TimestampMillis::MAX);
|
assert_eq!(i64::MAX - 1, TimestampMillis::MAX);
|
||||||
assert_eq!(i64::MIN, TimestampMillis::MIN);
|
assert_eq!(i64::MIN, TimestampMillis::MIN);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_align_by_bucket() {
|
||||||
|
let bucket = 100;
|
||||||
|
assert_eq!(0, TimestampMillis::new(0).align_by_bucket(bucket).unwrap());
|
||||||
|
assert_eq!(0, TimestampMillis::new(1).align_by_bucket(bucket).unwrap());
|
||||||
|
assert_eq!(0, TimestampMillis::new(99).align_by_bucket(bucket).unwrap());
|
||||||
|
assert_eq!(
|
||||||
|
100,
|
||||||
|
TimestampMillis::new(100).align_by_bucket(bucket).unwrap()
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
100,
|
||||||
|
TimestampMillis::new(199).align_by_bucket(bucket).unwrap()
|
||||||
|
);
|
||||||
|
|
||||||
|
assert_eq!(0, TimestampMillis::MAX.align_by_bucket(i64::MAX).unwrap());
|
||||||
|
assert_eq!(
|
||||||
|
i64::MAX,
|
||||||
|
TimestampMillis::INF.align_by_bucket(i64::MAX).unwrap()
|
||||||
|
);
|
||||||
|
|
||||||
|
assert_eq!(None, TimestampMillis::MIN.align_by_bucket(bucket));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -15,6 +15,7 @@ common-recordbatch = { path = "../common/recordbatch" }
|
|||||||
common-telemetry = { path = "../common/telemetry" }
|
common-telemetry = { path = "../common/telemetry" }
|
||||||
datatypes = { path = "../datatypes"}
|
datatypes = { path = "../datatypes"}
|
||||||
hyper = { version = "0.14", features = ["full"] }
|
hyper = { version = "0.14", features = ["full"] }
|
||||||
|
log-store = { path = "../log-store" }
|
||||||
metrics = "0.18"
|
metrics = "0.18"
|
||||||
query = { path = "../query" }
|
query = { path = "../query" }
|
||||||
serde = "1.0"
|
serde = "1.0"
|
||||||
@@ -34,6 +35,7 @@ tower-http = { version ="0.3", features = ["full"]}
|
|||||||
[dev-dependencies]
|
[dev-dependencies]
|
||||||
axum-test-helper = "0.1"
|
axum-test-helper = "0.1"
|
||||||
common-query = { path = "../common/query" }
|
common-query = { path = "../common/query" }
|
||||||
|
tempdir = "0.3"
|
||||||
|
|
||||||
[dev-dependencies.arrow]
|
[dev-dependencies.arrow]
|
||||||
package = "arrow2"
|
package = "arrow2"
|
||||||
|
|||||||
@@ -8,11 +8,23 @@ use crate::error::{NewCatalogSnafu, Result};
|
|||||||
use crate::instance::{Instance, InstanceRef};
|
use crate::instance::{Instance, InstanceRef};
|
||||||
use crate::server::Services;
|
use crate::server::Services;
|
||||||
|
|
||||||
#[derive(Debug)]
|
#[derive(Clone, Debug)]
|
||||||
pub struct DatanodeOptions {
|
pub struct DatanodeOptions {
|
||||||
pub http_addr: String,
|
pub http_addr: String,
|
||||||
pub rpc_addr: String,
|
pub rpc_addr: String,
|
||||||
|
pub wal_dir: String,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl Default for DatanodeOptions {
|
||||||
|
fn default() -> Self {
|
||||||
|
Self {
|
||||||
|
http_addr: Default::default(),
|
||||||
|
rpc_addr: Default::default(),
|
||||||
|
wal_dir: "/tmp/wal".to_string(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// Datanode service.
|
/// Datanode service.
|
||||||
pub struct Datanode {
|
pub struct Datanode {
|
||||||
opts: DatanodeOptions,
|
opts: DatanodeOptions,
|
||||||
@@ -22,9 +34,9 @@ pub struct Datanode {
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl Datanode {
|
impl Datanode {
|
||||||
pub fn new(opts: DatanodeOptions) -> Result<Datanode> {
|
pub async fn new(opts: DatanodeOptions) -> Result<Datanode> {
|
||||||
let catalog_list = memory::new_memory_catalog_list().context(NewCatalogSnafu)?;
|
let catalog_list = memory::new_memory_catalog_list().context(NewCatalogSnafu)?;
|
||||||
let instance = Arc::new(Instance::new(catalog_list.clone()));
|
let instance = Arc::new(Instance::new(&opts, catalog_list.clone()).await?);
|
||||||
|
|
||||||
Ok(Self {
|
Ok(Self {
|
||||||
opts,
|
opts,
|
||||||
|
|||||||
@@ -3,6 +3,7 @@ use std::any::Any;
|
|||||||
use common_error::ext::BoxedError;
|
use common_error::ext::BoxedError;
|
||||||
use common_error::prelude::*;
|
use common_error::prelude::*;
|
||||||
use datatypes::prelude::ConcreteDataType;
|
use datatypes::prelude::ConcreteDataType;
|
||||||
|
use storage::error::Error as StorageError;
|
||||||
use table::error::Error as TableError;
|
use table::error::Error as TableError;
|
||||||
use table_engine::error::Error as TableEngineError;
|
use table_engine::error::Error as TableEngineError;
|
||||||
|
|
||||||
@@ -92,6 +93,15 @@ pub enum Error {
|
|||||||
|
|
||||||
#[snafu(display("Fail to start gRPC server, source: {}", source))]
|
#[snafu(display("Fail to start gRPC server, source: {}", source))]
|
||||||
StartGrpc { source: tonic::transport::Error },
|
StartGrpc { source: tonic::transport::Error },
|
||||||
|
|
||||||
|
#[snafu(display("Failed to create directory {}, source: {}", dir, source))]
|
||||||
|
CreateDir { dir: String, source: std::io::Error },
|
||||||
|
|
||||||
|
#[snafu(display("Failed to open log store, source: {}", source))]
|
||||||
|
OpenLogStore { source: log_store::error::Error },
|
||||||
|
|
||||||
|
#[snafu(display("Failed to storage engine, source: {}", source))]
|
||||||
|
OpenStorageEngine { source: StorageError },
|
||||||
}
|
}
|
||||||
|
|
||||||
pub type Result<T> = std::result::Result<T, Error>;
|
pub type Result<T> = std::result::Result<T, Error>;
|
||||||
@@ -112,7 +122,10 @@ impl ErrorExt for Error {
|
|||||||
Error::StartHttp { .. }
|
Error::StartHttp { .. }
|
||||||
| Error::ParseAddr { .. }
|
| Error::ParseAddr { .. }
|
||||||
| Error::TcpBind { .. }
|
| Error::TcpBind { .. }
|
||||||
| Error::StartGrpc { .. } => StatusCode::Internal,
|
| Error::StartGrpc { .. }
|
||||||
|
| Error::CreateDir { .. } => StatusCode::Internal,
|
||||||
|
Error::OpenLogStore { source } => source.status_code(),
|
||||||
|
Error::OpenStorageEngine { source } => source.status_code(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -1,21 +1,24 @@
|
|||||||
use std::sync::Arc;
|
use std::{fs, path, sync::Arc};
|
||||||
|
|
||||||
|
use common_telemetry::logging::info;
|
||||||
use datatypes::prelude::ConcreteDataType;
|
use datatypes::prelude::ConcreteDataType;
|
||||||
use datatypes::schema::{ColumnSchema, Schema};
|
use datatypes::schema::{ColumnSchema, Schema};
|
||||||
|
use log_store::fs::{config::LogConfig, log::LocalFileLogStore};
|
||||||
use query::catalog::{CatalogListRef, DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME};
|
use query::catalog::{CatalogListRef, DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME};
|
||||||
use query::query_engine::{Output, QueryEngineFactory, QueryEngineRef};
|
use query::query_engine::{Output, QueryEngineFactory, QueryEngineRef};
|
||||||
use snafu::ResultExt;
|
use snafu::ResultExt;
|
||||||
use sql::statements::statement::Statement;
|
use sql::statements::statement::Statement;
|
||||||
use storage::EngineImpl;
|
use storage::{config::EngineConfig, EngineImpl};
|
||||||
use table::engine::EngineContext;
|
use table::engine::EngineContext;
|
||||||
use table::engine::TableEngine;
|
use table::engine::TableEngine;
|
||||||
use table::requests::CreateTableRequest;
|
use table::requests::CreateTableRequest;
|
||||||
use table_engine::engine::MitoEngine;
|
use table_engine::engine::MitoEngine;
|
||||||
|
|
||||||
use crate::error::{CreateTableSnafu, ExecuteSqlSnafu, Result};
|
use crate::datanode::DatanodeOptions;
|
||||||
|
use crate::error::{self, CreateTableSnafu, ExecuteSqlSnafu, Result};
|
||||||
use crate::sql::SqlHandler;
|
use crate::sql::SqlHandler;
|
||||||
|
|
||||||
type DefaultEngine = MitoEngine<EngineImpl>;
|
type DefaultEngine = MitoEngine<EngineImpl<LocalFileLogStore>>;
|
||||||
|
|
||||||
// An abstraction to read/write services.
|
// An abstraction to read/write services.
|
||||||
pub struct Instance {
|
pub struct Instance {
|
||||||
@@ -30,17 +33,22 @@ pub struct Instance {
|
|||||||
pub type InstanceRef = Arc<Instance>;
|
pub type InstanceRef = Arc<Instance>;
|
||||||
|
|
||||||
impl Instance {
|
impl Instance {
|
||||||
pub fn new(catalog_list: CatalogListRef) -> Self {
|
pub async fn new(opts: &DatanodeOptions, catalog_list: CatalogListRef) -> Result<Self> {
|
||||||
|
let log_store = create_local_file_log_store(opts).await?;
|
||||||
let factory = QueryEngineFactory::new(catalog_list.clone());
|
let factory = QueryEngineFactory::new(catalog_list.clone());
|
||||||
let query_engine = factory.query_engine().clone();
|
let query_engine = factory.query_engine().clone();
|
||||||
let table_engine = DefaultEngine::new(EngineImpl::new());
|
let table_engine = DefaultEngine::new(
|
||||||
|
EngineImpl::new(EngineConfig::default(), Arc::new(log_store))
|
||||||
|
.await
|
||||||
|
.context(error::OpenStorageEngineSnafu)?,
|
||||||
|
);
|
||||||
|
|
||||||
Self {
|
Ok(Self {
|
||||||
query_engine,
|
query_engine,
|
||||||
sql_handler: SqlHandler::new(table_engine.clone()),
|
sql_handler: SqlHandler::new(table_engine.clone()),
|
||||||
table_engine,
|
table_engine,
|
||||||
catalog_list,
|
catalog_list,
|
||||||
}
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
pub async fn execute_sql(&self, sql: &str) -> Result<Output> {
|
pub async fn execute_sql(&self, sql: &str) -> Result<Output> {
|
||||||
@@ -95,7 +103,10 @@ impl Instance {
|
|||||||
CreateTableRequest {
|
CreateTableRequest {
|
||||||
name: table_name.to_string(),
|
name: table_name.to_string(),
|
||||||
desc: Some(" a test table".to_string()),
|
desc: Some(" a test table".to_string()),
|
||||||
schema: Arc::new(Schema::new(column_schemas)),
|
schema: Arc::new(
|
||||||
|
Schema::with_timestamp_index(column_schemas, 3)
|
||||||
|
.expect("ts is expected to be timestamp column"),
|
||||||
|
),
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
.await
|
.await
|
||||||
@@ -116,6 +127,25 @@ impl Instance {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async fn create_local_file_log_store(opts: &DatanodeOptions) -> Result<LocalFileLogStore> {
|
||||||
|
// create WAL directory
|
||||||
|
fs::create_dir_all(path::Path::new(&opts.wal_dir))
|
||||||
|
.context(error::CreateDirSnafu { dir: &opts.wal_dir })?;
|
||||||
|
|
||||||
|
info!("The WAL directory is: {}", &opts.wal_dir);
|
||||||
|
|
||||||
|
let log_config = LogConfig {
|
||||||
|
log_file_dir: opts.wal_dir.clone(),
|
||||||
|
..Default::default()
|
||||||
|
};
|
||||||
|
|
||||||
|
let log_store = LocalFileLogStore::open(&log_config)
|
||||||
|
.await
|
||||||
|
.context(error::OpenLogStoreSnafu)?;
|
||||||
|
|
||||||
|
Ok(log_store)
|
||||||
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use arrow::array::UInt64Array;
|
use arrow::array::UInt64Array;
|
||||||
@@ -123,12 +153,13 @@ mod tests {
|
|||||||
use query::catalog::memory;
|
use query::catalog::memory;
|
||||||
|
|
||||||
use super::*;
|
use super::*;
|
||||||
|
use crate::test_util;
|
||||||
|
|
||||||
#[tokio::test]
|
#[tokio::test]
|
||||||
async fn test_execute_insert() {
|
async fn test_execute_insert() {
|
||||||
let catalog_list = memory::new_memory_catalog_list().unwrap();
|
let catalog_list = memory::new_memory_catalog_list().unwrap();
|
||||||
|
let (opts, _tmp_dir) = test_util::create_tmp_dir_and_datanode_opts();
|
||||||
let instance = Instance::new(catalog_list);
|
let instance = Instance::new(&opts, catalog_list).await.unwrap();
|
||||||
instance.start().await.unwrap();
|
instance.start().await.unwrap();
|
||||||
|
|
||||||
let output = instance
|
let output = instance
|
||||||
@@ -147,8 +178,8 @@ mod tests {
|
|||||||
#[tokio::test]
|
#[tokio::test]
|
||||||
async fn test_execute_query() {
|
async fn test_execute_query() {
|
||||||
let catalog_list = memory::new_memory_catalog_list().unwrap();
|
let catalog_list = memory::new_memory_catalog_list().unwrap();
|
||||||
|
let (opts, _tmp_dir) = test_util::create_tmp_dir_and_datanode_opts();
|
||||||
let instance = Instance::new(catalog_list);
|
let instance = Instance::new(&opts, catalog_list).await.unwrap();
|
||||||
|
|
||||||
let output = instance
|
let output = instance
|
||||||
.execute_sql("select sum(number) from numbers limit 20")
|
.execute_sql("select sum(number) from numbers limit 20")
|
||||||
|
|||||||
@@ -6,5 +6,7 @@ mod metric;
|
|||||||
pub mod server;
|
pub mod server;
|
||||||
mod sql;
|
mod sql;
|
||||||
|
|
||||||
pub use crate::datanode::Datanode;
|
#[cfg(test)]
|
||||||
pub use crate::datanode::DatanodeOptions;
|
pub mod test_util;
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests;
|
||||||
|
|||||||
@@ -48,6 +48,7 @@ mod tests {
|
|||||||
use super::*;
|
use super::*;
|
||||||
use crate::instance::Instance;
|
use crate::instance::Instance;
|
||||||
use crate::server::http::JsonOutput;
|
use crate::server::http::JsonOutput;
|
||||||
|
use crate::test_util;
|
||||||
|
|
||||||
fn create_params() -> Query<HashMap<String, String>> {
|
fn create_params() -> Query<HashMap<String, String>> {
|
||||||
let mut map = HashMap::new();
|
let mut map = HashMap::new();
|
||||||
@@ -58,15 +59,16 @@ mod tests {
|
|||||||
Query(map)
|
Query(map)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn create_extension() -> Extension<InstanceRef> {
|
async fn create_extension() -> Extension<InstanceRef> {
|
||||||
let catalog_list = memory::new_memory_catalog_list().unwrap();
|
let catalog_list = memory::new_memory_catalog_list().unwrap();
|
||||||
let instance = Arc::new(Instance::new(catalog_list));
|
let (opts, _tmp_dir) = test_util::create_tmp_dir_and_datanode_opts();
|
||||||
|
let instance = Arc::new(Instance::new(&opts, catalog_list).await.unwrap());
|
||||||
Extension(instance)
|
Extension(instance)
|
||||||
}
|
}
|
||||||
|
|
||||||
#[tokio::test]
|
#[tokio::test]
|
||||||
async fn test_sql_not_provided() {
|
async fn test_sql_not_provided() {
|
||||||
let extension = create_extension();
|
let extension = create_extension().await;
|
||||||
|
|
||||||
let json = sql(extension, Query(HashMap::default())).await;
|
let json = sql(extension, Query(HashMap::default())).await;
|
||||||
match json {
|
match json {
|
||||||
@@ -82,7 +84,7 @@ mod tests {
|
|||||||
#[tokio::test]
|
#[tokio::test]
|
||||||
async fn test_sql_output_rows() {
|
async fn test_sql_output_rows() {
|
||||||
let query = create_params();
|
let query = create_params();
|
||||||
let extension = create_extension();
|
let extension = create_extension().await;
|
||||||
|
|
||||||
let json = sql(extension, query).await;
|
let json = sql(extension, query).await;
|
||||||
|
|
||||||
@@ -110,7 +112,7 @@ mod tests {
|
|||||||
counter!("test_metrics", 1);
|
counter!("test_metrics", 1);
|
||||||
|
|
||||||
let query = create_params();
|
let query = create_params();
|
||||||
let extension = create_extension();
|
let extension = create_extension().await;
|
||||||
let text = metrics(extension, query).await;
|
let text = metrics(extension, query).await;
|
||||||
|
|
||||||
match text {
|
match text {
|
||||||
|
|||||||
@@ -63,14 +63,17 @@ mod tests {
|
|||||||
use datatypes::prelude::ConcreteDataType;
|
use datatypes::prelude::ConcreteDataType;
|
||||||
use datatypes::schema::{ColumnSchema, Schema, SchemaRef};
|
use datatypes::schema::{ColumnSchema, Schema, SchemaRef};
|
||||||
use datatypes::value::Value;
|
use datatypes::value::Value;
|
||||||
|
use log_store::fs::noop::NoopLogStore;
|
||||||
use query::catalog::memory;
|
use query::catalog::memory;
|
||||||
use query::catalog::schema::SchemaProvider;
|
use query::catalog::schema::SchemaProvider;
|
||||||
use query::error::Result as QueryResult;
|
use query::error::Result as QueryResult;
|
||||||
use query::QueryEngineFactory;
|
use query::QueryEngineFactory;
|
||||||
|
use storage::config::EngineConfig;
|
||||||
use storage::EngineImpl;
|
use storage::EngineImpl;
|
||||||
use table::error::Result as TableResult;
|
use table::error::Result as TableResult;
|
||||||
use table::{Table, TableRef};
|
use table::{Table, TableRef};
|
||||||
use table_engine::engine::MitoEngine;
|
use table_engine::engine::MitoEngine;
|
||||||
|
use tempdir::TempDir;
|
||||||
|
|
||||||
use super::*;
|
use super::*;
|
||||||
|
|
||||||
@@ -90,7 +93,7 @@ mod tests {
|
|||||||
ColumnSchema::new("ts", ConcreteDataType::int64_datatype(), true),
|
ColumnSchema::new("ts", ConcreteDataType::int64_datatype(), true),
|
||||||
];
|
];
|
||||||
|
|
||||||
Arc::new(Schema::new(column_schemas))
|
Arc::new(Schema::with_timestamp_index(column_schemas, 3).unwrap())
|
||||||
}
|
}
|
||||||
async fn scan(
|
async fn scan(
|
||||||
&self,
|
&self,
|
||||||
@@ -129,8 +132,11 @@ mod tests {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[tokio::test]
|
||||||
fn test_statement_to_request() {
|
async fn test_statement_to_request() {
|
||||||
|
let dir = TempDir::new("setup_test_engine_and_table").unwrap();
|
||||||
|
let store_dir = dir.path().to_string_lossy();
|
||||||
|
|
||||||
let catalog_list = memory::new_memory_catalog_list().unwrap();
|
let catalog_list = memory::new_memory_catalog_list().unwrap();
|
||||||
let factory = QueryEngineFactory::new(catalog_list);
|
let factory = QueryEngineFactory::new(catalog_list);
|
||||||
let query_engine = factory.query_engine().clone();
|
let query_engine = factory.query_engine().clone();
|
||||||
@@ -140,7 +146,14 @@ mod tests {
|
|||||||
('host2', 88.8, 333.3, 1655276558000)
|
('host2', 88.8, 333.3, 1655276558000)
|
||||||
"#;
|
"#;
|
||||||
|
|
||||||
let table_engine = MitoEngine::<EngineImpl>::new(EngineImpl::new());
|
let table_engine = MitoEngine::<EngineImpl<NoopLogStore>>::new(
|
||||||
|
EngineImpl::new(
|
||||||
|
EngineConfig::with_store_dir(&store_dir),
|
||||||
|
Arc::new(NoopLogStore::default()),
|
||||||
|
)
|
||||||
|
.await
|
||||||
|
.unwrap(),
|
||||||
|
);
|
||||||
let sql_handler = SqlHandler::new(table_engine);
|
let sql_handler = SqlHandler::new(table_engine);
|
||||||
|
|
||||||
let stmt = query_engine.sql_to_statement(sql).unwrap();
|
let stmt = query_engine.sql_to_statement(sql).unwrap();
|
||||||
|
|||||||
17
src/datanode/src/test_util.rs
Normal file
17
src/datanode/src/test_util.rs
Normal file
@@ -0,0 +1,17 @@
|
|||||||
|
use tempdir::TempDir;
|
||||||
|
|
||||||
|
use crate::datanode::DatanodeOptions;
|
||||||
|
|
||||||
|
/// Create a tmp dir(will be deleted once it goes out of scope.) and a default `DatanodeOptions`,
|
||||||
|
/// Only for test.
|
||||||
|
///
|
||||||
|
/// TODO: Add a test feature
|
||||||
|
pub fn create_tmp_dir_and_datanode_opts() -> (DatanodeOptions, TempDir) {
|
||||||
|
let tmp_dir = TempDir::new("/tmp/greptimedb_test").unwrap();
|
||||||
|
let opts = DatanodeOptions {
|
||||||
|
wal_dir: tmp_dir.path().to_str().unwrap().to_string(),
|
||||||
|
..Default::default()
|
||||||
|
};
|
||||||
|
|
||||||
|
(opts, tmp_dir)
|
||||||
|
}
|
||||||
1
src/datanode/src/tests.rs
Normal file
1
src/datanode/src/tests.rs
Normal file
@@ -0,0 +1 @@
|
|||||||
|
mod http_test;
|
||||||
@@ -5,12 +5,16 @@ use std::sync::Arc;
|
|||||||
use axum::http::StatusCode;
|
use axum::http::StatusCode;
|
||||||
use axum::Router;
|
use axum::Router;
|
||||||
use axum_test_helper::TestClient;
|
use axum_test_helper::TestClient;
|
||||||
use datanode::{instance::Instance, server::http::HttpServer};
|
|
||||||
use query::catalog::memory;
|
use query::catalog::memory;
|
||||||
|
|
||||||
fn make_test_app() -> Router {
|
use crate::instance::Instance;
|
||||||
|
use crate::server::http::HttpServer;
|
||||||
|
use crate::test_util;
|
||||||
|
|
||||||
|
async fn make_test_app() -> Router {
|
||||||
let catalog_list = memory::new_memory_catalog_list().unwrap();
|
let catalog_list = memory::new_memory_catalog_list().unwrap();
|
||||||
let instance = Arc::new(Instance::new(catalog_list));
|
let (opts, _tmp_dir) = test_util::create_tmp_dir_and_datanode_opts();
|
||||||
|
let instance = Arc::new(Instance::new(&opts, catalog_list).await.unwrap());
|
||||||
let http_server = HttpServer::new(instance);
|
let http_server = HttpServer::new(instance);
|
||||||
http_server.make_app()
|
http_server.make_app()
|
||||||
}
|
}
|
||||||
@@ -18,7 +22,7 @@ fn make_test_app() -> Router {
|
|||||||
#[tokio::test]
|
#[tokio::test]
|
||||||
async fn test_sql_api() {
|
async fn test_sql_api() {
|
||||||
common_telemetry::init_default_ut_logging();
|
common_telemetry::init_default_ut_logging();
|
||||||
let app = make_test_app();
|
let app = make_test_app().await;
|
||||||
let client = TestClient::new(app);
|
let client = TestClient::new(app);
|
||||||
let res = client.get("/sql").send().await;
|
let res = client.get("/sql").send().await;
|
||||||
assert_eq!(res.status(), StatusCode::OK);
|
assert_eq!(res.status(), StatusCode::OK);
|
||||||
@@ -46,7 +50,7 @@ async fn test_sql_api() {
|
|||||||
async fn test_metrics_api() {
|
async fn test_metrics_api() {
|
||||||
common_telemetry::init_default_ut_logging();
|
common_telemetry::init_default_ut_logging();
|
||||||
common_telemetry::init_default_metrics_recorder();
|
common_telemetry::init_default_metrics_recorder();
|
||||||
let app = make_test_app();
|
let app = make_test_app().await;
|
||||||
let client = TestClient::new(app);
|
let client = TestClient::new(app);
|
||||||
|
|
||||||
// Send a sql
|
// Send a sql
|
||||||
@@ -13,10 +13,10 @@ common-base = { path = "../common/base" }
|
|||||||
common-error = { path = "../common/error" }
|
common-error = { path = "../common/error" }
|
||||||
datafusion-common = { git = "https://github.com/apache/arrow-datafusion.git" , branch = "arrow2" }
|
datafusion-common = { git = "https://github.com/apache/arrow-datafusion.git" , branch = "arrow2" }
|
||||||
enum_dispatch = "0.3"
|
enum_dispatch = "0.3"
|
||||||
ordered-float = "3.0"
|
|
||||||
paste = "1.0"
|
|
||||||
num = "0.4"
|
num = "0.4"
|
||||||
num-traits = "0.2"
|
num-traits = "0.2"
|
||||||
serde = { version = "1.0.136", features = ["derive"] }
|
ordered-float = { version = "3.0", features = ["serde"]}
|
||||||
|
paste = "1.0"
|
||||||
|
serde = { version = "1.0", features = ["derive"] }
|
||||||
serde_json = "1.0"
|
serde_json = "1.0"
|
||||||
snafu = { version = "0.7", features = ["backtraces"] }
|
snafu = { version = "0.7", features = ["backtraces"] }
|
||||||
|
|||||||
@@ -2,6 +2,7 @@ use std::sync::Arc;
|
|||||||
|
|
||||||
use arrow::datatypes::DataType as ArrowDataType;
|
use arrow::datatypes::DataType as ArrowDataType;
|
||||||
use paste::paste;
|
use paste::paste;
|
||||||
|
use serde::{Deserialize, Serialize};
|
||||||
|
|
||||||
use crate::error::{self, Error, Result};
|
use crate::error::{self, Error, Result};
|
||||||
use crate::type_id::LogicalTypeId;
|
use crate::type_id::LogicalTypeId;
|
||||||
@@ -11,7 +12,7 @@ use crate::types::{
|
|||||||
};
|
};
|
||||||
use crate::value::Value;
|
use crate::value::Value;
|
||||||
|
|
||||||
#[derive(Clone, Debug, PartialEq)]
|
#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
|
||||||
#[enum_dispatch::enum_dispatch(DataType)]
|
#[enum_dispatch::enum_dispatch(DataType)]
|
||||||
pub enum ConcreteDataType {
|
pub enum ConcreteDataType {
|
||||||
Null(NullType),
|
Null(NullType),
|
||||||
@@ -72,6 +73,10 @@ impl ConcreteDataType {
|
|||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn is_timestamp(&self) -> bool {
|
||||||
|
matches!(self, ConcreteDataType::Int64(_))
|
||||||
|
}
|
||||||
|
|
||||||
pub fn numerics() -> Vec<ConcreteDataType> {
|
pub fn numerics() -> Vec<ConcreteDataType> {
|
||||||
vec![
|
vec![
|
||||||
ConcreteDataType::int8_datatype(),
|
ConcreteDataType::int8_datatype(),
|
||||||
|
|||||||
@@ -30,6 +30,20 @@ pub enum Error {
|
|||||||
arrow_type: arrow::datatypes::DataType,
|
arrow_type: arrow::datatypes::DataType,
|
||||||
backtrace: Backtrace,
|
backtrace: Backtrace,
|
||||||
},
|
},
|
||||||
|
|
||||||
|
#[snafu(display(
|
||||||
|
"Failed to parse index in schema meta, value: {}, source: {}",
|
||||||
|
value,
|
||||||
|
source
|
||||||
|
))]
|
||||||
|
ParseSchemaIndex {
|
||||||
|
value: String,
|
||||||
|
source: std::num::ParseIntError,
|
||||||
|
backtrace: Backtrace,
|
||||||
|
},
|
||||||
|
|
||||||
|
#[snafu(display("Invalid timestamp index: {}", index))]
|
||||||
|
InvalidTimestampIndex { index: usize, backtrace: Backtrace },
|
||||||
}
|
}
|
||||||
|
|
||||||
impl ErrorExt for Error {
|
impl ErrorExt for Error {
|
||||||
|
|||||||
@@ -1,15 +1,19 @@
|
|||||||
use std::collections::HashMap;
|
use std::collections::{BTreeMap, HashMap};
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
|
|
||||||
use arrow::datatypes::{Field, Schema as ArrowSchema};
|
use arrow::datatypes::{Field, Metadata, Schema as ArrowSchema};
|
||||||
|
use serde::{Deserialize, Serialize};
|
||||||
|
use snafu::{ensure, ResultExt};
|
||||||
|
|
||||||
use crate::data_type::{ConcreteDataType, DataType};
|
use crate::data_type::{ConcreteDataType, DataType};
|
||||||
use crate::error::{Error, Result};
|
use crate::error::{self, Error, Result};
|
||||||
|
|
||||||
|
const TIMESTAMP_INDEX_KEY: &str = "greptime:timestamp_index";
|
||||||
|
|
||||||
// TODO(yingwen): consider assign a version to schema so compare schema can be
|
// TODO(yingwen): consider assign a version to schema so compare schema can be
|
||||||
// done by compare version.
|
// done by compare version.
|
||||||
|
|
||||||
#[derive(Debug, Clone, PartialEq)]
|
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
|
||||||
pub struct ColumnSchema {
|
pub struct ColumnSchema {
|
||||||
pub name: String,
|
pub name: String,
|
||||||
pub data_type: ConcreteDataType,
|
pub data_type: ConcreteDataType,
|
||||||
@@ -30,31 +34,49 @@ impl ColumnSchema {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Clone, PartialEq)]
|
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
|
||||||
pub struct Schema {
|
pub struct Schema {
|
||||||
column_schemas: Vec<ColumnSchema>,
|
column_schemas: Vec<ColumnSchema>,
|
||||||
name_to_index: HashMap<String, usize>,
|
name_to_index: HashMap<String, usize>,
|
||||||
arrow_schema: Arc<ArrowSchema>,
|
arrow_schema: Arc<ArrowSchema>,
|
||||||
|
/// Index of the timestamp key column.
|
||||||
|
///
|
||||||
|
/// Timestamp key column is the column holds the timestamp and forms part of
|
||||||
|
/// the primary key. None means there is no timestamp key column.
|
||||||
|
timestamp_index: Option<usize>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Schema {
|
impl Schema {
|
||||||
pub fn new(column_schemas: Vec<ColumnSchema>) -> Schema {
|
pub fn new(column_schemas: Vec<ColumnSchema>) -> Schema {
|
||||||
let mut fields = Vec::with_capacity(column_schemas.len());
|
let (arrow_schema, name_to_index) = collect_column_schemas(&column_schemas);
|
||||||
let mut name_to_index = HashMap::with_capacity(column_schemas.len());
|
|
||||||
for (index, column_schema) in column_schemas.iter().enumerate() {
|
|
||||||
let field = Field::from(column_schema);
|
|
||||||
fields.push(field);
|
|
||||||
name_to_index.insert(column_schema.name.clone(), index);
|
|
||||||
}
|
|
||||||
let arrow_schema = Arc::new(ArrowSchema::from(fields));
|
|
||||||
|
|
||||||
Schema {
|
Schema {
|
||||||
column_schemas,
|
column_schemas,
|
||||||
name_to_index,
|
name_to_index,
|
||||||
arrow_schema,
|
arrow_schema: Arc::new(arrow_schema),
|
||||||
|
timestamp_index: None,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn with_timestamp_index(
|
||||||
|
column_schemas: Vec<ColumnSchema>,
|
||||||
|
timestamp_index: usize,
|
||||||
|
) -> Result<Schema> {
|
||||||
|
let (arrow_schema, name_to_index) = collect_column_schemas(&column_schemas);
|
||||||
|
let mut metadata = BTreeMap::new();
|
||||||
|
metadata.insert(TIMESTAMP_INDEX_KEY.to_string(), timestamp_index.to_string());
|
||||||
|
let arrow_schema = Arc::new(arrow_schema.with_metadata(metadata));
|
||||||
|
|
||||||
|
validate_timestamp_index(&column_schemas, timestamp_index)?;
|
||||||
|
|
||||||
|
Ok(Schema {
|
||||||
|
column_schemas,
|
||||||
|
name_to_index,
|
||||||
|
arrow_schema,
|
||||||
|
timestamp_index: Some(timestamp_index),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
pub fn arrow_schema(&self) -> &Arc<ArrowSchema> {
|
pub fn arrow_schema(&self) -> &Arc<ArrowSchema> {
|
||||||
&self.arrow_schema
|
&self.arrow_schema
|
||||||
}
|
}
|
||||||
@@ -68,6 +90,55 @@ impl Schema {
|
|||||||
.get(name)
|
.get(name)
|
||||||
.map(|index| &self.column_schemas[*index])
|
.map(|index| &self.column_schemas[*index])
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
pub fn num_columns(&self) -> usize {
|
||||||
|
self.column_schemas.len()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns index of the timestamp key column.
|
||||||
|
#[inline]
|
||||||
|
pub fn timestamp_index(&self) -> Option<usize> {
|
||||||
|
self.timestamp_index
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
pub fn timestamp_column(&self) -> Option<&ColumnSchema> {
|
||||||
|
self.timestamp_index.map(|idx| &self.column_schemas[idx])
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn collect_column_schemas(
|
||||||
|
column_schemas: &[ColumnSchema],
|
||||||
|
) -> (ArrowSchema, HashMap<String, usize>) {
|
||||||
|
let mut fields = Vec::with_capacity(column_schemas.len());
|
||||||
|
let mut name_to_index = HashMap::with_capacity(column_schemas.len());
|
||||||
|
for (index, column_schema) in column_schemas.iter().enumerate() {
|
||||||
|
let field = Field::from(column_schema);
|
||||||
|
fields.push(field);
|
||||||
|
name_to_index.insert(column_schema.name.clone(), index);
|
||||||
|
}
|
||||||
|
|
||||||
|
(ArrowSchema::from(fields), name_to_index)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn validate_timestamp_index(column_schemas: &[ColumnSchema], timestamp_index: usize) -> Result<()> {
|
||||||
|
ensure!(
|
||||||
|
timestamp_index < column_schemas.len(),
|
||||||
|
error::InvalidTimestampIndexSnafu {
|
||||||
|
index: timestamp_index,
|
||||||
|
}
|
||||||
|
);
|
||||||
|
|
||||||
|
let column_schema = &column_schemas[timestamp_index];
|
||||||
|
ensure!(
|
||||||
|
column_schema.data_type.is_timestamp(),
|
||||||
|
error::InvalidTimestampIndexSnafu {
|
||||||
|
index: timestamp_index,
|
||||||
|
}
|
||||||
|
);
|
||||||
|
|
||||||
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
pub type SchemaRef = Arc<Schema>;
|
pub type SchemaRef = Arc<Schema>;
|
||||||
@@ -108,14 +179,32 @@ impl TryFrom<Arc<ArrowSchema>> for Schema {
|
|||||||
column_schemas.push(column_schema);
|
column_schemas.push(column_schema);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
let timestamp_index = try_parse_index(&arrow_schema.metadata, TIMESTAMP_INDEX_KEY)?;
|
||||||
|
if let Some(index) = timestamp_index {
|
||||||
|
validate_timestamp_index(&column_schemas, index)?;
|
||||||
|
}
|
||||||
|
|
||||||
Ok(Self {
|
Ok(Self {
|
||||||
column_schemas,
|
column_schemas,
|
||||||
name_to_index,
|
name_to_index,
|
||||||
arrow_schema,
|
arrow_schema,
|
||||||
|
timestamp_index,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn try_parse_index(metadata: &Metadata, key: &str) -> Result<Option<usize>> {
|
||||||
|
if let Some(value) = metadata.get(key) {
|
||||||
|
let index = value
|
||||||
|
.parse()
|
||||||
|
.context(error::ParseSchemaIndexSnafu { value })?;
|
||||||
|
|
||||||
|
Ok(Some(index))
|
||||||
|
} else {
|
||||||
|
Ok(None)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use arrow::datatypes::DataType as ArrowDataType;
|
use arrow::datatypes::DataType as ArrowDataType;
|
||||||
@@ -135,13 +224,17 @@ mod tests {
|
|||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_schema() {
|
fn test_schema_no_timestamp() {
|
||||||
let column_schemas = vec![
|
let column_schemas = vec![
|
||||||
ColumnSchema::new("col1", ConcreteDataType::int32_datatype(), false),
|
ColumnSchema::new("col1", ConcreteDataType::int32_datatype(), false),
|
||||||
ColumnSchema::new("col2", ConcreteDataType::float64_datatype(), true),
|
ColumnSchema::new("col2", ConcreteDataType::float64_datatype(), true),
|
||||||
];
|
];
|
||||||
let schema = Schema::new(column_schemas.clone());
|
let schema = Schema::new(column_schemas.clone());
|
||||||
|
|
||||||
|
assert_eq!(2, schema.num_columns());
|
||||||
|
assert!(schema.timestamp_index().is_none());
|
||||||
|
assert!(schema.timestamp_column().is_none());
|
||||||
|
|
||||||
for column_schema in &column_schemas {
|
for column_schema in &column_schemas {
|
||||||
let found = schema.column_schema_by_name(&column_schema.name).unwrap();
|
let found = schema.column_schema_by_name(&column_schema.name).unwrap();
|
||||||
assert_eq!(column_schema, found);
|
assert_eq!(column_schema, found);
|
||||||
@@ -158,4 +251,31 @@ mod tests {
|
|||||||
assert_eq!(arrow_schema, *schema.arrow_schema());
|
assert_eq!(arrow_schema, *schema.arrow_schema());
|
||||||
assert_eq!(arrow_schema, *new_schema.arrow_schema());
|
assert_eq!(arrow_schema, *new_schema.arrow_schema());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_schema_with_timestamp() {
|
||||||
|
let column_schemas = vec![
|
||||||
|
ColumnSchema::new("col1", ConcreteDataType::int32_datatype(), true),
|
||||||
|
ColumnSchema::new("ts", ConcreteDataType::int64_datatype(), false),
|
||||||
|
];
|
||||||
|
let schema = Schema::with_timestamp_index(column_schemas.clone(), 1).unwrap();
|
||||||
|
|
||||||
|
assert_eq!(1, schema.timestamp_index().unwrap());
|
||||||
|
assert_eq!(&column_schemas[1], schema.timestamp_column().unwrap());
|
||||||
|
|
||||||
|
let new_schema = Schema::try_from(schema.arrow_schema().clone()).unwrap();
|
||||||
|
assert_eq!(1, schema.timestamp_index().unwrap());
|
||||||
|
assert_eq!(schema, new_schema);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_schema_wrong_timestamp() {
|
||||||
|
let column_schemas = vec![
|
||||||
|
ColumnSchema::new("col1", ConcreteDataType::int32_datatype(), true),
|
||||||
|
ColumnSchema::new("col2", ConcreteDataType::float64_datatype(), false),
|
||||||
|
];
|
||||||
|
assert!(Schema::with_timestamp_index(column_schemas.clone(), 0).is_err());
|
||||||
|
assert!(Schema::with_timestamp_index(column_schemas.clone(), 1).is_err());
|
||||||
|
assert!(Schema::with_timestamp_index(column_schemas, 2).is_err());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -2,12 +2,13 @@ use std::sync::Arc;
|
|||||||
|
|
||||||
use arrow::datatypes::DataType as ArrowDataType;
|
use arrow::datatypes::DataType as ArrowDataType;
|
||||||
use common_base::bytes::StringBytes;
|
use common_base::bytes::StringBytes;
|
||||||
|
use serde::{Deserialize, Serialize};
|
||||||
|
|
||||||
use crate::data_type::{DataType, DataTypeRef};
|
use crate::data_type::{DataType, DataTypeRef};
|
||||||
use crate::type_id::LogicalTypeId;
|
use crate::type_id::LogicalTypeId;
|
||||||
use crate::value::Value;
|
use crate::value::Value;
|
||||||
|
|
||||||
#[derive(Debug, Default, Clone, PartialEq)]
|
#[derive(Debug, Default, Clone, PartialEq, Serialize, Deserialize)]
|
||||||
pub struct BinaryType;
|
pub struct BinaryType;
|
||||||
|
|
||||||
impl BinaryType {
|
impl BinaryType {
|
||||||
|
|||||||
@@ -1,12 +1,13 @@
|
|||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
|
|
||||||
use arrow::datatypes::DataType as ArrowDataType;
|
use arrow::datatypes::DataType as ArrowDataType;
|
||||||
|
use serde::{Deserialize, Serialize};
|
||||||
|
|
||||||
use crate::data_type::{DataType, DataTypeRef};
|
use crate::data_type::{DataType, DataTypeRef};
|
||||||
use crate::type_id::LogicalTypeId;
|
use crate::type_id::LogicalTypeId;
|
||||||
use crate::value::Value;
|
use crate::value::Value;
|
||||||
|
|
||||||
#[derive(Debug, Default, Clone, PartialEq)]
|
#[derive(Debug, Default, Clone, PartialEq, Serialize, Deserialize)]
|
||||||
pub struct BooleanType;
|
pub struct BooleanType;
|
||||||
|
|
||||||
impl BooleanType {
|
impl BooleanType {
|
||||||
|
|||||||
@@ -1,10 +1,11 @@
|
|||||||
use arrow::datatypes::{DataType as ArrowDataType, Field};
|
use arrow::datatypes::{DataType as ArrowDataType, Field};
|
||||||
|
use serde::{Deserialize, Serialize};
|
||||||
|
|
||||||
use crate::prelude::*;
|
use crate::prelude::*;
|
||||||
use crate::value::ListValue;
|
use crate::value::ListValue;
|
||||||
|
|
||||||
/// Used to represent the List datatype.
|
/// Used to represent the List datatype.
|
||||||
#[derive(Debug, Clone, PartialEq)]
|
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
|
||||||
pub struct ListType {
|
pub struct ListType {
|
||||||
/// The type of List's inner data.
|
/// The type of List's inner data.
|
||||||
inner: Box<ConcreteDataType>,
|
inner: Box<ConcreteDataType>,
|
||||||
|
|||||||
@@ -1,12 +1,13 @@
|
|||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
|
|
||||||
use arrow::datatypes::DataType as ArrowDataType;
|
use arrow::datatypes::DataType as ArrowDataType;
|
||||||
|
use serde::{Deserialize, Serialize};
|
||||||
|
|
||||||
use crate::data_type::{DataType, DataTypeRef};
|
use crate::data_type::{DataType, DataTypeRef};
|
||||||
use crate::type_id::LogicalTypeId;
|
use crate::type_id::LogicalTypeId;
|
||||||
use crate::value::Value;
|
use crate::value::Value;
|
||||||
|
|
||||||
#[derive(Debug, Default, Clone, PartialEq)]
|
#[derive(Debug, Default, Clone, PartialEq, Serialize, Deserialize)]
|
||||||
pub struct NullType;
|
pub struct NullType;
|
||||||
|
|
||||||
impl NullType {
|
impl NullType {
|
||||||
|
|||||||
@@ -2,14 +2,16 @@ use std::marker::PhantomData;
|
|||||||
|
|
||||||
use arrow::datatypes::DataType as ArrowDataType;
|
use arrow::datatypes::DataType as ArrowDataType;
|
||||||
use paste::paste;
|
use paste::paste;
|
||||||
|
use serde::{Deserialize, Serialize};
|
||||||
|
|
||||||
use crate::data_type::{ConcreteDataType, DataType};
|
use crate::data_type::{ConcreteDataType, DataType};
|
||||||
use crate::type_id::LogicalTypeId;
|
use crate::type_id::LogicalTypeId;
|
||||||
use crate::types::primitive_traits::Primitive;
|
use crate::types::primitive_traits::Primitive;
|
||||||
use crate::value::Value;
|
use crate::value::Value;
|
||||||
|
|
||||||
#[derive(Clone, PartialEq)]
|
#[derive(Clone, PartialEq, Serialize, Deserialize)]
|
||||||
pub struct PrimitiveType<T: Primitive> {
|
pub struct PrimitiveType<T: Primitive> {
|
||||||
|
#[serde(skip)]
|
||||||
_phantom: PhantomData<T>,
|
_phantom: PhantomData<T>,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -2,11 +2,12 @@ use std::sync::Arc;
|
|||||||
|
|
||||||
use arrow::datatypes::DataType as ArrowDataType;
|
use arrow::datatypes::DataType as ArrowDataType;
|
||||||
use common_base::bytes::StringBytes;
|
use common_base::bytes::StringBytes;
|
||||||
|
use serde::{Deserialize, Serialize};
|
||||||
|
|
||||||
use crate::data_type::DataType;
|
use crate::data_type::DataType;
|
||||||
use crate::prelude::{DataTypeRef, LogicalTypeId, Value};
|
use crate::prelude::{DataTypeRef, LogicalTypeId, Value};
|
||||||
|
|
||||||
#[derive(Debug, Default, Clone, PartialEq)]
|
#[derive(Debug, Default, Clone, PartialEq, Serialize, Deserialize)]
|
||||||
pub struct StringType;
|
pub struct StringType;
|
||||||
|
|
||||||
impl StringType {
|
impl StringType {
|
||||||
|
|||||||
@@ -3,7 +3,7 @@ use std::cmp::Ordering;
|
|||||||
use common_base::bytes::{Bytes, StringBytes};
|
use common_base::bytes::{Bytes, StringBytes};
|
||||||
use datafusion_common::ScalarValue;
|
use datafusion_common::ScalarValue;
|
||||||
pub use ordered_float::OrderedFloat;
|
pub use ordered_float::OrderedFloat;
|
||||||
use serde::{Serialize, Serializer};
|
use serde::{Deserialize, Serialize, Serializer};
|
||||||
|
|
||||||
use crate::prelude::*;
|
use crate::prelude::*;
|
||||||
|
|
||||||
@@ -15,7 +15,7 @@ pub type OrderedF64 = OrderedFloat<f64>;
|
|||||||
/// Although compare Value with different data type is allowed, it is recommended to only
|
/// Although compare Value with different data type is allowed, it is recommended to only
|
||||||
/// compare Value with same data type. Comparing Value with different data type may not
|
/// compare Value with same data type. Comparing Value with different data type may not
|
||||||
/// behaves as what you expect.
|
/// behaves as what you expect.
|
||||||
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)]
|
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Deserialize)]
|
||||||
pub enum Value {
|
pub enum Value {
|
||||||
Null,
|
Null,
|
||||||
|
|
||||||
@@ -187,7 +187,7 @@ impl From<Value> for ScalarValue {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Clone, PartialEq)]
|
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
|
||||||
pub struct ListValue {
|
pub struct ListValue {
|
||||||
/// List of nested Values (boxed to reduce size_of(Value))
|
/// List of nested Values (boxed to reduce size_of(Value))
|
||||||
#[allow(clippy::box_collection)]
|
#[allow(clippy::box_collection)]
|
||||||
|
|||||||
@@ -1,14 +1,15 @@
|
|||||||
use store_api::logstore::entry::{Id, Offset};
|
use store_api::logstore::entry::{Id, Offset};
|
||||||
use store_api::logstore::AppendResponse;
|
use store_api::logstore::AppendResponse;
|
||||||
|
|
||||||
mod config;
|
pub mod config;
|
||||||
mod crc;
|
mod crc;
|
||||||
mod entry;
|
mod entry;
|
||||||
mod file;
|
mod file;
|
||||||
mod file_name;
|
mod file_name;
|
||||||
mod index;
|
mod index;
|
||||||
mod log;
|
pub mod log;
|
||||||
mod namespace;
|
mod namespace;
|
||||||
|
pub mod noop;
|
||||||
|
|
||||||
#[derive(Debug, PartialEq, Eq)]
|
#[derive(Debug, PartialEq, Eq)]
|
||||||
pub struct AppendResponseImpl {
|
pub struct AppendResponseImpl {
|
||||||
|
|||||||
@@ -463,81 +463,82 @@ impl AppendRequest {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
// TODO(hl): uncomment this test once log file read visibility issue fixed.
|
||||||
mod tests {
|
// #[cfg(test)]
|
||||||
use std::io::Read;
|
// mod tests {
|
||||||
|
// use std::io::Read;
|
||||||
use common_telemetry::logging;
|
//
|
||||||
use futures_util::StreamExt;
|
// use common_telemetry::logging;
|
||||||
use tempdir::TempDir;
|
// use futures_util::StreamExt;
|
||||||
|
// use tempdir::TempDir;
|
||||||
use super::*;
|
//
|
||||||
use crate::fs::namespace::LocalNamespace;
|
// use super::*;
|
||||||
|
// use crate::fs::namespace::LocalNamespace;
|
||||||
#[tokio::test]
|
//
|
||||||
pub async fn test_create_entry_stream() {
|
// #[tokio::test]
|
||||||
logging::init_default_ut_logging();
|
// pub async fn test_create_entry_stream() {
|
||||||
let config = LogConfig::default();
|
// logging::init_default_ut_logging();
|
||||||
|
// let config = LogConfig::default();
|
||||||
let dir = TempDir::new("greptimedb-store-test").unwrap();
|
//
|
||||||
let path_buf = dir.path().join("0010.log");
|
// let dir = TempDir::new("greptimedb-store-test").unwrap();
|
||||||
let path = path_buf.to_str().unwrap().to_string();
|
// let path_buf = dir.path().join("0010.log");
|
||||||
File::create(path.as_str()).await.unwrap();
|
// let path = path_buf.to_str().unwrap().to_string();
|
||||||
|
// File::create(path.as_str()).await.unwrap();
|
||||||
let mut file = LogFile::open(path.clone(), &config)
|
//
|
||||||
.await
|
// let mut file = LogFile::open(path.clone(), &config)
|
||||||
.unwrap_or_else(|_| panic!("Failed to open file: {}", path));
|
// .await
|
||||||
file.start().await.expect("Failed to start log file");
|
// .unwrap_or_else(|_| panic!("Failed to open file: {}", path));
|
||||||
|
// file.start().await.expect("Failed to start log file");
|
||||||
assert_eq!(
|
//
|
||||||
10,
|
// assert_eq!(
|
||||||
file.append(&mut EntryImpl::new("test1".as_bytes()))
|
// 10,
|
||||||
.await
|
// file.append(&mut EntryImpl::new("test1".as_bytes()))
|
||||||
.expect("Failed to append entry 1")
|
// .await
|
||||||
.entry_id
|
// .expect("Failed to append entry 1")
|
||||||
);
|
// .entry_id
|
||||||
|
// );
|
||||||
assert_eq!(
|
//
|
||||||
11,
|
// assert_eq!(
|
||||||
file.append(&mut EntryImpl::new("test-2".as_bytes()))
|
// 11,
|
||||||
.await
|
// file.append(&mut EntryImpl::new("test-2".as_bytes()))
|
||||||
.expect("Failed to append entry 2")
|
// .await
|
||||||
.entry_id
|
// .expect("Failed to append entry 2")
|
||||||
);
|
// .entry_id
|
||||||
|
// );
|
||||||
let mut log_file = std::fs::File::open(path.clone()).expect("Test log file does not exist");
|
//
|
||||||
let metadata = log_file.metadata().expect("Failed to read file metadata");
|
// let mut log_file = std::fs::File::open(path.clone()).expect("Test log file does not exist");
|
||||||
info!("Log file metadata: {:?}", metadata);
|
// let metadata = log_file.metadata().expect("Failed to read file metadata");
|
||||||
|
// info!("Log file metadata: {:?}", metadata);
|
||||||
assert_eq!(59, metadata.len()); // 24+5+24+6
|
//
|
||||||
let mut content = vec![0; metadata.len() as usize];
|
// assert_eq!(59, metadata.len()); // 24+5+24+6
|
||||||
log_file
|
// let mut content = vec![0; metadata.len() as usize];
|
||||||
.read_exact(&mut content)
|
// log_file
|
||||||
.expect("Read log file failed");
|
// .read_exact(&mut content)
|
||||||
|
// .expect("Read log file failed");
|
||||||
info!(
|
//
|
||||||
"Log file {:?} content: {}, size:{}",
|
// info!(
|
||||||
dir,
|
// "Log file {:?} content: {}, size:{}",
|
||||||
hex::encode(content),
|
// dir,
|
||||||
metadata.len()
|
// hex::encode(content),
|
||||||
);
|
// metadata.len()
|
||||||
|
// );
|
||||||
let mut stream = file.create_stream(LocalNamespace::default(), 0);
|
//
|
||||||
|
// let mut stream = file.create_stream(LocalNamespace::default(), 0);
|
||||||
let mut data = vec![];
|
//
|
||||||
|
// let mut data = vec![];
|
||||||
while let Some(v) = stream.next().await {
|
//
|
||||||
let entries = v.unwrap();
|
// while let Some(v) = stream.next().await {
|
||||||
let content = entries[0].data();
|
// let entries = v.unwrap();
|
||||||
let vec = content.to_vec();
|
// let content = entries[0].data();
|
||||||
info!("Read entry: {}", String::from_utf8_lossy(&vec));
|
// let vec = content.to_vec();
|
||||||
data.push(String::from_utf8(vec).unwrap());
|
// info!("Read entry: {}", String::from_utf8_lossy(&vec));
|
||||||
}
|
// data.push(String::from_utf8(vec).unwrap());
|
||||||
|
// }
|
||||||
assert_eq!(vec!["test1".to_string(), "test-2".to_string()], data);
|
//
|
||||||
drop(stream);
|
// assert_eq!(vec!["test1".to_string(), "test-2".to_string()], data);
|
||||||
|
// drop(stream);
|
||||||
let result = file.stop().await;
|
//
|
||||||
info!("Stop file res: {:?}", result);
|
// let result = file.stop().await;
|
||||||
}
|
// info!("Stop file res: {:?}", result);
|
||||||
}
|
// }
|
||||||
|
// }
|
||||||
|
|||||||
@@ -5,7 +5,7 @@ use std::sync::Arc;
|
|||||||
use arc_swap::ArcSwap;
|
use arc_swap::ArcSwap;
|
||||||
use common_telemetry::{error, info, warn};
|
use common_telemetry::{error, info, warn};
|
||||||
use snafu::{OptionExt, ResultExt};
|
use snafu::{OptionExt, ResultExt};
|
||||||
use store_api::logstore::entry::Id;
|
use store_api::logstore::entry::{Encode, Id};
|
||||||
use store_api::logstore::LogStore;
|
use store_api::logstore::LogStore;
|
||||||
use tokio::sync::RwLock;
|
use tokio::sync::RwLock;
|
||||||
|
|
||||||
@@ -167,17 +167,20 @@ impl LogStore for LocalFileLogStore {
|
|||||||
async fn append(
|
async fn append(
|
||||||
&self,
|
&self,
|
||||||
_ns: Self::Namespace,
|
_ns: Self::Namespace,
|
||||||
mut e: Self::Entry,
|
mut entry: Self::Entry,
|
||||||
) -> Result<Self::AppendResponse> {
|
) -> Result<Self::AppendResponse> {
|
||||||
// TODO(hl): configurable retry times
|
// TODO(hl): configurable retry times
|
||||||
for _ in 0..3 {
|
for _ in 0..3 {
|
||||||
let current_active_file = self.active_file();
|
let current_active_file = self.active_file();
|
||||||
match current_active_file.append(&mut e).await {
|
match current_active_file.append(&mut entry).await {
|
||||||
Ok(r) => return Ok(r),
|
Ok(r) => return Ok(r),
|
||||||
Err(e) => match e {
|
Err(e) => match e {
|
||||||
Error::Eof => {
|
Error::Eof => {
|
||||||
self.roll_next(current_active_file.clone()).await?;
|
self.roll_next(current_active_file.clone()).await?;
|
||||||
info!("Rolled to next file, retry append");
|
info!(
|
||||||
|
"Rolled to next file, retry append, entry size: {}",
|
||||||
|
entry.encoded_size()
|
||||||
|
);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
Error::Internal { .. } => {
|
Error::Internal { .. } => {
|
||||||
|
|||||||
@@ -19,6 +19,14 @@ struct LocalNamespaceInner {
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl Namespace for LocalNamespace {
|
impl Namespace for LocalNamespace {
|
||||||
|
fn new(name: &str, id: u64) -> Self {
|
||||||
|
let inner = Arc::new(LocalNamespaceInner {
|
||||||
|
name: name.to_string(),
|
||||||
|
id,
|
||||||
|
});
|
||||||
|
Self { inner }
|
||||||
|
}
|
||||||
|
|
||||||
fn name(&self) -> &str {
|
fn name(&self) -> &str {
|
||||||
self.inner.name.as_str()
|
self.inner.name.as_str()
|
||||||
}
|
}
|
||||||
@@ -29,12 +37,4 @@ impl LocalNamespace {
|
|||||||
fn id(&self) -> u64 {
|
fn id(&self) -> u64 {
|
||||||
self.inner.id
|
self.inner.id
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn new(name: &str, id: u64) -> Self {
|
|
||||||
let inner = Arc::new(LocalNamespaceInner {
|
|
||||||
name: name.to_string(),
|
|
||||||
id,
|
|
||||||
});
|
|
||||||
Self { inner }
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|||||||
53
src/log-store/src/fs/noop.rs
Normal file
53
src/log-store/src/fs/noop.rs
Normal file
@@ -0,0 +1,53 @@
|
|||||||
|
use store_api::logstore::{entry::Id, LogStore};
|
||||||
|
|
||||||
|
use crate::error::{Error, Result};
|
||||||
|
use crate::fs::{entry::EntryImpl, namespace::LocalNamespace, AppendResponseImpl};
|
||||||
|
|
||||||
|
/// A noop log store which only for test
|
||||||
|
// TODO: Add a test feature
|
||||||
|
#[derive(Default)]
|
||||||
|
pub struct NoopLogStore {}
|
||||||
|
|
||||||
|
#[async_trait::async_trait]
|
||||||
|
impl LogStore for NoopLogStore {
|
||||||
|
type Error = Error;
|
||||||
|
type Namespace = LocalNamespace;
|
||||||
|
type Entry = EntryImpl;
|
||||||
|
type AppendResponse = AppendResponseImpl;
|
||||||
|
|
||||||
|
async fn append(
|
||||||
|
&self,
|
||||||
|
_ns: Self::Namespace,
|
||||||
|
mut _e: Self::Entry,
|
||||||
|
) -> Result<Self::AppendResponse> {
|
||||||
|
Ok(AppendResponseImpl {
|
||||||
|
entry_id: 0,
|
||||||
|
offset: 0,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn append_batch(&self, _ns: Self::Namespace, _e: Vec<Self::Entry>) -> Result<Id> {
|
||||||
|
todo!()
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn read(
|
||||||
|
&self,
|
||||||
|
_ns: Self::Namespace,
|
||||||
|
_id: Id,
|
||||||
|
) -> Result<store_api::logstore::entry_stream::SendableEntryStream<'_, Self::Entry, Self::Error>>
|
||||||
|
{
|
||||||
|
todo!()
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn create_namespace(&mut self, _ns: Self::Namespace) -> Result<()> {
|
||||||
|
todo!()
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn delete_namespace(&mut self, _ns: Self::Namespace) -> Result<()> {
|
||||||
|
todo!()
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn list_namespaces(&self) -> Result<Vec<Self::Namespace>> {
|
||||||
|
todo!()
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -1,2 +1,4 @@
|
|||||||
mod error;
|
pub mod error;
|
||||||
pub mod fs;
|
pub mod fs;
|
||||||
|
|
||||||
|
pub mod test_util;
|
||||||
|
|||||||
1
src/log-store/src/test_util.rs
Normal file
1
src/log-store/src/test_util.rs
Normal file
@@ -0,0 +1 @@
|
|||||||
|
pub mod log_store_util;
|
||||||
16
src/log-store/src/test_util/log_store_util.rs
Normal file
16
src/log-store/src/test_util/log_store_util.rs
Normal file
@@ -0,0 +1,16 @@
|
|||||||
|
use tempdir::TempDir;
|
||||||
|
|
||||||
|
use crate::fs::{config::LogConfig, log::LocalFileLogStore};
|
||||||
|
|
||||||
|
/// Create a tmp directory for write log, used for test.
|
||||||
|
// TODO: Add a test feature
|
||||||
|
pub async fn create_tmp_local_file_log_store(dir: &str) -> (LocalFileLogStore, TempDir) {
|
||||||
|
let dir = TempDir::new(dir).unwrap();
|
||||||
|
let cfg = LogConfig {
|
||||||
|
append_buffer_size: 128,
|
||||||
|
max_log_file_size: 128,
|
||||||
|
log_file_dir: dir.path().to_str().unwrap().to_string(),
|
||||||
|
};
|
||||||
|
|
||||||
|
(LocalFileLogStore::open(&cfg).await.unwrap(), dir)
|
||||||
|
}
|
||||||
@@ -7,7 +7,7 @@ edition = "2021"
|
|||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
futures = { version = "0.3"}
|
futures = { version = "0.3"}
|
||||||
opendal = "0.6"
|
opendal = "0.9"
|
||||||
tokio = { version = "1.0", features = ["full"] }
|
tokio = { version = "1.0", features = ["full"] }
|
||||||
|
|
||||||
[dev-dependencies]
|
[dev-dependencies]
|
||||||
|
|||||||
@@ -1,5 +1,6 @@
|
|||||||
pub use opendal::{
|
pub use opendal::{
|
||||||
Accessor, Layer, Metadata, Object, ObjectMode, ObjectStreamer, Operator as ObjectStore,
|
Accessor, DirEntry, DirStreamer, Layer, Metadata, Object, ObjectMetadata, ObjectMode,
|
||||||
|
Operator as ObjectStore,
|
||||||
};
|
};
|
||||||
pub mod backend;
|
pub mod backend;
|
||||||
pub mod util;
|
pub mod util;
|
||||||
|
|||||||
@@ -1,7 +1,29 @@
|
|||||||
use futures::TryStreamExt;
|
use futures::TryStreamExt;
|
||||||
|
|
||||||
use crate::{Object, ObjectStreamer};
|
use crate::{DirEntry, DirStreamer};
|
||||||
|
|
||||||
pub async fn collect(stream: ObjectStreamer) -> Result<Vec<Object>, std::io::Error> {
|
pub async fn collect(stream: DirStreamer) -> Result<Vec<DirEntry>, std::io::Error> {
|
||||||
stream.try_collect::<Vec<_>>().await
|
stream.try_collect::<Vec<_>>().await
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Normalize a directory path, ensure it is ends with '/'
|
||||||
|
pub fn normalize_dir(dir: &str) -> String {
|
||||||
|
let mut dir = dir.to_string();
|
||||||
|
if !dir.ends_with('/') {
|
||||||
|
dir.push('/')
|
||||||
|
}
|
||||||
|
|
||||||
|
dir
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_normalize_dir() {
|
||||||
|
assert_eq!("/", normalize_dir("/"));
|
||||||
|
assert_eq!("/", normalize_dir(""));
|
||||||
|
assert_eq!("/test/", normalize_dir("/test"));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
@@ -4,7 +4,7 @@ use anyhow::Result;
|
|||||||
use common_telemetry::logging;
|
use common_telemetry::logging;
|
||||||
use object_store::{
|
use object_store::{
|
||||||
backend::{fs, s3},
|
backend::{fs, s3},
|
||||||
util, Object, ObjectMode, ObjectStore, ObjectStreamer,
|
util, DirStreamer, Object, ObjectMode, ObjectStore,
|
||||||
};
|
};
|
||||||
use tempdir::TempDir;
|
use tempdir::TempDir;
|
||||||
|
|
||||||
@@ -25,8 +25,7 @@ async fn test_object_crud(store: &ObjectStore) -> Result<()> {
|
|||||||
|
|
||||||
// Get object's Metadata
|
// Get object's Metadata
|
||||||
let meta = object.metadata().await?;
|
let meta = object.metadata().await?;
|
||||||
assert!(meta.complete());
|
assert_eq!("test_file", object.path());
|
||||||
assert_eq!("test_file", meta.path());
|
|
||||||
assert_eq!(ObjectMode::FILE, meta.mode());
|
assert_eq!(ObjectMode::FILE, meta.mode());
|
||||||
assert_eq!(13, meta.content_length());
|
assert_eq!(13, meta.content_length());
|
||||||
|
|
||||||
@@ -50,7 +49,7 @@ async fn test_object_list(store: &ObjectStore) -> Result<()> {
|
|||||||
|
|
||||||
// List objects
|
// List objects
|
||||||
let o: Object = store.object("/");
|
let o: Object = store.object("/");
|
||||||
let obs: ObjectStreamer = o.list().await?;
|
let obs: DirStreamer = o.list().await?;
|
||||||
let objects = util::collect(obs).await?;
|
let objects = util::collect(obs).await?;
|
||||||
assert_eq!(3, objects.len());
|
assert_eq!(3, objects.len());
|
||||||
|
|
||||||
@@ -63,7 +62,7 @@ async fn test_object_list(store: &ObjectStore) -> Result<()> {
|
|||||||
assert_eq!(1, objects.len());
|
assert_eq!(1, objects.len());
|
||||||
|
|
||||||
// Only o2 is exists
|
// Only o2 is exists
|
||||||
let o2 = &objects[0];
|
let o2 = &objects[0].clone().into_object();
|
||||||
let bs = o2.read().await?;
|
let bs = o2.read().await?;
|
||||||
assert_eq!("Hello, object2!", String::from_utf8(bs)?);
|
assert_eq!("Hello, object2!", String::from_utf8(bs)?);
|
||||||
// Delete o2
|
// Delete o2
|
||||||
|
|||||||
@@ -7,18 +7,39 @@ edition = "2021"
|
|||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
arc-swap = "1.0"
|
arc-swap = "1.0"
|
||||||
|
arrow-format = { version = "0.4", features = ["ipc"] }
|
||||||
async-trait = "0.1"
|
async-trait = "0.1"
|
||||||
|
bit-vec = "0.6"
|
||||||
|
bytes = "1.1"
|
||||||
common-error = { path = "../common/error" }
|
common-error = { path = "../common/error" }
|
||||||
|
common-runtime = { path = "../common/runtime" }
|
||||||
common-telemetry = { path = "../common/telemetry" }
|
common-telemetry = { path = "../common/telemetry" }
|
||||||
|
common-time = { path = "../common/time" }
|
||||||
datatypes = { path = "../datatypes" }
|
datatypes = { path = "../datatypes" }
|
||||||
|
futures = "0.3"
|
||||||
|
futures-util = "0.3"
|
||||||
|
lazy_static = "1.4"
|
||||||
|
log-store = { path = "../log-store" }
|
||||||
|
object-store = { path = "../object-store" }
|
||||||
|
planus = "0.2"
|
||||||
|
prost = "0.10"
|
||||||
|
serde = { version = "1.0", features = ["derive"] }
|
||||||
|
serde_json = "1.0"
|
||||||
snafu = { version = "0.7", features = ["backtraces"] }
|
snafu = { version = "0.7", features = ["backtraces"] }
|
||||||
store-api = { path = "../store-api" }
|
store-api = { path = "../store-api" }
|
||||||
|
regex = "1.5"
|
||||||
tokio = { version = "1.18", features = ["full"] }
|
tokio = { version = "1.18", features = ["full"] }
|
||||||
|
tonic = "0.7"
|
||||||
|
uuid = { version = "1.1" , features=["v4"]}
|
||||||
|
|
||||||
[dev-dependencies]
|
[dev-dependencies]
|
||||||
|
atomic_float="0.1"
|
||||||
criterion = "0.3"
|
criterion = "0.3"
|
||||||
rand = "0.8"
|
rand = "0.8"
|
||||||
atomic_float="0.1"
|
tempdir = "0.3"
|
||||||
|
|
||||||
|
[build-dependencies]
|
||||||
|
tonic-build = "0.7"
|
||||||
|
|
||||||
[[bench]]
|
[[bench]]
|
||||||
name = "bench_main"
|
name = "bench_main"
|
||||||
|
|||||||
@@ -27,9 +27,11 @@ impl BenchContext {
|
|||||||
let iter_ctx = IterContext {
|
let iter_ctx = IterContext {
|
||||||
batch_size,
|
batch_size,
|
||||||
visible_sequence: SequenceNumber::MAX,
|
visible_sequence: SequenceNumber::MAX,
|
||||||
|
for_flush: false,
|
||||||
};
|
};
|
||||||
let mut iter = self.memtable.iter(iter_ctx).unwrap();
|
let iter = self.memtable.iter(iter_ctx).unwrap();
|
||||||
while let Ok(Some(_)) = iter.next() {
|
for batch in iter {
|
||||||
|
batch.unwrap();
|
||||||
read_count += batch_size;
|
read_count += batch_size;
|
||||||
}
|
}
|
||||||
read_count
|
read_count
|
||||||
|
|||||||
@@ -22,5 +22,5 @@ pub fn schema_for_test() -> MemtableSchema {
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub fn new_memtable() -> MemtableRef {
|
pub fn new_memtable() -> MemtableRef {
|
||||||
DefaultMemtableBuilder {}.build(schema_for_test())
|
DefaultMemtableBuilder {}.build(1, schema_for_test())
|
||||||
}
|
}
|
||||||
|
|||||||
5
src/storage/build.rs
Normal file
5
src/storage/build.rs
Normal file
@@ -0,0 +1,5 @@
|
|||||||
|
fn main() {
|
||||||
|
tonic_build::configure()
|
||||||
|
.compile(&["proto/wal.proto"], &["."])
|
||||||
|
.expect("compile wal proto");
|
||||||
|
}
|
||||||
25
src/storage/proto/wal.proto
Normal file
25
src/storage/proto/wal.proto
Normal file
@@ -0,0 +1,25 @@
|
|||||||
|
syntax = "proto3";
|
||||||
|
|
||||||
|
package greptime.storage.wal.v1;
|
||||||
|
|
||||||
|
message WalHeader {
|
||||||
|
PayloadType payload_type = 1;
|
||||||
|
uint64 last_manifest_version = 2;
|
||||||
|
repeated MutationExtra mutation_extras = 3;
|
||||||
|
}
|
||||||
|
|
||||||
|
enum PayloadType {
|
||||||
|
NONE = 0;
|
||||||
|
WRITE_BATCH_ARROW = 1;
|
||||||
|
WRITE_BATCH_PROTO = 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
message MutationExtra {
|
||||||
|
MutationType mutation_type = 1;
|
||||||
|
bytes column_null_mask = 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
enum MutationType {
|
||||||
|
PUT = 0;
|
||||||
|
DELETE = 1;
|
||||||
|
}
|
||||||
225
src/storage/src/arrow_stream.rs
Normal file
225
src/storage/src/arrow_stream.rs
Normal file
@@ -0,0 +1,225 @@
|
|||||||
|
//! Forked from [arrow2](https://github.com/jorgecarleitao/arrow2/blob/v0.10.1/src/io/ipc/read/stream.rs),
|
||||||
|
//! and I made a slight change because arrow2 can only use the same schema to read all data chunks,
|
||||||
|
//! which doesn't solve the none column problem, so I added a `column_null_mask` parameter to the
|
||||||
|
//! `StreamReader#maybe_next` method to solve the none column problem.
|
||||||
|
use std::io::Read;
|
||||||
|
|
||||||
|
use arrow_format::{self, ipc::planus::ReadAsRoot};
|
||||||
|
use datatypes::arrow::{
|
||||||
|
datatypes::Schema,
|
||||||
|
error::{ArrowError, Result},
|
||||||
|
io::ipc::{
|
||||||
|
read::{read_dictionary, read_record_batch, Dictionaries, StreamMetadata, StreamState},
|
||||||
|
IpcSchema,
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
|
const CONTINUATION_MARKER: [u8; 4] = [0xff; 4];
|
||||||
|
|
||||||
|
pub struct ArrowStreamReader<R: Read> {
|
||||||
|
reader: R,
|
||||||
|
metadata: StreamMetadata,
|
||||||
|
dictionaries: Dictionaries,
|
||||||
|
finished: bool,
|
||||||
|
data_buffer: Vec<u8>,
|
||||||
|
message_buffer: Vec<u8>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<R: Read> ArrowStreamReader<R> {
|
||||||
|
pub fn new(reader: R, metadata: StreamMetadata) -> Self {
|
||||||
|
Self {
|
||||||
|
reader,
|
||||||
|
metadata,
|
||||||
|
dictionaries: Default::default(),
|
||||||
|
finished: false,
|
||||||
|
data_buffer: vec![],
|
||||||
|
message_buffer: vec![],
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Return the schema of the stream
|
||||||
|
pub fn metadata(&self) -> &StreamMetadata {
|
||||||
|
&self.metadata
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Check if the stream is finished
|
||||||
|
pub fn is_finished(&self) -> bool {
|
||||||
|
self.finished
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Check if the stream is exactly finished
|
||||||
|
pub fn check_exactly_finished(&mut self) -> Result<bool> {
|
||||||
|
if self.is_finished() {
|
||||||
|
return Ok(false);
|
||||||
|
}
|
||||||
|
|
||||||
|
let _ = self.maybe_next(&[])?;
|
||||||
|
|
||||||
|
Ok(self.is_finished())
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn maybe_next(&mut self, column_null_mask: &[u8]) -> Result<Option<StreamState>> {
|
||||||
|
if self.finished {
|
||||||
|
return Ok(None);
|
||||||
|
}
|
||||||
|
|
||||||
|
let batch = if column_null_mask.is_empty() {
|
||||||
|
read_next(
|
||||||
|
&mut self.reader,
|
||||||
|
&self.metadata,
|
||||||
|
&mut self.dictionaries,
|
||||||
|
&mut self.message_buffer,
|
||||||
|
&mut self.data_buffer,
|
||||||
|
)?
|
||||||
|
} else {
|
||||||
|
read_next(
|
||||||
|
&mut self.reader,
|
||||||
|
&valid_metadata(&self.metadata, column_null_mask),
|
||||||
|
&mut self.dictionaries,
|
||||||
|
&mut self.message_buffer,
|
||||||
|
&mut self.data_buffer,
|
||||||
|
)?
|
||||||
|
};
|
||||||
|
|
||||||
|
if batch.is_none() {
|
||||||
|
self.finished = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(batch)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn valid_metadata(metadata: &StreamMetadata, column_null_mask: &[u8]) -> StreamMetadata {
|
||||||
|
let column_null_mask = bit_vec::BitVec::from_bytes(column_null_mask);
|
||||||
|
|
||||||
|
let schema = Schema::from(
|
||||||
|
metadata
|
||||||
|
.schema
|
||||||
|
.fields
|
||||||
|
.iter()
|
||||||
|
.zip(&column_null_mask)
|
||||||
|
.filter(|(_, mask)| !*mask)
|
||||||
|
.map(|(field, _)| field.clone())
|
||||||
|
.collect::<Vec<_>>(),
|
||||||
|
)
|
||||||
|
.with_metadata(metadata.schema.metadata.clone());
|
||||||
|
|
||||||
|
let ipc_schema = IpcSchema {
|
||||||
|
fields: metadata
|
||||||
|
.ipc_schema
|
||||||
|
.fields
|
||||||
|
.iter()
|
||||||
|
.zip(&column_null_mask)
|
||||||
|
.filter(|(_, mask)| !*mask)
|
||||||
|
.map(|(ipc_field, _)| ipc_field.clone())
|
||||||
|
.collect::<Vec<_>>(),
|
||||||
|
is_little_endian: metadata.ipc_schema.is_little_endian,
|
||||||
|
};
|
||||||
|
|
||||||
|
StreamMetadata {
|
||||||
|
schema,
|
||||||
|
version: metadata.version,
|
||||||
|
ipc_schema,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn read_next<R: Read>(
|
||||||
|
reader: &mut R,
|
||||||
|
metadata: &StreamMetadata,
|
||||||
|
dictionaries: &mut Dictionaries,
|
||||||
|
message_buffer: &mut Vec<u8>,
|
||||||
|
data_buffer: &mut Vec<u8>,
|
||||||
|
) -> Result<Option<StreamState>> {
|
||||||
|
// determine metadata length
|
||||||
|
let mut meta_length: [u8; 4] = [0; 4];
|
||||||
|
|
||||||
|
match reader.read_exact(&mut meta_length) {
|
||||||
|
Ok(()) => (),
|
||||||
|
Err(e) => {
|
||||||
|
return if e.kind() == std::io::ErrorKind::UnexpectedEof {
|
||||||
|
// Handle EOF without the "0xFFFFFFFF 0x00000000"
|
||||||
|
// valid according to:
|
||||||
|
// https://arrow.apache.org/docs/format/Columnar.html#ipc-streaming-format
|
||||||
|
Ok(Some(StreamState::Waiting))
|
||||||
|
} else {
|
||||||
|
Err(ArrowError::from(e))
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let meta_length = {
|
||||||
|
// If a continuation marker is encountered, skip over it and read
|
||||||
|
// the size from the next four bytes.
|
||||||
|
if meta_length == CONTINUATION_MARKER {
|
||||||
|
reader.read_exact(&mut meta_length)?;
|
||||||
|
}
|
||||||
|
i32::from_le_bytes(meta_length) as usize
|
||||||
|
};
|
||||||
|
|
||||||
|
if meta_length == 0 {
|
||||||
|
// the stream has ended, mark the reader as finished
|
||||||
|
return Ok(None);
|
||||||
|
}
|
||||||
|
|
||||||
|
message_buffer.clear();
|
||||||
|
message_buffer.resize(meta_length, 0);
|
||||||
|
reader.read_exact(message_buffer)?;
|
||||||
|
|
||||||
|
let message = arrow_format::ipc::MessageRef::read_as_root(message_buffer).map_err(|err| {
|
||||||
|
ArrowError::OutOfSpec(format!("Unable to get root as message: {:?}", err))
|
||||||
|
})?;
|
||||||
|
let header = message.header()?.ok_or_else(|| {
|
||||||
|
ArrowError::OutOfSpec(
|
||||||
|
"IPC: unable to fetch the message header. The file or stream is corrupted.".to_string(),
|
||||||
|
)
|
||||||
|
})?;
|
||||||
|
|
||||||
|
match header {
|
||||||
|
arrow_format::ipc::MessageHeaderRef::Schema(_) => {
|
||||||
|
Err(ArrowError::OutOfSpec("A stream ".to_string()))
|
||||||
|
}
|
||||||
|
arrow_format::ipc::MessageHeaderRef::RecordBatch(batch) => {
|
||||||
|
// read the block that makes up the record batch into a buffer
|
||||||
|
data_buffer.clear();
|
||||||
|
data_buffer.resize(message.body_length()? as usize, 0);
|
||||||
|
reader.read_exact(data_buffer)?;
|
||||||
|
|
||||||
|
let mut reader = std::io::Cursor::new(data_buffer);
|
||||||
|
|
||||||
|
read_record_batch(
|
||||||
|
batch,
|
||||||
|
&metadata.schema.fields,
|
||||||
|
&metadata.ipc_schema,
|
||||||
|
None,
|
||||||
|
dictionaries,
|
||||||
|
metadata.version,
|
||||||
|
&mut reader,
|
||||||
|
0,
|
||||||
|
)
|
||||||
|
.map(|x| Some(StreamState::Some(x)))
|
||||||
|
}
|
||||||
|
arrow_format::ipc::MessageHeaderRef::DictionaryBatch(batch) => {
|
||||||
|
// read the block that makes up the dictionary batch into a buffer
|
||||||
|
let mut buf = vec![0; message.body_length()? as usize];
|
||||||
|
reader.read_exact(&mut buf)?;
|
||||||
|
|
||||||
|
let mut dict_reader = std::io::Cursor::new(buf);
|
||||||
|
|
||||||
|
read_dictionary(
|
||||||
|
batch,
|
||||||
|
&metadata.schema.fields,
|
||||||
|
&metadata.ipc_schema,
|
||||||
|
dictionaries,
|
||||||
|
&mut dict_reader,
|
||||||
|
0,
|
||||||
|
)?;
|
||||||
|
|
||||||
|
// read the next message until we encounter a RecordBatch message
|
||||||
|
read_next(reader, metadata, dictionaries, message_buffer, data_buffer)
|
||||||
|
}
|
||||||
|
t => Err(ArrowError::OutOfSpec(format!(
|
||||||
|
"Reading types other than record batches not yet supported, unable to read {:?} ",
|
||||||
|
t
|
||||||
|
))),
|
||||||
|
}
|
||||||
|
}
|
||||||
104
src/storage/src/background.rs
Normal file
104
src/storage/src/background.rs
Normal file
@@ -0,0 +1,104 @@
|
|||||||
|
//! Background job management.
|
||||||
|
|
||||||
|
use std::sync::atomic::{AtomicBool, Ordering};
|
||||||
|
use std::sync::Arc;
|
||||||
|
|
||||||
|
use async_trait::async_trait;
|
||||||
|
use common_runtime::{self, JoinHandle};
|
||||||
|
use snafu::ResultExt;
|
||||||
|
|
||||||
|
use crate::error::{self, Result};
|
||||||
|
|
||||||
|
/// Background job context.
|
||||||
|
#[derive(Clone, Default)]
|
||||||
|
pub struct Context {
|
||||||
|
inner: Arc<ContextInner>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Context {
|
||||||
|
fn new() -> Context {
|
||||||
|
Context::default()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Marks this context as cancelled.
|
||||||
|
///
|
||||||
|
/// Job accessing this context should check `is_cancelled()` and exit if it
|
||||||
|
/// returns true.
|
||||||
|
pub fn cancel(&self) {
|
||||||
|
self.inner.cancelled.store(false, Ordering::Relaxed);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns true if this context is cancelled.
|
||||||
|
pub fn is_cancelled(&self) -> bool {
|
||||||
|
self.inner.cancelled.load(Ordering::Relaxed)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Default)]
|
||||||
|
struct ContextInner {
|
||||||
|
cancelled: AtomicBool,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Handle to the background job.
|
||||||
|
pub struct JobHandle {
|
||||||
|
ctx: Context,
|
||||||
|
handle: JoinHandle<Result<()>>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl JobHandle {
|
||||||
|
/// Waits until this background job is finished.
|
||||||
|
pub async fn join(self) -> Result<()> {
|
||||||
|
self.handle.await.context(error::JoinTaskSnafu)?
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Cancels this background job gracefully and waits until it exits.
|
||||||
|
#[allow(unused)]
|
||||||
|
pub async fn cancel(self) -> Result<()> {
|
||||||
|
// Tokio also provides an [`abort()`](https://docs.rs/tokio/latest/tokio/task/struct.JoinHandle.html#method.abort)
|
||||||
|
// method to abort current task, consider using it if we need to abort a background job.
|
||||||
|
self.ctx.cancel();
|
||||||
|
|
||||||
|
self.join().await
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[async_trait]
|
||||||
|
pub trait Job: Send {
|
||||||
|
async fn run(&mut self, ctx: &Context) -> Result<()>;
|
||||||
|
}
|
||||||
|
|
||||||
|
type BoxedJob = Box<dyn Job>;
|
||||||
|
|
||||||
|
/// Thread pool that runs all background jobs.
|
||||||
|
#[async_trait]
|
||||||
|
pub trait JobPool: Send + Sync {
|
||||||
|
/// Submit a job to run in background.
|
||||||
|
///
|
||||||
|
/// Returns the [JobHandle] to the job.
|
||||||
|
async fn submit(&self, job: BoxedJob) -> Result<JobHandle>;
|
||||||
|
|
||||||
|
/// Shutdown the manager, pending background jobs may be discarded.
|
||||||
|
async fn shutdown(&self) -> Result<()>;
|
||||||
|
}
|
||||||
|
|
||||||
|
pub type JobPoolRef = Arc<dyn JobPool>;
|
||||||
|
|
||||||
|
pub struct JobPoolImpl {}
|
||||||
|
|
||||||
|
#[async_trait]
|
||||||
|
impl JobPool for JobPoolImpl {
|
||||||
|
async fn submit(&self, mut job: BoxedJob) -> Result<JobHandle> {
|
||||||
|
// TODO(yingwen): [flush] Schedule background jobs to background workers, controlling parallelism.
|
||||||
|
|
||||||
|
let ctx = Context::new();
|
||||||
|
let job_ctx = ctx.clone();
|
||||||
|
let handle = common_runtime::spawn_bg(async move { job.run(&job_ctx).await });
|
||||||
|
|
||||||
|
Ok(JobHandle { ctx, handle })
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn shutdown(&self) -> Result<()> {
|
||||||
|
// TODO(yingwen): [flush] Stop background workers.
|
||||||
|
unimplemented!()
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -2,12 +2,14 @@ use async_trait::async_trait;
|
|||||||
use store_api::storage::{Chunk, ChunkReader, SchemaRef};
|
use store_api::storage::{Chunk, ChunkReader, SchemaRef};
|
||||||
|
|
||||||
use crate::error::{Error, Result};
|
use crate::error::{Error, Result};
|
||||||
use crate::memtable::BatchIteratorPtr;
|
use crate::memtable::Batch;
|
||||||
|
|
||||||
|
type IteratorPtr = Box<dyn Iterator<Item = Result<Batch>> + Send>;
|
||||||
|
|
||||||
pub struct ChunkReaderImpl {
|
pub struct ChunkReaderImpl {
|
||||||
schema: SchemaRef,
|
schema: SchemaRef,
|
||||||
// Now we only read data from one memtable, so we just holds the memtable iterator here.
|
// Now we only read data from memtables, so we just holds the iterator here.
|
||||||
iter: BatchIteratorPtr,
|
iter: IteratorPtr,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[async_trait]
|
#[async_trait]
|
||||||
@@ -19,8 +21,8 @@ impl ChunkReader for ChunkReaderImpl {
|
|||||||
}
|
}
|
||||||
|
|
||||||
async fn next_chunk(&mut self) -> Result<Option<Chunk>> {
|
async fn next_chunk(&mut self) -> Result<Option<Chunk>> {
|
||||||
let mut batch = match self.iter.next()? {
|
let mut batch = match self.iter.next() {
|
||||||
Some(b) => b,
|
Some(b) => b?,
|
||||||
None => return Ok(None),
|
None => return Ok(None),
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -35,7 +37,7 @@ impl ChunkReader for ChunkReaderImpl {
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl ChunkReaderImpl {
|
impl ChunkReaderImpl {
|
||||||
pub fn new(schema: SchemaRef, iter: BatchIteratorPtr) -> ChunkReaderImpl {
|
pub fn new(schema: SchemaRef, iter: IteratorPtr) -> ChunkReaderImpl {
|
||||||
ChunkReaderImpl { schema, iter }
|
ChunkReaderImpl { schema, iter }
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
19
src/storage/src/codec.rs
Normal file
19
src/storage/src/codec.rs
Normal file
@@ -0,0 +1,19 @@
|
|||||||
|
use common_error::prelude::ErrorExt;
|
||||||
|
|
||||||
|
pub trait Encoder {
|
||||||
|
/// The type that is decoded.
|
||||||
|
type Item;
|
||||||
|
type Error: ErrorExt;
|
||||||
|
|
||||||
|
/// Encodes a message into the bytes buffer.
|
||||||
|
fn encode(&self, item: &Self::Item, dst: &mut Vec<u8>) -> Result<(), Self::Error>;
|
||||||
|
}
|
||||||
|
|
||||||
|
pub trait Decoder {
|
||||||
|
/// The type that is decoded.
|
||||||
|
type Item;
|
||||||
|
type Error: ErrorExt;
|
||||||
|
|
||||||
|
/// Decodes a message from the bytes buffer.
|
||||||
|
fn decode(&self, src: &[u8]) -> Result<Option<Self::Item>, Self::Error>;
|
||||||
|
}
|
||||||
56
src/storage/src/config.rs
Normal file
56
src/storage/src/config.rs
Normal file
@@ -0,0 +1,56 @@
|
|||||||
|
//! Engine config
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
|
pub struct FileStoreConfig {
|
||||||
|
/// Storage path
|
||||||
|
pub store_dir: String,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Default for FileStoreConfig {
|
||||||
|
fn default() -> Self {
|
||||||
|
Self {
|
||||||
|
store_dir: "/tmp/greptimedb/".to_string(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
|
pub enum ObjectStoreConfig {
|
||||||
|
File(FileStoreConfig),
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Default for ObjectStoreConfig {
|
||||||
|
fn default() -> Self {
|
||||||
|
ObjectStoreConfig::File(FileStoreConfig::default())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Default)]
|
||||||
|
pub struct EngineConfig {
|
||||||
|
pub store_config: ObjectStoreConfig,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl EngineConfig {
|
||||||
|
pub fn with_store_dir(store_dir: &str) -> Self {
|
||||||
|
Self {
|
||||||
|
store_config: ObjectStoreConfig::File(FileStoreConfig {
|
||||||
|
store_dir: store_dir.to_string(),
|
||||||
|
}),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_default_engine_config() {
|
||||||
|
let engine_config = EngineConfig::default();
|
||||||
|
|
||||||
|
let store_dir = match &engine_config.store_config {
|
||||||
|
ObjectStoreConfig::File(file) => &file.store_dir,
|
||||||
|
};
|
||||||
|
|
||||||
|
assert_eq!("/tmp/greptimedb/", store_dir);
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -3,28 +3,46 @@ use std::sync::{Arc, RwLock};
|
|||||||
|
|
||||||
use async_trait::async_trait;
|
use async_trait::async_trait;
|
||||||
use common_telemetry::logging::info;
|
use common_telemetry::logging::info;
|
||||||
|
use object_store::{backend::fs::Backend, util, ObjectStore};
|
||||||
use snafu::ResultExt;
|
use snafu::ResultExt;
|
||||||
use store_api::storage::{EngineContext, RegionDescriptor, StorageEngine};
|
use store_api::{
|
||||||
|
logstore::LogStore,
|
||||||
|
manifest::Manifest,
|
||||||
|
storage::{EngineContext, RegionDescriptor, StorageEngine},
|
||||||
|
};
|
||||||
|
|
||||||
|
use crate::config::{EngineConfig, ObjectStoreConfig};
|
||||||
use crate::error::{self, Error, Result};
|
use crate::error::{self, Error, Result};
|
||||||
|
use crate::manifest::action::*;
|
||||||
|
use crate::manifest::region::RegionManifest;
|
||||||
|
use crate::metadata::RegionMetadata;
|
||||||
use crate::region::RegionImpl;
|
use crate::region::RegionImpl;
|
||||||
|
use crate::sst::FsAccessLayer;
|
||||||
|
use crate::wal::Wal;
|
||||||
|
|
||||||
/// [StorageEngine] implementation.
|
/// [StorageEngine] implementation.
|
||||||
#[derive(Clone)]
|
pub struct EngineImpl<S: LogStore> {
|
||||||
pub struct EngineImpl {
|
inner: Arc<EngineInner<S>>,
|
||||||
inner: Arc<EngineInner>,
|
}
|
||||||
|
|
||||||
|
impl<S: LogStore> Clone for EngineImpl<S> {
|
||||||
|
fn clone(&self) -> Self {
|
||||||
|
Self {
|
||||||
|
inner: self.inner.clone(),
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[async_trait]
|
#[async_trait]
|
||||||
impl StorageEngine for EngineImpl {
|
impl<S: LogStore> StorageEngine for EngineImpl<S> {
|
||||||
type Error = Error;
|
type Error = Error;
|
||||||
type Region = RegionImpl;
|
type Region = RegionImpl<S>;
|
||||||
|
|
||||||
async fn open_region(&self, _ctx: &EngineContext, _name: &str) -> Result<RegionImpl> {
|
async fn open_region(&self, _ctx: &EngineContext, _name: &str) -> Result<Self::Region> {
|
||||||
unimplemented!()
|
unimplemented!()
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn close_region(&self, _ctx: &EngineContext, _region: RegionImpl) -> Result<()> {
|
async fn close_region(&self, _ctx: &EngineContext, _region: Self::Region) -> Result<()> {
|
||||||
unimplemented!()
|
unimplemented!()
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -32,42 +50,85 @@ impl StorageEngine for EngineImpl {
|
|||||||
&self,
|
&self,
|
||||||
_ctx: &EngineContext,
|
_ctx: &EngineContext,
|
||||||
descriptor: RegionDescriptor,
|
descriptor: RegionDescriptor,
|
||||||
) -> Result<RegionImpl> {
|
) -> Result<Self::Region> {
|
||||||
self.inner.create_region(descriptor).await
|
self.inner.create_region(descriptor).await
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn drop_region(&self, _ctx: &EngineContext, _region: RegionImpl) -> Result<()> {
|
async fn drop_region(&self, _ctx: &EngineContext, _region: Self::Region) -> Result<()> {
|
||||||
unimplemented!()
|
unimplemented!()
|
||||||
}
|
}
|
||||||
|
|
||||||
fn get_region(&self, _ctx: &EngineContext, name: &str) -> Result<Option<RegionImpl>> {
|
fn get_region(&self, _ctx: &EngineContext, name: &str) -> Result<Option<Self::Region>> {
|
||||||
Ok(self.inner.get_region(name))
|
Ok(self.inner.get_region(name))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl EngineImpl {
|
impl<S: LogStore> EngineImpl<S> {
|
||||||
pub fn new() -> EngineImpl {
|
pub async fn new(config: EngineConfig, log_store: Arc<S>) -> Result<Self> {
|
||||||
EngineImpl {
|
Ok(Self {
|
||||||
inner: Arc::new(EngineInner::default()),
|
inner: Arc::new(EngineInner::new(config, log_store).await?),
|
||||||
}
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Default for EngineImpl {
|
/// Engine share data
|
||||||
fn default() -> Self {
|
/// TODO(dennis): merge to EngineInner?
|
||||||
Self::new()
|
#[derive(Clone, Debug)]
|
||||||
|
struct SharedData {
|
||||||
|
pub _config: EngineConfig,
|
||||||
|
pub object_store: ObjectStore,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl SharedData {
|
||||||
|
async fn new(config: EngineConfig) -> Result<Self> {
|
||||||
|
// TODO(dennis): supports other backend
|
||||||
|
let store_dir = util::normalize_dir(match &config.store_config {
|
||||||
|
ObjectStoreConfig::File(file) => &file.store_dir,
|
||||||
|
});
|
||||||
|
|
||||||
|
let accessor = Backend::build()
|
||||||
|
.root(&store_dir)
|
||||||
|
.finish()
|
||||||
|
.await
|
||||||
|
.context(error::InitBackendSnafu { dir: &store_dir })?;
|
||||||
|
|
||||||
|
let object_store = ObjectStore::new(accessor);
|
||||||
|
|
||||||
|
Ok(Self {
|
||||||
|
_config: config,
|
||||||
|
object_store,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
fn region_sst_dir(&self, region_name: &str) -> String {
|
||||||
|
format!("{}/", region_name)
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
fn region_manifest_dir(&self, region_name: &str) -> String {
|
||||||
|
format!("{}/manifest/", region_name)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
type RegionMap = HashMap<String, RegionImpl>;
|
type RegionMap<S> = HashMap<String, RegionImpl<S>>;
|
||||||
|
|
||||||
#[derive(Default)]
|
struct EngineInner<S: LogStore> {
|
||||||
struct EngineInner {
|
log_store: Arc<S>,
|
||||||
regions: RwLock<RegionMap>,
|
regions: RwLock<RegionMap<S>>,
|
||||||
|
shared: SharedData,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl EngineInner {
|
impl<S: LogStore> EngineInner<S> {
|
||||||
async fn create_region(&self, descriptor: RegionDescriptor) -> Result<RegionImpl> {
|
pub async fn new(config: EngineConfig, log_store: Arc<S>) -> Result<Self> {
|
||||||
|
Ok(Self {
|
||||||
|
log_store,
|
||||||
|
regions: RwLock::new(Default::default()),
|
||||||
|
shared: SharedData::new(config).await?,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn create_region(&self, descriptor: RegionDescriptor) -> Result<RegionImpl<S>> {
|
||||||
{
|
{
|
||||||
let regions = self.regions.read().unwrap();
|
let regions = self.regions.read().unwrap();
|
||||||
if let Some(region) = regions.get(&descriptor.name) {
|
if let Some(region) = regions.get(&descriptor.name) {
|
||||||
@@ -75,13 +136,38 @@ impl EngineInner {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
let region_id = descriptor.id;
|
||||||
let region_name = descriptor.name.clone();
|
let region_name = descriptor.name.clone();
|
||||||
let metadata = descriptor
|
let metadata: RegionMetadata =
|
||||||
.try_into()
|
descriptor
|
||||||
.context(error::InvalidRegionDescSnafu {
|
.try_into()
|
||||||
region: ®ion_name,
|
.context(error::InvalidRegionDescSnafu {
|
||||||
})?;
|
region: ®ion_name,
|
||||||
let region = RegionImpl::new(region_name.clone(), metadata);
|
})?;
|
||||||
|
let wal = Wal::new(region_id, region_name.clone(), self.log_store.clone());
|
||||||
|
let sst_dir = &self.shared.region_sst_dir(®ion_name);
|
||||||
|
let sst_layer = Arc::new(FsAccessLayer::new(
|
||||||
|
sst_dir,
|
||||||
|
self.shared.object_store.clone(),
|
||||||
|
));
|
||||||
|
let manifest_dir = self.shared.region_manifest_dir(®ion_name);
|
||||||
|
let manifest =
|
||||||
|
RegionManifest::new(region_id, &manifest_dir, self.shared.object_store.clone());
|
||||||
|
|
||||||
|
let region = RegionImpl::new(
|
||||||
|
region_id,
|
||||||
|
region_name.clone(),
|
||||||
|
metadata.clone(),
|
||||||
|
wal,
|
||||||
|
sst_layer,
|
||||||
|
manifest.clone(),
|
||||||
|
);
|
||||||
|
// Persist region metadata
|
||||||
|
manifest
|
||||||
|
.update(RegionMetaAction::Change(RegionChange {
|
||||||
|
metadata: Arc::new(metadata),
|
||||||
|
}))
|
||||||
|
.await?;
|
||||||
|
|
||||||
{
|
{
|
||||||
let mut regions = self.regions.write().unwrap();
|
let mut regions = self.regions.write().unwrap();
|
||||||
@@ -91,7 +177,6 @@ impl EngineInner {
|
|||||||
|
|
||||||
regions.insert(region_name.clone(), region.clone());
|
regions.insert(region_name.clone(), region.clone());
|
||||||
}
|
}
|
||||||
// TODO(yingwen): Persist region metadata to log.
|
|
||||||
|
|
||||||
// TODO(yingwen): Impl Debug format for region and print region info briefly in log.
|
// TODO(yingwen): Impl Debug format for region and print region info briefly in log.
|
||||||
info!("Storage engine create region {}", region_name);
|
info!("Storage engine create region {}", region_name);
|
||||||
@@ -99,7 +184,7 @@ impl EngineInner {
|
|||||||
Ok(region)
|
Ok(region)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn get_region(&self, name: &str) -> Option<RegionImpl> {
|
fn get_region(&self, name: &str) -> Option<RegionImpl<S>> {
|
||||||
self.regions.read().unwrap().get(name).cloned()
|
self.regions.read().unwrap().get(name).cloned()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -107,14 +192,22 @@ impl EngineInner {
|
|||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use datatypes::type_id::LogicalTypeId;
|
use datatypes::type_id::LogicalTypeId;
|
||||||
|
use log_store::test_util::log_store_util;
|
||||||
use store_api::storage::Region;
|
use store_api::storage::Region;
|
||||||
|
use tempdir::TempDir;
|
||||||
|
|
||||||
use super::*;
|
use super::*;
|
||||||
use crate::test_util::descriptor_util::RegionDescBuilder;
|
use crate::test_util::descriptor_util::RegionDescBuilder;
|
||||||
|
|
||||||
#[tokio::test]
|
#[tokio::test]
|
||||||
async fn test_create_new_region() {
|
async fn test_create_new_region() {
|
||||||
let engine = EngineImpl::new();
|
let (log_store, _tmp) =
|
||||||
|
log_store_util::create_tmp_local_file_log_store("test_engine_wal").await;
|
||||||
|
let dir = TempDir::new("test_create_new_region").unwrap();
|
||||||
|
let store_dir = dir.path().to_string_lossy();
|
||||||
|
let config = EngineConfig::with_store_dir(&store_dir);
|
||||||
|
|
||||||
|
let engine = EngineImpl::new(config, Arc::new(log_store)).await.unwrap();
|
||||||
|
|
||||||
let region_name = "region-0";
|
let region_name = "region-0";
|
||||||
let desc = RegionDescBuilder::new(region_name)
|
let desc = RegionDescBuilder::new(region_name)
|
||||||
|
|||||||
@@ -1,6 +1,11 @@
|
|||||||
use std::any::Any;
|
use std::any::Any;
|
||||||
|
use std::io::Error as IoError;
|
||||||
|
use std::str::Utf8Error;
|
||||||
|
|
||||||
use common_error::prelude::*;
|
use common_error::prelude::*;
|
||||||
|
use datatypes::arrow;
|
||||||
|
use serde_json::error::Error as JsonError;
|
||||||
|
use store_api::manifest::ManifestVersion;
|
||||||
|
|
||||||
use crate::metadata::Error as MetadataError;
|
use crate::metadata::Error as MetadataError;
|
||||||
|
|
||||||
@@ -25,6 +30,118 @@ pub enum Error {
|
|||||||
column: String,
|
column: String,
|
||||||
backtrace: Backtrace,
|
backtrace: Backtrace,
|
||||||
},
|
},
|
||||||
|
|
||||||
|
#[snafu(display("Missing timestamp in write batch"))]
|
||||||
|
BatchMissingTimestamp { backtrace: Backtrace },
|
||||||
|
|
||||||
|
#[snafu(display("Failed to write columns, source: {}", source))]
|
||||||
|
FlushIo {
|
||||||
|
source: std::io::Error,
|
||||||
|
backtrace: Backtrace,
|
||||||
|
},
|
||||||
|
|
||||||
|
#[snafu(display("Failed to init backend, source: {}", source))]
|
||||||
|
InitBackend {
|
||||||
|
dir: String,
|
||||||
|
source: std::io::Error,
|
||||||
|
backtrace: Backtrace,
|
||||||
|
},
|
||||||
|
|
||||||
|
#[snafu(display("Failed to write parquet file, source: {}", source))]
|
||||||
|
WriteParquet {
|
||||||
|
source: arrow::error::ArrowError,
|
||||||
|
backtrace: Backtrace,
|
||||||
|
},
|
||||||
|
|
||||||
|
#[snafu(display("Fail to read object from path: {}, source: {}", path, source))]
|
||||||
|
ReadObject {
|
||||||
|
path: String,
|
||||||
|
backtrace: Backtrace,
|
||||||
|
source: IoError,
|
||||||
|
},
|
||||||
|
|
||||||
|
#[snafu(display("Fail to write object into path: {}, source: {}", path, source))]
|
||||||
|
WriteObject {
|
||||||
|
path: String,
|
||||||
|
backtrace: Backtrace,
|
||||||
|
source: IoError,
|
||||||
|
},
|
||||||
|
|
||||||
|
#[snafu(display("Fail to delete object from path: {}, source: {}", path, source))]
|
||||||
|
DeleteObject {
|
||||||
|
path: String,
|
||||||
|
backtrace: Backtrace,
|
||||||
|
source: IoError,
|
||||||
|
},
|
||||||
|
|
||||||
|
#[snafu(display("Fail to list objects in path: {}, source: {}", path, source))]
|
||||||
|
ListObjects {
|
||||||
|
path: String,
|
||||||
|
backtrace: Backtrace,
|
||||||
|
source: IoError,
|
||||||
|
},
|
||||||
|
|
||||||
|
#[snafu(display("Fail to create str from bytes, source: {}", source))]
|
||||||
|
Utf8 {
|
||||||
|
backtrace: Backtrace,
|
||||||
|
source: Utf8Error,
|
||||||
|
},
|
||||||
|
|
||||||
|
#[snafu(display("Fail to encode object into json , source: {}", source))]
|
||||||
|
EncodeJson {
|
||||||
|
backtrace: Backtrace,
|
||||||
|
source: JsonError,
|
||||||
|
},
|
||||||
|
|
||||||
|
#[snafu(display("Fail to decode object from json , source: {}", source))]
|
||||||
|
DecodeJson {
|
||||||
|
backtrace: Backtrace,
|
||||||
|
source: JsonError,
|
||||||
|
},
|
||||||
|
|
||||||
|
#[snafu(display("Invalid scan index, start: {}, end: {}", start, end))]
|
||||||
|
InvalidScanIndex {
|
||||||
|
start: ManifestVersion,
|
||||||
|
end: ManifestVersion,
|
||||||
|
backtrace: Backtrace,
|
||||||
|
},
|
||||||
|
|
||||||
|
#[snafu(display(
|
||||||
|
"Failed to write WAL, region id: {}, WAL name: {}, source: {}",
|
||||||
|
region_id,
|
||||||
|
name,
|
||||||
|
source
|
||||||
|
))]
|
||||||
|
WriteWal {
|
||||||
|
region_id: u32,
|
||||||
|
name: String,
|
||||||
|
#[snafu(backtrace)]
|
||||||
|
source: BoxedError,
|
||||||
|
},
|
||||||
|
|
||||||
|
#[snafu(display("Failed to encode WAL header, source {}", source))]
|
||||||
|
EncodeWalHeader {
|
||||||
|
backtrace: Backtrace,
|
||||||
|
source: std::io::Error,
|
||||||
|
},
|
||||||
|
|
||||||
|
#[snafu(display("Failed to decode WAL header, source {}", source))]
|
||||||
|
DecodeWalHeader {
|
||||||
|
backtrace: Backtrace,
|
||||||
|
source: std::io::Error,
|
||||||
|
},
|
||||||
|
|
||||||
|
#[snafu(display("Failed to join task, source: {}", source))]
|
||||||
|
JoinTask {
|
||||||
|
source: common_runtime::JoinError,
|
||||||
|
backtrace: Backtrace,
|
||||||
|
},
|
||||||
|
|
||||||
|
#[snafu(display("Invalid timestamp in write batch, source: {}", source))]
|
||||||
|
InvalidTimestamp { source: crate::write_batch::Error },
|
||||||
|
|
||||||
|
#[snafu(display("Task already cancelled"))]
|
||||||
|
Cancelled { backtrace: Backtrace },
|
||||||
}
|
}
|
||||||
|
|
||||||
pub type Result<T> = std::result::Result<T, Error>;
|
pub type Result<T> = std::result::Result<T, Error>;
|
||||||
@@ -34,9 +151,29 @@ impl ErrorExt for Error {
|
|||||||
use Error::*;
|
use Error::*;
|
||||||
|
|
||||||
match self {
|
match self {
|
||||||
InvalidRegionDesc { .. } | InvalidInputSchema { .. } | BatchMissingColumn { .. } => {
|
InvalidScanIndex { .. }
|
||||||
StatusCode::InvalidArguments
|
| InvalidRegionDesc { .. }
|
||||||
}
|
| InvalidInputSchema { .. }
|
||||||
|
| BatchMissingColumn { .. }
|
||||||
|
| BatchMissingTimestamp { .. }
|
||||||
|
| InvalidTimestamp { .. } => StatusCode::InvalidArguments,
|
||||||
|
|
||||||
|
Utf8 { .. }
|
||||||
|
| EncodeJson { .. }
|
||||||
|
| DecodeJson { .. }
|
||||||
|
| JoinTask { .. }
|
||||||
|
| Cancelled { .. } => StatusCode::Unexpected,
|
||||||
|
|
||||||
|
FlushIo { .. }
|
||||||
|
| InitBackend { .. }
|
||||||
|
| WriteParquet { .. }
|
||||||
|
| ReadObject { .. }
|
||||||
|
| WriteObject { .. }
|
||||||
|
| ListObjects { .. }
|
||||||
|
| DeleteObject { .. }
|
||||||
|
| WriteWal { .. }
|
||||||
|
| DecodeWalHeader { .. }
|
||||||
|
| EncodeWalHeader { .. } => StatusCode::StorageUnavailable,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -51,6 +188,9 @@ impl ErrorExt for Error {
|
|||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
|
|
||||||
|
use common_error::prelude::StatusCode::*;
|
||||||
|
use datatypes::arrow::error::ArrowError;
|
||||||
use snafu::GenerateImplicitData;
|
use snafu::GenerateImplicitData;
|
||||||
|
|
||||||
use super::*;
|
use super::*;
|
||||||
@@ -72,4 +212,32 @@ mod tests {
|
|||||||
assert_eq!(StatusCode::InvalidArguments, err.status_code());
|
assert_eq!(StatusCode::InvalidArguments, err.status_code());
|
||||||
assert!(err.backtrace_opt().is_some());
|
assert!(err.backtrace_opt().is_some());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
pub fn test_flush_error() {
|
||||||
|
fn throw_io_error() -> std::result::Result<(), std::io::Error> {
|
||||||
|
Err(std::io::Error::new(
|
||||||
|
std::io::ErrorKind::UnexpectedEof,
|
||||||
|
"writer is closed",
|
||||||
|
))
|
||||||
|
}
|
||||||
|
|
||||||
|
let error = throw_io_error().context(FlushIoSnafu).err().unwrap();
|
||||||
|
assert_eq!(StatusCode::StorageUnavailable, error.status_code());
|
||||||
|
assert!(error.backtrace_opt().is_some());
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
pub fn test_arrow_error() {
|
||||||
|
fn throw_arrow_error() -> std::result::Result<(), ArrowError> {
|
||||||
|
Err(ArrowError::ExternalFormat("Lorem ipsum".to_string()))
|
||||||
|
}
|
||||||
|
|
||||||
|
let error = throw_arrow_error()
|
||||||
|
.context(WriteParquetSnafu)
|
||||||
|
.err()
|
||||||
|
.unwrap();
|
||||||
|
assert_eq!(StorageUnavailable, error.status_code());
|
||||||
|
assert!(error.backtrace_opt().is_some());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
264
src/storage/src/flush.rs
Normal file
264
src/storage/src/flush.rs
Normal file
@@ -0,0 +1,264 @@
|
|||||||
|
use std::sync::Arc;
|
||||||
|
|
||||||
|
use async_trait::async_trait;
|
||||||
|
use common_telemetry::logging;
|
||||||
|
use common_time::RangeMillis;
|
||||||
|
use store_api::logstore::LogStore;
|
||||||
|
use store_api::manifest::Manifest;
|
||||||
|
use store_api::manifest::ManifestVersion;
|
||||||
|
use store_api::storage::SequenceNumber;
|
||||||
|
use uuid::Uuid;
|
||||||
|
|
||||||
|
use crate::background::{Context, Job, JobHandle, JobPoolRef};
|
||||||
|
use crate::error::{CancelledSnafu, Result};
|
||||||
|
use crate::manifest::action::*;
|
||||||
|
use crate::manifest::region::RegionManifest;
|
||||||
|
use crate::memtable::{IterContext, MemtableId, MemtableRef};
|
||||||
|
use crate::region::RegionWriterRef;
|
||||||
|
use crate::region::SharedDataRef;
|
||||||
|
use crate::sst::{AccessLayerRef, FileMeta, WriteOptions};
|
||||||
|
use crate::version::VersionEdit;
|
||||||
|
use crate::wal::Wal;
|
||||||
|
|
||||||
|
/// Default write buffer size (32M).
|
||||||
|
const DEFAULT_WRITE_BUFFER_SIZE: usize = 32 * 1024 * 1024;
|
||||||
|
|
||||||
|
pub trait FlushStrategy: Send + Sync {
|
||||||
|
fn should_flush(
|
||||||
|
&self,
|
||||||
|
shared: &SharedDataRef,
|
||||||
|
bytes_mutable: usize,
|
||||||
|
bytes_total: usize,
|
||||||
|
) -> bool;
|
||||||
|
}
|
||||||
|
|
||||||
|
pub type FlushStrategyRef = Arc<dyn FlushStrategy>;
|
||||||
|
|
||||||
|
#[derive(Debug)]
|
||||||
|
pub struct SizeBasedStrategy {
|
||||||
|
/// Write buffer size of memtable.
|
||||||
|
max_write_buffer_size: usize,
|
||||||
|
/// Mutable memtable memory size limitation
|
||||||
|
mutable_limitation: usize,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
fn get_mutable_limitation(max_write_buffer_size: usize) -> usize {
|
||||||
|
// Inspired by RocksDB
|
||||||
|
// https://github.com/facebook/rocksdb/blob/main/include/rocksdb/write_buffer_manager.h#L86
|
||||||
|
max_write_buffer_size * 7 / 8
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Default for SizeBasedStrategy {
|
||||||
|
fn default() -> Self {
|
||||||
|
let max_write_buffer_size = DEFAULT_WRITE_BUFFER_SIZE;
|
||||||
|
Self {
|
||||||
|
max_write_buffer_size,
|
||||||
|
mutable_limitation: get_mutable_limitation(max_write_buffer_size),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl FlushStrategy for SizeBasedStrategy {
|
||||||
|
fn should_flush(
|
||||||
|
&self,
|
||||||
|
shared: &SharedDataRef,
|
||||||
|
bytes_mutable: usize,
|
||||||
|
bytes_total: usize,
|
||||||
|
) -> bool {
|
||||||
|
// Insipired by RocksDB flush strategy
|
||||||
|
// https://github.com/facebook/rocksdb/blob/main/include/rocksdb/write_buffer_manager.h#L94
|
||||||
|
|
||||||
|
if bytes_mutable > self.mutable_limitation {
|
||||||
|
logging::info!(
|
||||||
|
"Region should flush, region: {}, bytes_mutable: {}, mutable_limitation: {}, \
|
||||||
|
bytes_total: {}, max_write_buffer_size: {} .",
|
||||||
|
shared.name,
|
||||||
|
bytes_mutable,
|
||||||
|
self.mutable_limitation,
|
||||||
|
bytes_total,
|
||||||
|
self.max_write_buffer_size
|
||||||
|
);
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
let buffer_size = self.max_write_buffer_size;
|
||||||
|
|
||||||
|
// If the memory exceeds the buffer size, we trigger more aggressive
|
||||||
|
// flush. But if already more than half memory is being flushed,
|
||||||
|
// triggering more flush may not help. We will hold it instead.
|
||||||
|
let should_flush = bytes_total >= buffer_size && bytes_mutable >= buffer_size / 2;
|
||||||
|
|
||||||
|
if should_flush {
|
||||||
|
logging::info!(
|
||||||
|
"Region should flush, region: {}, bytes_mutable: {}, mutable_limitation: {}, \
|
||||||
|
bytes_total: {}, max_write_buffer_size: {} .",
|
||||||
|
shared.name,
|
||||||
|
bytes_mutable,
|
||||||
|
self.mutable_limitation,
|
||||||
|
bytes_total,
|
||||||
|
buffer_size
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
should_flush
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug)]
|
||||||
|
pub struct MemtableWithMeta {
|
||||||
|
pub memtable: MemtableRef,
|
||||||
|
pub bucket: RangeMillis,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[async_trait]
|
||||||
|
pub trait FlushScheduler: Send + Sync {
|
||||||
|
async fn schedule_flush(&self, flush_job: Box<dyn Job>) -> Result<JobHandle>;
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct FlushSchedulerImpl {
|
||||||
|
job_pool: JobPoolRef,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl FlushSchedulerImpl {
|
||||||
|
pub fn new(job_pool: JobPoolRef) -> FlushSchedulerImpl {
|
||||||
|
FlushSchedulerImpl { job_pool }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[async_trait]
|
||||||
|
impl FlushScheduler for FlushSchedulerImpl {
|
||||||
|
async fn schedule_flush(&self, flush_job: Box<dyn Job>) -> Result<JobHandle> {
|
||||||
|
// TODO(yingwen): [flush] Implements flush schedule strategy, controls max background flushes.
|
||||||
|
self.job_pool.submit(flush_job).await
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub type FlushSchedulerRef = Arc<dyn FlushScheduler>;
|
||||||
|
|
||||||
|
pub struct FlushJob<S: LogStore> {
|
||||||
|
/// Max memtable id in these memtables,
|
||||||
|
/// used to remove immutable memtables in current version.
|
||||||
|
pub max_memtable_id: MemtableId,
|
||||||
|
/// Memtables to be flushed.
|
||||||
|
pub memtables: Vec<MemtableWithMeta>,
|
||||||
|
/// Last sequence of data to be flushed.
|
||||||
|
pub flush_sequence: SequenceNumber,
|
||||||
|
/// Shared data of region to be flushed.
|
||||||
|
pub shared: SharedDataRef,
|
||||||
|
/// Sst access layer of the region.
|
||||||
|
pub sst_layer: AccessLayerRef,
|
||||||
|
/// Region writer, used to persist log entry that points to the latest manifest file.
|
||||||
|
pub writer: RegionWriterRef,
|
||||||
|
/// Region write-ahead logging, used to write data/meta to the log file.
|
||||||
|
pub wal: Wal<S>,
|
||||||
|
/// Region manifest service, used to persist metadata.
|
||||||
|
pub manifest: RegionManifest,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<S: LogStore> FlushJob<S> {
|
||||||
|
async fn write_memtables_to_layer(&self, ctx: &Context) -> Result<Vec<FileMeta>> {
|
||||||
|
if ctx.is_cancelled() {
|
||||||
|
return CancelledSnafu {}.fail();
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut futures = Vec::with_capacity(self.memtables.len());
|
||||||
|
for m in &self.memtables {
|
||||||
|
let file_name = Self::generate_sst_file_name();
|
||||||
|
// TODO(hl): Check if random file name already exists in meta.
|
||||||
|
|
||||||
|
let iter_ctx = IterContext {
|
||||||
|
for_flush: true,
|
||||||
|
..Default::default()
|
||||||
|
};
|
||||||
|
|
||||||
|
let iter = m.memtable.iter(iter_ctx)?;
|
||||||
|
futures.push(async move {
|
||||||
|
self.sst_layer
|
||||||
|
.write_sst(&file_name, iter, WriteOptions::default())
|
||||||
|
.await
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
let metas = futures_util::future::join_all(futures)
|
||||||
|
.await
|
||||||
|
.into_iter()
|
||||||
|
.collect::<Result<Vec<_>>>()?
|
||||||
|
.into_iter()
|
||||||
|
.map(|f| FileMeta {
|
||||||
|
file_path: f,
|
||||||
|
level: 0,
|
||||||
|
})
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
logging::info!("Successfully flush memtables to files: {:?}", metas);
|
||||||
|
Ok(metas)
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn write_to_manifest(&self, file_metas: &[FileMeta]) -> Result<ManifestVersion> {
|
||||||
|
let edit = RegionEdit {
|
||||||
|
region_id: self.shared.id,
|
||||||
|
region_version: self.shared.version_control.metadata().version,
|
||||||
|
flush_sequence: self.flush_sequence,
|
||||||
|
files_to_add: file_metas.to_vec(),
|
||||||
|
files_to_remove: Vec::default(),
|
||||||
|
};
|
||||||
|
logging::debug!("Write region edit: {:?} to manifest.", edit);
|
||||||
|
self.manifest.update(RegionMetaAction::Edit(edit)).await
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Generates random SST file name in format: `^[a-f\d]{8}(-[a-f\d]{4}){3}-[a-f\d]{12}.parquet$`
|
||||||
|
fn generate_sst_file_name() -> String {
|
||||||
|
format!("{}.parquet", Uuid::new_v4().hyphenated())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[async_trait]
|
||||||
|
impl<S: LogStore> Job for FlushJob<S> {
|
||||||
|
// TODO(yingwen): [flush] Support in-job parallelism (Flush memtables concurrently)
|
||||||
|
async fn run(&mut self, ctx: &Context) -> Result<()> {
|
||||||
|
let file_metas = self.write_memtables_to_layer(ctx).await?;
|
||||||
|
|
||||||
|
let manifest_version = self.write_to_manifest(&file_metas).await?;
|
||||||
|
|
||||||
|
let edit = VersionEdit {
|
||||||
|
files_to_add: file_metas,
|
||||||
|
flushed_sequence: Some(self.flush_sequence),
|
||||||
|
manifest_version,
|
||||||
|
max_memtable_id: Some(self.max_memtable_id),
|
||||||
|
};
|
||||||
|
|
||||||
|
self.writer
|
||||||
|
.apply_version_edit(&self.wal, edit, &self.shared)
|
||||||
|
.await?;
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use log_store::fs::noop::NoopLogStore;
|
||||||
|
use regex::Regex;
|
||||||
|
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_get_mutable_limitation() {
|
||||||
|
assert_eq!(7, get_mutable_limitation(8));
|
||||||
|
assert_eq!(8, get_mutable_limitation(10));
|
||||||
|
assert_eq!(56, get_mutable_limitation(64));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
pub fn test_uuid_generate() {
|
||||||
|
let file_name = FlushJob::<NoopLogStore>::generate_sst_file_name();
|
||||||
|
let regex = Regex::new(r"^[a-f\d]{8}(-[a-f\d]{4}){3}-[a-f\d]{12}.parquet$").unwrap();
|
||||||
|
assert!(
|
||||||
|
regex.is_match(&file_name),
|
||||||
|
"illegal sst file name: {}",
|
||||||
|
file_name
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -1,17 +1,24 @@
|
|||||||
//! Storage engine implementation.
|
//! Storage engine implementation.
|
||||||
|
mod arrow_stream;
|
||||||
|
mod background;
|
||||||
mod chunk;
|
mod chunk;
|
||||||
|
mod codec;
|
||||||
|
pub mod config;
|
||||||
mod engine;
|
mod engine;
|
||||||
mod error;
|
pub mod error;
|
||||||
|
mod flush;
|
||||||
|
pub mod manifest;
|
||||||
pub mod memtable;
|
pub mod memtable;
|
||||||
pub mod metadata;
|
pub mod metadata;
|
||||||
|
mod proto;
|
||||||
mod region;
|
mod region;
|
||||||
mod snapshot;
|
mod snapshot;
|
||||||
|
mod sst;
|
||||||
pub mod sync;
|
pub mod sync;
|
||||||
mod version;
|
|
||||||
mod write_batch;
|
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod test_util;
|
mod test_util;
|
||||||
|
mod version;
|
||||||
|
mod wal;
|
||||||
|
mod write_batch;
|
||||||
|
|
||||||
pub use engine::EngineImpl;
|
pub use engine::EngineImpl;
|
||||||
|
|||||||
5
src/storage/src/manifest.rs
Normal file
5
src/storage/src/manifest.rs
Normal file
@@ -0,0 +1,5 @@
|
|||||||
|
//! manifest storage
|
||||||
|
pub(crate) mod action;
|
||||||
|
pub(crate) mod checkpoint;
|
||||||
|
pub mod region;
|
||||||
|
pub(crate) mod storage;
|
||||||
67
src/storage/src/manifest/action.rs
Normal file
67
src/storage/src/manifest/action.rs
Normal file
@@ -0,0 +1,67 @@
|
|||||||
|
use serde::{Deserialize, Serialize};
|
||||||
|
use serde_json as json;
|
||||||
|
use snafu::ResultExt;
|
||||||
|
use store_api::manifest::MetaAction;
|
||||||
|
use store_api::manifest::Metadata;
|
||||||
|
use store_api::storage::RegionId;
|
||||||
|
use store_api::storage::SequenceNumber;
|
||||||
|
|
||||||
|
use crate::error::{DecodeJsonSnafu, EncodeJsonSnafu, Result, Utf8Snafu};
|
||||||
|
use crate::metadata::{RegionMetadataRef, VersionNumber};
|
||||||
|
use crate::sst::FileMeta;
|
||||||
|
|
||||||
|
#[derive(Serialize, Deserialize, Clone, Debug)]
|
||||||
|
pub struct RegionChange {
|
||||||
|
pub metadata: RegionMetadataRef,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Serialize, Deserialize, Clone, Debug)]
|
||||||
|
pub struct RegionRemove {
|
||||||
|
pub region_id: RegionId,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Serialize, Deserialize, Clone, Debug)]
|
||||||
|
pub struct RegionEdit {
|
||||||
|
pub region_id: RegionId,
|
||||||
|
pub region_version: VersionNumber,
|
||||||
|
pub flush_sequence: SequenceNumber,
|
||||||
|
pub files_to_add: Vec<FileMeta>,
|
||||||
|
pub files_to_remove: Vec<FileMeta>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Serialize, Deserialize, Clone, Debug)]
|
||||||
|
pub struct RegionManifestData {
|
||||||
|
pub region_meta: RegionMetadataRef,
|
||||||
|
// TODO(dennis): version metadata
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Serialize, Deserialize, Clone, Debug)]
|
||||||
|
pub enum RegionMetaAction {
|
||||||
|
Change(RegionChange),
|
||||||
|
Remove(RegionRemove),
|
||||||
|
Edit(RegionEdit),
|
||||||
|
}
|
||||||
|
|
||||||
|
impl RegionMetaAction {
|
||||||
|
pub(crate) fn encode(&self) -> Result<Vec<u8>> {
|
||||||
|
Ok(json::to_string(self).context(EncodeJsonSnafu)?.into_bytes())
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) fn decode(bs: &[u8]) -> Result<Self> {
|
||||||
|
json::from_str(std::str::from_utf8(bs).context(Utf8Snafu)?).context(DecodeJsonSnafu)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Metadata for RegionManifestData {}
|
||||||
|
|
||||||
|
impl MetaAction for RegionMetaAction {
|
||||||
|
type MetadataId = RegionId;
|
||||||
|
|
||||||
|
fn metadata_id(&self) -> RegionId {
|
||||||
|
match self {
|
||||||
|
RegionMetaAction::Change(c) => c.metadata.id,
|
||||||
|
RegionMetaAction::Remove(r) => r.region_id,
|
||||||
|
RegionMetaAction::Edit(e) => e.region_id,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
1
src/storage/src/manifest/checkpoint.rs
Normal file
1
src/storage/src/manifest/checkpoint.rs
Normal file
@@ -0,0 +1 @@
|
|||||||
|
|
||||||
205
src/storage/src/manifest/region.rs
Normal file
205
src/storage/src/manifest/region.rs
Normal file
@@ -0,0 +1,205 @@
|
|||||||
|
//! Region manifest impl
|
||||||
|
use std::sync::{
|
||||||
|
atomic::{AtomicU64, Ordering},
|
||||||
|
Arc,
|
||||||
|
};
|
||||||
|
|
||||||
|
use async_trait::async_trait;
|
||||||
|
use common_telemetry::logging;
|
||||||
|
use object_store::ObjectStore;
|
||||||
|
use store_api::manifest::*;
|
||||||
|
use store_api::storage::RegionId;
|
||||||
|
|
||||||
|
use crate::error::{Error, Result};
|
||||||
|
use crate::manifest::action::*;
|
||||||
|
use crate::manifest::storage::ManifestObjectStore;
|
||||||
|
use crate::manifest::storage::ObjectStoreLogIterator;
|
||||||
|
|
||||||
|
#[derive(Clone)]
|
||||||
|
pub struct RegionManifest {
|
||||||
|
inner: Arc<RegionManifestInner>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[async_trait]
|
||||||
|
impl Manifest for RegionManifest {
|
||||||
|
type Error = Error;
|
||||||
|
type MetaAction = RegionMetaAction;
|
||||||
|
type MetadataId = RegionId;
|
||||||
|
type Metadata = RegionManifestData;
|
||||||
|
|
||||||
|
fn new(id: Self::MetadataId, manifest_dir: &str, object_store: ObjectStore) -> Self {
|
||||||
|
RegionManifest {
|
||||||
|
inner: Arc::new(RegionManifestInner::new(id, manifest_dir, object_store)),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn update(&self, action: RegionMetaAction) -> Result<ManifestVersion> {
|
||||||
|
self.inner.save(&action).await
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn load(&self) -> Result<Option<RegionManifestData>> {
|
||||||
|
let last_version = self.inner.last_version();
|
||||||
|
|
||||||
|
let start_bound = if last_version == MIN_VERSION {
|
||||||
|
// No actions have ever saved
|
||||||
|
MIN_VERSION
|
||||||
|
} else {
|
||||||
|
last_version - 1
|
||||||
|
};
|
||||||
|
|
||||||
|
let mut iter = self.inner.scan(start_bound, MAX_VERSION).await?;
|
||||||
|
|
||||||
|
match iter.next_action().await? {
|
||||||
|
Some((_v, RegionMetaAction::Change(c))) => Ok(Some(RegionManifestData {
|
||||||
|
region_meta: c.metadata,
|
||||||
|
})),
|
||||||
|
Some(_) => todo!(),
|
||||||
|
None => Ok(None),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn checkpoint(&self) -> Result<ManifestVersion> {
|
||||||
|
unimplemented!();
|
||||||
|
}
|
||||||
|
|
||||||
|
fn metadata_id(&self) -> RegionId {
|
||||||
|
self.inner.region_id
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
struct RegionManifestInner {
|
||||||
|
region_id: RegionId,
|
||||||
|
store: Arc<ManifestObjectStore>,
|
||||||
|
version: AtomicU64,
|
||||||
|
}
|
||||||
|
|
||||||
|
struct RegionMetaActionIterator {
|
||||||
|
log_iter: ObjectStoreLogIterator,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl RegionMetaActionIterator {
|
||||||
|
async fn next_action(&mut self) -> Result<Option<(ManifestVersion, RegionMetaAction)>> {
|
||||||
|
match self.log_iter.next_log().await? {
|
||||||
|
Some((v, bytes)) => {
|
||||||
|
let action: RegionMetaAction = RegionMetaAction::decode(&bytes)?;
|
||||||
|
Ok(Some((v, action)))
|
||||||
|
}
|
||||||
|
None => Ok(None),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl RegionManifestInner {
|
||||||
|
fn new(region_id: RegionId, manifest_dir: &str, object_store: ObjectStore) -> Self {
|
||||||
|
Self {
|
||||||
|
region_id,
|
||||||
|
store: Arc::new(ManifestObjectStore::new(manifest_dir, object_store)),
|
||||||
|
// TODO(dennis): recover the last version from history
|
||||||
|
version: AtomicU64::new(0),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
fn inc_version(&self) -> ManifestVersion {
|
||||||
|
self.version.fetch_add(1, Ordering::Relaxed)
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
fn last_version(&self) -> ManifestVersion {
|
||||||
|
self.version.load(Ordering::Relaxed)
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn save(&self, action: &RegionMetaAction) -> Result<ManifestVersion> {
|
||||||
|
let version = self.inc_version();
|
||||||
|
|
||||||
|
logging::debug!(
|
||||||
|
"Save region metadata action: {:?}, version: {}",
|
||||||
|
action,
|
||||||
|
version
|
||||||
|
);
|
||||||
|
|
||||||
|
self.store.save(version, &action.encode()?).await?;
|
||||||
|
|
||||||
|
Ok(version)
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn scan(
|
||||||
|
&self,
|
||||||
|
start: ManifestVersion,
|
||||||
|
end: ManifestVersion,
|
||||||
|
) -> Result<RegionMetaActionIterator> {
|
||||||
|
Ok(RegionMetaActionIterator {
|
||||||
|
log_iter: self.store.scan(start, end).await?,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use datatypes::type_id::LogicalTypeId;
|
||||||
|
use object_store::{backend::fs, ObjectStore};
|
||||||
|
use tempdir::TempDir;
|
||||||
|
|
||||||
|
use super::*;
|
||||||
|
use crate::metadata::RegionMetadata;
|
||||||
|
use crate::test_util::descriptor_util::RegionDescBuilder;
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn test_region_manifest() {
|
||||||
|
common_telemetry::init_default_ut_logging();
|
||||||
|
let tmp_dir = TempDir::new("test_region_manifest").unwrap();
|
||||||
|
let object_store = ObjectStore::new(
|
||||||
|
fs::Backend::build()
|
||||||
|
.root(&tmp_dir.path().to_string_lossy())
|
||||||
|
.finish()
|
||||||
|
.await
|
||||||
|
.unwrap(),
|
||||||
|
);
|
||||||
|
let region_id = 0;
|
||||||
|
|
||||||
|
let manifest = RegionManifest::new(region_id, "/manifest/", object_store);
|
||||||
|
assert_eq!(region_id, manifest.metadata_id());
|
||||||
|
|
||||||
|
let region_name = "region-0";
|
||||||
|
let desc = RegionDescBuilder::new(region_name)
|
||||||
|
.id(region_id)
|
||||||
|
.push_key_column(("k1", LogicalTypeId::Int32, false))
|
||||||
|
.push_value_column(("v1", LogicalTypeId::Float32, true))
|
||||||
|
.build();
|
||||||
|
let metadata: RegionMetadata = desc.try_into().unwrap();
|
||||||
|
let region_meta = Arc::new(metadata);
|
||||||
|
|
||||||
|
assert!(manifest.load().await.unwrap().is_none());
|
||||||
|
|
||||||
|
manifest
|
||||||
|
.update(RegionMetaAction::Change(RegionChange {
|
||||||
|
metadata: region_meta.clone(),
|
||||||
|
}))
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
let manifest_data = manifest.load().await.unwrap().unwrap();
|
||||||
|
assert_eq!(manifest_data.region_meta, region_meta);
|
||||||
|
|
||||||
|
// save another metadata
|
||||||
|
let region_name = "region-0";
|
||||||
|
let desc = RegionDescBuilder::new(region_name)
|
||||||
|
.id(region_id)
|
||||||
|
.push_key_column(("k1", LogicalTypeId::Int32, false))
|
||||||
|
.push_key_column(("k2", LogicalTypeId::Int64, false))
|
||||||
|
.push_value_column(("v1", LogicalTypeId::Float32, true))
|
||||||
|
.push_value_column(("bool", LogicalTypeId::Boolean, true))
|
||||||
|
.build();
|
||||||
|
let metadata: RegionMetadata = desc.try_into().unwrap();
|
||||||
|
let region_meta = Arc::new(metadata);
|
||||||
|
manifest
|
||||||
|
.update(RegionMetaAction::Change(RegionChange {
|
||||||
|
metadata: region_meta.clone(),
|
||||||
|
}))
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
let manifest_data = manifest.load().await.unwrap().unwrap();
|
||||||
|
assert_eq!(manifest_data.region_meta, region_meta);
|
||||||
|
}
|
||||||
|
}
|
||||||
330
src/storage/src/manifest/storage.rs
Normal file
330
src/storage/src/manifest/storage.rs
Normal file
@@ -0,0 +1,330 @@
|
|||||||
|
use std::collections::HashMap;
|
||||||
|
use std::iter::Iterator;
|
||||||
|
|
||||||
|
use async_trait::async_trait;
|
||||||
|
use common_telemetry::logging;
|
||||||
|
use futures::TryStreamExt;
|
||||||
|
use lazy_static::lazy_static;
|
||||||
|
use object_store::{util, DirEntry, ObjectStore};
|
||||||
|
use regex::Regex;
|
||||||
|
use serde::{Deserialize, Serialize};
|
||||||
|
use snafu::{ensure, ResultExt};
|
||||||
|
use store_api::manifest::{LogIterator, ManifestLogStorage, ManifestVersion};
|
||||||
|
|
||||||
|
use crate::error::{
|
||||||
|
DecodeJsonSnafu, DeleteObjectSnafu, EncodeJsonSnafu, Error, InvalidScanIndexSnafu,
|
||||||
|
ListObjectsSnafu, ReadObjectSnafu, Result, Utf8Snafu, WriteObjectSnafu,
|
||||||
|
};
|
||||||
|
|
||||||
|
lazy_static! {
|
||||||
|
static ref RE: Regex = Regex::new("^\\d+\\.json$").unwrap();
|
||||||
|
}
|
||||||
|
|
||||||
|
const LAST_CHECKPOINT_FILE: &str = "_last_checkpoint";
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
pub fn delta_file(version: ManifestVersion) -> String {
|
||||||
|
format!("{:020}.json", version)
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
pub fn checkpoint_file(version: ManifestVersion) -> String {
|
||||||
|
format!("{:020}.checkpoint", version)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Return's the delta file version from path
|
||||||
|
///
|
||||||
|
/// # Panics
|
||||||
|
/// Panics if the file path is not a valid delta file.
|
||||||
|
#[inline]
|
||||||
|
pub fn delta_version(path: &str) -> ManifestVersion {
|
||||||
|
let s = path.split('.').next().unwrap();
|
||||||
|
s.parse()
|
||||||
|
.unwrap_or_else(|_| panic!("Invalid delta file: {}", path))
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
pub fn is_delta_file(file_name: &str) -> bool {
|
||||||
|
RE.is_match(file_name)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct ObjectStoreLogIterator {
|
||||||
|
iter: Box<dyn Iterator<Item = (ManifestVersion, DirEntry)> + Send + Sync>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[async_trait]
|
||||||
|
impl LogIterator for ObjectStoreLogIterator {
|
||||||
|
type Error = Error;
|
||||||
|
|
||||||
|
async fn next_log(&mut self) -> Result<Option<(ManifestVersion, Vec<u8>)>> {
|
||||||
|
match self.iter.next() {
|
||||||
|
Some((v, e)) => {
|
||||||
|
let object = e.into_object();
|
||||||
|
let bytes = object.read().await.context(ReadObjectSnafu {
|
||||||
|
path: object.path(),
|
||||||
|
})?;
|
||||||
|
|
||||||
|
Ok(Some((v, bytes)))
|
||||||
|
}
|
||||||
|
None => Ok(None),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Clone, Debug)]
|
||||||
|
pub struct ManifestObjectStore {
|
||||||
|
object_store: ObjectStore,
|
||||||
|
path: String,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl ManifestObjectStore {
|
||||||
|
pub fn new(path: &str, object_store: ObjectStore) -> Self {
|
||||||
|
Self {
|
||||||
|
object_store,
|
||||||
|
path: util::normalize_dir(path),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn delta_file_path(&self, version: ManifestVersion) -> String {
|
||||||
|
format!("{}{}", self.path, delta_file(version))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn checkpoint_file_path(&self, version: ManifestVersion) -> String {
|
||||||
|
format!("{}{}", self.path, checkpoint_file(version))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Serialize, Deserialize, Debug)]
|
||||||
|
struct CheckpointMetadata {
|
||||||
|
pub size: usize,
|
||||||
|
pub version: ManifestVersion,
|
||||||
|
pub checksum: Option<String>,
|
||||||
|
pub extend_metadata: Option<HashMap<String, String>>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl CheckpointMetadata {
|
||||||
|
fn encode(&self) -> Result<impl AsRef<[u8]>> {
|
||||||
|
serde_json::to_string(self).context(EncodeJsonSnafu)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn decode(bs: &[u8]) -> Result<Self> {
|
||||||
|
let data = std::str::from_utf8(bs).context(Utf8Snafu)?;
|
||||||
|
|
||||||
|
serde_json::from_str(data).context(DecodeJsonSnafu)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[async_trait]
|
||||||
|
impl ManifestLogStorage for ManifestObjectStore {
|
||||||
|
type Error = Error;
|
||||||
|
type Iter = ObjectStoreLogIterator;
|
||||||
|
|
||||||
|
async fn scan(
|
||||||
|
&self,
|
||||||
|
start: ManifestVersion,
|
||||||
|
end: ManifestVersion,
|
||||||
|
) -> Result<ObjectStoreLogIterator> {
|
||||||
|
ensure!(start <= end, InvalidScanIndexSnafu { start, end });
|
||||||
|
|
||||||
|
let dir = self.object_store.object(&self.path);
|
||||||
|
let dir_exists = dir
|
||||||
|
.is_exist()
|
||||||
|
.await
|
||||||
|
.context(ReadObjectSnafu { path: &self.path })?;
|
||||||
|
|
||||||
|
if !dir_exists {
|
||||||
|
return Ok(ObjectStoreLogIterator {
|
||||||
|
iter: Box::new(Vec::default().into_iter()),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
let streamer = dir
|
||||||
|
.list()
|
||||||
|
.await
|
||||||
|
.context(ListObjectsSnafu { path: &self.path })?;
|
||||||
|
|
||||||
|
let mut entries: Vec<(ManifestVersion, DirEntry)> = streamer
|
||||||
|
.try_filter_map(|e| async move {
|
||||||
|
let file_name = e.name();
|
||||||
|
if is_delta_file(file_name) {
|
||||||
|
let version = delta_version(file_name);
|
||||||
|
if version >= start && version < end {
|
||||||
|
Ok(Some((version, e)))
|
||||||
|
} else {
|
||||||
|
Ok(None)
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
Ok(None)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.try_collect::<Vec<_>>()
|
||||||
|
.await
|
||||||
|
.context(ListObjectsSnafu { path: &self.path })?;
|
||||||
|
|
||||||
|
entries.sort_unstable_by(|(v1, _), (v2, _)| v1.cmp(v2));
|
||||||
|
|
||||||
|
Ok(ObjectStoreLogIterator {
|
||||||
|
iter: Box::new(entries.into_iter()),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn save(&self, version: ManifestVersion, bytes: &[u8]) -> Result<()> {
|
||||||
|
let object = self.object_store.object(&self.delta_file_path(version));
|
||||||
|
object.write(bytes).await.context(WriteObjectSnafu {
|
||||||
|
path: object.path(),
|
||||||
|
})?;
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn delete(&self, start: ManifestVersion, end: ManifestVersion) -> Result<()> {
|
||||||
|
//TODO(dennis): delete in batch or concurrently?
|
||||||
|
for v in start..end {
|
||||||
|
let object = self.object_store.object(&self.delta_file_path(v));
|
||||||
|
object.delete().await.context(DeleteObjectSnafu {
|
||||||
|
path: object.path(),
|
||||||
|
})?;
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn save_checkpoint(&self, version: ManifestVersion, bytes: &[u8]) -> Result<()> {
|
||||||
|
let object = self
|
||||||
|
.object_store
|
||||||
|
.object(&self.checkpoint_file_path(version));
|
||||||
|
object.write(bytes).await.context(WriteObjectSnafu {
|
||||||
|
path: object.path(),
|
||||||
|
})?;
|
||||||
|
|
||||||
|
let last_checkpoint = self
|
||||||
|
.object_store
|
||||||
|
.object(&format!("{}{}", self.path, LAST_CHECKPOINT_FILE));
|
||||||
|
|
||||||
|
let checkpoint_metadata = CheckpointMetadata {
|
||||||
|
size: bytes.len(),
|
||||||
|
version,
|
||||||
|
checksum: None,
|
||||||
|
extend_metadata: None,
|
||||||
|
};
|
||||||
|
|
||||||
|
logging::debug!(
|
||||||
|
"Save checkpoint in path: {}, metadata: {:?}",
|
||||||
|
last_checkpoint.path(),
|
||||||
|
checkpoint_metadata
|
||||||
|
);
|
||||||
|
|
||||||
|
let bs = checkpoint_metadata.encode()?;
|
||||||
|
last_checkpoint.write(bs).await.context(WriteObjectSnafu {
|
||||||
|
path: last_checkpoint.path(),
|
||||||
|
})?;
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn load_checkpoint(&self) -> Result<Option<(ManifestVersion, Vec<u8>)>> {
|
||||||
|
let last_checkpoint = self
|
||||||
|
.object_store
|
||||||
|
.object(&format!("{}{}", self.path, LAST_CHECKPOINT_FILE));
|
||||||
|
|
||||||
|
let checkpoint_exists = last_checkpoint.is_exist().await.context(ReadObjectSnafu {
|
||||||
|
path: last_checkpoint.path(),
|
||||||
|
})?;
|
||||||
|
|
||||||
|
if checkpoint_exists {
|
||||||
|
let bytes = last_checkpoint.read().await.context(ReadObjectSnafu {
|
||||||
|
path: last_checkpoint.path(),
|
||||||
|
})?;
|
||||||
|
|
||||||
|
let checkpoint_metadata = CheckpointMetadata::decode(&bytes)?;
|
||||||
|
|
||||||
|
logging::debug!(
|
||||||
|
"Load checkpoint in path: {}, metadata: {:?}",
|
||||||
|
last_checkpoint.path(),
|
||||||
|
checkpoint_metadata
|
||||||
|
);
|
||||||
|
|
||||||
|
let checkpoint = self
|
||||||
|
.object_store
|
||||||
|
.object(&self.checkpoint_file_path(checkpoint_metadata.version));
|
||||||
|
|
||||||
|
Ok(Some((
|
||||||
|
checkpoint_metadata.version,
|
||||||
|
checkpoint.read().await.context(ReadObjectSnafu {
|
||||||
|
path: checkpoint.path(),
|
||||||
|
})?,
|
||||||
|
)))
|
||||||
|
} else {
|
||||||
|
Ok(None)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use object_store::{backend::fs, ObjectStore};
|
||||||
|
use tempdir::TempDir;
|
||||||
|
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn test_manifest_log_store() {
|
||||||
|
common_telemetry::init_default_ut_logging();
|
||||||
|
let tmp_dir = TempDir::new("test_manifest_log_store").unwrap();
|
||||||
|
let object_store = ObjectStore::new(
|
||||||
|
fs::Backend::build()
|
||||||
|
.root(&tmp_dir.path().to_string_lossy())
|
||||||
|
.finish()
|
||||||
|
.await
|
||||||
|
.unwrap(),
|
||||||
|
);
|
||||||
|
|
||||||
|
let log_store = ManifestObjectStore::new("/", object_store);
|
||||||
|
|
||||||
|
for v in 0..5 {
|
||||||
|
log_store
|
||||||
|
.save(v, format!("hello, {}", v).as_bytes())
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut it = log_store.scan(1, 4).await.unwrap();
|
||||||
|
for v in 1..4 {
|
||||||
|
let (version, bytes) = it.next_log().await.unwrap().unwrap();
|
||||||
|
assert_eq!(v, version);
|
||||||
|
assert_eq!(format!("hello, {}", v).as_bytes(), bytes);
|
||||||
|
}
|
||||||
|
assert!(it.next_log().await.unwrap().is_none());
|
||||||
|
|
||||||
|
let mut it = log_store.scan(0, 11).await.unwrap();
|
||||||
|
for v in 0..5 {
|
||||||
|
let (version, bytes) = it.next_log().await.unwrap().unwrap();
|
||||||
|
assert_eq!(v, version);
|
||||||
|
assert_eq!(format!("hello, {}", v).as_bytes(), bytes);
|
||||||
|
}
|
||||||
|
assert!(it.next_log().await.unwrap().is_none());
|
||||||
|
|
||||||
|
// Delete [0, 3)
|
||||||
|
log_store.delete(0, 3).await.unwrap();
|
||||||
|
|
||||||
|
// [3, 5) remains
|
||||||
|
let mut it = log_store.scan(0, 11).await.unwrap();
|
||||||
|
for v in 3..5 {
|
||||||
|
let (version, bytes) = it.next_log().await.unwrap().unwrap();
|
||||||
|
assert_eq!(v, version);
|
||||||
|
assert_eq!(format!("hello, {}", v).as_bytes(), bytes);
|
||||||
|
}
|
||||||
|
assert!(it.next_log().await.unwrap().is_none());
|
||||||
|
|
||||||
|
// test checkpoint
|
||||||
|
assert!(log_store.load_checkpoint().await.unwrap().is_none());
|
||||||
|
log_store
|
||||||
|
.save_checkpoint(3, "checkpoint".as_bytes())
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
let (v, checkpoint) = log_store.load_checkpoint().await.unwrap().unwrap();
|
||||||
|
assert_eq!(checkpoint, "checkpoint".as_bytes());
|
||||||
|
assert_eq!(3, v);
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -2,22 +2,27 @@ mod btree;
|
|||||||
mod inserter;
|
mod inserter;
|
||||||
mod schema;
|
mod schema;
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests;
|
pub mod tests;
|
||||||
|
mod version;
|
||||||
|
|
||||||
use std::mem;
|
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
|
|
||||||
use datatypes::vectors::{UInt64Vector, UInt8Vector, VectorRef};
|
use datatypes::vectors::{UInt64Vector, UInt8Vector, VectorRef};
|
||||||
use snafu::Snafu;
|
|
||||||
use store_api::storage::{consts, SequenceNumber, ValueType};
|
use store_api::storage::{consts, SequenceNumber, ValueType};
|
||||||
|
|
||||||
use crate::error::Result;
|
use crate::error::Result;
|
||||||
use crate::memtable::btree::BTreeMemtable;
|
use crate::memtable::btree::BTreeMemtable;
|
||||||
pub use crate::memtable::inserter::Inserter;
|
pub use crate::memtable::inserter::Inserter;
|
||||||
pub use crate::memtable::schema::MemtableSchema;
|
pub use crate::memtable::schema::MemtableSchema;
|
||||||
|
pub use crate::memtable::version::{MemtableSet, MemtableVersion};
|
||||||
|
|
||||||
|
/// Unique id for memtables under same region.
|
||||||
|
pub type MemtableId = u32;
|
||||||
|
|
||||||
/// In memory storage.
|
/// In memory storage.
|
||||||
pub trait Memtable: Send + Sync {
|
pub trait Memtable: Send + Sync + std::fmt::Debug {
|
||||||
|
fn id(&self) -> MemtableId;
|
||||||
|
|
||||||
fn schema(&self) -> &MemtableSchema;
|
fn schema(&self) -> &MemtableSchema;
|
||||||
|
|
||||||
/// Write key/values to the memtable.
|
/// Write key/values to the memtable.
|
||||||
@@ -27,7 +32,7 @@ pub trait Memtable: Send + Sync {
|
|||||||
fn write(&self, kvs: &KeyValues) -> Result<()>;
|
fn write(&self, kvs: &KeyValues) -> Result<()>;
|
||||||
|
|
||||||
/// Iterates the memtable.
|
/// Iterates the memtable.
|
||||||
// TODO(yingwen): Consider passing a projector (does column projection).
|
// TODO(yingwen): 1. Use reference of IterContext? 2. Consider passing a projector (does column projection).
|
||||||
fn iter(&self, ctx: IterContext) -> Result<BatchIteratorPtr>;
|
fn iter(&self, ctx: IterContext) -> Result<BatchIteratorPtr>;
|
||||||
|
|
||||||
/// Returns the estimated bytes allocated by this memtable from heap.
|
/// Returns the estimated bytes allocated by this memtable from heap.
|
||||||
@@ -43,6 +48,11 @@ pub struct IterContext {
|
|||||||
pub batch_size: usize,
|
pub batch_size: usize,
|
||||||
/// Max visible sequence (inclusive).
|
/// Max visible sequence (inclusive).
|
||||||
pub visible_sequence: SequenceNumber,
|
pub visible_sequence: SequenceNumber,
|
||||||
|
|
||||||
|
// TODO(yingwen): [flush] Maybe delay deduping and visiblility handling, just returns all rows
|
||||||
|
// in memtable.
|
||||||
|
/// Returns all rows, ignores sequence visibility and key duplication.
|
||||||
|
pub for_flush: bool,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Default for IterContext {
|
impl Default for IterContext {
|
||||||
@@ -51,6 +61,7 @@ impl Default for IterContext {
|
|||||||
batch_size: consts::READ_BATCH_SIZE,
|
batch_size: consts::READ_BATCH_SIZE,
|
||||||
// All data in memory is visible by default.
|
// All data in memory is visible by default.
|
||||||
visible_sequence: SequenceNumber::MAX,
|
visible_sequence: SequenceNumber::MAX,
|
||||||
|
for_flush: false,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -65,6 +76,7 @@ pub enum RowOrdering {
|
|||||||
Key,
|
Key,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TODO(yingwen): Maybe pack value_type with sequence (reserve 8bits in u64 for value type) like RocksDB.
|
||||||
pub struct Batch {
|
pub struct Batch {
|
||||||
pub keys: Vec<VectorRef>,
|
pub keys: Vec<VectorRef>,
|
||||||
pub sequences: UInt64Vector,
|
pub sequences: UInt64Vector,
|
||||||
@@ -73,24 +85,18 @@ pub struct Batch {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Iterator of memtable.
|
/// Iterator of memtable.
|
||||||
pub trait BatchIterator: Send {
|
pub trait BatchIterator: Iterator<Item = Result<Batch>> + Send + Sync {
|
||||||
/// Returns the schema of this iterator.
|
/// Returns the schema of this iterator.
|
||||||
fn schema(&self) -> &MemtableSchema;
|
fn schema(&self) -> &MemtableSchema;
|
||||||
|
|
||||||
/// Returns the ordering of the output rows from this iterator.
|
/// Returns the ordering of the output rows from this iterator.
|
||||||
fn ordering(&self) -> RowOrdering;
|
fn ordering(&self) -> RowOrdering;
|
||||||
|
|
||||||
/// Fetch next batch from the memtable.
|
|
||||||
///
|
|
||||||
/// # Panics
|
|
||||||
/// Panics if the iterator has already been exhausted.
|
|
||||||
fn next(&mut self) -> Result<Option<Batch>>;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pub type BatchIteratorPtr = Box<dyn BatchIterator>;
|
pub type BatchIteratorPtr = Box<dyn BatchIterator>;
|
||||||
|
|
||||||
pub trait MemtableBuilder: Send + Sync {
|
pub trait MemtableBuilder: Send + Sync {
|
||||||
fn build(&self, schema: MemtableSchema) -> MemtableRef;
|
fn build(&self, id: MemtableId, schema: MemtableSchema) -> MemtableRef;
|
||||||
}
|
}
|
||||||
|
|
||||||
pub type MemtableBuilderRef = Arc<dyn MemtableBuilder>;
|
pub type MemtableBuilderRef = Arc<dyn MemtableBuilder>;
|
||||||
@@ -100,7 +106,8 @@ pub type MemtableBuilderRef = Arc<dyn MemtableBuilder>;
|
|||||||
pub struct KeyValues {
|
pub struct KeyValues {
|
||||||
pub sequence: SequenceNumber,
|
pub sequence: SequenceNumber,
|
||||||
pub value_type: ValueType,
|
pub value_type: ValueType,
|
||||||
/// Start index of these key-value paris in batch.
|
/// Start index of these key-value paris in batch. Each row in the same batch has
|
||||||
|
/// a unique index to identify it.
|
||||||
pub start_index_in_batch: usize,
|
pub start_index_in_batch: usize,
|
||||||
pub keys: Vec<VectorRef>,
|
pub keys: Vec<VectorRef>,
|
||||||
pub values: Vec<VectorRef>,
|
pub values: Vec<VectorRef>,
|
||||||
@@ -132,42 +139,7 @@ impl KeyValues {
|
|||||||
pub struct DefaultMemtableBuilder {}
|
pub struct DefaultMemtableBuilder {}
|
||||||
|
|
||||||
impl MemtableBuilder for DefaultMemtableBuilder {
|
impl MemtableBuilder for DefaultMemtableBuilder {
|
||||||
fn build(&self, schema: MemtableSchema) -> MemtableRef {
|
fn build(&self, id: MemtableId, schema: MemtableSchema) -> MemtableRef {
|
||||||
Arc::new(BTreeMemtable::new(schema))
|
Arc::new(BTreeMemtable::new(id, schema))
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Debug, Snafu)]
|
|
||||||
#[snafu(display("Fail to switch memtable"))]
|
|
||||||
pub struct SwitchError;
|
|
||||||
|
|
||||||
pub struct MemtableSet {
|
|
||||||
mem: MemtableRef,
|
|
||||||
// TODO(yingwen): Support multiple immutable memtables.
|
|
||||||
_immem: Option<MemtableRef>,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl MemtableSet {
|
|
||||||
pub fn new(mem: MemtableRef) -> MemtableSet {
|
|
||||||
MemtableSet { mem, _immem: None }
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn mutable_memtable(&self) -> &MemtableRef {
|
|
||||||
&self.mem
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Switch mutable memtable to immutable memtable, returns the old mutable memtable if success.
|
|
||||||
pub fn _switch_memtable(
|
|
||||||
&mut self,
|
|
||||||
mem: &MemtableRef,
|
|
||||||
) -> std::result::Result<MemtableRef, SwitchError> {
|
|
||||||
match &self._immem {
|
|
||||||
Some(_) => SwitchSnafu {}.fail(),
|
|
||||||
None => {
|
|
||||||
let old_mem = mem::replace(&mut self.mem, mem.clone());
|
|
||||||
self._immem = Some(old_mem.clone());
|
|
||||||
Ok(old_mem)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -8,13 +8,15 @@ use std::sync::{
|
|||||||
|
|
||||||
use datatypes::prelude::*;
|
use datatypes::prelude::*;
|
||||||
use datatypes::value::Value;
|
use datatypes::value::Value;
|
||||||
use datatypes::vectors::{UInt64VectorBuilder, UInt8VectorBuilder, VectorBuilder};
|
use datatypes::vectors::{
|
||||||
|
UInt64Vector, UInt64VectorBuilder, UInt8Vector, UInt8VectorBuilder, VectorBuilder,
|
||||||
|
};
|
||||||
use store_api::storage::{SequenceNumber, ValueType};
|
use store_api::storage::{SequenceNumber, ValueType};
|
||||||
|
|
||||||
use crate::error::Result;
|
use crate::error::Result;
|
||||||
use crate::memtable::{
|
use crate::memtable::{
|
||||||
Batch, BatchIterator, BatchIteratorPtr, IterContext, KeyValues, Memtable, MemtableSchema,
|
Batch, BatchIterator, BatchIteratorPtr, IterContext, KeyValues, Memtable, MemtableId,
|
||||||
RowOrdering,
|
MemtableSchema, RowOrdering,
|
||||||
};
|
};
|
||||||
|
|
||||||
type RwLockMap = RwLock<BTreeMap<InnerKey, RowValue>>;
|
type RwLockMap = RwLock<BTreeMap<InnerKey, RowValue>>;
|
||||||
@@ -22,15 +24,18 @@ type RwLockMap = RwLock<BTreeMap<InnerKey, RowValue>>;
|
|||||||
/// A simple memtable implementation based on std's [`BTreeMap`].
|
/// A simple memtable implementation based on std's [`BTreeMap`].
|
||||||
///
|
///
|
||||||
/// Mainly for test purpose, don't use in production.
|
/// Mainly for test purpose, don't use in production.
|
||||||
|
#[derive(Debug)]
|
||||||
pub struct BTreeMemtable {
|
pub struct BTreeMemtable {
|
||||||
|
id: MemtableId,
|
||||||
schema: MemtableSchema,
|
schema: MemtableSchema,
|
||||||
map: Arc<RwLockMap>,
|
map: Arc<RwLockMap>,
|
||||||
estimated_bytes: AtomicUsize,
|
estimated_bytes: AtomicUsize,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl BTreeMemtable {
|
impl BTreeMemtable {
|
||||||
pub fn new(schema: MemtableSchema) -> BTreeMemtable {
|
pub fn new(id: MemtableId, schema: MemtableSchema) -> BTreeMemtable {
|
||||||
BTreeMemtable {
|
BTreeMemtable {
|
||||||
|
id,
|
||||||
schema,
|
schema,
|
||||||
map: Arc::new(RwLock::new(BTreeMap::new())),
|
map: Arc::new(RwLock::new(BTreeMap::new())),
|
||||||
estimated_bytes: AtomicUsize::new(0),
|
estimated_bytes: AtomicUsize::new(0),
|
||||||
@@ -39,6 +44,10 @@ impl BTreeMemtable {
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl Memtable for BTreeMemtable {
|
impl Memtable for BTreeMemtable {
|
||||||
|
fn id(&self) -> MemtableId {
|
||||||
|
self.id
|
||||||
|
}
|
||||||
|
|
||||||
fn schema(&self) -> &MemtableSchema {
|
fn schema(&self) -> &MemtableSchema {
|
||||||
&self.schema
|
&self.schema
|
||||||
}
|
}
|
||||||
@@ -84,9 +93,13 @@ impl BatchIterator for BTreeIterator {
|
|||||||
fn ordering(&self) -> RowOrdering {
|
fn ordering(&self) -> RowOrdering {
|
||||||
RowOrdering::Key
|
RowOrdering::Key
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
fn next(&mut self) -> Result<Option<Batch>> {
|
impl Iterator for BTreeIterator {
|
||||||
Ok(self.next_batch())
|
type Item = Result<Batch>;
|
||||||
|
|
||||||
|
fn next(&mut self) -> Option<Result<Batch>> {
|
||||||
|
self.next_batch().map(Ok)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -107,18 +120,13 @@ impl BTreeIterator {
|
|||||||
} else {
|
} else {
|
||||||
map.range(..)
|
map.range(..)
|
||||||
};
|
};
|
||||||
let iter = MapIterWrapper::new(iter, self.ctx.visible_sequence);
|
|
||||||
|
|
||||||
let mut keys = Vec::with_capacity(self.ctx.batch_size);
|
let (keys, sequences, value_types, values) = if self.ctx.for_flush {
|
||||||
let mut sequences = UInt64VectorBuilder::with_capacity(self.ctx.batch_size);
|
collect_iter(iter, self.ctx.batch_size)
|
||||||
let mut value_types = UInt8VectorBuilder::with_capacity(self.ctx.batch_size);
|
} else {
|
||||||
let mut values = Vec::with_capacity(self.ctx.batch_size);
|
let iter = MapIterWrapper::new(iter, self.ctx.visible_sequence);
|
||||||
for (inner_key, row_value) in iter.take(self.ctx.batch_size) {
|
collect_iter(iter, self.ctx.batch_size)
|
||||||
keys.push(inner_key);
|
};
|
||||||
sequences.push(Some(inner_key.sequence));
|
|
||||||
value_types.push(Some(inner_key.value_type.as_u8()));
|
|
||||||
values.push(row_value);
|
|
||||||
}
|
|
||||||
|
|
||||||
if keys.is_empty() {
|
if keys.is_empty() {
|
||||||
return None;
|
return None;
|
||||||
@@ -140,14 +148,37 @@ impl BTreeIterator {
|
|||||||
|
|
||||||
Some(Batch {
|
Some(Batch {
|
||||||
keys: rows_to_vectors(key_data_types, keys.as_slice()),
|
keys: rows_to_vectors(key_data_types, keys.as_slice()),
|
||||||
sequences: sequences.finish(),
|
sequences,
|
||||||
value_types: value_types.finish(),
|
value_types,
|
||||||
values: rows_to_vectors(value_data_types, values.as_slice()),
|
values: rows_to_vectors(value_data_types, values.as_slice()),
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// `MapIterWrapper` removes same user key with elder sequence.
|
fn collect_iter<'a, I: Iterator<Item = (&'a InnerKey, &'a RowValue)>>(
|
||||||
|
iter: I,
|
||||||
|
batch_size: usize,
|
||||||
|
) -> (
|
||||||
|
Vec<&'a InnerKey>,
|
||||||
|
UInt64Vector,
|
||||||
|
UInt8Vector,
|
||||||
|
Vec<&'a RowValue>,
|
||||||
|
) {
|
||||||
|
let mut keys = Vec::with_capacity(batch_size);
|
||||||
|
let mut sequences = UInt64VectorBuilder::with_capacity(batch_size);
|
||||||
|
let mut value_types = UInt8VectorBuilder::with_capacity(batch_size);
|
||||||
|
let mut values = Vec::with_capacity(batch_size);
|
||||||
|
for (inner_key, row_value) in iter.take(batch_size) {
|
||||||
|
keys.push(inner_key);
|
||||||
|
sequences.push(Some(inner_key.sequence));
|
||||||
|
value_types.push(Some(inner_key.value_type.as_u8()));
|
||||||
|
values.push(row_value);
|
||||||
|
}
|
||||||
|
|
||||||
|
(keys, sequences.finish(), value_types.finish(), values)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// `MapIterWrapper` removes same user key with invisible sequence.
|
||||||
struct MapIterWrapper<'a, InnerKey, RowValue> {
|
struct MapIterWrapper<'a, InnerKey, RowValue> {
|
||||||
iter: btree_map::Range<'a, InnerKey, RowValue>,
|
iter: btree_map::Range<'a, InnerKey, RowValue>,
|
||||||
prev_key: Option<InnerKey>,
|
prev_key: Option<InnerKey>,
|
||||||
|
|||||||
@@ -1,51 +1,80 @@
|
|||||||
|
use std::collections::HashMap;
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
|
use std::time::Duration;
|
||||||
|
|
||||||
use datatypes::vectors::{NullVector, VectorRef};
|
use common_time::{RangeMillis, TimestampMillis};
|
||||||
use snafu::ensure;
|
use datatypes::prelude::ScalarVector;
|
||||||
|
use datatypes::schema::SchemaRef;
|
||||||
|
use datatypes::vectors::{Int64Vector, NullVector, VectorRef};
|
||||||
|
use snafu::{ensure, OptionExt};
|
||||||
use store_api::storage::{ColumnDescriptor, SequenceNumber, ValueType};
|
use store_api::storage::{ColumnDescriptor, SequenceNumber, ValueType};
|
||||||
|
|
||||||
use crate::error::{self, Result};
|
use crate::error::{self, Result};
|
||||||
use crate::memtable::{KeyValues, Memtable};
|
use crate::memtable::{KeyValues, Memtable, MemtableSet};
|
||||||
use crate::write_batch::{Mutation, PutData, WriteBatch};
|
use crate::write_batch::{Mutation, PutData, WriteBatch};
|
||||||
|
|
||||||
|
type RangeIndexMap = HashMap<TimestampMillis, usize>;
|
||||||
|
|
||||||
/// Wraps logic of inserting key/values in [WriteBatch] to [Memtable].
|
/// Wraps logic of inserting key/values in [WriteBatch] to [Memtable].
|
||||||
pub struct Inserter {
|
pub struct Inserter {
|
||||||
/// Sequence of the batch to be inserted.
|
/// Sequence of the batch to be inserted.
|
||||||
sequence: SequenceNumber,
|
sequence: SequenceNumber,
|
||||||
|
/// Time ranges of all input data.
|
||||||
|
time_ranges: Vec<RangeMillis>,
|
||||||
|
/// Map time range's start time to its index in time ranges.
|
||||||
|
time_range_indexes: RangeIndexMap,
|
||||||
|
/// Bucket duration of memtables.
|
||||||
|
bucket_duration: Duration,
|
||||||
|
/// Used to calculate the start index in batch for `KeyValues`.
|
||||||
index_in_batch: usize,
|
index_in_batch: usize,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Inserter {
|
impl Inserter {
|
||||||
pub fn new(sequence: SequenceNumber) -> Inserter {
|
pub fn new(
|
||||||
|
sequence: SequenceNumber,
|
||||||
|
time_ranges: Vec<RangeMillis>,
|
||||||
|
bucket_duration: Duration,
|
||||||
|
) -> Inserter {
|
||||||
|
let time_range_indexes = new_range_index_map(&time_ranges);
|
||||||
|
|
||||||
Inserter {
|
Inserter {
|
||||||
sequence,
|
sequence,
|
||||||
|
time_ranges,
|
||||||
|
time_range_indexes,
|
||||||
|
bucket_duration,
|
||||||
index_in_batch: 0,
|
index_in_batch: 0,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO(yingwen): Can we take the WriteBatch?
|
// TODO(yingwen): Can we take the WriteBatch?
|
||||||
/// Insert write batch into memtable.
|
/// Insert write batch into memtables if both `batch` and `memtables` are not empty.
|
||||||
///
|
///
|
||||||
/// Won't do schema validation.
|
/// Won't do schema validation, caller (mostly the [`RegionWriter`]) should ensure the
|
||||||
pub fn insert_memtable(&mut self, batch: &WriteBatch, memtable: &dyn Memtable) -> Result<()> {
|
/// schemas of `memtables` are consistent with `batch`'s, and the time ranges of `memtables`
|
||||||
if batch.is_empty() {
|
/// are consistent with `self`'s time ranges.
|
||||||
|
///
|
||||||
|
/// # Panics
|
||||||
|
/// Panics if there is time range in `self.time_ranges` but not in `memtables`.
|
||||||
|
pub fn insert_memtables(&mut self, batch: &WriteBatch, memtables: &MemtableSet) -> Result<()> {
|
||||||
|
if batch.is_empty() || memtables.is_empty() {
|
||||||
return Ok(());
|
return Ok(());
|
||||||
}
|
}
|
||||||
|
|
||||||
let schema = memtable.schema();
|
// Enough to hold all key or value columns.
|
||||||
|
let total_column_num = batch.schema().num_columns();
|
||||||
// Reusable KeyValues buffer.
|
// Reusable KeyValues buffer.
|
||||||
let mut kvs = KeyValues {
|
let mut kvs = KeyValues {
|
||||||
sequence: self.sequence,
|
sequence: self.sequence,
|
||||||
value_type: ValueType::Put,
|
value_type: ValueType::Put,
|
||||||
start_index_in_batch: self.index_in_batch,
|
start_index_in_batch: self.index_in_batch,
|
||||||
keys: Vec::with_capacity(schema.num_row_key_columns()),
|
keys: Vec::with_capacity(total_column_num),
|
||||||
values: Vec::with_capacity(schema.num_value_columns()),
|
values: Vec::with_capacity(total_column_num),
|
||||||
};
|
};
|
||||||
|
|
||||||
for mutation in batch {
|
for mutation in batch {
|
||||||
match mutation {
|
match mutation {
|
||||||
Mutation::Put(put_data) => {
|
Mutation::Put(put_data) => {
|
||||||
self.put_impl(put_data, memtable, &mut kvs)?;
|
self.put_memtables(batch.schema(), put_data, memtables, &mut kvs)?;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -53,7 +82,24 @@ impl Inserter {
|
|||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
fn put_impl(
|
fn put_memtables(
|
||||||
|
&mut self,
|
||||||
|
schema: &SchemaRef,
|
||||||
|
put_data: &PutData,
|
||||||
|
memtables: &MemtableSet,
|
||||||
|
kvs: &mut KeyValues,
|
||||||
|
) -> Result<()> {
|
||||||
|
if memtables.len() == 1 {
|
||||||
|
// Fast path, only one memtable to put.
|
||||||
|
let (_range, memtable) = memtables.iter().next().unwrap();
|
||||||
|
return self.put_one_memtable(put_data, &**memtable, kvs);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Split data by time range and put them into memtables.
|
||||||
|
self.put_multiple_memtables(schema, put_data, memtables, kvs)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn put_one_memtable(
|
||||||
&mut self,
|
&mut self,
|
||||||
put_data: &PutData,
|
put_data: &PutData,
|
||||||
memtable: &dyn Memtable,
|
memtable: &dyn Memtable,
|
||||||
@@ -78,6 +124,52 @@ impl Inserter {
|
|||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Put data to multiple memtables.
|
||||||
|
fn put_multiple_memtables(
|
||||||
|
&mut self,
|
||||||
|
schema: &SchemaRef,
|
||||||
|
put_data: &PutData,
|
||||||
|
memtables: &MemtableSet,
|
||||||
|
kvs: &mut KeyValues,
|
||||||
|
) -> Result<()> {
|
||||||
|
let timestamp_schema = schema
|
||||||
|
.timestamp_column()
|
||||||
|
.context(error::BatchMissingTimestampSnafu)?;
|
||||||
|
|
||||||
|
let timestamps = put_data.column_by_name(×tamp_schema.name).context(
|
||||||
|
error::BatchMissingColumnSnafu {
|
||||||
|
column: ×tamp_schema.name,
|
||||||
|
},
|
||||||
|
)?;
|
||||||
|
let timestamps = timestamps
|
||||||
|
.as_any()
|
||||||
|
.downcast_ref()
|
||||||
|
.context(error::BatchMissingTimestampSnafu)?;
|
||||||
|
let slice_indexes =
|
||||||
|
compute_slice_indexes(timestamps, self.bucket_duration, &self.time_range_indexes);
|
||||||
|
|
||||||
|
for slice_index in slice_indexes {
|
||||||
|
let sliced_data = put_data.slice(slice_index.start, slice_index.end);
|
||||||
|
let range = &self.time_ranges[slice_index.range_index];
|
||||||
|
// The caller should ensure memtable for given time range is exists.
|
||||||
|
let memtable = memtables
|
||||||
|
.get_by_range(range)
|
||||||
|
.expect("Memtable not found for range");
|
||||||
|
|
||||||
|
self.put_one_memtable(&sliced_data, &**memtable, kvs)?;
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn new_range_index_map(time_ranges: &[RangeMillis]) -> RangeIndexMap {
|
||||||
|
time_ranges
|
||||||
|
.iter()
|
||||||
|
.enumerate()
|
||||||
|
.map(|(i, range)| (*range.start(), i))
|
||||||
|
.collect()
|
||||||
}
|
}
|
||||||
|
|
||||||
fn clone_put_data_column_to(
|
fn clone_put_data_column_to(
|
||||||
@@ -100,3 +192,519 @@ fn clone_put_data_column_to(
|
|||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Holds `start` and `end` indexes to get a slice `[start, end)` from the vector whose
|
||||||
|
/// timestamps belong to same time range at `range_index`.
|
||||||
|
#[derive(Debug, PartialEq)]
|
||||||
|
struct SliceIndex {
|
||||||
|
start: usize,
|
||||||
|
end: usize,
|
||||||
|
/// Index in time ranges.
|
||||||
|
range_index: usize,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Computes the indexes used to split timestamps into time ranges aligned by `duration`, stores
|
||||||
|
/// the indexes in [`SliceIndex`].
|
||||||
|
///
|
||||||
|
/// # Panics
|
||||||
|
/// Panics if the duration is too large to be represented by i64, or `timestamps` are not all
|
||||||
|
/// included by `time_range_indexes`.
|
||||||
|
fn compute_slice_indexes(
|
||||||
|
timestamps: &Int64Vector,
|
||||||
|
duration: Duration,
|
||||||
|
time_range_indexes: &RangeIndexMap,
|
||||||
|
) -> Vec<SliceIndex> {
|
||||||
|
let duration_ms = duration
|
||||||
|
.as_millis()
|
||||||
|
.try_into()
|
||||||
|
.unwrap_or_else(|e| panic!("Duration {:?} too large, {}", duration, e));
|
||||||
|
let mut slice_indexes = Vec::with_capacity(time_range_indexes.len());
|
||||||
|
// Current start and end of a valid `SliceIndex`.
|
||||||
|
let (mut start, mut end) = (0, 0);
|
||||||
|
// Time range index of the valid but unpushed `SliceIndex`.
|
||||||
|
let mut last_range_index = None;
|
||||||
|
|
||||||
|
// Iterate all timestamps, split timestamps by its time range.
|
||||||
|
for (i, ts) in timestamps.iter_data().enumerate() {
|
||||||
|
// Find index for time range of the timestamp.
|
||||||
|
let current_range_index = ts
|
||||||
|
.and_then(|v| TimestampMillis::new(v).align_by_bucket(duration_ms))
|
||||||
|
.and_then(|aligned| time_range_indexes.get(&aligned).copied());
|
||||||
|
|
||||||
|
match current_range_index {
|
||||||
|
Some(current_range_index) => {
|
||||||
|
end = i;
|
||||||
|
|
||||||
|
match last_range_index {
|
||||||
|
Some(last_index) => {
|
||||||
|
if last_index != current_range_index {
|
||||||
|
// Found a new range, we need to push a SliceIndex for last range.
|
||||||
|
slice_indexes.push(SliceIndex {
|
||||||
|
start,
|
||||||
|
end,
|
||||||
|
range_index: last_index,
|
||||||
|
});
|
||||||
|
// Update last range index.
|
||||||
|
last_range_index = Some(current_range_index);
|
||||||
|
// Advance start.
|
||||||
|
start = i;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// No previous range index.
|
||||||
|
None => last_range_index = Some(current_range_index),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
None => {
|
||||||
|
// Row without timestamp or out of time range will be skipped. This usually should not happen.
|
||||||
|
if let Some(last_index) = last_range_index {
|
||||||
|
// Need to store SliceIndex for last range.
|
||||||
|
slice_indexes.push(SliceIndex {
|
||||||
|
start,
|
||||||
|
end: i,
|
||||||
|
range_index: last_index,
|
||||||
|
});
|
||||||
|
// Clear last range index.
|
||||||
|
last_range_index = None;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Advances start and end, skips current row.
|
||||||
|
start = i + 1;
|
||||||
|
end = start;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Process last slice index.
|
||||||
|
if let Some(last_index) = last_range_index {
|
||||||
|
slice_indexes.push(SliceIndex {
|
||||||
|
start,
|
||||||
|
// We need to use `end + 1` to include the last element.
|
||||||
|
end: end + 1,
|
||||||
|
range_index: last_index,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
slice_indexes
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use datatypes::{type_id::LogicalTypeId, value::Value};
|
||||||
|
use store_api::storage::{PutOperation, WriteRequest};
|
||||||
|
|
||||||
|
use super::*;
|
||||||
|
use crate::memtable::{
|
||||||
|
DefaultMemtableBuilder, IterContext, MemtableBuilder, MemtableId, MemtableSchema,
|
||||||
|
};
|
||||||
|
use crate::metadata::RegionMetadata;
|
||||||
|
use crate::test_util::descriptor_util::RegionDescBuilder;
|
||||||
|
use crate::test_util::write_batch_util;
|
||||||
|
|
||||||
|
fn new_time_ranges(starts: &[i64], duration: i64) -> Vec<RangeMillis> {
|
||||||
|
let mut ranges = Vec::with_capacity(starts.len());
|
||||||
|
for start in starts {
|
||||||
|
assert_eq!(*start, start / duration * duration);
|
||||||
|
|
||||||
|
ranges.push(RangeMillis::new(*start, start + duration).unwrap());
|
||||||
|
}
|
||||||
|
|
||||||
|
ranges
|
||||||
|
}
|
||||||
|
|
||||||
|
fn check_compute_slice_indexes(
|
||||||
|
timestamps: &[Option<i64>],
|
||||||
|
range_starts: &[i64],
|
||||||
|
duration: i64,
|
||||||
|
expect: &[SliceIndex],
|
||||||
|
) {
|
||||||
|
assert!(duration > 0);
|
||||||
|
|
||||||
|
let timestamps = Int64Vector::from_iter(timestamps.iter());
|
||||||
|
let time_ranges = new_time_ranges(range_starts, duration);
|
||||||
|
let time_range_indexes = new_range_index_map(&time_ranges);
|
||||||
|
|
||||||
|
let slice_indexes = compute_slice_indexes(
|
||||||
|
×tamps,
|
||||||
|
Duration::from_millis(duration as u64),
|
||||||
|
&time_range_indexes,
|
||||||
|
);
|
||||||
|
|
||||||
|
assert_eq!(expect, slice_indexes);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_compute_slice_indexes_valid() {
|
||||||
|
// Test empty input.
|
||||||
|
check_compute_slice_indexes(&[], &[], 100, &[]);
|
||||||
|
|
||||||
|
// One valid input.
|
||||||
|
check_compute_slice_indexes(
|
||||||
|
&[Some(99)],
|
||||||
|
&[0],
|
||||||
|
100,
|
||||||
|
&[SliceIndex {
|
||||||
|
start: 0,
|
||||||
|
end: 1,
|
||||||
|
range_index: 0,
|
||||||
|
}],
|
||||||
|
);
|
||||||
|
|
||||||
|
// 2 ranges.
|
||||||
|
check_compute_slice_indexes(
|
||||||
|
&[Some(99), Some(234)],
|
||||||
|
&[0, 200],
|
||||||
|
100,
|
||||||
|
&[
|
||||||
|
SliceIndex {
|
||||||
|
start: 0,
|
||||||
|
end: 1,
|
||||||
|
range_index: 0,
|
||||||
|
},
|
||||||
|
SliceIndex {
|
||||||
|
start: 1,
|
||||||
|
end: 2,
|
||||||
|
range_index: 1,
|
||||||
|
},
|
||||||
|
],
|
||||||
|
);
|
||||||
|
|
||||||
|
// Multiple elements in first range.
|
||||||
|
check_compute_slice_indexes(
|
||||||
|
&[Some(99), Some(13), Some(18), Some(234)],
|
||||||
|
&[0, 200],
|
||||||
|
100,
|
||||||
|
&[
|
||||||
|
SliceIndex {
|
||||||
|
start: 0,
|
||||||
|
end: 3,
|
||||||
|
range_index: 0,
|
||||||
|
},
|
||||||
|
SliceIndex {
|
||||||
|
start: 3,
|
||||||
|
end: 4,
|
||||||
|
range_index: 1,
|
||||||
|
},
|
||||||
|
],
|
||||||
|
);
|
||||||
|
|
||||||
|
// Multiple elements in last range.
|
||||||
|
check_compute_slice_indexes(
|
||||||
|
&[Some(99), Some(234), Some(271)],
|
||||||
|
&[0, 200],
|
||||||
|
100,
|
||||||
|
&[
|
||||||
|
SliceIndex {
|
||||||
|
start: 0,
|
||||||
|
end: 1,
|
||||||
|
range_index: 0,
|
||||||
|
},
|
||||||
|
SliceIndex {
|
||||||
|
start: 1,
|
||||||
|
end: 3,
|
||||||
|
range_index: 1,
|
||||||
|
},
|
||||||
|
],
|
||||||
|
);
|
||||||
|
|
||||||
|
// Mulitple ranges.
|
||||||
|
check_compute_slice_indexes(
|
||||||
|
&[Some(99), Some(13), Some(234), Some(456)],
|
||||||
|
&[0, 200, 400],
|
||||||
|
100,
|
||||||
|
&[
|
||||||
|
SliceIndex {
|
||||||
|
start: 0,
|
||||||
|
end: 2,
|
||||||
|
range_index: 0,
|
||||||
|
},
|
||||||
|
SliceIndex {
|
||||||
|
start: 2,
|
||||||
|
end: 3,
|
||||||
|
range_index: 1,
|
||||||
|
},
|
||||||
|
SliceIndex {
|
||||||
|
start: 3,
|
||||||
|
end: 4,
|
||||||
|
range_index: 2,
|
||||||
|
},
|
||||||
|
],
|
||||||
|
);
|
||||||
|
|
||||||
|
// Different slices with same range.
|
||||||
|
check_compute_slice_indexes(
|
||||||
|
&[Some(99), Some(234), Some(15)],
|
||||||
|
&[0, 200],
|
||||||
|
100,
|
||||||
|
&[
|
||||||
|
SliceIndex {
|
||||||
|
start: 0,
|
||||||
|
end: 1,
|
||||||
|
range_index: 0,
|
||||||
|
},
|
||||||
|
SliceIndex {
|
||||||
|
start: 1,
|
||||||
|
end: 2,
|
||||||
|
range_index: 1,
|
||||||
|
},
|
||||||
|
SliceIndex {
|
||||||
|
start: 2,
|
||||||
|
end: 3,
|
||||||
|
range_index: 0,
|
||||||
|
},
|
||||||
|
],
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_compute_slice_indexes_null_timestamp() {
|
||||||
|
check_compute_slice_indexes(&[None], &[0], 100, &[]);
|
||||||
|
|
||||||
|
check_compute_slice_indexes(
|
||||||
|
&[None, None, Some(53)],
|
||||||
|
&[0],
|
||||||
|
100,
|
||||||
|
&[SliceIndex {
|
||||||
|
start: 2,
|
||||||
|
end: 3,
|
||||||
|
range_index: 0,
|
||||||
|
}],
|
||||||
|
);
|
||||||
|
|
||||||
|
check_compute_slice_indexes(
|
||||||
|
&[Some(53), None, None],
|
||||||
|
&[0],
|
||||||
|
100,
|
||||||
|
&[SliceIndex {
|
||||||
|
start: 0,
|
||||||
|
end: 1,
|
||||||
|
range_index: 0,
|
||||||
|
}],
|
||||||
|
);
|
||||||
|
|
||||||
|
check_compute_slice_indexes(
|
||||||
|
&[None, Some(53), None, Some(240), Some(13), None],
|
||||||
|
&[0, 200],
|
||||||
|
100,
|
||||||
|
&[
|
||||||
|
SliceIndex {
|
||||||
|
start: 1,
|
||||||
|
end: 2,
|
||||||
|
range_index: 0,
|
||||||
|
},
|
||||||
|
SliceIndex {
|
||||||
|
start: 3,
|
||||||
|
end: 4,
|
||||||
|
range_index: 1,
|
||||||
|
},
|
||||||
|
SliceIndex {
|
||||||
|
start: 4,
|
||||||
|
end: 5,
|
||||||
|
range_index: 0,
|
||||||
|
},
|
||||||
|
],
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_compute_slice_indexes_no_range() {
|
||||||
|
check_compute_slice_indexes(
|
||||||
|
&[Some(99), Some(234), Some(15)],
|
||||||
|
&[0],
|
||||||
|
100,
|
||||||
|
&[
|
||||||
|
SliceIndex {
|
||||||
|
start: 0,
|
||||||
|
end: 1,
|
||||||
|
range_index: 0,
|
||||||
|
},
|
||||||
|
SliceIndex {
|
||||||
|
start: 2,
|
||||||
|
end: 3,
|
||||||
|
range_index: 0,
|
||||||
|
},
|
||||||
|
],
|
||||||
|
);
|
||||||
|
|
||||||
|
check_compute_slice_indexes(
|
||||||
|
&[Some(99), Some(15), Some(234)],
|
||||||
|
&[0],
|
||||||
|
100,
|
||||||
|
&[SliceIndex {
|
||||||
|
start: 0,
|
||||||
|
end: 2,
|
||||||
|
range_index: 0,
|
||||||
|
}],
|
||||||
|
);
|
||||||
|
|
||||||
|
check_compute_slice_indexes(
|
||||||
|
&[Some(i64::MIN), Some(99), Some(15)],
|
||||||
|
&[0],
|
||||||
|
100,
|
||||||
|
&[SliceIndex {
|
||||||
|
start: 1,
|
||||||
|
end: 3,
|
||||||
|
range_index: 0,
|
||||||
|
}],
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
fn new_test_write_batch() -> WriteBatch {
|
||||||
|
write_batch_util::new_write_batch(
|
||||||
|
&[
|
||||||
|
("ts", LogicalTypeId::Int64, false),
|
||||||
|
("value", LogicalTypeId::Int64, true),
|
||||||
|
],
|
||||||
|
Some(0),
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn new_memtable_schema() -> MemtableSchema {
|
||||||
|
let desc = RegionDescBuilder::new("test")
|
||||||
|
.timestamp(("ts", LogicalTypeId::Int64, false))
|
||||||
|
.push_value_column(("value", LogicalTypeId::Int64, true))
|
||||||
|
.enable_version_column(false)
|
||||||
|
.build();
|
||||||
|
let metadata: RegionMetadata = desc.try_into().unwrap();
|
||||||
|
|
||||||
|
MemtableSchema::new(metadata.columns_row_key)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn put_batch(batch: &mut WriteBatch, data: &[(i64, Option<i64>)]) {
|
||||||
|
let mut put_data = PutData::with_num_columns(2);
|
||||||
|
let ts = Int64Vector::from_values(data.iter().map(|v| v.0));
|
||||||
|
put_data.add_key_column("ts", Arc::new(ts)).unwrap();
|
||||||
|
let value = Int64Vector::from_iter(data.iter().map(|v| v.1));
|
||||||
|
put_data.add_value_column("value", Arc::new(value)).unwrap();
|
||||||
|
|
||||||
|
batch.put(put_data).unwrap();
|
||||||
|
}
|
||||||
|
|
||||||
|
fn new_memtable_set(time_ranges: &[RangeMillis], schema: &MemtableSchema) -> MemtableSet {
|
||||||
|
let mut set = MemtableSet::new();
|
||||||
|
for (id, range) in time_ranges.iter().enumerate() {
|
||||||
|
let mem = DefaultMemtableBuilder {}.build(id as MemtableId, schema.clone());
|
||||||
|
set.insert(*range, mem)
|
||||||
|
}
|
||||||
|
|
||||||
|
set
|
||||||
|
}
|
||||||
|
|
||||||
|
fn check_memtable_content(
|
||||||
|
mem: &dyn Memtable,
|
||||||
|
sequence: SequenceNumber,
|
||||||
|
data: &[(i64, Option<i64>)],
|
||||||
|
) {
|
||||||
|
let iter = mem.iter(IterContext::default()).unwrap();
|
||||||
|
|
||||||
|
let mut index = 0;
|
||||||
|
for batch in iter {
|
||||||
|
let batch = batch.unwrap();
|
||||||
|
let row_num = batch.keys[0].len();
|
||||||
|
for i in 0..row_num {
|
||||||
|
let ts = batch.keys[0].get(i);
|
||||||
|
let v = batch.values[0].get(i);
|
||||||
|
assert_eq!(Value::from(data[index].0), ts);
|
||||||
|
assert_eq!(Value::from(data[index].1), v);
|
||||||
|
assert_eq!(sequence, batch.sequences.get_data(i).unwrap());
|
||||||
|
|
||||||
|
index += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
assert_eq!(data.len(), index);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_inserter_put_one_memtable() {
|
||||||
|
let sequence = 11111;
|
||||||
|
let bucket_duration = 100;
|
||||||
|
let time_ranges = new_time_ranges(&[0], bucket_duration);
|
||||||
|
let memtable_schema = new_memtable_schema();
|
||||||
|
let memtables = new_memtable_set(&time_ranges, &memtable_schema);
|
||||||
|
let mut inserter = Inserter::new(
|
||||||
|
sequence,
|
||||||
|
time_ranges,
|
||||||
|
Duration::from_millis(bucket_duration as u64),
|
||||||
|
);
|
||||||
|
|
||||||
|
let mut batch = new_test_write_batch();
|
||||||
|
put_batch(&mut batch, &[(1, Some(1)), (2, None)]);
|
||||||
|
// Also test multiple put data in one batch.
|
||||||
|
put_batch(
|
||||||
|
&mut batch,
|
||||||
|
&[
|
||||||
|
(3, None),
|
||||||
|
// Duplicate entries in same put data.
|
||||||
|
(2, None),
|
||||||
|
(2, Some(2)),
|
||||||
|
(4, Some(4)),
|
||||||
|
],
|
||||||
|
);
|
||||||
|
|
||||||
|
inserter.insert_memtables(&batch, &memtables).unwrap();
|
||||||
|
let mem = memtables
|
||||||
|
.get_by_range(&RangeMillis::new(0, 100).unwrap())
|
||||||
|
.unwrap();
|
||||||
|
check_memtable_content(
|
||||||
|
&**mem,
|
||||||
|
sequence,
|
||||||
|
&[(1, Some(1)), (2, Some(2)), (3, None), (4, Some(4))],
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_inserter_put_multiple() {
|
||||||
|
let sequence = 11111;
|
||||||
|
let bucket_duration = 100;
|
||||||
|
let time_ranges = new_time_ranges(&[0, 100, 200], bucket_duration);
|
||||||
|
let memtable_schema = new_memtable_schema();
|
||||||
|
let memtables = new_memtable_set(&time_ranges, &memtable_schema);
|
||||||
|
let mut inserter = Inserter::new(
|
||||||
|
sequence,
|
||||||
|
time_ranges,
|
||||||
|
Duration::from_millis(bucket_duration as u64),
|
||||||
|
);
|
||||||
|
|
||||||
|
let mut batch = new_test_write_batch();
|
||||||
|
put_batch(
|
||||||
|
&mut batch,
|
||||||
|
&[
|
||||||
|
(1, Some(1)),
|
||||||
|
(2, None),
|
||||||
|
(201, Some(201)),
|
||||||
|
(102, None),
|
||||||
|
(101, Some(101)),
|
||||||
|
],
|
||||||
|
);
|
||||||
|
put_batch(
|
||||||
|
&mut batch,
|
||||||
|
&[
|
||||||
|
(180, Some(1)),
|
||||||
|
(3, Some(3)),
|
||||||
|
(1, None),
|
||||||
|
(211, Some(211)),
|
||||||
|
(180, Some(180)),
|
||||||
|
],
|
||||||
|
);
|
||||||
|
|
||||||
|
inserter.insert_memtables(&batch, &memtables).unwrap();
|
||||||
|
let mem = memtables
|
||||||
|
.get_by_range(&RangeMillis::new(0, 100).unwrap())
|
||||||
|
.unwrap();
|
||||||
|
check_memtable_content(&**mem, sequence, &[(1, None), (2, None), (3, Some(3))]);
|
||||||
|
|
||||||
|
let mem = memtables
|
||||||
|
.get_by_range(&RangeMillis::new(100, 200).unwrap())
|
||||||
|
.unwrap();
|
||||||
|
check_memtable_content(
|
||||||
|
&**mem,
|
||||||
|
sequence,
|
||||||
|
&[(101, Some(101)), (102, None), (180, Some(180))],
|
||||||
|
);
|
||||||
|
|
||||||
|
let mem = memtables
|
||||||
|
.get_by_range(&RangeMillis::new(200, 300).unwrap())
|
||||||
|
.unwrap();
|
||||||
|
check_memtable_content(&**mem, sequence, &[(201, Some(201)), (211, Some(211))]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
@@ -6,12 +6,16 @@ use super::*;
|
|||||||
use crate::metadata::RegionMetadata;
|
use crate::metadata::RegionMetadata;
|
||||||
use crate::test_util::descriptor_util::RegionDescBuilder;
|
use crate::test_util::descriptor_util::RegionDescBuilder;
|
||||||
|
|
||||||
|
// For simplicity, all memtables in test share same memtable id.
|
||||||
|
const MEMTABLE_ID: MemtableId = 1;
|
||||||
|
|
||||||
// Schema for testing memtable:
|
// Schema for testing memtable:
|
||||||
// - key: Int64(timestamp), UInt64(version),
|
// - key: Int64(timestamp), UInt64(version),
|
||||||
// - value: UInt64
|
// - value: UInt64
|
||||||
fn schema_for_test() -> MemtableSchema {
|
pub fn schema_for_test() -> MemtableSchema {
|
||||||
// Just build a region desc and use its columns_row_key metadata.
|
// Just build a region desc and use its columns_row_key metadata.
|
||||||
let desc = RegionDescBuilder::new("test")
|
let desc = RegionDescBuilder::new("test")
|
||||||
|
.enable_version_column(true)
|
||||||
.push_value_column(("v1", LogicalTypeId::UInt64, true))
|
.push_value_column(("v1", LogicalTypeId::UInt64, true))
|
||||||
.build();
|
.build();
|
||||||
let metadata: RegionMetadata = desc.try_into().unwrap();
|
let metadata: RegionMetadata = desc.try_into().unwrap();
|
||||||
@@ -70,7 +74,7 @@ fn kvs_for_test(
|
|||||||
kvs_for_test_with_index(sequence, value_type, 0, keys, values)
|
kvs_for_test_with_index(sequence, value_type, 0, keys, values)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn write_kvs(
|
pub fn write_kvs(
|
||||||
memtable: &dyn Memtable,
|
memtable: &dyn Memtable,
|
||||||
sequence: SequenceNumber,
|
sequence: SequenceNumber,
|
||||||
value_type: ValueType,
|
value_type: ValueType,
|
||||||
@@ -100,7 +104,8 @@ fn check_iter_content(
|
|||||||
values: &[Option<u64>],
|
values: &[Option<u64>],
|
||||||
) {
|
) {
|
||||||
let mut index = 0;
|
let mut index = 0;
|
||||||
while let Some(batch) = iter.next().unwrap() {
|
for batch in iter {
|
||||||
|
let batch = batch.unwrap();
|
||||||
check_batch_valid(&batch);
|
check_batch_valid(&batch);
|
||||||
|
|
||||||
let row_num = batch.keys[0].len();
|
let row_num = batch.keys[0].len();
|
||||||
@@ -147,7 +152,7 @@ impl MemtableTester {
|
|||||||
fn new_memtables(&self) -> Vec<MemtableRef> {
|
fn new_memtables(&self) -> Vec<MemtableRef> {
|
||||||
self.builders
|
self.builders
|
||||||
.iter()
|
.iter()
|
||||||
.map(|b| b.build(self.schema.clone()))
|
.map(|b| b.build(MEMTABLE_ID, self.schema.clone()))
|
||||||
.collect()
|
.collect()
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -174,7 +179,9 @@ struct TestContext {
|
|||||||
fn write_iter_memtable_case(ctx: &TestContext) {
|
fn write_iter_memtable_case(ctx: &TestContext) {
|
||||||
// Test iterating an empty memtable.
|
// Test iterating an empty memtable.
|
||||||
let mut iter = ctx.memtable.iter(IterContext::default()).unwrap();
|
let mut iter = ctx.memtable.iter(IterContext::default()).unwrap();
|
||||||
assert!(iter.next().unwrap().is_none());
|
assert!(iter.next().is_none());
|
||||||
|
// Poll the empty iterator again.
|
||||||
|
assert!(iter.next().is_none());
|
||||||
assert_eq!(0, ctx.memtable.bytes_allocated());
|
assert_eq!(0, ctx.memtable.bytes_allocated());
|
||||||
|
|
||||||
// Init test data.
|
// Init test data.
|
||||||
@@ -262,7 +269,8 @@ fn test_write_iter_memtable() {
|
|||||||
|
|
||||||
fn check_iter_batch_size(iter: &mut dyn BatchIterator, total: usize, batch_size: usize) {
|
fn check_iter_batch_size(iter: &mut dyn BatchIterator, total: usize, batch_size: usize) {
|
||||||
let mut remains = total;
|
let mut remains = total;
|
||||||
while let Some(batch) = iter.next().unwrap() {
|
for batch in iter {
|
||||||
|
let batch = batch.unwrap();
|
||||||
check_batch_valid(&batch);
|
check_batch_valid(&batch);
|
||||||
|
|
||||||
let row_num = batch.keys[0].len();
|
let row_num = batch.keys[0].len();
|
||||||
@@ -419,6 +427,7 @@ fn test_sequence_visibility() {
|
|||||||
let iter_ctx = IterContext {
|
let iter_ctx = IterContext {
|
||||||
batch_size: 1,
|
batch_size: 1,
|
||||||
visible_sequence: 9,
|
visible_sequence: 9,
|
||||||
|
for_flush: false,
|
||||||
};
|
};
|
||||||
|
|
||||||
let mut iter = ctx.memtable.iter(iter_ctx).unwrap();
|
let mut iter = ctx.memtable.iter(iter_ctx).unwrap();
|
||||||
@@ -435,6 +444,7 @@ fn test_sequence_visibility() {
|
|||||||
let iter_ctx = IterContext {
|
let iter_ctx = IterContext {
|
||||||
batch_size: 1,
|
batch_size: 1,
|
||||||
visible_sequence: 10,
|
visible_sequence: 10,
|
||||||
|
for_flush: false,
|
||||||
};
|
};
|
||||||
|
|
||||||
let mut iter = ctx.memtable.iter(iter_ctx).unwrap();
|
let mut iter = ctx.memtable.iter(iter_ctx).unwrap();
|
||||||
@@ -451,6 +461,7 @@ fn test_sequence_visibility() {
|
|||||||
let iter_ctx = IterContext {
|
let iter_ctx = IterContext {
|
||||||
batch_size: 1,
|
batch_size: 1,
|
||||||
visible_sequence: 11,
|
visible_sequence: 11,
|
||||||
|
for_flush: false,
|
||||||
};
|
};
|
||||||
|
|
||||||
let mut iter = ctx.memtable.iter(iter_ctx).unwrap();
|
let mut iter = ctx.memtable.iter(iter_ctx).unwrap();
|
||||||
@@ -465,4 +476,26 @@ fn test_sequence_visibility() {
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO(yingwen): Test key overwrite in same batch.
|
#[test]
|
||||||
|
fn test_iter_after_none() {
|
||||||
|
let tester = MemtableTester::default();
|
||||||
|
tester.run_testcase(|ctx| {
|
||||||
|
write_kvs(
|
||||||
|
&*ctx.memtable,
|
||||||
|
10, // sequence
|
||||||
|
ValueType::Put,
|
||||||
|
&[(1000, 0), (1001, 1), (1002, 2)], // keys
|
||||||
|
&[Some(0), Some(1), Some(2)], // values
|
||||||
|
);
|
||||||
|
|
||||||
|
let iter_ctx = IterContext {
|
||||||
|
batch_size: 4,
|
||||||
|
..Default::default()
|
||||||
|
};
|
||||||
|
|
||||||
|
let mut iter = ctx.memtable.iter(iter_ctx).unwrap();
|
||||||
|
assert!(iter.next().is_some());
|
||||||
|
assert!(iter.next().is_none());
|
||||||
|
assert!(iter.next().is_none());
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|||||||
415
src/storage/src/memtable/version.rs
Normal file
415
src/storage/src/memtable/version.rs
Normal file
@@ -0,0 +1,415 @@
|
|||||||
|
use std::cmp::Ordering;
|
||||||
|
use std::collections::BTreeMap;
|
||||||
|
use std::sync::Arc;
|
||||||
|
|
||||||
|
use common_time::RangeMillis;
|
||||||
|
|
||||||
|
use crate::flush::MemtableWithMeta;
|
||||||
|
use crate::memtable::{MemtableId, MemtableRef};
|
||||||
|
|
||||||
|
/// A version of all memtables.
|
||||||
|
///
|
||||||
|
/// This structure is immutable now.
|
||||||
|
#[derive(Default, Debug, PartialEq, Eq)]
|
||||||
|
pub struct MemtableVersion {
|
||||||
|
mutable: MemtableSet,
|
||||||
|
/// Immutable memtables.
|
||||||
|
immutables: Vec<MemtableSetRef>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl MemtableVersion {
|
||||||
|
pub fn new() -> MemtableVersion {
|
||||||
|
MemtableVersion::default()
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
pub fn mutable_memtables(&self) -> &MemtableSet {
|
||||||
|
&self.mutable
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
pub fn immutable_memtables(&self) -> &[MemtableSetRef] {
|
||||||
|
&self.immutables
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn num_memtables(&self) -> usize {
|
||||||
|
self.mutable.len() + self.immutables.iter().map(|set| set.len()).sum::<usize>()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Clone current memtable version and freeze its mutable memtables, which moves
|
||||||
|
/// all mutable memtables to immutable memtable list.
|
||||||
|
pub fn freeze_mutable(&self) -> MemtableVersion {
|
||||||
|
let mut immutables = self.immutables.clone();
|
||||||
|
immutables.push(Arc::new(self.mutable.clone()));
|
||||||
|
|
||||||
|
MemtableVersion {
|
||||||
|
mutable: MemtableSet::new(),
|
||||||
|
immutables,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn mutable_bytes_allocated(&self) -> usize {
|
||||||
|
self.mutable.bytes_allocated()
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn total_bytes_allocated(&self) -> usize {
|
||||||
|
self.immutables
|
||||||
|
.iter()
|
||||||
|
.map(|m| m.bytes_allocated())
|
||||||
|
.sum::<usize>()
|
||||||
|
+ self.mutable.bytes_allocated()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Creates a new `MemtableVersion` that contains memtables both in this and `other`.
|
||||||
|
///
|
||||||
|
/// # Panics
|
||||||
|
/// Panics if there are memtables with same time ranges.
|
||||||
|
pub fn add_mutable(&self, other: MemtableSet) -> MemtableVersion {
|
||||||
|
let mutable = self.mutable.add(other);
|
||||||
|
|
||||||
|
Self {
|
||||||
|
mutable,
|
||||||
|
immutables: self.immutables.clone(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Creates a new `MemtableVersion` that removes immutable memtables
|
||||||
|
/// less than or equal to max_memtable_id.
|
||||||
|
pub fn remove_immutables(&self, max_memtable_id: MemtableId) -> MemtableVersion {
|
||||||
|
let immutables = self
|
||||||
|
.immutables
|
||||||
|
.iter()
|
||||||
|
.filter(|immem| immem.max_memtable_id() > max_memtable_id)
|
||||||
|
.cloned()
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
MemtableVersion {
|
||||||
|
mutable: self.mutable.clone(),
|
||||||
|
immutables,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn memtables_to_flush(&self) -> (Option<MemtableId>, Vec<MemtableWithMeta>) {
|
||||||
|
let max_memtable_id = self
|
||||||
|
.immutables
|
||||||
|
.iter()
|
||||||
|
.map(|immem| immem.max_memtable_id())
|
||||||
|
.max();
|
||||||
|
let memtables = self
|
||||||
|
.immutables
|
||||||
|
.iter()
|
||||||
|
.flat_map(|immem| immem.to_memtable_with_metas())
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
(max_memtable_id, memtables)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// We use a new type to order time ranges by (end, start).
|
||||||
|
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||||
|
struct RangeKey(RangeMillis);
|
||||||
|
|
||||||
|
impl Ord for RangeKey {
|
||||||
|
fn cmp(&self, other: &RangeKey) -> Ordering {
|
||||||
|
self.0
|
||||||
|
.end()
|
||||||
|
.cmp(other.0.end())
|
||||||
|
.then_with(|| self.0.start().cmp(other.0.start()))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl PartialOrd for RangeKey {
|
||||||
|
fn partial_cmp(&self, other: &RangeKey) -> Option<Ordering> {
|
||||||
|
Some(self.cmp(other))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Collection of mutable memtables.
|
||||||
|
///
|
||||||
|
/// Memtables are partitioned by their time range. Caller should ensure
|
||||||
|
/// there are no overlapped ranges and all ranges are aligned by same
|
||||||
|
/// bucket duration.
|
||||||
|
#[derive(Default, Clone, Debug)]
|
||||||
|
pub struct MemtableSet {
|
||||||
|
memtables: BTreeMap<RangeKey, MemtableRef>,
|
||||||
|
max_memtable_id: MemtableId,
|
||||||
|
}
|
||||||
|
|
||||||
|
pub type MemtableSetRef = Arc<MemtableSet>;
|
||||||
|
|
||||||
|
impl PartialEq for MemtableSet {
|
||||||
|
fn eq(&self, other: &MemtableSet) -> bool {
|
||||||
|
self.max_memtable_id == other.max_memtable_id
|
||||||
|
&& self.memtables.len() == other.memtables.len()
|
||||||
|
&& self
|
||||||
|
.memtables
|
||||||
|
.iter()
|
||||||
|
.zip(&other.memtables)
|
||||||
|
.all(|(a, b)| a.0 == b.0 && a.1.id() == b.1.id() && a.1.schema() == b.1.schema())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Eq for MemtableSet {}
|
||||||
|
|
||||||
|
impl MemtableSet {
|
||||||
|
pub fn new() -> MemtableSet {
|
||||||
|
MemtableSet::default()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Get memtable by time range.
|
||||||
|
///
|
||||||
|
/// The range must exactly equal to the range of the memtable, otherwise `None`
|
||||||
|
/// is returned.
|
||||||
|
pub fn get_by_range(&self, range: &RangeMillis) -> Option<&MemtableRef> {
|
||||||
|
let range_key = RangeKey(*range);
|
||||||
|
self.memtables.get(&range_key)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Insert a new memtable.
|
||||||
|
///
|
||||||
|
/// # Panics
|
||||||
|
/// Panics if memtable with same range already exists.
|
||||||
|
pub fn insert(&mut self, range: RangeMillis, mem: MemtableRef) {
|
||||||
|
self.max_memtable_id = MemtableId::max(self.max_memtable_id, mem.id());
|
||||||
|
let old = self.memtables.insert(RangeKey(range), mem);
|
||||||
|
assert!(old.is_none());
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns number of memtables in the set.
|
||||||
|
#[inline]
|
||||||
|
pub fn len(&self) -> usize {
|
||||||
|
self.memtables.len()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns true if there is no memtable in the set.
|
||||||
|
#[inline]
|
||||||
|
pub fn is_empty(&self) -> bool {
|
||||||
|
self.memtables.is_empty()
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn bytes_allocated(&self) -> usize {
|
||||||
|
self.memtables.values().map(|m| m.bytes_allocated()).sum()
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn max_memtable_id(&self) -> MemtableId {
|
||||||
|
self.max_memtable_id
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Creates a new `MemtableSet` that contains memtables both in `self` and
|
||||||
|
/// `other`, let `self` unchanged.
|
||||||
|
pub fn add(&self, mut other: MemtableSet) -> MemtableSet {
|
||||||
|
// We use `other.memtables` to extend `self.memtables` since memtables
|
||||||
|
// in other should be empty in usual, so overwriting it is okay.
|
||||||
|
other
|
||||||
|
.memtables
|
||||||
|
.extend(self.memtables.iter().map(|(k, v)| (*k, v.clone())));
|
||||||
|
|
||||||
|
MemtableSet {
|
||||||
|
memtables: other.memtables,
|
||||||
|
max_memtable_id: MemtableId::max(self.max_memtable_id, other.max_memtable_id),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn to_memtable_with_metas(&self) -> Vec<MemtableWithMeta> {
|
||||||
|
self.memtables
|
||||||
|
.iter()
|
||||||
|
.map(|(range_key, memtable)| MemtableWithMeta {
|
||||||
|
memtable: memtable.clone(),
|
||||||
|
bucket: range_key.0,
|
||||||
|
})
|
||||||
|
.collect()
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn iter(&self) -> impl Iterator<Item = (&RangeMillis, &MemtableRef)> {
|
||||||
|
self.memtables.iter().map(|(k, v)| (&k.0, v))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use store_api::storage::ValueType;
|
||||||
|
|
||||||
|
use super::*;
|
||||||
|
use crate::memtable::tests;
|
||||||
|
use crate::memtable::BTreeMemtable;
|
||||||
|
use crate::memtable::Memtable;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_memtableset_misc() {
|
||||||
|
let mut set = MemtableSet::new();
|
||||||
|
|
||||||
|
assert!(set.is_empty());
|
||||||
|
assert_eq!(0, set.max_memtable_id());
|
||||||
|
assert_eq!(0, set.bytes_allocated());
|
||||||
|
assert!(set
|
||||||
|
.get_by_range(&RangeMillis::new(0, 10).unwrap())
|
||||||
|
.is_none());
|
||||||
|
|
||||||
|
set.insert(
|
||||||
|
RangeMillis::new(0, 10).unwrap(),
|
||||||
|
Arc::new(BTreeMemtable::new(0, tests::schema_for_test())),
|
||||||
|
);
|
||||||
|
set.insert(
|
||||||
|
RangeMillis::new(10, 20).unwrap(),
|
||||||
|
Arc::new(BTreeMemtable::new(1, tests::schema_for_test())),
|
||||||
|
);
|
||||||
|
let memtable = Arc::new(BTreeMemtable::new(2, tests::schema_for_test()));
|
||||||
|
// Write some test data
|
||||||
|
tests::write_kvs(
|
||||||
|
&*memtable,
|
||||||
|
10, // sequence
|
||||||
|
ValueType::Put,
|
||||||
|
&[
|
||||||
|
(1000, 1),
|
||||||
|
(1000, 2),
|
||||||
|
(2002, 1),
|
||||||
|
(2003, 1),
|
||||||
|
(2003, 5),
|
||||||
|
(1001, 1),
|
||||||
|
], // keys
|
||||||
|
&[Some(1), Some(2), Some(7), Some(8), Some(9), Some(3)], // values
|
||||||
|
);
|
||||||
|
|
||||||
|
set.insert(RangeMillis::new(20, 30).unwrap(), memtable.clone());
|
||||||
|
|
||||||
|
for (i, (range, _)) in set.iter().enumerate() {
|
||||||
|
assert_eq!(
|
||||||
|
*range,
|
||||||
|
RangeMillis::new(i as i64 * 10, i as i64 * 10 + 10).unwrap()
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
assert!(!set.is_empty());
|
||||||
|
assert_eq!(2, set.max_memtable_id());
|
||||||
|
assert_eq!(memtable.bytes_allocated(), set.bytes_allocated());
|
||||||
|
assert!(set
|
||||||
|
.get_by_range(&RangeMillis::new(0, 10).unwrap())
|
||||||
|
.is_some());
|
||||||
|
assert!(set
|
||||||
|
.get_by_range(&RangeMillis::new(10, 20).unwrap())
|
||||||
|
.is_some());
|
||||||
|
assert!(set
|
||||||
|
.get_by_range(&RangeMillis::new(20, 30).unwrap())
|
||||||
|
.is_some());
|
||||||
|
assert!(set
|
||||||
|
.get_by_range(&RangeMillis::new(0, 100).unwrap())
|
||||||
|
.is_none());
|
||||||
|
}
|
||||||
|
|
||||||
|
fn create_test_memtableset(ids: &[MemtableId]) -> MemtableSet {
|
||||||
|
let mut set = MemtableSet::new();
|
||||||
|
|
||||||
|
for id in ids {
|
||||||
|
let i = *id as i64;
|
||||||
|
set.insert(
|
||||||
|
RangeMillis::new(i * 10, (i + 1) * 10).unwrap(),
|
||||||
|
Arc::new(BTreeMemtable::new(*id, tests::schema_for_test())),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
set
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_add_memtableset() {
|
||||||
|
let s1 = create_test_memtableset(&[0, 1, 2]);
|
||||||
|
let s2 = create_test_memtableset(&[3, 4, 5, 6]);
|
||||||
|
|
||||||
|
let mut s1_memtables = s1.to_memtable_with_metas();
|
||||||
|
let s2_memtables = s2.to_memtable_with_metas();
|
||||||
|
s1_memtables.extend(s2_memtables);
|
||||||
|
|
||||||
|
let empty = create_test_memtableset(&[]);
|
||||||
|
assert_eq!(s1, s1.add(empty));
|
||||||
|
|
||||||
|
let s3 = s1.add(s2);
|
||||||
|
assert_ne!(s1, s3);
|
||||||
|
|
||||||
|
assert_eq!(7, s3.memtables.len());
|
||||||
|
let s3_memtables = s3.to_memtable_with_metas();
|
||||||
|
assert_eq!(7, s3_memtables.len());
|
||||||
|
|
||||||
|
for i in 0..7 {
|
||||||
|
assert_eq!(s1_memtables[i].bucket, s3_memtables[i].bucket);
|
||||||
|
assert_eq!(s1_memtables[i].memtable.id(), s3_memtables[i].memtable.id());
|
||||||
|
}
|
||||||
|
assert_eq!(6, s3.max_memtable_id());
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_memtableversion() {
|
||||||
|
let s1 = create_test_memtableset(&[0, 1, 2]);
|
||||||
|
let s2 = create_test_memtableset(&[3, 4, 5, 6]);
|
||||||
|
let s3 = s1.add(s2.clone());
|
||||||
|
|
||||||
|
let v1 = MemtableVersion::new();
|
||||||
|
assert!(v1.mutable_memtables().is_empty());
|
||||||
|
assert_eq!(0, v1.num_memtables());
|
||||||
|
|
||||||
|
// Add one mutable
|
||||||
|
let v2 = v1.add_mutable(s1.clone());
|
||||||
|
assert_ne!(v1, v2);
|
||||||
|
let mutables = v2.mutable_memtables();
|
||||||
|
assert_eq!(s1, *mutables);
|
||||||
|
assert_eq!(3, v2.num_memtables());
|
||||||
|
|
||||||
|
// Add another mutable
|
||||||
|
let v3 = v2.add_mutable(s2);
|
||||||
|
assert_ne!(v1, v3);
|
||||||
|
assert_ne!(v2, v3);
|
||||||
|
let mutables = v3.mutable_memtables();
|
||||||
|
assert_eq!(s3, *mutables);
|
||||||
|
assert!(v3.memtables_to_flush().1.is_empty());
|
||||||
|
assert_eq!(7, v3.num_memtables());
|
||||||
|
|
||||||
|
// Try to freeze s1, s2
|
||||||
|
let v4 = v3.freeze_mutable();
|
||||||
|
assert_ne!(v1, v4);
|
||||||
|
assert_ne!(v2, v4);
|
||||||
|
assert_ne!(v3, v4);
|
||||||
|
assert!(v4.mutable_memtables().is_empty());
|
||||||
|
assert_eq!(v4.immutables.len(), 1);
|
||||||
|
assert_eq!(v4.immutables[0], Arc::new(s3.clone()));
|
||||||
|
|
||||||
|
let (max_id, tables) = v4.memtables_to_flush();
|
||||||
|
assert_eq!(6, max_id.unwrap());
|
||||||
|
assert_eq!(7, tables.len());
|
||||||
|
assert_eq!(7, v4.num_memtables());
|
||||||
|
|
||||||
|
// Add another mutable
|
||||||
|
let s4 = create_test_memtableset(&[7, 8]);
|
||||||
|
let v5 = v4.add_mutable(s4.clone());
|
||||||
|
let mutables = v5.mutable_memtables();
|
||||||
|
assert_eq!(s4, *mutables);
|
||||||
|
assert_eq!(v4.immutables, v5.immutables);
|
||||||
|
|
||||||
|
// Try to freeze s4
|
||||||
|
let v6 = v5.freeze_mutable();
|
||||||
|
assert_eq!(v6.immutables.len(), 2);
|
||||||
|
assert_eq!(v6.immutables[0], Arc::new(s3));
|
||||||
|
assert_eq!(v6.immutables[1], Arc::new(s4.clone()));
|
||||||
|
|
||||||
|
let (max_id, tables) = v6.memtables_to_flush();
|
||||||
|
assert_eq!(8, max_id.unwrap());
|
||||||
|
assert_eq!(9, tables.len());
|
||||||
|
assert_eq!(9, v6.num_memtables());
|
||||||
|
// verify tables
|
||||||
|
for (i, table) in tables.iter().enumerate() {
|
||||||
|
assert_eq!(i as u32, table.memtable.id());
|
||||||
|
let i = i as i64;
|
||||||
|
assert_eq!(
|
||||||
|
table.bucket,
|
||||||
|
RangeMillis::new(i * 10, (i + 1) * 10).unwrap()
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Remove tables
|
||||||
|
let v7 = v6.remove_immutables(6);
|
||||||
|
assert_eq!(v7.immutables.len(), 1);
|
||||||
|
assert_eq!(v7.immutables[0], Arc::new(s4));
|
||||||
|
|
||||||
|
let v8 = v7.remove_immutables(8);
|
||||||
|
assert_eq!(v8.immutables.len(), 0);
|
||||||
|
assert_eq!(0, v8.num_memtables());
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -3,10 +3,12 @@ use std::sync::Arc;
|
|||||||
|
|
||||||
use common_error::prelude::*;
|
use common_error::prelude::*;
|
||||||
use datatypes::data_type::ConcreteDataType;
|
use datatypes::data_type::ConcreteDataType;
|
||||||
|
use serde::{Deserialize, Serialize};
|
||||||
use snafu::ensure;
|
use snafu::ensure;
|
||||||
use store_api::storage::{
|
use store_api::storage::{
|
||||||
consts, ColumnDescriptor, ColumnDescriptorBuilder, ColumnFamilyDescriptor, ColumnFamilyId,
|
consts, ColumnDescriptor, ColumnDescriptorBuilder, ColumnFamilyDescriptor, ColumnFamilyId,
|
||||||
ColumnId, ColumnSchema, RegionDescriptor, RegionMeta, RowKeyDescriptor, Schema, SchemaRef,
|
ColumnId, ColumnSchema, RegionDescriptor, RegionId, RegionMeta, RowKeyDescriptor, Schema,
|
||||||
|
SchemaRef,
|
||||||
};
|
};
|
||||||
|
|
||||||
/// Error for handling metadata.
|
/// Error for handling metadata.
|
||||||
@@ -20,6 +22,12 @@ pub enum Error {
|
|||||||
|
|
||||||
#[snafu(display("Column family id already exists, id: {}", id))]
|
#[snafu(display("Column family id already exists, id: {}", id))]
|
||||||
CfIdExists { id: ColumnId, backtrace: Backtrace },
|
CfIdExists { id: ColumnId, backtrace: Backtrace },
|
||||||
|
|
||||||
|
#[snafu(display("Failed to build schema, source: {}", source))]
|
||||||
|
InvalidSchema {
|
||||||
|
source: datatypes::error::Error,
|
||||||
|
backtrace: Backtrace,
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
pub type Result<T> = std::result::Result<T, Error>;
|
pub type Result<T> = std::result::Result<T, Error>;
|
||||||
@@ -27,6 +35,7 @@ pub type Result<T> = std::result::Result<T, Error>;
|
|||||||
/// Implementation of [RegionMeta].
|
/// Implementation of [RegionMeta].
|
||||||
///
|
///
|
||||||
/// Holds a snapshot of region metadata.
|
/// Holds a snapshot of region metadata.
|
||||||
|
#[derive(Clone, Debug)]
|
||||||
pub struct RegionMetaImpl {
|
pub struct RegionMetaImpl {
|
||||||
metadata: RegionMetadataRef,
|
metadata: RegionMetadataRef,
|
||||||
}
|
}
|
||||||
@@ -48,8 +57,9 @@ pub type VersionNumber = u32;
|
|||||||
// TODO(yingwen): Make some fields of metadata private.
|
// TODO(yingwen): Make some fields of metadata private.
|
||||||
|
|
||||||
/// In memory metadata of region.
|
/// In memory metadata of region.
|
||||||
#[derive(Clone, Debug)]
|
#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)]
|
||||||
pub struct RegionMetadata {
|
pub struct RegionMetadata {
|
||||||
|
pub id: RegionId,
|
||||||
/// Schema of the region.
|
/// Schema of the region.
|
||||||
///
|
///
|
||||||
/// Holding a [SchemaRef] to allow converting into `SchemaRef`/`arrow::SchemaRef`
|
/// Holding a [SchemaRef] to allow converting into `SchemaRef`/`arrow::SchemaRef`
|
||||||
@@ -66,13 +76,13 @@ pub struct RegionMetadata {
|
|||||||
|
|
||||||
pub type RegionMetadataRef = Arc<RegionMetadata>;
|
pub type RegionMetadataRef = Arc<RegionMetadata>;
|
||||||
|
|
||||||
#[derive(Clone, Debug, PartialEq)]
|
#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
|
||||||
pub struct ColumnMetadata {
|
pub struct ColumnMetadata {
|
||||||
pub cf_id: ColumnFamilyId,
|
pub cf_id: ColumnFamilyId,
|
||||||
pub desc: ColumnDescriptor,
|
pub desc: ColumnDescriptor,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Clone, Debug, PartialEq)]
|
#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
|
||||||
pub struct ColumnsMetadata {
|
pub struct ColumnsMetadata {
|
||||||
/// All columns, in `(key columns, timestamp, [version,] value columns)` order.
|
/// All columns, in `(key columns, timestamp, [version,] value columns)` order.
|
||||||
///
|
///
|
||||||
@@ -82,7 +92,7 @@ pub struct ColumnsMetadata {
|
|||||||
pub name_to_col_index: HashMap<String, usize>,
|
pub name_to_col_index: HashMap<String, usize>,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Clone, Debug, Default, PartialEq)]
|
#[derive(Clone, Debug, Default, PartialEq, Serialize, Deserialize)]
|
||||||
pub struct RowKeyMetadata {
|
pub struct RowKeyMetadata {
|
||||||
/// Exclusive end index of row key columns.
|
/// Exclusive end index of row key columns.
|
||||||
row_key_end: usize,
|
row_key_end: usize,
|
||||||
@@ -93,7 +103,7 @@ pub struct RowKeyMetadata {
|
|||||||
pub enable_version_column: bool,
|
pub enable_version_column: bool,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Clone, Debug, PartialEq)]
|
#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
|
||||||
pub struct ColumnsRowKeyMetadata {
|
pub struct ColumnsRowKeyMetadata {
|
||||||
columns: ColumnsMetadata,
|
columns: ColumnsMetadata,
|
||||||
row_key: RowKeyMetadata,
|
row_key: RowKeyMetadata,
|
||||||
@@ -121,7 +131,7 @@ impl ColumnsRowKeyMetadata {
|
|||||||
|
|
||||||
pub type ColumnsRowKeyMetadataRef = Arc<ColumnsRowKeyMetadata>;
|
pub type ColumnsRowKeyMetadataRef = Arc<ColumnsRowKeyMetadata>;
|
||||||
|
|
||||||
#[derive(Clone, Debug)]
|
#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)]
|
||||||
pub struct ColumnFamiliesMetadata {
|
pub struct ColumnFamiliesMetadata {
|
||||||
/// Map column family id to column family metadata.
|
/// Map column family id to column family metadata.
|
||||||
id_to_cfs: HashMap<ColumnFamilyId, ColumnFamilyMetadata>,
|
id_to_cfs: HashMap<ColumnFamilyId, ColumnFamilyMetadata>,
|
||||||
@@ -133,7 +143,7 @@ impl ColumnFamiliesMetadata {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Clone, Debug)]
|
#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)]
|
||||||
pub struct ColumnFamilyMetadata {
|
pub struct ColumnFamilyMetadata {
|
||||||
/// Column family name.
|
/// Column family name.
|
||||||
pub name: String,
|
pub name: String,
|
||||||
@@ -151,18 +161,20 @@ impl TryFrom<RegionDescriptor> for RegionMetadata {
|
|||||||
// Doesn't set version explicitly here, because this is a new region meta
|
// Doesn't set version explicitly here, because this is a new region meta
|
||||||
// created from descriptor, using initial version is reasonable.
|
// created from descriptor, using initial version is reasonable.
|
||||||
let mut builder = RegionMetadataBuilder::new()
|
let mut builder = RegionMetadataBuilder::new()
|
||||||
|
.id(desc.id)
|
||||||
.row_key(desc.row_key)?
|
.row_key(desc.row_key)?
|
||||||
.add_column_family(desc.default_cf)?;
|
.add_column_family(desc.default_cf)?;
|
||||||
for cf in desc.extra_cfs {
|
for cf in desc.extra_cfs {
|
||||||
builder = builder.add_column_family(cf)?;
|
builder = builder.add_column_family(cf)?;
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(builder.build())
|
builder.build()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Default)]
|
#[derive(Default)]
|
||||||
struct RegionMetadataBuilder {
|
struct RegionMetadataBuilder {
|
||||||
|
id: RegionId,
|
||||||
columns: Vec<ColumnMetadata>,
|
columns: Vec<ColumnMetadata>,
|
||||||
column_schemas: Vec<ColumnSchema>,
|
column_schemas: Vec<ColumnSchema>,
|
||||||
name_to_col_index: HashMap<String, usize>,
|
name_to_col_index: HashMap<String, usize>,
|
||||||
@@ -178,6 +190,11 @@ impl RegionMetadataBuilder {
|
|||||||
RegionMetadataBuilder::default()
|
RegionMetadataBuilder::default()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn id(mut self, id: RegionId) -> Self {
|
||||||
|
self.id = id;
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
fn row_key(mut self, key: RowKeyDescriptor) -> Result<Self> {
|
fn row_key(mut self, key: RowKeyDescriptor) -> Result<Self> {
|
||||||
for col in key.columns {
|
for col in key.columns {
|
||||||
self.push_row_key_column(col)?;
|
self.push_row_key_column(col)?;
|
||||||
@@ -234,8 +251,15 @@ impl RegionMetadataBuilder {
|
|||||||
Ok(self)
|
Ok(self)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn build(self) -> RegionMetadata {
|
fn build(self) -> Result<RegionMetadata> {
|
||||||
let schema = Arc::new(Schema::new(self.column_schemas));
|
let schema = if self.column_schemas.is_empty() {
|
||||||
|
Arc::new(Schema::new(self.column_schemas))
|
||||||
|
} else {
|
||||||
|
Arc::new(
|
||||||
|
Schema::with_timestamp_index(self.column_schemas, self.row_key.timestamp_key_index)
|
||||||
|
.context(InvalidSchemaSnafu)?,
|
||||||
|
)
|
||||||
|
};
|
||||||
let columns = ColumnsMetadata {
|
let columns = ColumnsMetadata {
|
||||||
columns: self.columns,
|
columns: self.columns,
|
||||||
name_to_col_index: self.name_to_col_index,
|
name_to_col_index: self.name_to_col_index,
|
||||||
@@ -245,14 +269,15 @@ impl RegionMetadataBuilder {
|
|||||||
row_key: self.row_key,
|
row_key: self.row_key,
|
||||||
});
|
});
|
||||||
|
|
||||||
RegionMetadata {
|
Ok(RegionMetadata {
|
||||||
|
id: self.id,
|
||||||
schema,
|
schema,
|
||||||
columns_row_key,
|
columns_row_key,
|
||||||
column_families: ColumnFamiliesMetadata {
|
column_families: ColumnFamiliesMetadata {
|
||||||
id_to_cfs: self.id_to_cfs,
|
id_to_cfs: self.id_to_cfs,
|
||||||
},
|
},
|
||||||
version: 0,
|
version: 0,
|
||||||
}
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
// Helper methods:
|
// Helper methods:
|
||||||
@@ -308,17 +333,20 @@ mod tests {
|
|||||||
#[test]
|
#[test]
|
||||||
fn test_descriptor_to_region_metadata() {
|
fn test_descriptor_to_region_metadata() {
|
||||||
let desc = RegionDescBuilder::new("region-0")
|
let desc = RegionDescBuilder::new("region-0")
|
||||||
.timestamp(("ts", LogicalTypeId::UInt64, false))
|
.timestamp(("ts", LogicalTypeId::Int64, false))
|
||||||
.enable_version_column(false)
|
.enable_version_column(false)
|
||||||
.push_key_column(("k1", LogicalTypeId::Int32, false))
|
.push_key_column(("k1", LogicalTypeId::Int32, false))
|
||||||
.push_value_column(("v1", LogicalTypeId::Float32, true))
|
.push_value_column(("v1", LogicalTypeId::Float32, true))
|
||||||
.build();
|
.build();
|
||||||
|
|
||||||
let expect_schema = schema_util::new_schema_ref(&[
|
let expect_schema = schema_util::new_schema_ref(
|
||||||
("k1", LogicalTypeId::Int32, false),
|
&[
|
||||||
("ts", LogicalTypeId::UInt64, false),
|
("k1", LogicalTypeId::Int32, false),
|
||||||
("v1", LogicalTypeId::Float32, true),
|
("ts", LogicalTypeId::Int64, false),
|
||||||
]);
|
("v1", LogicalTypeId::Float32, true),
|
||||||
|
],
|
||||||
|
Some(1),
|
||||||
|
);
|
||||||
|
|
||||||
let metadata = RegionMetadata::try_from(desc).unwrap();
|
let metadata = RegionMetadata::try_from(desc).unwrap();
|
||||||
assert_eq!(expect_schema, metadata.schema);
|
assert_eq!(expect_schema, metadata.schema);
|
||||||
@@ -328,7 +356,7 @@ mod tests {
|
|||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_build_empty_region_metadata() {
|
fn test_build_empty_region_metadata() {
|
||||||
let metadata = RegionMetadataBuilder::default().build();
|
let metadata = RegionMetadataBuilder::default().build().unwrap();
|
||||||
assert!(metadata.schema.column_schemas().is_empty());
|
assert!(metadata.schema.column_schemas().is_empty());
|
||||||
|
|
||||||
assert!(metadata.columns_row_key.columns.columns.is_empty());
|
assert!(metadata.columns_row_key.columns.columns.is_empty());
|
||||||
@@ -373,17 +401,21 @@ mod tests {
|
|||||||
.add_column_family(cf)
|
.add_column_family(cf)
|
||||||
.unwrap()
|
.unwrap()
|
||||||
.build()
|
.build()
|
||||||
|
.unwrap()
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_build_metedata_disable_version() {
|
fn test_build_metedata_disable_version() {
|
||||||
let metadata = new_metadata(false);
|
let metadata = new_metadata(false);
|
||||||
|
|
||||||
let expect_schema = schema_util::new_schema_ref(&[
|
let expect_schema = schema_util::new_schema_ref(
|
||||||
("k1", LogicalTypeId::Int64, false),
|
&[
|
||||||
("ts", LogicalTypeId::Int64, false),
|
("k1", LogicalTypeId::Int64, false),
|
||||||
("v1", LogicalTypeId::Int64, true),
|
("ts", LogicalTypeId::Int64, false),
|
||||||
]);
|
("v1", LogicalTypeId::Int64, true),
|
||||||
|
],
|
||||||
|
Some(1),
|
||||||
|
);
|
||||||
|
|
||||||
assert_eq!(expect_schema, metadata.schema);
|
assert_eq!(expect_schema, metadata.schema);
|
||||||
|
|
||||||
@@ -422,12 +454,15 @@ mod tests {
|
|||||||
fn test_build_metedata_enable_version() {
|
fn test_build_metedata_enable_version() {
|
||||||
let metadata = new_metadata(true);
|
let metadata = new_metadata(true);
|
||||||
|
|
||||||
let expect_schema = schema_util::new_schema_ref(&[
|
let expect_schema = schema_util::new_schema_ref(
|
||||||
("k1", LogicalTypeId::Int64, false),
|
&[
|
||||||
("ts", LogicalTypeId::Int64, false),
|
("k1", LogicalTypeId::Int64, false),
|
||||||
(consts::VERSION_COLUMN_NAME, LogicalTypeId::UInt64, false),
|
("ts", LogicalTypeId::Int64, false),
|
||||||
("v1", LogicalTypeId::Int64, true),
|
(consts::VERSION_COLUMN_NAME, LogicalTypeId::UInt64, false),
|
||||||
]);
|
("v1", LogicalTypeId::Int64, true),
|
||||||
|
],
|
||||||
|
Some(1),
|
||||||
|
);
|
||||||
|
|
||||||
assert_eq!(expect_schema, metadata.schema);
|
assert_eq!(expect_schema, metadata.schema);
|
||||||
|
|
||||||
|
|||||||
43
src/storage/src/proto.rs
Normal file
43
src/storage/src/proto.rs
Normal file
@@ -0,0 +1,43 @@
|
|||||||
|
#![allow(clippy::all)]
|
||||||
|
|
||||||
|
tonic::include_proto!("greptime.storage.wal.v1");
|
||||||
|
|
||||||
|
use crate::write_batch::{Mutation, WriteBatch};
|
||||||
|
|
||||||
|
pub fn gen_mutation_extras(write_batch: &WriteBatch) -> Vec<MutationExtra> {
|
||||||
|
let column_schemas = write_batch.schema().column_schemas();
|
||||||
|
write_batch
|
||||||
|
.iter()
|
||||||
|
.map(|m| match m {
|
||||||
|
Mutation::Put(put) => {
|
||||||
|
if put.num_columns() == column_schemas.len() {
|
||||||
|
MutationExtra {
|
||||||
|
mutation_type: MutationType::Put.into(),
|
||||||
|
column_null_mask: Default::default(),
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
let mut column_null_mask =
|
||||||
|
bit_vec::BitVec::from_elem(column_schemas.len(), false);
|
||||||
|
for (i, cs) in column_schemas.iter().enumerate() {
|
||||||
|
if put.column_by_name(&cs.name).is_none() {
|
||||||
|
column_null_mask.set(i, true);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
MutationExtra {
|
||||||
|
mutation_type: MutationType::Put.into(),
|
||||||
|
column_null_mask: column_null_mask.to_bytes(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.collect::<Vec<_>>()
|
||||||
|
}
|
||||||
|
|
||||||
|
impl WalHeader {
|
||||||
|
pub fn with_last_manifest_version(last_manifest_version: u64) -> Self {
|
||||||
|
Self {
|
||||||
|
last_manifest_version,
|
||||||
|
..Default::default()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -6,32 +6,44 @@ use std::sync::Arc;
|
|||||||
|
|
||||||
use async_trait::async_trait;
|
use async_trait::async_trait;
|
||||||
use snafu::ensure;
|
use snafu::ensure;
|
||||||
use store_api::storage::{ReadContext, Region, RegionMeta, WriteContext, WriteResponse};
|
use store_api::logstore::LogStore;
|
||||||
use tokio::sync::Mutex;
|
use store_api::storage::{ReadContext, Region, RegionId, RegionMeta, WriteContext, WriteResponse};
|
||||||
|
|
||||||
|
use crate::background::JobPoolImpl;
|
||||||
use crate::error::{self, Error, Result};
|
use crate::error::{self, Error, Result};
|
||||||
use crate::memtable::{DefaultMemtableBuilder, MemtableBuilder, MemtableSchema, MemtableSet};
|
use crate::flush::{FlushSchedulerImpl, FlushSchedulerRef, FlushStrategyRef, SizeBasedStrategy};
|
||||||
|
use crate::manifest::region::RegionManifest;
|
||||||
|
use crate::memtable::{DefaultMemtableBuilder, MemtableVersion};
|
||||||
use crate::metadata::{RegionMetaImpl, RegionMetadata};
|
use crate::metadata::{RegionMetaImpl, RegionMetadata};
|
||||||
use crate::region::writer::RegionWriter;
|
pub use crate::region::writer::{RegionWriter, RegionWriterRef, WriterContext};
|
||||||
use crate::snapshot::SnapshotImpl;
|
use crate::snapshot::SnapshotImpl;
|
||||||
|
use crate::sst::AccessLayerRef;
|
||||||
use crate::version::{VersionControl, VersionControlRef};
|
use crate::version::{VersionControl, VersionControlRef};
|
||||||
|
use crate::wal::Wal;
|
||||||
use crate::write_batch::WriteBatch;
|
use crate::write_batch::WriteBatch;
|
||||||
|
|
||||||
/// [Region] implementation.
|
/// [Region] implementation.
|
||||||
#[derive(Clone)]
|
pub struct RegionImpl<S: LogStore> {
|
||||||
pub struct RegionImpl {
|
inner: Arc<RegionInner<S>>,
|
||||||
inner: Arc<RegionInner>,
|
}
|
||||||
|
|
||||||
|
impl<S: LogStore> Clone for RegionImpl<S> {
|
||||||
|
fn clone(&self) -> Self {
|
||||||
|
Self {
|
||||||
|
inner: self.inner.clone(),
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[async_trait]
|
#[async_trait]
|
||||||
impl Region for RegionImpl {
|
impl<S: LogStore> Region for RegionImpl<S> {
|
||||||
type Error = Error;
|
type Error = Error;
|
||||||
type Meta = RegionMetaImpl;
|
type Meta = RegionMetaImpl;
|
||||||
type WriteRequest = WriteBatch;
|
type WriteRequest = WriteBatch;
|
||||||
type Snapshot = SnapshotImpl;
|
type Snapshot = SnapshotImpl;
|
||||||
|
|
||||||
fn name(&self) -> &str {
|
fn name(&self) -> &str {
|
||||||
&self.inner.name
|
&self.inner.shared.name
|
||||||
}
|
}
|
||||||
|
|
||||||
fn in_memory_metadata(&self) -> RegionMetaImpl {
|
fn in_memory_metadata(&self) -> RegionMetaImpl {
|
||||||
@@ -47,61 +59,109 @@ impl Region for RegionImpl {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl RegionImpl {
|
impl<S: LogStore> RegionImpl<S> {
|
||||||
pub fn new(name: String, metadata: RegionMetadata) -> RegionImpl {
|
pub fn new(
|
||||||
|
id: RegionId,
|
||||||
|
name: String,
|
||||||
|
metadata: RegionMetadata,
|
||||||
|
wal: Wal<S>,
|
||||||
|
sst_layer: AccessLayerRef,
|
||||||
|
manifest: RegionManifest,
|
||||||
|
) -> RegionImpl<S> {
|
||||||
let memtable_builder = Arc::new(DefaultMemtableBuilder {});
|
let memtable_builder = Arc::new(DefaultMemtableBuilder {});
|
||||||
let memtable_schema = MemtableSchema::new(metadata.columns_row_key.clone());
|
let memtable_version = MemtableVersion::new();
|
||||||
let mem = memtable_builder.build(memtable_schema);
|
// TODO(yingwen): Pass flush scheduler to `RegionImpl::new`.
|
||||||
let memtables = MemtableSet::new(mem);
|
let job_pool = Arc::new(JobPoolImpl {});
|
||||||
|
let flush_scheduler = Arc::new(FlushSchedulerImpl::new(job_pool));
|
||||||
|
|
||||||
let version = VersionControl::new(metadata, memtables);
|
let version_control = VersionControl::new(metadata, memtable_version);
|
||||||
let inner = Arc::new(RegionInner {
|
let inner = Arc::new(RegionInner {
|
||||||
name,
|
shared: Arc::new(SharedData {
|
||||||
version: Arc::new(version),
|
id,
|
||||||
writer: Mutex::new(RegionWriter::new(memtable_builder)),
|
name,
|
||||||
|
version_control: Arc::new(version_control),
|
||||||
|
}),
|
||||||
|
writer: Arc::new(RegionWriter::new(memtable_builder)),
|
||||||
|
wal,
|
||||||
|
flush_strategy: Arc::new(SizeBasedStrategy::default()),
|
||||||
|
flush_scheduler,
|
||||||
|
sst_layer,
|
||||||
|
manifest,
|
||||||
});
|
});
|
||||||
|
|
||||||
RegionImpl { inner }
|
RegionImpl { inner }
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
// Private methods for tests.
|
||||||
|
#[cfg(test)]
|
||||||
|
impl<S: LogStore> RegionImpl<S> {
|
||||||
#[inline]
|
#[inline]
|
||||||
fn committed_sequence(&self) -> store_api::storage::SequenceNumber {
|
fn committed_sequence(&self) -> store_api::storage::SequenceNumber {
|
||||||
self.inner.version.committed_sequence()
|
self.inner.version_control().committed_sequence()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
struct RegionInner {
|
/// Shared data of region.
|
||||||
name: String,
|
pub struct SharedData {
|
||||||
version: VersionControlRef,
|
pub id: RegionId,
|
||||||
writer: Mutex<RegionWriter>,
|
pub name: String,
|
||||||
|
// TODO(yingwen): Maybe no need to use Arc for version control.
|
||||||
|
pub version_control: VersionControlRef,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl RegionInner {
|
pub type SharedDataRef = Arc<SharedData>;
|
||||||
|
|
||||||
|
struct RegionInner<S: LogStore> {
|
||||||
|
shared: SharedDataRef,
|
||||||
|
writer: RegionWriterRef,
|
||||||
|
wal: Wal<S>,
|
||||||
|
flush_strategy: FlushStrategyRef,
|
||||||
|
flush_scheduler: FlushSchedulerRef,
|
||||||
|
sst_layer: AccessLayerRef,
|
||||||
|
manifest: RegionManifest,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<S: LogStore> RegionInner<S> {
|
||||||
|
#[inline]
|
||||||
|
fn version_control(&self) -> &VersionControl {
|
||||||
|
&*self.shared.version_control
|
||||||
|
}
|
||||||
|
|
||||||
fn in_memory_metadata(&self) -> RegionMetaImpl {
|
fn in_memory_metadata(&self) -> RegionMetaImpl {
|
||||||
let metadata = self.version.metadata();
|
let metadata = self.version_control().metadata();
|
||||||
|
|
||||||
RegionMetaImpl::new(metadata)
|
RegionMetaImpl::new(metadata)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn create_snapshot(&self) -> SnapshotImpl {
|
||||||
|
let version = self.version_control().current();
|
||||||
|
let sequence = self.version_control().committed_sequence();
|
||||||
|
|
||||||
|
SnapshotImpl::new(version, sequence)
|
||||||
|
}
|
||||||
|
|
||||||
async fn write(&self, ctx: &WriteContext, request: WriteBatch) -> Result<WriteResponse> {
|
async fn write(&self, ctx: &WriteContext, request: WriteBatch) -> Result<WriteResponse> {
|
||||||
let metadata = self.in_memory_metadata();
|
let metadata = self.in_memory_metadata();
|
||||||
let schema = metadata.schema();
|
let schema = metadata.schema();
|
||||||
// Only compare column schemas.
|
// Only compare column schemas.
|
||||||
ensure!(
|
ensure!(
|
||||||
schema.column_schemas() == request.schema().column_schemas(),
|
schema.column_schemas() == request.schema().column_schemas(),
|
||||||
error::InvalidInputSchemaSnafu { region: &self.name }
|
error::InvalidInputSchemaSnafu {
|
||||||
|
region: &self.shared.name
|
||||||
|
}
|
||||||
);
|
);
|
||||||
|
|
||||||
|
let writer_ctx = WriterContext {
|
||||||
|
shared: &self.shared,
|
||||||
|
flush_strategy: &self.flush_strategy,
|
||||||
|
flush_scheduler: &self.flush_scheduler,
|
||||||
|
sst_layer: &self.sst_layer,
|
||||||
|
wal: &self.wal,
|
||||||
|
writer: &self.writer,
|
||||||
|
manifest: &self.manifest,
|
||||||
|
};
|
||||||
// Now altering schema is not allowed, so it is safe to validate schema outside of the lock.
|
// Now altering schema is not allowed, so it is safe to validate schema outside of the lock.
|
||||||
let mut writer = self.writer.lock().await;
|
self.writer.write(ctx, request, writer_ctx).await
|
||||||
writer.write(ctx, &self.version, request).await
|
|
||||||
}
|
|
||||||
|
|
||||||
fn create_snapshot(&self) -> SnapshotImpl {
|
|
||||||
let version = self.version.current();
|
|
||||||
let sequence = self.version.committed_sequence();
|
|
||||||
|
|
||||||
SnapshotImpl::new(version, sequence)
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -3,28 +3,58 @@
|
|||||||
mod read_write;
|
mod read_write;
|
||||||
|
|
||||||
use datatypes::type_id::LogicalTypeId;
|
use datatypes::type_id::LogicalTypeId;
|
||||||
|
use log_store::fs::noop::NoopLogStore;
|
||||||
|
use object_store::{backend::fs::Backend, ObjectStore};
|
||||||
|
use store_api::manifest::Manifest;
|
||||||
use store_api::storage::consts;
|
use store_api::storage::consts;
|
||||||
|
use tempdir::TempDir;
|
||||||
|
|
||||||
use super::*;
|
use super::*;
|
||||||
|
use crate::manifest::region::RegionManifest;
|
||||||
|
use crate::sst::FsAccessLayer;
|
||||||
use crate::test_util::{self, descriptor_util::RegionDescBuilder, schema_util};
|
use crate::test_util::{self, descriptor_util::RegionDescBuilder, schema_util};
|
||||||
|
|
||||||
#[test]
|
#[tokio::test]
|
||||||
fn test_new_region() {
|
async fn test_new_region() {
|
||||||
|
let region_id = 0;
|
||||||
let region_name = "region-0";
|
let region_name = "region-0";
|
||||||
let desc = RegionDescBuilder::new(region_name)
|
let desc = RegionDescBuilder::new(region_name)
|
||||||
|
.enable_version_column(true)
|
||||||
.push_key_column(("k1", LogicalTypeId::Int32, false))
|
.push_key_column(("k1", LogicalTypeId::Int32, false))
|
||||||
.push_value_column(("v1", LogicalTypeId::Float32, true))
|
.push_value_column(("v1", LogicalTypeId::Float32, true))
|
||||||
.build();
|
.build();
|
||||||
let metadata = desc.try_into().unwrap();
|
let metadata = desc.try_into().unwrap();
|
||||||
|
|
||||||
let region = RegionImpl::new(region_name.to_string(), metadata);
|
let wal = Wal::new(region_id, region_name, Arc::new(NoopLogStore::default()));
|
||||||
|
let store_dir = TempDir::new("test_new_region")
|
||||||
|
.unwrap()
|
||||||
|
.path()
|
||||||
|
.to_string_lossy()
|
||||||
|
.to_string();
|
||||||
|
|
||||||
let expect_schema = schema_util::new_schema_ref(&[
|
let accessor = Backend::build().root(&store_dir).finish().await.unwrap();
|
||||||
("k1", LogicalTypeId::Int32, false),
|
let object_store = ObjectStore::new(accessor);
|
||||||
(test_util::TIMESTAMP_NAME, LogicalTypeId::Int64, false),
|
let sst_layer = Arc::new(FsAccessLayer::new("/", object_store.clone()));
|
||||||
(consts::VERSION_COLUMN_NAME, LogicalTypeId::UInt64, false),
|
let manifest = RegionManifest::new(region_id, "/manifest/", object_store);
|
||||||
("v1", LogicalTypeId::Float32, true),
|
|
||||||
]);
|
let region = RegionImpl::new(
|
||||||
|
region_id,
|
||||||
|
region_name.to_string(),
|
||||||
|
metadata,
|
||||||
|
wal,
|
||||||
|
sst_layer,
|
||||||
|
manifest,
|
||||||
|
);
|
||||||
|
|
||||||
|
let expect_schema = schema_util::new_schema_ref(
|
||||||
|
&[
|
||||||
|
("k1", LogicalTypeId::Int32, false),
|
||||||
|
(test_util::TIMESTAMP_NAME, LogicalTypeId::Int64, false),
|
||||||
|
(consts::VERSION_COLUMN_NAME, LogicalTypeId::UInt64, false),
|
||||||
|
("v1", LogicalTypeId::Float32, true),
|
||||||
|
],
|
||||||
|
Some(1),
|
||||||
|
);
|
||||||
|
|
||||||
assert_eq!(region_name, region.name());
|
assert_eq!(region_name, region.name());
|
||||||
assert_eq!(expect_schema, *region.in_memory_metadata().schema());
|
assert_eq!(expect_schema, *region.in_memory_metadata().schema());
|
||||||
|
|||||||
@@ -5,39 +5,71 @@ use std::sync::Arc;
|
|||||||
use datatypes::prelude::*;
|
use datatypes::prelude::*;
|
||||||
use datatypes::type_id::LogicalTypeId;
|
use datatypes::type_id::LogicalTypeId;
|
||||||
use datatypes::vectors::Int64Vector;
|
use datatypes::vectors::Int64Vector;
|
||||||
|
use log_store::fs::noop::NoopLogStore;
|
||||||
|
use object_store::{backend::fs::Backend, ObjectStore};
|
||||||
|
use store_api::manifest::Manifest;
|
||||||
use store_api::storage::{
|
use store_api::storage::{
|
||||||
consts, Chunk, ChunkReader, PutOperation, ReadContext, Region, RegionMeta, ScanRequest,
|
consts, Chunk, ChunkReader, PutOperation, ReadContext, Region, RegionMeta, ScanRequest,
|
||||||
SequenceNumber, Snapshot, WriteContext, WriteRequest, WriteResponse,
|
SequenceNumber, Snapshot, WriteContext, WriteRequest, WriteResponse,
|
||||||
};
|
};
|
||||||
|
use tempdir::TempDir;
|
||||||
|
|
||||||
|
use crate::manifest::region::RegionManifest;
|
||||||
use crate::region::RegionImpl;
|
use crate::region::RegionImpl;
|
||||||
|
use crate::sst::FsAccessLayer;
|
||||||
use crate::test_util::{self, descriptor_util::RegionDescBuilder, write_batch_util};
|
use crate::test_util::{self, descriptor_util::RegionDescBuilder, write_batch_util};
|
||||||
|
use crate::wal::Wal;
|
||||||
use crate::write_batch::{PutData, WriteBatch};
|
use crate::write_batch::{PutData, WriteBatch};
|
||||||
|
|
||||||
/// Create a new region for read/write test
|
/// Create a new region for read/write test
|
||||||
fn new_region_for_rw(enable_version_column: bool) -> RegionImpl {
|
async fn new_region_for_rw(
|
||||||
|
store_dir: &str,
|
||||||
|
enable_version_column: bool,
|
||||||
|
) -> RegionImpl<NoopLogStore> {
|
||||||
|
let region_id = 0;
|
||||||
let region_name = "region-rw-0";
|
let region_name = "region-rw-0";
|
||||||
|
let sst_dir = format!("{}/{}/", store_dir, region_name);
|
||||||
|
let manifest_dir = format!("{}/{}/maniffest/", store_dir, region_name);
|
||||||
|
|
||||||
let desc = RegionDescBuilder::new(region_name)
|
let desc = RegionDescBuilder::new(region_name)
|
||||||
.enable_version_column(enable_version_column)
|
.enable_version_column(enable_version_column)
|
||||||
.push_value_column(("v1", LogicalTypeId::Int64, true))
|
.push_value_column(("v1", LogicalTypeId::Int64, true))
|
||||||
.build();
|
.build();
|
||||||
let metadata = desc.try_into().unwrap();
|
let metadata = desc.try_into().unwrap();
|
||||||
|
let wal = Wal::new(region_id, region_name, Arc::new(NoopLogStore::default()));
|
||||||
|
let accessor = Backend::build().root(store_dir).finish().await.unwrap();
|
||||||
|
let object_store = ObjectStore::new(accessor);
|
||||||
|
let sst_layer = Arc::new(FsAccessLayer::new(&sst_dir, object_store.clone()));
|
||||||
|
let manifest = RegionManifest::new(region_id, &manifest_dir, object_store);
|
||||||
|
|
||||||
RegionImpl::new(region_name.to_string(), metadata)
|
RegionImpl::new(
|
||||||
|
region_id,
|
||||||
|
region_name.to_string(),
|
||||||
|
metadata,
|
||||||
|
wal,
|
||||||
|
sst_layer,
|
||||||
|
manifest,
|
||||||
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn new_write_batch_for_test(enable_version_column: bool) -> WriteBatch {
|
fn new_write_batch_for_test(enable_version_column: bool) -> WriteBatch {
|
||||||
if enable_version_column {
|
if enable_version_column {
|
||||||
write_batch_util::new_write_batch(&[
|
write_batch_util::new_write_batch(
|
||||||
(test_util::TIMESTAMP_NAME, LogicalTypeId::Int64, false),
|
&[
|
||||||
(consts::VERSION_COLUMN_NAME, LogicalTypeId::UInt64, false),
|
(test_util::TIMESTAMP_NAME, LogicalTypeId::Int64, false),
|
||||||
("v1", LogicalTypeId::Int64, true),
|
(consts::VERSION_COLUMN_NAME, LogicalTypeId::UInt64, false),
|
||||||
])
|
("v1", LogicalTypeId::Int64, true),
|
||||||
|
],
|
||||||
|
Some(0),
|
||||||
|
)
|
||||||
} else {
|
} else {
|
||||||
write_batch_util::new_write_batch(&[
|
write_batch_util::new_write_batch(
|
||||||
(test_util::TIMESTAMP_NAME, LogicalTypeId::Int64, false),
|
&[
|
||||||
("v1", LogicalTypeId::Int64, true),
|
(test_util::TIMESTAMP_NAME, LogicalTypeId::Int64, false),
|
||||||
])
|
("v1", LogicalTypeId::Int64, true),
|
||||||
|
],
|
||||||
|
Some(0),
|
||||||
|
)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -73,20 +105,14 @@ fn append_chunk_to(chunk: &Chunk, dst: &mut Vec<(i64, Option<i64>)>) {
|
|||||||
|
|
||||||
/// Test region without considering version column.
|
/// Test region without considering version column.
|
||||||
struct Tester {
|
struct Tester {
|
||||||
region: RegionImpl,
|
region: RegionImpl<NoopLogStore>,
|
||||||
write_ctx: WriteContext,
|
write_ctx: WriteContext,
|
||||||
read_ctx: ReadContext,
|
read_ctx: ReadContext,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Default for Tester {
|
|
||||||
fn default() -> Tester {
|
|
||||||
Tester::new()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Tester {
|
impl Tester {
|
||||||
fn new() -> Tester {
|
async fn new(store_dir: &str) -> Tester {
|
||||||
let region = new_region_for_rw(false);
|
let region = new_region_for_rw(store_dir, false).await;
|
||||||
|
|
||||||
Tester {
|
Tester {
|
||||||
region,
|
region,
|
||||||
@@ -134,7 +160,9 @@ impl Tester {
|
|||||||
|
|
||||||
#[tokio::test]
|
#[tokio::test]
|
||||||
async fn test_simple_put_scan() {
|
async fn test_simple_put_scan() {
|
||||||
let tester = Tester::default();
|
let dir = TempDir::new("write_parquet").unwrap();
|
||||||
|
let store_dir = dir.path().to_str().unwrap();
|
||||||
|
let tester = Tester::new(store_dir).await;
|
||||||
|
|
||||||
let data = vec![
|
let data = vec![
|
||||||
(1000, Some(100)),
|
(1000, Some(100)),
|
||||||
@@ -151,7 +179,9 @@ async fn test_simple_put_scan() {
|
|||||||
}
|
}
|
||||||
#[tokio::test]
|
#[tokio::test]
|
||||||
async fn test_sequence_increase() {
|
async fn test_sequence_increase() {
|
||||||
let tester = Tester::default();
|
let dir = TempDir::new("write_parquet").unwrap();
|
||||||
|
let store_dir = dir.path().to_str().unwrap();
|
||||||
|
let tester = Tester::new(store_dir).await;
|
||||||
|
|
||||||
let mut committed_sequence = tester.committed_sequence();
|
let mut committed_sequence = tester.committed_sequence();
|
||||||
for i in 0..100 {
|
for i in 0..100 {
|
||||||
|
|||||||
@@ -1,46 +1,291 @@
|
|||||||
use store_api::storage::{WriteContext, WriteResponse};
|
use std::sync::Arc;
|
||||||
|
|
||||||
use crate::error::Result;
|
use common_telemetry::logging;
|
||||||
use crate::memtable::{Inserter, MemtableBuilderRef};
|
use common_time::RangeMillis;
|
||||||
use crate::version::VersionControlRef;
|
use snafu::ResultExt;
|
||||||
|
use store_api::logstore::LogStore;
|
||||||
|
use store_api::storage::{SequenceNumber, WriteContext, WriteRequest, WriteResponse};
|
||||||
|
use tokio::sync::Mutex;
|
||||||
|
|
||||||
|
use crate::background::JobHandle;
|
||||||
|
use crate::error::{InvalidTimestampSnafu, Result};
|
||||||
|
use crate::flush::{FlushJob, FlushSchedulerRef, FlushStrategyRef};
|
||||||
|
use crate::memtable::{Inserter, MemtableBuilderRef, MemtableId, MemtableSet};
|
||||||
|
use crate::proto::WalHeader;
|
||||||
|
use crate::region::RegionManifest;
|
||||||
|
use crate::region::SharedDataRef;
|
||||||
|
use crate::sst::AccessLayerRef;
|
||||||
|
use crate::version::{VersionControlRef, VersionEdit};
|
||||||
|
use crate::wal::{Payload, Wal};
|
||||||
use crate::write_batch::WriteBatch;
|
use crate::write_batch::WriteBatch;
|
||||||
|
|
||||||
|
pub type RegionWriterRef = Arc<RegionWriter>;
|
||||||
|
|
||||||
pub struct RegionWriter {
|
pub struct RegionWriter {
|
||||||
_memtable_builder: MemtableBuilderRef,
|
inner: Mutex<WriterInner>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl RegionWriter {
|
impl RegionWriter {
|
||||||
pub fn new(_memtable_builder: MemtableBuilderRef) -> RegionWriter {
|
pub fn new(memtable_builder: MemtableBuilderRef) -> RegionWriter {
|
||||||
RegionWriter { _memtable_builder }
|
RegionWriter {
|
||||||
|
inner: Mutex::new(WriterInner::new(memtable_builder)),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn write<S: LogStore>(
|
||||||
|
&self,
|
||||||
|
ctx: &WriteContext,
|
||||||
|
request: WriteBatch,
|
||||||
|
writer_ctx: WriterContext<'_, S>,
|
||||||
|
) -> Result<WriteResponse> {
|
||||||
|
let mut inner = self.inner.lock().await;
|
||||||
|
inner.write(ctx, request, writer_ctx).await
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn apply_version_edit<S: LogStore>(
|
||||||
|
&self,
|
||||||
|
wal: &Wal<S>,
|
||||||
|
edit: VersionEdit,
|
||||||
|
shared: &SharedDataRef,
|
||||||
|
) -> Result<()> {
|
||||||
|
let mut inner = self.inner.lock().await;
|
||||||
|
inner.apply_version_edit(wal, edit, shared).await
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct WriterContext<'a, S: LogStore> {
|
||||||
|
pub shared: &'a SharedDataRef,
|
||||||
|
pub flush_strategy: &'a FlushStrategyRef,
|
||||||
|
pub flush_scheduler: &'a FlushSchedulerRef,
|
||||||
|
pub sst_layer: &'a AccessLayerRef,
|
||||||
|
pub wal: &'a Wal<S>,
|
||||||
|
pub writer: &'a RegionWriterRef,
|
||||||
|
pub manifest: &'a RegionManifest,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a, S: LogStore> WriterContext<'a, S> {
|
||||||
|
#[inline]
|
||||||
|
fn version_control(&self) -> &VersionControlRef {
|
||||||
|
&self.shared.version_control
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
struct WriterInner {
|
||||||
|
memtable_builder: MemtableBuilderRef,
|
||||||
|
last_memtable_id: MemtableId,
|
||||||
|
flush_handle: Option<JobHandle>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl WriterInner {
|
||||||
|
fn new(memtable_builder: MemtableBuilderRef) -> WriterInner {
|
||||||
|
WriterInner {
|
||||||
|
memtable_builder,
|
||||||
|
last_memtable_id: 0,
|
||||||
|
flush_handle: None,
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO(yingwen): Support group commit so we can avoid taking mutable reference.
|
// TODO(yingwen): Support group commit so we can avoid taking mutable reference.
|
||||||
/// Write `WriteBatch` to region, now the schema of batch needs to be validated outside.
|
/// Write `WriteBatch` to region, now the schema of batch needs to be validated outside.
|
||||||
pub async fn write(
|
///
|
||||||
|
/// Mutable reference of writer ensure no other reference of this writer can modify the
|
||||||
|
/// version control (write is exclusive).
|
||||||
|
async fn write<S: LogStore>(
|
||||||
&mut self,
|
&mut self,
|
||||||
_ctx: &WriteContext,
|
_ctx: &WriteContext,
|
||||||
version_control: &VersionControlRef,
|
|
||||||
request: WriteBatch,
|
request: WriteBatch,
|
||||||
|
writer_ctx: WriterContext<'_, S>,
|
||||||
) -> Result<WriteResponse> {
|
) -> Result<WriteResponse> {
|
||||||
// Mutable reference of writer ensure no other reference of this writer can modify
|
let time_ranges = self.preprocess_write(&request, &writer_ctx).await?;
|
||||||
// the version control (write is exclusive).
|
|
||||||
|
|
||||||
// TODO(yingwen): Write wal and get sequence.
|
// TODO(yingwen): Write wal and get sequence.
|
||||||
|
let version_control = writer_ctx.version_control();
|
||||||
let version = version_control.current();
|
let version = version_control.current();
|
||||||
let mem = version.mutable_memtable();
|
|
||||||
|
|
||||||
let committed_sequence = version_control.committed_sequence();
|
let committed_sequence = version_control.committed_sequence();
|
||||||
// Sequence for current write batch.
|
// Sequence for current write batch.
|
||||||
let next_sequence = committed_sequence + 1;
|
let next_sequence = committed_sequence + 1;
|
||||||
|
|
||||||
// Insert batch into memtable.
|
let wal_header = WalHeader::with_last_manifest_version(version.manifest_version());
|
||||||
let mut inserter = Inserter::new(next_sequence);
|
writer_ctx
|
||||||
inserter.insert_memtable(&request, &**mem)?;
|
.wal
|
||||||
|
.write_to_wal(
|
||||||
|
next_sequence,
|
||||||
|
wal_header,
|
||||||
|
Payload::WriteBatchArrow(&request),
|
||||||
|
)
|
||||||
|
.await?;
|
||||||
|
|
||||||
// Update committed_sequence to make current batch visible. The `&mut self` of RegionWriter
|
// Insert batch into memtable.
|
||||||
|
let mut inserter = Inserter::new(next_sequence, time_ranges, version.bucket_duration());
|
||||||
|
inserter.insert_memtables(&request, version.mutable_memtables())?;
|
||||||
|
|
||||||
|
// Update committed_sequence to make current batch visible. The `&mut self` of WriterInner
|
||||||
// guarantees the writer is exclusive.
|
// guarantees the writer is exclusive.
|
||||||
version_control.set_committed_sequence(next_sequence);
|
version_control.set_committed_sequence(next_sequence);
|
||||||
|
|
||||||
Ok(WriteResponse {})
|
Ok(WriteResponse {})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Preprocess before write.
|
||||||
|
///
|
||||||
|
/// Creates needed mutable memtables, ensures there is enough capacity in memtable and trigger
|
||||||
|
/// flush if necessary. Returns time ranges of the input write batch.
|
||||||
|
async fn preprocess_write<S: LogStore>(
|
||||||
|
&mut self,
|
||||||
|
request: &WriteBatch,
|
||||||
|
writer_ctx: &WriterContext<'_, S>,
|
||||||
|
) -> Result<Vec<RangeMillis>> {
|
||||||
|
let version_control = writer_ctx.version_control();
|
||||||
|
// Check whether memtable is full or flush should be triggered. We need to do this first since
|
||||||
|
// switching memtables will clear all mutable memtables.
|
||||||
|
if self.should_flush(
|
||||||
|
writer_ctx.shared,
|
||||||
|
version_control,
|
||||||
|
writer_ctx.flush_strategy,
|
||||||
|
) {
|
||||||
|
self.trigger_flush(
|
||||||
|
writer_ctx.shared,
|
||||||
|
writer_ctx.flush_scheduler,
|
||||||
|
writer_ctx.sst_layer,
|
||||||
|
writer_ctx.writer,
|
||||||
|
writer_ctx.wal,
|
||||||
|
writer_ctx.manifest,
|
||||||
|
)
|
||||||
|
.await?;
|
||||||
|
}
|
||||||
|
|
||||||
|
let current_version = version_control.current();
|
||||||
|
let duration = current_version.bucket_duration();
|
||||||
|
let time_ranges = request
|
||||||
|
.time_ranges(duration)
|
||||||
|
.context(InvalidTimestampSnafu)?;
|
||||||
|
let mutable = current_version.mutable_memtables();
|
||||||
|
let mut memtables_to_add = MemtableSet::default();
|
||||||
|
|
||||||
|
// Pre-create all needed mutable memtables.
|
||||||
|
for range in &time_ranges {
|
||||||
|
if mutable.get_by_range(range).is_none()
|
||||||
|
&& memtables_to_add.get_by_range(range).is_none()
|
||||||
|
{
|
||||||
|
// Memtable for this range is missing, need to create a new memtable.
|
||||||
|
let memtable_schema = current_version.memtable_schema();
|
||||||
|
let id = self.alloc_memtable_id();
|
||||||
|
let memtable = self.memtable_builder.build(id, memtable_schema);
|
||||||
|
memtables_to_add.insert(*range, memtable);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if !memtables_to_add.is_empty() {
|
||||||
|
version_control.add_mutable(memtables_to_add);
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(time_ranges)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn should_flush(
|
||||||
|
&self,
|
||||||
|
shared: &SharedDataRef,
|
||||||
|
version_control: &VersionControlRef,
|
||||||
|
flush_strategy: &FlushStrategyRef,
|
||||||
|
) -> bool {
|
||||||
|
let current = version_control.current();
|
||||||
|
let memtables = current.memtables();
|
||||||
|
let mutable_bytes_allocated = memtables.mutable_bytes_allocated();
|
||||||
|
let total_bytes_allocated = memtables.total_bytes_allocated();
|
||||||
|
flush_strategy.should_flush(shared, mutable_bytes_allocated, total_bytes_allocated)
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn trigger_flush<S: LogStore>(
|
||||||
|
&mut self,
|
||||||
|
shared: &SharedDataRef,
|
||||||
|
flush_scheduler: &FlushSchedulerRef,
|
||||||
|
sst_layer: &AccessLayerRef,
|
||||||
|
writer: &RegionWriterRef,
|
||||||
|
wal: &Wal<S>,
|
||||||
|
manifest: &RegionManifest,
|
||||||
|
) -> Result<()> {
|
||||||
|
let version_control = &shared.version_control;
|
||||||
|
// Freeze all mutable memtables so we can flush them later.
|
||||||
|
version_control.freeze_mutable();
|
||||||
|
|
||||||
|
if let Some(flush_handle) = self.flush_handle.take() {
|
||||||
|
// Previous flush job is incomplete, wait util it is finished (write stall).
|
||||||
|
// However the last flush job may fail, in which case, we just return error
|
||||||
|
// and abort current write request. The flush handle is left empty, so the next
|
||||||
|
// time we still have chance to trigger a new flush.
|
||||||
|
flush_handle.join().await.map_err(|e| {
|
||||||
|
logging::error!(
|
||||||
|
"Previous flush job failed, region: {}, err: {}",
|
||||||
|
shared.name,
|
||||||
|
e
|
||||||
|
);
|
||||||
|
e
|
||||||
|
})?;
|
||||||
|
}
|
||||||
|
|
||||||
|
let current_version = version_control.current();
|
||||||
|
let (max_memtable_id, mem_to_flush) = current_version.memtables().memtables_to_flush();
|
||||||
|
|
||||||
|
if max_memtable_id.is_none() {
|
||||||
|
logging::info!("No memtables to flush in region: {}", shared.name);
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
|
|
||||||
|
let flush_req = FlushJob {
|
||||||
|
max_memtable_id: max_memtable_id.unwrap(),
|
||||||
|
memtables: mem_to_flush,
|
||||||
|
// In write thread, safe to use current commited sequence.
|
||||||
|
flush_sequence: version_control.committed_sequence(),
|
||||||
|
shared: shared.clone(),
|
||||||
|
sst_layer: sst_layer.clone(),
|
||||||
|
writer: writer.clone(),
|
||||||
|
wal: wal.clone(),
|
||||||
|
manifest: manifest.clone(),
|
||||||
|
};
|
||||||
|
|
||||||
|
let flush_handle = flush_scheduler.schedule_flush(Box::new(flush_req)).await?;
|
||||||
|
self.flush_handle = Some(flush_handle);
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn apply_version_edit<S: LogStore>(
|
||||||
|
&mut self,
|
||||||
|
wal: &Wal<S>,
|
||||||
|
edit: VersionEdit,
|
||||||
|
shared: &SharedDataRef,
|
||||||
|
) -> Result<()> {
|
||||||
|
let version_control = &shared.version_control;
|
||||||
|
|
||||||
|
let next_sequence = version_control.committed_sequence() + 1;
|
||||||
|
|
||||||
|
self.persist_manifest_version(wal, next_sequence, &edit)
|
||||||
|
.await?;
|
||||||
|
|
||||||
|
version_control.apply_edit(edit);
|
||||||
|
|
||||||
|
version_control.set_committed_sequence(next_sequence);
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn persist_manifest_version<S: LogStore>(
|
||||||
|
&self,
|
||||||
|
wal: &Wal<S>,
|
||||||
|
seq: SequenceNumber,
|
||||||
|
edit: &VersionEdit,
|
||||||
|
) -> Result<()> {
|
||||||
|
let header = WalHeader::with_last_manifest_version(edit.manifest_version);
|
||||||
|
|
||||||
|
wal.write_to_wal(seq, header, Payload::None).await?;
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
fn alloc_memtable_id(&mut self) -> MemtableId {
|
||||||
|
self.last_memtable_id += 1;
|
||||||
|
self.last_memtable_id
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -33,13 +33,34 @@ impl Snapshot for SnapshotImpl {
|
|||||||
request: ScanRequest,
|
request: ScanRequest,
|
||||||
) -> Result<ScanResponse<ChunkReaderImpl>> {
|
) -> Result<ScanResponse<ChunkReaderImpl>> {
|
||||||
let visible_sequence = self.sequence_to_read(request.sequence);
|
let visible_sequence = self.sequence_to_read(request.sequence);
|
||||||
|
let memtable_version = self.version.memtables();
|
||||||
|
|
||||||
|
let mutables = memtable_version.mutable_memtables();
|
||||||
|
let immutables = memtable_version.immutable_memtables();
|
||||||
|
let mut batch_iters = Vec::with_capacity(memtable_version.num_memtables());
|
||||||
|
|
||||||
let mem = self.version.mutable_memtable();
|
|
||||||
let iter_ctx = IterContext {
|
let iter_ctx = IterContext {
|
||||||
batch_size: ctx.batch_size,
|
batch_size: ctx.batch_size,
|
||||||
visible_sequence,
|
visible_sequence,
|
||||||
|
..Default::default()
|
||||||
};
|
};
|
||||||
let iter = mem.iter(iter_ctx)?;
|
|
||||||
|
for (_range, mem) in mutables.iter() {
|
||||||
|
let iter = mem.iter(iter_ctx.clone())?;
|
||||||
|
|
||||||
|
batch_iters.push(iter);
|
||||||
|
}
|
||||||
|
|
||||||
|
for mem_set in immutables {
|
||||||
|
for (_range, mem) in mem_set.iter() {
|
||||||
|
let iter = mem.iter(iter_ctx.clone())?;
|
||||||
|
|
||||||
|
batch_iters.push(iter);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Now we just simply chain all iterators together, ignore duplications/ordering.
|
||||||
|
let iter = Box::new(batch_iters.into_iter().flatten());
|
||||||
|
|
||||||
let reader = ChunkReaderImpl::new(self.version.schema().clone(), iter);
|
let reader = ChunkReaderImpl::new(self.version.schema().clone(), iter);
|
||||||
|
|
||||||
|
|||||||
172
src/storage/src/sst.rs
Normal file
172
src/storage/src/sst.rs
Normal file
@@ -0,0 +1,172 @@
|
|||||||
|
mod parquet;
|
||||||
|
|
||||||
|
use std::sync::Arc;
|
||||||
|
|
||||||
|
use async_trait::async_trait;
|
||||||
|
use object_store::{util, ObjectStore};
|
||||||
|
use serde::{Deserialize, Serialize};
|
||||||
|
|
||||||
|
use crate::error::Result;
|
||||||
|
use crate::memtable::BatchIteratorPtr;
|
||||||
|
use crate::sst::parquet::ParquetWriter;
|
||||||
|
|
||||||
|
/// Maximum level of ssts.
|
||||||
|
pub const MAX_LEVEL: usize = 1;
|
||||||
|
|
||||||
|
// We only has fixed number of level, so we array to hold elements. This implement
|
||||||
|
// detail of LevelMetaVec should not be exposed to the user of [LevelMetas].
|
||||||
|
type LevelMetaVec = [LevelMeta; MAX_LEVEL];
|
||||||
|
|
||||||
|
/// Metadata of all ssts under a region.
|
||||||
|
///
|
||||||
|
/// Files are organized into multiple level, though there may be only one level.
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
|
pub struct LevelMetas {
|
||||||
|
levels: LevelMetaVec,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl LevelMetas {
|
||||||
|
/// Create a new LevelMetas and initialized each level.
|
||||||
|
pub fn new() -> LevelMetas {
|
||||||
|
LevelMetas {
|
||||||
|
levels: [LevelMeta::default(); MAX_LEVEL],
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Merge `self` with files to add/remove to create a new [LevelMetas].
|
||||||
|
///
|
||||||
|
/// # Panics
|
||||||
|
/// Panics if level of [FileHandle] is greater than [MAX_LEVEL].
|
||||||
|
pub fn merge(&self, files_to_add: impl Iterator<Item = FileHandle>) -> LevelMetas {
|
||||||
|
let mut merged = self.clone();
|
||||||
|
for file in files_to_add {
|
||||||
|
let level = file.level_index();
|
||||||
|
|
||||||
|
merged.levels[level].add_file(file);
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO(yingwen): Support file removal.
|
||||||
|
|
||||||
|
merged
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Default for LevelMetas {
|
||||||
|
fn default() -> LevelMetas {
|
||||||
|
LevelMetas::new()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Metadata of files in same sst level.
|
||||||
|
#[derive(Debug, Default, Clone)]
|
||||||
|
pub struct LevelMeta {
|
||||||
|
/// Handles to the files in this level.
|
||||||
|
// TODO(yingwen): Now for simplicity, files are unordered, maybe sort the files by time range
|
||||||
|
// or use another structure to hold them.
|
||||||
|
files: Vec<FileHandle>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl LevelMeta {
|
||||||
|
fn add_file(&mut self, file: FileHandle) {
|
||||||
|
self.files.push(file);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// In-memory handle to a file.
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
|
pub struct FileHandle {
|
||||||
|
inner: Arc<FileHandleInner>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl FileHandle {
|
||||||
|
pub fn new(meta: FileMeta) -> FileHandle {
|
||||||
|
FileHandle {
|
||||||
|
inner: Arc::new(FileHandleInner::new(meta)),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns level as usize so it can be used as index.
|
||||||
|
#[inline]
|
||||||
|
pub fn level_index(&self) -> usize {
|
||||||
|
self.inner.meta.level.into()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Actually data of [FileHandle].
|
||||||
|
///
|
||||||
|
/// Contains meta of the file, and other mutable info like metrics.
|
||||||
|
#[derive(Debug)]
|
||||||
|
struct FileHandleInner {
|
||||||
|
meta: FileMeta,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl FileHandleInner {
|
||||||
|
fn new(meta: FileMeta) -> FileHandleInner {
|
||||||
|
FileHandleInner { meta }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Immutable metadata of a sst file.
|
||||||
|
#[derive(Serialize, Deserialize, Clone, Debug)]
|
||||||
|
pub struct FileMeta {
|
||||||
|
pub file_path: String,
|
||||||
|
/// SST level of the file.
|
||||||
|
pub level: u8,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Default)]
|
||||||
|
pub struct WriteOptions {
|
||||||
|
// TODO(yingwen): [flush] row group size.
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Sst access layer.
|
||||||
|
#[async_trait]
|
||||||
|
pub trait AccessLayer: Send + Sync {
|
||||||
|
// Writes SST file with given name and returns the full path.
|
||||||
|
async fn write_sst(
|
||||||
|
&self,
|
||||||
|
file_name: &str,
|
||||||
|
iter: BatchIteratorPtr,
|
||||||
|
opts: WriteOptions,
|
||||||
|
) -> Result<String>;
|
||||||
|
}
|
||||||
|
|
||||||
|
pub type AccessLayerRef = Arc<dyn AccessLayer>;
|
||||||
|
|
||||||
|
/// Sst access layer based on local file system.
|
||||||
|
pub struct FsAccessLayer {
|
||||||
|
sst_dir: String,
|
||||||
|
object_store: ObjectStore,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl FsAccessLayer {
|
||||||
|
pub fn new(sst_dir: &str, object_store: ObjectStore) -> FsAccessLayer {
|
||||||
|
FsAccessLayer {
|
||||||
|
sst_dir: util::normalize_dir(sst_dir),
|
||||||
|
object_store,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
fn sst_file_path(&self, file_name: &str) -> String {
|
||||||
|
format!("{}{}", self.sst_dir, file_name)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[async_trait]
|
||||||
|
impl AccessLayer for FsAccessLayer {
|
||||||
|
async fn write_sst(
|
||||||
|
&self,
|
||||||
|
file_name: &str,
|
||||||
|
iter: BatchIteratorPtr,
|
||||||
|
opts: WriteOptions,
|
||||||
|
) -> Result<String> {
|
||||||
|
// Now we only supports parquet format. We may allow caller to specific sst format in
|
||||||
|
// WriteOptions in the future.
|
||||||
|
let file_path = self.sst_file_path(file_name);
|
||||||
|
let writer = ParquetWriter::new(&file_path, iter, self.object_store.clone());
|
||||||
|
|
||||||
|
writer.write_sst(opts).await?;
|
||||||
|
Ok(file_path)
|
||||||
|
}
|
||||||
|
}
|
||||||
263
src/storage/src/sst/parquet.rs
Normal file
263
src/storage/src/sst/parquet.rs
Normal file
@@ -0,0 +1,263 @@
|
|||||||
|
//! Parquet sst format.
|
||||||
|
|
||||||
|
use std::collections::HashMap;
|
||||||
|
|
||||||
|
use datatypes::arrow::chunk::Chunk;
|
||||||
|
use datatypes::arrow::datatypes::{DataType, Field, Schema};
|
||||||
|
use datatypes::arrow::io::parquet::write::{
|
||||||
|
Compression, Encoding, FileSink, Version, WriteOptions,
|
||||||
|
};
|
||||||
|
use datatypes::prelude::{ConcreteDataType, Vector};
|
||||||
|
use datatypes::schema::ColumnSchema;
|
||||||
|
use futures_util::sink::SinkExt;
|
||||||
|
use object_store::ObjectStore;
|
||||||
|
use snafu::ResultExt;
|
||||||
|
use store_api::storage::consts;
|
||||||
|
|
||||||
|
use crate::error::{FlushIoSnafu, Result, WriteParquetSnafu};
|
||||||
|
use crate::memtable::{BatchIteratorPtr, MemtableSchema};
|
||||||
|
use crate::metadata::ColumnMetadata;
|
||||||
|
use crate::sst;
|
||||||
|
|
||||||
|
/// Parquet sst writer.
|
||||||
|
pub struct ParquetWriter<'a> {
|
||||||
|
file_name: &'a str,
|
||||||
|
iter: BatchIteratorPtr,
|
||||||
|
object_store: ObjectStore,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a> ParquetWriter<'a> {
|
||||||
|
pub fn new(
|
||||||
|
file_name: &'a str,
|
||||||
|
iter: BatchIteratorPtr,
|
||||||
|
object_store: ObjectStore,
|
||||||
|
) -> ParquetWriter {
|
||||||
|
ParquetWriter {
|
||||||
|
file_name,
|
||||||
|
iter,
|
||||||
|
object_store,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn write_sst(self, _opts: sst::WriteOptions) -> Result<()> {
|
||||||
|
self.write_rows(None).await
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Iterates memtable and writes rows to Parquet file.
|
||||||
|
/// A chunk of records yielded from each iteration with a size given
|
||||||
|
/// in config will be written to a single row group.
|
||||||
|
async fn write_rows(self, extra_meta: Option<HashMap<String, String>>) -> Result<()> {
|
||||||
|
let schema = memtable_schema_to_arrow_schema(self.iter.schema());
|
||||||
|
let object = self.object_store.object(self.file_name);
|
||||||
|
|
||||||
|
// FIXME(hl): writer size is not used in fs backend so just leave it to 0,
|
||||||
|
// but in s3/azblob backend the Content-Length field of HTTP request is set
|
||||||
|
// to this value.
|
||||||
|
let writer = object.writer(0).await.context(FlushIoSnafu)?;
|
||||||
|
|
||||||
|
// now all physical types use plain encoding, maybe let caller to choose encoding for each type.
|
||||||
|
let encodings = get_encoding_for_schema(&schema, |_| Encoding::Plain);
|
||||||
|
|
||||||
|
let mut sink = FileSink::try_new(
|
||||||
|
writer,
|
||||||
|
schema,
|
||||||
|
encodings,
|
||||||
|
WriteOptions {
|
||||||
|
write_statistics: true,
|
||||||
|
compression: Compression::Gzip,
|
||||||
|
version: Version::V2,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
.context(WriteParquetSnafu)?;
|
||||||
|
|
||||||
|
for batch in self.iter {
|
||||||
|
let batch = batch?;
|
||||||
|
sink.send(Chunk::new(
|
||||||
|
batch
|
||||||
|
.keys
|
||||||
|
.iter()
|
||||||
|
.map(|v| v.to_arrow_array())
|
||||||
|
.chain(std::iter::once(batch.sequences.to_arrow_array()))
|
||||||
|
.chain(std::iter::once(batch.value_types.to_arrow_array()))
|
||||||
|
.chain(batch.values.iter().map(|v| v.to_arrow_array()))
|
||||||
|
.collect(),
|
||||||
|
))
|
||||||
|
.await
|
||||||
|
.context(WriteParquetSnafu)?;
|
||||||
|
}
|
||||||
|
|
||||||
|
if let Some(meta) = extra_meta {
|
||||||
|
for (k, v) in meta {
|
||||||
|
sink.metadata.insert(k, Some(v));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
sink.close().await.context(WriteParquetSnafu)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Assembles arrow schema from memtable schema info.
|
||||||
|
fn memtable_schema_to_arrow_schema(schema: &MemtableSchema) -> Schema {
|
||||||
|
let col_meta_to_field: fn(&ColumnMetadata) -> Field = |col_meta| {
|
||||||
|
Field::from(&ColumnSchema::new(
|
||||||
|
col_meta.desc.name.clone(),
|
||||||
|
col_meta.desc.data_type.clone(),
|
||||||
|
col_meta.desc.is_nullable,
|
||||||
|
))
|
||||||
|
};
|
||||||
|
|
||||||
|
let fields = schema
|
||||||
|
.row_key_columns()
|
||||||
|
.map(col_meta_to_field)
|
||||||
|
.chain(std::iter::once(Field::from(&ColumnSchema::new(
|
||||||
|
consts::SEQUENCE_COLUMN_NAME,
|
||||||
|
ConcreteDataType::uint64_datatype(),
|
||||||
|
false,
|
||||||
|
))))
|
||||||
|
.chain(std::iter::once(Field::from(&ColumnSchema::new(
|
||||||
|
consts::VALUE_TYPE_COLUMN_NAME,
|
||||||
|
ConcreteDataType::uint8_datatype(),
|
||||||
|
false,
|
||||||
|
))))
|
||||||
|
.chain(schema.value_columns().map(col_meta_to_field))
|
||||||
|
.collect::<Vec<_>>();
|
||||||
|
Schema::from(fields)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn get_encoding_for_schema<F: Fn(&DataType) -> Encoding + Clone>(
|
||||||
|
schema: &Schema,
|
||||||
|
map: F,
|
||||||
|
) -> Vec<Encoding> {
|
||||||
|
schema
|
||||||
|
.fields
|
||||||
|
.iter()
|
||||||
|
.flat_map(|f| transverse(&f.data_type, map.clone()))
|
||||||
|
.collect()
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO(hl): backport from arrow2 v0.12 (https://github.com/jorgecarleitao/arrow2/blob/f57dbd5dbc61b940a71decd5f81d0fd4c93b158d/src/io/parquet/write/mod.rs#L454-L509)
|
||||||
|
// remove it when upgrade to newer version
|
||||||
|
pub fn transverse<T, F: Fn(&DataType) -> T + Clone>(data_type: &DataType, map: F) -> Vec<T> {
|
||||||
|
let mut encodings = vec![];
|
||||||
|
transverse_recursive(data_type, map, &mut encodings);
|
||||||
|
encodings
|
||||||
|
}
|
||||||
|
|
||||||
|
fn transverse_recursive<T, F: Fn(&DataType) -> T + Clone>(
|
||||||
|
data_type: &DataType,
|
||||||
|
map: F,
|
||||||
|
encodings: &mut Vec<T>,
|
||||||
|
) {
|
||||||
|
use datatypes::arrow::datatypes::PhysicalType::*;
|
||||||
|
match data_type.to_physical_type() {
|
||||||
|
Null | Boolean | Primitive(_) | Binary | FixedSizeBinary | LargeBinary | Utf8
|
||||||
|
| Dictionary(_) | LargeUtf8 => encodings.push(map(data_type)),
|
||||||
|
List | FixedSizeList | LargeList => {
|
||||||
|
let a = data_type.to_logical_type();
|
||||||
|
if let DataType::List(inner) = a {
|
||||||
|
transverse_recursive(&inner.data_type, map, encodings)
|
||||||
|
} else if let DataType::LargeList(inner) = a {
|
||||||
|
transverse_recursive(&inner.data_type, map, encodings)
|
||||||
|
} else if let DataType::FixedSizeList(inner, _) = a {
|
||||||
|
transverse_recursive(&inner.data_type, map, encodings)
|
||||||
|
} else {
|
||||||
|
unreachable!()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Struct => {
|
||||||
|
if let DataType::Struct(fields) = data_type.to_logical_type() {
|
||||||
|
for field in fields {
|
||||||
|
transverse_recursive(&field.data_type, map.clone(), encodings)
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
unreachable!()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Union => todo!(),
|
||||||
|
Map => todo!(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use std::sync::Arc;
|
||||||
|
|
||||||
|
use datatypes::arrow::array::{Array, Int64Array, UInt64Array, UInt8Array};
|
||||||
|
use datatypes::arrow::io::parquet::read::FileReader;
|
||||||
|
use object_store::backend::fs::Backend;
|
||||||
|
use store_api::storage::ValueType;
|
||||||
|
use tempdir::TempDir;
|
||||||
|
|
||||||
|
use super::*;
|
||||||
|
use crate::memtable::tests as memtable_tests;
|
||||||
|
use crate::memtable::{DefaultMemtableBuilder, IterContext, MemtableBuilder};
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn test_parquet_writer() {
|
||||||
|
let schema = memtable_tests::schema_for_test();
|
||||||
|
let memtable = DefaultMemtableBuilder {}.build(1, schema);
|
||||||
|
|
||||||
|
memtable_tests::write_kvs(
|
||||||
|
&*memtable,
|
||||||
|
10, // sequence
|
||||||
|
ValueType::Put,
|
||||||
|
&[
|
||||||
|
(1000, 1),
|
||||||
|
(1000, 2),
|
||||||
|
(2002, 1),
|
||||||
|
(2003, 1),
|
||||||
|
(2003, 5),
|
||||||
|
(1001, 1),
|
||||||
|
], // keys
|
||||||
|
&[Some(1), Some(2), Some(7), Some(8), Some(9), Some(3)], // values
|
||||||
|
);
|
||||||
|
|
||||||
|
let dir = TempDir::new("write_parquet").unwrap();
|
||||||
|
let path = dir.path().to_str().unwrap();
|
||||||
|
let backend = Backend::build().root(path).finish().await.unwrap();
|
||||||
|
let object_store = ObjectStore::new(backend);
|
||||||
|
let sst_file_name = "test-flush.parquet";
|
||||||
|
let iter = memtable.iter(IterContext::default()).unwrap();
|
||||||
|
let writer = ParquetWriter::new(sst_file_name, iter, object_store);
|
||||||
|
|
||||||
|
writer
|
||||||
|
.write_sst(sst::WriteOptions::default())
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
// verify parquet file
|
||||||
|
|
||||||
|
let reader = std::fs::File::open(dir.path().join(sst_file_name)).unwrap();
|
||||||
|
let mut file_reader = FileReader::try_new(reader, None, Some(128), None, None).unwrap();
|
||||||
|
|
||||||
|
// chunk schema: timestamp, __version, __sequence, __value_type, v1
|
||||||
|
let chunk = file_reader.next().unwrap().unwrap();
|
||||||
|
assert_eq!(5, chunk.arrays().len());
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
Arc::new(Int64Array::from_slice(&[
|
||||||
|
1000, 1000, 1001, 2002, 2003, 2003
|
||||||
|
])) as Arc<dyn Array>,
|
||||||
|
chunk.arrays()[0]
|
||||||
|
);
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
Arc::new(UInt64Array::from_slice(&[1, 2, 1, 1, 1, 5])) as Arc<dyn Array>,
|
||||||
|
chunk.arrays()[1]
|
||||||
|
);
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
Arc::new(UInt64Array::from_slice(&[10, 10, 10, 10, 10, 10])) as Arc<dyn Array>,
|
||||||
|
chunk.arrays()[2]
|
||||||
|
);
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
Arc::new(UInt8Array::from_slice(&[0, 0, 0, 0, 0, 0])) as Arc<dyn Array>,
|
||||||
|
chunk.arrays()[3]
|
||||||
|
);
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
Arc::new(UInt64Array::from_slice(&[1, 2, 3, 7, 8, 9])) as Arc<dyn Array>,
|
||||||
|
chunk.arrays()[4]
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -1,13 +1,14 @@
|
|||||||
use datatypes::prelude::ConcreteDataType;
|
use datatypes::prelude::ConcreteDataType;
|
||||||
use store_api::storage::{
|
use store_api::storage::{
|
||||||
ColumnDescriptor, ColumnDescriptorBuilder, ColumnFamilyDescriptorBuilder, ColumnId,
|
ColumnDescriptor, ColumnDescriptorBuilder, ColumnFamilyDescriptorBuilder, ColumnId,
|
||||||
RegionDescriptor, RowKeyDescriptorBuilder,
|
RegionDescriptor, RegionId, RowKeyDescriptorBuilder,
|
||||||
};
|
};
|
||||||
|
|
||||||
use crate::test_util::{self, schema_util::ColumnDef};
|
use crate::test_util::{self, schema_util::ColumnDef};
|
||||||
|
|
||||||
/// A RegionDescriptor builder for test.
|
/// A RegionDescriptor builder for test.
|
||||||
pub struct RegionDescBuilder {
|
pub struct RegionDescBuilder {
|
||||||
|
id: RegionId,
|
||||||
name: String,
|
name: String,
|
||||||
last_column_id: ColumnId,
|
last_column_id: ColumnId,
|
||||||
key_builder: RowKeyDescriptorBuilder,
|
key_builder: RowKeyDescriptorBuilder,
|
||||||
@@ -27,6 +28,7 @@ impl RegionDescBuilder {
|
|||||||
);
|
);
|
||||||
|
|
||||||
Self {
|
Self {
|
||||||
|
id: 0,
|
||||||
name: name.into(),
|
name: name.into(),
|
||||||
last_column_id: 2,
|
last_column_id: 2,
|
||||||
key_builder,
|
key_builder,
|
||||||
@@ -34,6 +36,11 @@ impl RegionDescBuilder {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn id(mut self, id: RegionId) -> Self {
|
||||||
|
self.id = id;
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
// This will reset the row key builder, so should be called before `push_key_column()`
|
// This will reset the row key builder, so should be called before `push_key_column()`
|
||||||
// and `enable_version_column()`, or just call after `new()`.
|
// and `enable_version_column()`, or just call after `new()`.
|
||||||
pub fn timestamp(mut self, column_def: ColumnDef) -> Self {
|
pub fn timestamp(mut self, column_def: ColumnDef) -> Self {
|
||||||
@@ -61,7 +68,7 @@ impl RegionDescBuilder {
|
|||||||
|
|
||||||
pub fn build(self) -> RegionDescriptor {
|
pub fn build(self) -> RegionDescriptor {
|
||||||
RegionDescriptor {
|
RegionDescriptor {
|
||||||
id: 0,
|
id: self.id,
|
||||||
name: self.name,
|
name: self.name,
|
||||||
row_key: self.key_builder.build(),
|
row_key: self.key_builder.build(),
|
||||||
default_cf: self.default_cf_builder.build(),
|
default_cf: self.default_cf_builder.build(),
|
||||||
|
|||||||
@@ -6,7 +6,7 @@ use datatypes::schema::{ColumnSchema, Schema, SchemaRef};
|
|||||||
/// Column definition: (name, datatype, is_nullable)
|
/// Column definition: (name, datatype, is_nullable)
|
||||||
pub type ColumnDef<'a> = (&'a str, LogicalTypeId, bool);
|
pub type ColumnDef<'a> = (&'a str, LogicalTypeId, bool);
|
||||||
|
|
||||||
pub fn new_schema(column_defs: &[ColumnDef]) -> Schema {
|
pub fn new_schema(column_defs: &[ColumnDef], timestamp_index: Option<usize>) -> Schema {
|
||||||
let column_schemas = column_defs
|
let column_schemas = column_defs
|
||||||
.iter()
|
.iter()
|
||||||
.map(|column_def| {
|
.map(|column_def| {
|
||||||
@@ -15,9 +15,13 @@ pub fn new_schema(column_defs: &[ColumnDef]) -> Schema {
|
|||||||
})
|
})
|
||||||
.collect();
|
.collect();
|
||||||
|
|
||||||
Schema::new(column_schemas)
|
if let Some(index) = timestamp_index {
|
||||||
|
Schema::with_timestamp_index(column_schemas, index).unwrap()
|
||||||
|
} else {
|
||||||
|
Schema::new(column_schemas)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn new_schema_ref(column_defs: &[ColumnDef]) -> SchemaRef {
|
pub fn new_schema_ref(column_defs: &[ColumnDef], timestamp_index: Option<usize>) -> SchemaRef {
|
||||||
Arc::new(new_schema(column_defs))
|
Arc::new(new_schema(column_defs, timestamp_index))
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -3,8 +3,8 @@ use store_api::storage::WriteRequest;
|
|||||||
use crate::test_util::schema_util::{self, ColumnDef};
|
use crate::test_util::schema_util::{self, ColumnDef};
|
||||||
use crate::write_batch::WriteBatch;
|
use crate::write_batch::WriteBatch;
|
||||||
|
|
||||||
pub fn new_write_batch(column_defs: &[ColumnDef]) -> WriteBatch {
|
pub fn new_write_batch(column_defs: &[ColumnDef], timestamp_index: Option<usize>) -> WriteBatch {
|
||||||
let schema = schema_util::new_schema_ref(column_defs);
|
let schema = schema_util::new_schema_ref(column_defs, timestamp_index);
|
||||||
|
|
||||||
WriteBatch::new(schema)
|
WriteBatch::new(schema)
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -9,15 +9,26 @@
|
|||||||
|
|
||||||
use std::sync::atomic::{AtomicU64, Ordering};
|
use std::sync::atomic::{AtomicU64, Ordering};
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
|
use std::time::Duration;
|
||||||
|
|
||||||
|
use store_api::manifest::ManifestVersion;
|
||||||
use store_api::storage::{SchemaRef, SequenceNumber};
|
use store_api::storage::{SchemaRef, SequenceNumber};
|
||||||
|
|
||||||
use crate::memtable::{MemtableRef, MemtableSet};
|
use crate::memtable::{MemtableId, MemtableSchema, MemtableSet, MemtableVersion};
|
||||||
use crate::metadata::{RegionMetadata, RegionMetadataRef};
|
use crate::metadata::{RegionMetadata, RegionMetadataRef};
|
||||||
|
use crate::sst::LevelMetas;
|
||||||
|
use crate::sst::{FileHandle, FileMeta};
|
||||||
use crate::sync::CowCell;
|
use crate::sync::CowCell;
|
||||||
|
|
||||||
|
/// Default bucket duration: 2 Hours.
|
||||||
|
const DEFAULT_BUCKET_DURATION: Duration = Duration::from_secs(3600 * 2);
|
||||||
|
|
||||||
/// Controls version of in memory state for a region.
|
/// Controls version of in memory state for a region.
|
||||||
pub struct VersionControl {
|
pub struct VersionControl {
|
||||||
|
// TODO(yingwen): If all modification to version must acquire the region writer lock first,
|
||||||
|
// then we may just use ArcSwap to hold version. But some operations may only require the
|
||||||
|
// version lock, instead of the writer lock, since we can use the version lock the protect
|
||||||
|
// the read-modify-write of version.
|
||||||
version: CowCell<Version>,
|
version: CowCell<Version>,
|
||||||
/// Latest sequence that is committed and visible to user.
|
/// Latest sequence that is committed and visible to user.
|
||||||
committed_sequence: AtomicU64,
|
committed_sequence: AtomicU64,
|
||||||
@@ -25,7 +36,7 @@ pub struct VersionControl {
|
|||||||
|
|
||||||
impl VersionControl {
|
impl VersionControl {
|
||||||
/// Construct a new version control from `metadata`.
|
/// Construct a new version control from `metadata`.
|
||||||
pub fn new(metadata: RegionMetadata, memtables: MemtableSet) -> VersionControl {
|
pub fn new(metadata: RegionMetadata, memtables: MemtableVersion) -> VersionControl {
|
||||||
VersionControl {
|
VersionControl {
|
||||||
version: CowCell::new(Version::new(metadata, memtables)),
|
version: CowCell::new(Version::new(metadata, memtables)),
|
||||||
committed_sequence: AtomicU64::new(0),
|
committed_sequence: AtomicU64::new(0),
|
||||||
@@ -58,34 +69,91 @@ impl VersionControl {
|
|||||||
// Release ordering should be enough to guarantee sequence is updated at last.
|
// Release ordering should be enough to guarantee sequence is updated at last.
|
||||||
self.committed_sequence.store(value, Ordering::Release);
|
self.committed_sequence.store(value, Ordering::Release);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Add mutable memtables and commit.
|
||||||
|
///
|
||||||
|
/// # Panics
|
||||||
|
/// See [MemtableVersion::add_mutable](MemtableVersion::add_mutable).
|
||||||
|
pub fn add_mutable(&self, memtables_to_add: MemtableSet) {
|
||||||
|
let mut version_to_update = self.version.lock();
|
||||||
|
|
||||||
|
let memtable_version = version_to_update.memtables();
|
||||||
|
let merged = memtable_version.add_mutable(memtables_to_add);
|
||||||
|
version_to_update.memtables = Arc::new(merged);
|
||||||
|
|
||||||
|
version_to_update.commit();
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Freeze all mutable memtables.
|
||||||
|
pub fn freeze_mutable(&self) {
|
||||||
|
let mut version_to_update = self.version.lock();
|
||||||
|
|
||||||
|
let memtable_version = version_to_update.memtables();
|
||||||
|
let freezed = memtable_version.freeze_mutable();
|
||||||
|
version_to_update.memtables = Arc::new(freezed);
|
||||||
|
|
||||||
|
version_to_update.commit();
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn apply_edit(&self, edit: VersionEdit) {
|
||||||
|
let mut version_to_update = self.version.lock();
|
||||||
|
|
||||||
|
if let Some(max_memtable_id) = edit.max_memtable_id {
|
||||||
|
// Remove flushed memtables
|
||||||
|
let memtable_version = version_to_update.memtables();
|
||||||
|
let removed = memtable_version.remove_immutables(max_memtable_id);
|
||||||
|
version_to_update.memtables = Arc::new(removed);
|
||||||
|
}
|
||||||
|
|
||||||
|
version_to_update.apply_edit(edit);
|
||||||
|
|
||||||
|
version_to_update.commit();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug)]
|
||||||
|
pub struct VersionEdit {
|
||||||
|
pub files_to_add: Vec<FileMeta>,
|
||||||
|
pub flushed_sequence: Option<SequenceNumber>,
|
||||||
|
pub manifest_version: ManifestVersion,
|
||||||
|
pub max_memtable_id: Option<MemtableId>,
|
||||||
}
|
}
|
||||||
|
|
||||||
pub type VersionControlRef = Arc<VersionControl>;
|
pub type VersionControlRef = Arc<VersionControl>;
|
||||||
pub type VersionRef = Arc<Version>;
|
pub type VersionRef = Arc<Version>;
|
||||||
|
type MemtableVersionRef = Arc<MemtableVersion>;
|
||||||
// Get data from version, need to
|
type LevelMetasRef = Arc<LevelMetas>;
|
||||||
// 1. acquire version first
|
|
||||||
// 2. acquire sequence later
|
|
||||||
//
|
|
||||||
// Reason: data may flush and some data with old sequence may be removed, so need
|
|
||||||
// to acquire version at first.
|
|
||||||
|
|
||||||
/// Version contains metadata and state of region.
|
/// Version contains metadata and state of region.
|
||||||
|
#[derive(Clone)]
|
||||||
pub struct Version {
|
pub struct Version {
|
||||||
/// Metadata of the region. Altering metadata isn't frequent, storing metadata
|
/// Metadata of the region.
|
||||||
/// in Arc to allow sharing metadata and reuse metadata when creating a new
|
///
|
||||||
/// `Version`.
|
/// Altering metadata isn't frequent, storing metadata in Arc to allow sharing
|
||||||
|
/// metadata and reuse metadata when creating a new `Version`.
|
||||||
metadata: RegionMetadataRef,
|
metadata: RegionMetadataRef,
|
||||||
memtables: MemtableSet,
|
/// Mutable and immutable memtables.
|
||||||
// TODO(yingwen): Also need to store last sequence to this version when switching
|
///
|
||||||
|
/// Wrapped in Arc to make clone of `Version` much cheaper.
|
||||||
|
memtables: MemtableVersionRef,
|
||||||
|
/// SSTs of the region.
|
||||||
|
ssts: LevelMetasRef,
|
||||||
|
/// Inclusive max sequence of flushed data.
|
||||||
|
flushed_sequence: SequenceNumber,
|
||||||
|
/// Current version of manifest.
|
||||||
|
manifest_version: ManifestVersion,
|
||||||
|
// TODO(yingwen): Maybe also store last sequence to this version when switching
|
||||||
// version, so we can know the newest data can read from this version.
|
// version, so we can know the newest data can read from this version.
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Version {
|
impl Version {
|
||||||
pub fn new(metadata: RegionMetadata, memtables: MemtableSet) -> Version {
|
pub fn new(metadata: RegionMetadata, memtables: MemtableVersion) -> Version {
|
||||||
Version {
|
Version {
|
||||||
metadata: Arc::new(metadata),
|
metadata: Arc::new(metadata),
|
||||||
memtables,
|
memtables: Arc::new(memtables),
|
||||||
|
ssts: Arc::new(LevelMetas::new()),
|
||||||
|
flushed_sequence: 0,
|
||||||
|
manifest_version: 0,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -95,15 +163,47 @@ impl Version {
|
|||||||
}
|
}
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn mutable_memtable(&self) -> &MemtableRef {
|
pub fn mutable_memtables(&self) -> &MemtableSet {
|
||||||
self.memtables.mutable_memtable()
|
self.memtables.mutable_memtables()
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn memtables(&self) -> &MemtableVersionRef {
|
||||||
|
&self.memtables
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns duration used to partition the memtables and ssts by time.
|
||||||
|
pub fn bucket_duration(&self) -> Duration {
|
||||||
|
DEFAULT_BUCKET_DURATION
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
pub fn memtable_schema(&self) -> MemtableSchema {
|
||||||
|
MemtableSchema::new(self.metadata.columns_row_key.clone())
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn apply_edit(&mut self, edit: VersionEdit) {
|
||||||
|
let flushed_sequence = edit.flushed_sequence.unwrap_or(self.flushed_sequence);
|
||||||
|
if self.flushed_sequence < flushed_sequence {
|
||||||
|
self.flushed_sequence = flushed_sequence;
|
||||||
|
}
|
||||||
|
if self.manifest_version < edit.manifest_version {
|
||||||
|
self.manifest_version = edit.manifest_version;
|
||||||
|
}
|
||||||
|
let handles_to_add = edit.files_to_add.into_iter().map(FileHandle::new);
|
||||||
|
let merged_ssts = self.ssts.merge(handles_to_add);
|
||||||
|
|
||||||
|
self.ssts = Arc::new(merged_ssts);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
pub fn manifest_version(&self) -> ManifestVersion {
|
||||||
|
self.manifest_version
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use super::*;
|
use super::*;
|
||||||
use crate::memtable::{DefaultMemtableBuilder, MemtableBuilder, MemtableSchema};
|
|
||||||
use crate::test_util::descriptor_util::RegionDescBuilder;
|
use crate::test_util::descriptor_util::RegionDescBuilder;
|
||||||
|
|
||||||
fn new_version_control() -> VersionControl {
|
fn new_version_control() -> VersionControl {
|
||||||
@@ -112,11 +212,7 @@ mod tests {
|
|||||||
.build();
|
.build();
|
||||||
let metadata: RegionMetadata = desc.try_into().unwrap();
|
let metadata: RegionMetadata = desc.try_into().unwrap();
|
||||||
|
|
||||||
let schema = MemtableSchema::new(metadata.columns_row_key.clone());
|
VersionControl::new(metadata, MemtableVersion::new())
|
||||||
let memtable = DefaultMemtableBuilder {}.build(schema);
|
|
||||||
let memtable_set = MemtableSet::new(memtable);
|
|
||||||
|
|
||||||
VersionControl::new(metadata, memtable_set)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
|
|||||||
225
src/storage/src/wal.rs
Normal file
225
src/storage/src/wal.rs
Normal file
@@ -0,0 +1,225 @@
|
|||||||
|
use std::sync::Arc;
|
||||||
|
|
||||||
|
use common_error::prelude::BoxedError;
|
||||||
|
use prost::Message;
|
||||||
|
use snafu::ResultExt;
|
||||||
|
use store_api::{
|
||||||
|
logstore::{entry::Entry, namespace::Namespace, AppendResponse, LogStore},
|
||||||
|
storage::SequenceNumber,
|
||||||
|
};
|
||||||
|
|
||||||
|
use crate::{
|
||||||
|
codec::{Decoder, Encoder},
|
||||||
|
error::{self, Error, Result},
|
||||||
|
proto::{self, PayloadType, WalHeader},
|
||||||
|
write_batch::{codec::WriteBatchArrowEncoder, WriteBatch},
|
||||||
|
};
|
||||||
|
|
||||||
|
pub struct Wal<S: LogStore> {
|
||||||
|
region_id: u32,
|
||||||
|
namespace: S::Namespace,
|
||||||
|
store: Arc<S>,
|
||||||
|
}
|
||||||
|
|
||||||
|
// wal should be cheap to clone
|
||||||
|
impl<S: LogStore> Clone for Wal<S> {
|
||||||
|
fn clone(&self) -> Self {
|
||||||
|
Self {
|
||||||
|
region_id: self.region_id,
|
||||||
|
namespace: self.namespace.clone(),
|
||||||
|
store: self.store.clone(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<S: LogStore> Wal<S> {
|
||||||
|
pub fn new(region_id: u32, region_name: impl Into<String>, store: Arc<S>) -> Self {
|
||||||
|
let region_name = region_name.into();
|
||||||
|
let namespace = S::Namespace::new(®ion_name, region_id as u64);
|
||||||
|
|
||||||
|
Self {
|
||||||
|
region_id,
|
||||||
|
namespace,
|
||||||
|
store,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
pub fn region_id(&self) -> u32 {
|
||||||
|
self.region_id
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
pub fn name(&self) -> &str {
|
||||||
|
self.namespace.name()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<S: LogStore> Wal<S> {
|
||||||
|
/// Data format:
|
||||||
|
///
|
||||||
|
/// ```text
|
||||||
|
/// | |
|
||||||
|
/// |--------------------------> Header Len <-----------------------------| Arrow/Protobuf/... encoded
|
||||||
|
/// | |
|
||||||
|
/// v v
|
||||||
|
/// +---------------------+----------------------------------------------------+--------------+-------------+--------------+
|
||||||
|
/// | | Header | | | |
|
||||||
|
/// | Header Len(varint) | (last_manifest_version + mutation_extras + ...) | Data Chunk0 | Data Chunk1 | ... |
|
||||||
|
/// | | | | | |
|
||||||
|
/// +---------------------+----------------------------------------------------+--------------+-------------+--------------+
|
||||||
|
/// ```
|
||||||
|
///
|
||||||
|
pub async fn write_to_wal<'a>(
|
||||||
|
&self,
|
||||||
|
seq: SequenceNumber,
|
||||||
|
mut header: WalHeader,
|
||||||
|
payload: Payload<'a>,
|
||||||
|
) -> Result<(u64, usize)> {
|
||||||
|
header.payload_type = payload.payload_type();
|
||||||
|
|
||||||
|
if let Payload::WriteBatchArrow(batch) = payload {
|
||||||
|
header.mutation_extras = proto::gen_mutation_extras(batch);
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut buf = vec![];
|
||||||
|
|
||||||
|
// header
|
||||||
|
let wal_header_encoder = WalHeaderEncoder {};
|
||||||
|
wal_header_encoder.encode(&header, &mut buf)?;
|
||||||
|
|
||||||
|
if let Payload::WriteBatchArrow(batch) = payload {
|
||||||
|
// entry
|
||||||
|
let encoder = WriteBatchArrowEncoder::new(header.mutation_extras);
|
||||||
|
// TODO(jiachun): provide some way to compute data size before encode, so we can preallocate an exactly sized buf.
|
||||||
|
encoder
|
||||||
|
.encode(batch, &mut buf)
|
||||||
|
.map_err(BoxedError::new)
|
||||||
|
.context(error::WriteWalSnafu {
|
||||||
|
region_id: self.region_id(),
|
||||||
|
name: self.name(),
|
||||||
|
})?;
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO(jiachun): encode protobuf payload
|
||||||
|
|
||||||
|
// write bytes to wal
|
||||||
|
self.write(seq, &buf).await
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn write(&self, seq: SequenceNumber, bytes: &[u8]) -> Result<(u64, usize)> {
|
||||||
|
let ns = self.namespace.clone();
|
||||||
|
let mut e = S::Entry::new(bytes);
|
||||||
|
e.set_id(seq);
|
||||||
|
|
||||||
|
let res = self
|
||||||
|
.store
|
||||||
|
.append(ns, e)
|
||||||
|
.await
|
||||||
|
.map_err(BoxedError::new)
|
||||||
|
.context(error::WriteWalSnafu {
|
||||||
|
region_id: self.region_id(),
|
||||||
|
name: self.name(),
|
||||||
|
})?;
|
||||||
|
|
||||||
|
Ok((res.entry_id(), res.offset()))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub enum Payload<'a> {
|
||||||
|
None, // only header
|
||||||
|
WriteBatchArrow(&'a WriteBatch),
|
||||||
|
WriteBatchProto(&'a WriteBatch),
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a> Payload<'a> {
|
||||||
|
pub fn payload_type(&self) -> i32 {
|
||||||
|
match self {
|
||||||
|
Payload::None => PayloadType::None.into(),
|
||||||
|
Payload::WriteBatchArrow(_) => PayloadType::WriteBatchArrow.into(),
|
||||||
|
Payload::WriteBatchProto(_) => PayloadType::WriteBatchProto.into(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct WalHeaderEncoder {}
|
||||||
|
|
||||||
|
impl Encoder for WalHeaderEncoder {
|
||||||
|
type Item = WalHeader;
|
||||||
|
type Error = Error;
|
||||||
|
|
||||||
|
fn encode(&self, item: &WalHeader, dst: &mut Vec<u8>) -> Result<()> {
|
||||||
|
item.encode_length_delimited(dst)
|
||||||
|
.map_err(|err| err.into())
|
||||||
|
.context(error::EncodeWalHeaderSnafu)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct WalHeaderDecoder {}
|
||||||
|
|
||||||
|
impl Decoder for WalHeaderDecoder {
|
||||||
|
type Item = (usize, WalHeader);
|
||||||
|
type Error = Error;
|
||||||
|
|
||||||
|
fn decode(&self, src: &[u8]) -> Result<Option<(usize, WalHeader)>> {
|
||||||
|
let mut data_pos = prost::decode_length_delimiter(src)
|
||||||
|
.map_err(|err| err.into())
|
||||||
|
.context(error::DecodeWalHeaderSnafu)?;
|
||||||
|
data_pos += prost::length_delimiter_len(data_pos);
|
||||||
|
|
||||||
|
let wal_header = WalHeader::decode_length_delimited(src)
|
||||||
|
.map_err(|err| err.into())
|
||||||
|
.context(error::DecodeWalHeaderSnafu)?;
|
||||||
|
|
||||||
|
Ok(Some((data_pos, wal_header)))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use log_store::test_util;
|
||||||
|
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
pub async fn test_write_wal() {
|
||||||
|
let (log_store, _tmp) =
|
||||||
|
test_util::log_store_util::create_tmp_local_file_log_store("wal_test").await;
|
||||||
|
let wal = Wal::new(0, "test_region", Arc::new(log_store));
|
||||||
|
|
||||||
|
let res = wal.write(0, b"test1").await.unwrap();
|
||||||
|
|
||||||
|
assert_eq!(0, res.0);
|
||||||
|
assert_eq!(0, res.1);
|
||||||
|
|
||||||
|
let res = wal.write(1, b"test2").await.unwrap();
|
||||||
|
|
||||||
|
assert_eq!(1, res.0);
|
||||||
|
assert_eq!(29, res.1);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
pub fn test_wal_header_codec() {
|
||||||
|
let wal_header = WalHeader {
|
||||||
|
payload_type: 1,
|
||||||
|
last_manifest_version: 99999999,
|
||||||
|
mutation_extras: vec![],
|
||||||
|
};
|
||||||
|
|
||||||
|
let mut buf: Vec<u8> = vec![];
|
||||||
|
let wal_encoder = WalHeaderEncoder {};
|
||||||
|
wal_encoder.encode(&wal_header, &mut buf).unwrap();
|
||||||
|
|
||||||
|
buf.push(1u8); // data
|
||||||
|
buf.push(2u8); // data
|
||||||
|
buf.push(3u8); // data
|
||||||
|
|
||||||
|
let decoder = WalHeaderDecoder {};
|
||||||
|
let res = decoder.decode(&buf).unwrap();
|
||||||
|
|
||||||
|
assert!(res.is_some());
|
||||||
|
|
||||||
|
let data_pos = res.unwrap().0;
|
||||||
|
assert_eq!(buf.len() - 3, data_pos);
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -1,11 +1,19 @@
|
|||||||
use std::any::Any;
|
use std::{
|
||||||
use std::collections::HashMap;
|
any::Any,
|
||||||
use std::slice;
|
collections::{BTreeSet, HashMap},
|
||||||
|
slice,
|
||||||
|
time::Duration,
|
||||||
|
};
|
||||||
|
|
||||||
use common_error::prelude::*;
|
use common_error::prelude::*;
|
||||||
use datatypes::data_type::ConcreteDataType;
|
use common_time::{RangeMillis, TimestampMillis};
|
||||||
use datatypes::schema::SchemaRef;
|
use datatypes::{
|
||||||
use datatypes::vectors::VectorRef;
|
arrow::error::ArrowError,
|
||||||
|
data_type::ConcreteDataType,
|
||||||
|
prelude::ScalarVector,
|
||||||
|
schema::SchemaRef,
|
||||||
|
vectors::{Int64Vector, VectorRef},
|
||||||
|
};
|
||||||
use snafu::ensure;
|
use snafu::ensure;
|
||||||
use store_api::storage::{consts, PutOperation, WriteRequest};
|
use store_api::storage::{consts, PutOperation, WriteRequest};
|
||||||
|
|
||||||
@@ -58,6 +66,42 @@ pub enum Error {
|
|||||||
num_rows: usize,
|
num_rows: usize,
|
||||||
backtrace: Backtrace,
|
backtrace: Backtrace,
|
||||||
},
|
},
|
||||||
|
|
||||||
|
#[snafu(display("Cannot align timestamp: {}", ts))]
|
||||||
|
TimestampOverflow { ts: i64 },
|
||||||
|
|
||||||
|
#[snafu(display("Failed to encode, source: {}", source))]
|
||||||
|
EncodeArrow {
|
||||||
|
backtrace: Backtrace,
|
||||||
|
source: ArrowError,
|
||||||
|
},
|
||||||
|
|
||||||
|
#[snafu(display("Failed to decode, source: {}", source))]
|
||||||
|
DecodeArrow {
|
||||||
|
backtrace: Backtrace,
|
||||||
|
source: ArrowError,
|
||||||
|
},
|
||||||
|
|
||||||
|
#[snafu(display("Failed to parse schema, source: {}", source))]
|
||||||
|
ParseSchema {
|
||||||
|
backtrace: Backtrace,
|
||||||
|
source: datatypes::error::Error,
|
||||||
|
},
|
||||||
|
|
||||||
|
#[snafu(display("Failed to decode, in stream waiting state"))]
|
||||||
|
StreamWaiting,
|
||||||
|
|
||||||
|
#[snafu(display("Failed to decode, data corruption {}", message))]
|
||||||
|
DataCorruption {
|
||||||
|
message: String,
|
||||||
|
backtrace: Backtrace,
|
||||||
|
},
|
||||||
|
|
||||||
|
#[snafu(display("Failed to decode vector, source {}", source))]
|
||||||
|
DecodeVector {
|
||||||
|
backtrace: Backtrace,
|
||||||
|
source: datatypes::error::Error,
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
pub type Result<T> = std::result::Result<T, Error>;
|
pub type Result<T> = std::result::Result<T, Error>;
|
||||||
@@ -110,6 +154,57 @@ impl WriteRequest for WriteBatch {
|
|||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Aligns timestamps in write batch specified by schema to durations.
|
||||||
|
///
|
||||||
|
/// A negative timestamp means "before Unix epoch".
|
||||||
|
/// Valid timestamp range is `[i64::MIN + duration, i64::MAX-(i64::MAX%duration))`.
|
||||||
|
fn time_ranges(&self, duration: Duration) -> Result<Vec<RangeMillis>> {
|
||||||
|
let ts_col_name = match self.schema.timestamp_column() {
|
||||||
|
None => {
|
||||||
|
// write batch does not have a timestamp column
|
||||||
|
return Ok(Vec::new());
|
||||||
|
}
|
||||||
|
Some(ts_col) => &ts_col.name,
|
||||||
|
};
|
||||||
|
let durations_millis = duration.as_millis() as i64;
|
||||||
|
let mut aligned_timestamps: BTreeSet<i64> = BTreeSet::new();
|
||||||
|
for m in &self.mutations {
|
||||||
|
match m {
|
||||||
|
Mutation::Put(put_data) => {
|
||||||
|
let column = put_data
|
||||||
|
.column_by_name(ts_col_name)
|
||||||
|
.unwrap_or_else(|| panic!("Cannot find column by name: {}", ts_col_name));
|
||||||
|
|
||||||
|
let ts_vector = column.as_any().downcast_ref::<Int64Vector>().unwrap(); // not expected to fail
|
||||||
|
for ts in ts_vector.iter_data().flatten() {
|
||||||
|
let aligned = align_timestamp(ts, durations_millis)
|
||||||
|
.context(TimestampOverflowSnafu { ts })?;
|
||||||
|
aligned_timestamps.insert(aligned);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let ranges = aligned_timestamps
|
||||||
|
.iter()
|
||||||
|
.map(|t| RangeMillis::new(*t, *t + durations_millis).unwrap())
|
||||||
|
.collect::<Vec<_>>();
|
||||||
|
|
||||||
|
Ok(ranges)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Aligns timestamp to nearest time interval.
|
||||||
|
/// Negative ts means a timestamp before Unix epoch.
|
||||||
|
/// If arithmetic overflows, this function returns None.
|
||||||
|
/// So timestamp within `[i64::MIN, i64::MIN + duration)` or
|
||||||
|
/// `[i64::MAX-(i64::MAX%duration), i64::MAX]` is not a valid input.
|
||||||
|
fn align_timestamp(ts: i64, duration: i64) -> Option<i64> {
|
||||||
|
let aligned = TimestampMillis::new(ts).align_by_bucket(duration)?.as_i64();
|
||||||
|
// Also ensure end timestamp won't overflow.
|
||||||
|
aligned.checked_add(duration)?;
|
||||||
|
Some(aligned)
|
||||||
}
|
}
|
||||||
|
|
||||||
// WriteBatch pub methods.
|
// WriteBatch pub methods.
|
||||||
@@ -169,6 +264,11 @@ impl PutData {
|
|||||||
self.columns.get(name)
|
self.columns.get(name)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Returns number of columns in data.
|
||||||
|
pub fn num_columns(&self) -> usize {
|
||||||
|
self.columns.len()
|
||||||
|
}
|
||||||
|
|
||||||
/// Returns number of rows in data.
|
/// Returns number of rows in data.
|
||||||
pub fn num_rows(&self) -> usize {
|
pub fn num_rows(&self) -> usize {
|
||||||
self.columns
|
self.columns
|
||||||
@@ -184,6 +284,22 @@ impl PutData {
|
|||||||
pub fn is_empty(&self) -> bool {
|
pub fn is_empty(&self) -> bool {
|
||||||
self.num_rows() == 0
|
self.num_rows() == 0
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Returns slice of [PutData] in range `[start, end)`.
|
||||||
|
///
|
||||||
|
/// # Panics
|
||||||
|
/// Panics if `start > end`.
|
||||||
|
pub fn slice(&self, start: usize, end: usize) -> PutData {
|
||||||
|
assert!(start <= end);
|
||||||
|
|
||||||
|
let columns = self
|
||||||
|
.columns
|
||||||
|
.iter()
|
||||||
|
.map(|(k, v)| (k.clone(), v.slice(start, end - start)))
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
PutData { columns }
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl WriteBatch {
|
impl WriteBatch {
|
||||||
@@ -273,15 +389,253 @@ impl PutData {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub mod codec {
|
||||||
|
use std::{io::Cursor, sync::Arc};
|
||||||
|
|
||||||
|
use common_error::prelude::*;
|
||||||
|
use datatypes::{
|
||||||
|
arrow::{
|
||||||
|
chunk::Chunk as ArrowChunk,
|
||||||
|
io::ipc::{
|
||||||
|
self,
|
||||||
|
read::{self, StreamState},
|
||||||
|
write::{StreamWriter, WriteOptions},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
error::Result as DataTypesResult,
|
||||||
|
schema::Schema,
|
||||||
|
vectors::Helper,
|
||||||
|
};
|
||||||
|
use snafu::ensure;
|
||||||
|
use store_api::storage::{PutOperation, WriteRequest};
|
||||||
|
|
||||||
|
use super::{
|
||||||
|
DataCorruptionSnafu, DecodeArrowSnafu, DecodeVectorSnafu, EncodeArrowSnafu,
|
||||||
|
Error as WriteBatchError, Mutation, ParseSchemaSnafu, Result, WriteBatch,
|
||||||
|
};
|
||||||
|
use crate::{
|
||||||
|
arrow_stream::ArrowStreamReader,
|
||||||
|
codec::{Decoder, Encoder},
|
||||||
|
};
|
||||||
|
use crate::{
|
||||||
|
proto::{MutationExtra, MutationType},
|
||||||
|
write_batch::PutData,
|
||||||
|
};
|
||||||
|
|
||||||
|
// TODO(jiachun): The codec logic is too complex, maybe we should use protobuf to
|
||||||
|
// serialize/deserialize all our data.
|
||||||
|
// And we can make a comparison with protobuf, including performance, storage cost,
|
||||||
|
// CPU consumption, etc
|
||||||
|
pub struct WriteBatchArrowEncoder {
|
||||||
|
mutation_extras: Vec<MutationExtra>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl WriteBatchArrowEncoder {
|
||||||
|
pub fn new(mutation_extras: Vec<MutationExtra>) -> Self {
|
||||||
|
Self { mutation_extras }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Encoder for WriteBatchArrowEncoder {
|
||||||
|
type Item = WriteBatch;
|
||||||
|
type Error = WriteBatchError;
|
||||||
|
|
||||||
|
fn encode(&self, item: &WriteBatch, dst: &mut Vec<u8>) -> Result<()> {
|
||||||
|
let schema = item.schema().arrow_schema();
|
||||||
|
|
||||||
|
let column_names = item
|
||||||
|
.schema()
|
||||||
|
.column_schemas()
|
||||||
|
.iter()
|
||||||
|
.map(|column_schema| column_schema.name.clone())
|
||||||
|
.collect::<Vec<_>>();
|
||||||
|
|
||||||
|
let data = item
|
||||||
|
.iter()
|
||||||
|
.zip(self.mutation_extras.iter())
|
||||||
|
.map(|(mtn, ext)| match mtn {
|
||||||
|
Mutation::Put(put) => {
|
||||||
|
let arrays = column_names
|
||||||
|
.iter()
|
||||||
|
.filter_map(|column_name| put.column_by_name(column_name))
|
||||||
|
.map(|vector| vector.to_arrow_array())
|
||||||
|
.collect::<Vec<_>>();
|
||||||
|
|
||||||
|
(arrays, &ext.column_null_mask)
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
let opts = WriteOptions { compression: None };
|
||||||
|
let mut writer = StreamWriter::new(dst, opts);
|
||||||
|
let ipc_fields = ipc::write::default_ipc_fields(&schema.fields);
|
||||||
|
writer
|
||||||
|
.start(schema, Some(ipc_fields.clone()))
|
||||||
|
.context(EncodeArrowSnafu)?;
|
||||||
|
for (arrays, column_null_mask) in data {
|
||||||
|
let chunk = ArrowChunk::try_new(arrays).context(EncodeArrowSnafu)?;
|
||||||
|
if column_null_mask.is_empty() {
|
||||||
|
writer.write(&chunk, None).context(EncodeArrowSnafu)?;
|
||||||
|
} else {
|
||||||
|
let valid_ipc_fields = ipc_fields
|
||||||
|
.iter()
|
||||||
|
.zip(bit_vec::BitVec::from_bytes(column_null_mask))
|
||||||
|
.filter(|(_, mask)| !*mask)
|
||||||
|
.map(|(ipc_field, _)| ipc_field.clone())
|
||||||
|
.collect::<Vec<_>>();
|
||||||
|
writer
|
||||||
|
.write(&chunk, Some(&valid_ipc_fields))
|
||||||
|
.context(EncodeArrowSnafu)?;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
writer.finish().context(EncodeArrowSnafu)?;
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct WriteBatchArrowDecoder {
|
||||||
|
mutation_extras: Vec<MutationExtra>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl WriteBatchArrowDecoder {
|
||||||
|
#[allow(dead_code)]
|
||||||
|
pub fn new(mutation_extras: Vec<MutationExtra>) -> Self {
|
||||||
|
Self { mutation_extras }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Decoder for WriteBatchArrowDecoder {
|
||||||
|
type Item = WriteBatch;
|
||||||
|
type Error = WriteBatchError;
|
||||||
|
|
||||||
|
fn decode(&self, src: &[u8]) -> Result<Option<WriteBatch>> {
|
||||||
|
let mut reader = Cursor::new(src);
|
||||||
|
let metadata = read::read_stream_metadata(&mut reader).context(DecodeArrowSnafu)?;
|
||||||
|
let mut reader = ArrowStreamReader::new(reader, metadata);
|
||||||
|
let schema = reader.metadata().schema.clone();
|
||||||
|
|
||||||
|
let stream_states = self
|
||||||
|
.mutation_extras
|
||||||
|
.iter()
|
||||||
|
.map(|ext| {
|
||||||
|
reader
|
||||||
|
.maybe_next(&ext.column_null_mask)
|
||||||
|
.context(DecodeArrowSnafu)
|
||||||
|
})
|
||||||
|
.collect::<Result<Vec<_>>>()?;
|
||||||
|
|
||||||
|
// check if exactly finished
|
||||||
|
ensure!(
|
||||||
|
reader.check_exactly_finished().context(DecodeArrowSnafu)?,
|
||||||
|
DataCorruptionSnafu {
|
||||||
|
message: "Impossible, the num of data chunks is different than expected."
|
||||||
|
}
|
||||||
|
);
|
||||||
|
|
||||||
|
let mut chunks = Vec::with_capacity(self.mutation_extras.len());
|
||||||
|
|
||||||
|
for state_opt in stream_states {
|
||||||
|
match state_opt {
|
||||||
|
Some(s) => match s {
|
||||||
|
StreamState::Some(chunk) => chunks.push(chunk),
|
||||||
|
StreamState::Waiting => return Err(WriteBatchError::StreamWaiting),
|
||||||
|
},
|
||||||
|
None => (),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// chunks -> mutations
|
||||||
|
let chunks = chunks
|
||||||
|
.iter()
|
||||||
|
.map(|chunk| chunk.arrays())
|
||||||
|
.map(|arrays| {
|
||||||
|
arrays
|
||||||
|
.iter()
|
||||||
|
.map(Helper::try_into_vector)
|
||||||
|
.collect::<DataTypesResult<Vec<_>>>()
|
||||||
|
.context(DecodeVectorSnafu)
|
||||||
|
})
|
||||||
|
.collect::<Result<Vec<_>>>()?;
|
||||||
|
|
||||||
|
ensure!(
|
||||||
|
chunks.len() == self.mutation_extras.len(),
|
||||||
|
DataCorruptionSnafu {
|
||||||
|
message: &format!(
|
||||||
|
"expected {} mutations, but got {}",
|
||||||
|
self.mutation_extras.len(),
|
||||||
|
chunks.len()
|
||||||
|
)
|
||||||
|
}
|
||||||
|
);
|
||||||
|
|
||||||
|
let schema = Schema::try_from(Arc::new(schema)).context(ParseSchemaSnafu)?;
|
||||||
|
|
||||||
|
let column_names = schema
|
||||||
|
.column_schemas()
|
||||||
|
.iter()
|
||||||
|
.map(|column| column.name.clone())
|
||||||
|
.collect::<Vec<_>>();
|
||||||
|
|
||||||
|
let mutations = self
|
||||||
|
.mutation_extras
|
||||||
|
.iter()
|
||||||
|
.zip(chunks.iter())
|
||||||
|
.map(|(ext, mtn)| match ext.mutation_type {
|
||||||
|
x if x == MutationType::Put as i32 => {
|
||||||
|
let valid_column_names = if ext.column_null_mask.is_empty() {
|
||||||
|
column_names.clone()
|
||||||
|
} else {
|
||||||
|
bit_vec::BitVec::from_bytes(&ext.column_null_mask)
|
||||||
|
.iter()
|
||||||
|
.zip(column_names.iter())
|
||||||
|
.filter(|(mask, _)| !*mask)
|
||||||
|
.map(|(_, column_name)| column_name.clone())
|
||||||
|
.collect::<Vec<_>>()
|
||||||
|
};
|
||||||
|
|
||||||
|
let mut put_data = PutData::with_num_columns(valid_column_names.len());
|
||||||
|
|
||||||
|
let res = valid_column_names
|
||||||
|
.iter()
|
||||||
|
.zip(mtn)
|
||||||
|
.map(|(name, vector)| put_data.add_column_by_name(name, vector.clone()))
|
||||||
|
.collect::<Result<Vec<_>>>();
|
||||||
|
|
||||||
|
res.map(|_| Mutation::Put(put_data))
|
||||||
|
}
|
||||||
|
x if x == MutationType::Delete as i32 => {
|
||||||
|
todo!()
|
||||||
|
}
|
||||||
|
_ => {
|
||||||
|
unreachable!()
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.collect::<Result<Vec<_>>>()?;
|
||||||
|
|
||||||
|
let mut write_batch = WriteBatch::new(Arc::new(schema));
|
||||||
|
|
||||||
|
mutations
|
||||||
|
.into_iter()
|
||||||
|
.try_for_each(|mutation| match mutation {
|
||||||
|
Mutation::Put(put_data) => write_batch.put(put_data),
|
||||||
|
})?;
|
||||||
|
|
||||||
|
Ok(Some(write_batch))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use std::iter;
|
use std::iter;
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
|
|
||||||
use datatypes::type_id::LogicalTypeId;
|
use datatypes::type_id::LogicalTypeId;
|
||||||
use datatypes::vectors::{BooleanVector, Int32Vector, UInt64Vector};
|
use datatypes::vectors::{BooleanVector, Int32Vector, Int64Vector, UInt64Vector};
|
||||||
|
|
||||||
use super::*;
|
use super::*;
|
||||||
|
use crate::codec::{Decoder, Encoder};
|
||||||
|
use crate::proto;
|
||||||
use crate::test_util::write_batch_util;
|
use crate::test_util::write_batch_util;
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
@@ -320,22 +674,28 @@ mod tests {
|
|||||||
}
|
}
|
||||||
|
|
||||||
fn new_test_batch() -> WriteBatch {
|
fn new_test_batch() -> WriteBatch {
|
||||||
write_batch_util::new_write_batch(&[
|
write_batch_util::new_write_batch(
|
||||||
("k1", LogicalTypeId::UInt64, false),
|
&[
|
||||||
(consts::VERSION_COLUMN_NAME, LogicalTypeId::UInt64, false),
|
("k1", LogicalTypeId::UInt64, false),
|
||||||
("v1", LogicalTypeId::Boolean, true),
|
(consts::VERSION_COLUMN_NAME, LogicalTypeId::UInt64, false),
|
||||||
])
|
("ts", LogicalTypeId::Int64, false),
|
||||||
|
("v1", LogicalTypeId::Boolean, true),
|
||||||
|
],
|
||||||
|
Some(2),
|
||||||
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_write_batch_put() {
|
fn test_write_batch_put() {
|
||||||
let intv = Arc::new(UInt64Vector::from_slice(&[1, 2, 3]));
|
let intv = Arc::new(UInt64Vector::from_slice(&[1, 2, 3]));
|
||||||
let boolv = Arc::new(BooleanVector::from(vec![true, false, true]));
|
let boolv = Arc::new(BooleanVector::from(vec![true, false, true]));
|
||||||
|
let tsv = Arc::new(Int64Vector::from_vec(vec![0, 0, 0]));
|
||||||
|
|
||||||
let mut put_data = PutData::new();
|
let mut put_data = PutData::new();
|
||||||
put_data.add_key_column("k1", intv.clone()).unwrap();
|
put_data.add_key_column("k1", intv.clone()).unwrap();
|
||||||
put_data.add_version_column(intv).unwrap();
|
put_data.add_version_column(intv).unwrap();
|
||||||
put_data.add_value_column("v1", boolv).unwrap();
|
put_data.add_value_column("v1", boolv).unwrap();
|
||||||
|
put_data.add_key_column("ts", tsv).unwrap();
|
||||||
|
|
||||||
let mut batch = new_test_batch();
|
let mut batch = new_test_batch();
|
||||||
assert!(batch.is_empty());
|
assert!(batch.is_empty());
|
||||||
@@ -362,7 +722,8 @@ mod tests {
|
|||||||
let mut put_data = PutData::new();
|
let mut put_data = PutData::new();
|
||||||
put_data.add_key_column("k1", boolv).unwrap();
|
put_data.add_key_column("k1", boolv).unwrap();
|
||||||
|
|
||||||
let mut batch = write_batch_util::new_write_batch(&[("k1", LogicalTypeId::Boolean, false)]);
|
let mut batch =
|
||||||
|
write_batch_util::new_write_batch(&[("k1", LogicalTypeId::Boolean, false)], None);
|
||||||
let err = batch.put(put_data).err().unwrap();
|
let err = batch.put(put_data).err().unwrap();
|
||||||
check_err(err, "Request is too large");
|
check_err(err, "Request is too large");
|
||||||
}
|
}
|
||||||
@@ -391,9 +752,11 @@ mod tests {
|
|||||||
#[test]
|
#[test]
|
||||||
fn test_put_type_mismatch() {
|
fn test_put_type_mismatch() {
|
||||||
let boolv = Arc::new(BooleanVector::from(vec![true, false, true]));
|
let boolv = Arc::new(BooleanVector::from(vec![true, false, true]));
|
||||||
|
let tsv = Arc::new(Int64Vector::from_vec(vec![0, 0, 0]));
|
||||||
|
|
||||||
let mut put_data = PutData::new();
|
let mut put_data = PutData::new();
|
||||||
put_data.add_key_column("k1", boolv).unwrap();
|
put_data.add_key_column("k1", boolv).unwrap();
|
||||||
|
put_data.add_key_column("ts", tsv).unwrap();
|
||||||
|
|
||||||
let mut batch = new_test_batch();
|
let mut batch = new_test_batch();
|
||||||
let err = batch.put(put_data).err().unwrap();
|
let err = batch.put(put_data).err().unwrap();
|
||||||
@@ -403,9 +766,11 @@ mod tests {
|
|||||||
#[test]
|
#[test]
|
||||||
fn test_put_type_has_null() {
|
fn test_put_type_has_null() {
|
||||||
let intv = Arc::new(UInt64Vector::from_iter(&[Some(1), None, Some(3)]));
|
let intv = Arc::new(UInt64Vector::from_iter(&[Some(1), None, Some(3)]));
|
||||||
|
let tsv = Arc::new(Int64Vector::from_vec(vec![0, 0, 0]));
|
||||||
|
|
||||||
let mut put_data = PutData::new();
|
let mut put_data = PutData::new();
|
||||||
put_data.add_key_column("k1", intv).unwrap();
|
put_data.add_key_column("k1", intv).unwrap();
|
||||||
|
put_data.add_key_column("ts", tsv).unwrap();
|
||||||
|
|
||||||
let mut batch = new_test_batch();
|
let mut batch = new_test_batch();
|
||||||
let err = batch.put(put_data).err().unwrap();
|
let err = batch.put(put_data).err().unwrap();
|
||||||
@@ -415,10 +780,11 @@ mod tests {
|
|||||||
#[test]
|
#[test]
|
||||||
fn test_put_missing_column() {
|
fn test_put_missing_column() {
|
||||||
let boolv = Arc::new(BooleanVector::from(vec![true, false, true]));
|
let boolv = Arc::new(BooleanVector::from(vec![true, false, true]));
|
||||||
|
let tsv = Arc::new(Int64Vector::from_vec(vec![0, 0, 0]));
|
||||||
|
|
||||||
let mut put_data = PutData::new();
|
let mut put_data = PutData::new();
|
||||||
put_data.add_key_column("v1", boolv).unwrap();
|
put_data.add_key_column("v1", boolv).unwrap();
|
||||||
|
put_data.add_key_column("ts", tsv).unwrap();
|
||||||
let mut batch = new_test_batch();
|
let mut batch = new_test_batch();
|
||||||
let err = batch.put(put_data).err().unwrap();
|
let err = batch.put(put_data).err().unwrap();
|
||||||
check_err(err, "Missing column k1");
|
check_err(err, "Missing column k1");
|
||||||
@@ -427,16 +793,125 @@ mod tests {
|
|||||||
#[test]
|
#[test]
|
||||||
fn test_put_unknown_column() {
|
fn test_put_unknown_column() {
|
||||||
let intv = Arc::new(UInt64Vector::from_slice(&[1, 2, 3]));
|
let intv = Arc::new(UInt64Vector::from_slice(&[1, 2, 3]));
|
||||||
|
let tsv = Arc::new(Int64Vector::from_vec(vec![0, 0, 0]));
|
||||||
let boolv = Arc::new(BooleanVector::from(vec![true, false, true]));
|
let boolv = Arc::new(BooleanVector::from(vec![true, false, true]));
|
||||||
|
|
||||||
let mut put_data = PutData::new();
|
let mut put_data = PutData::new();
|
||||||
put_data.add_key_column("k1", intv.clone()).unwrap();
|
put_data.add_key_column("k1", intv.clone()).unwrap();
|
||||||
put_data.add_version_column(intv).unwrap();
|
put_data.add_version_column(intv).unwrap();
|
||||||
put_data.add_value_column("v1", boolv.clone()).unwrap();
|
put_data.add_value_column("v1", boolv.clone()).unwrap();
|
||||||
|
put_data.add_key_column("ts", tsv).unwrap();
|
||||||
put_data.add_value_column("v2", boolv).unwrap();
|
put_data.add_value_column("v2", boolv).unwrap();
|
||||||
|
|
||||||
let mut batch = new_test_batch();
|
let mut batch = new_test_batch();
|
||||||
let err = batch.put(put_data).err().unwrap();
|
let err = batch.put(put_data).err().unwrap();
|
||||||
check_err(err, "Unknown column v2");
|
check_err(err, "Unknown column v2");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
pub fn test_align_timestamp() {
|
||||||
|
let duration_millis = 20;
|
||||||
|
let ts = [-21, -20, -19, -1, 0, 5, 15, 19, 20, 21];
|
||||||
|
let res = ts.map(|t| align_timestamp(t, duration_millis));
|
||||||
|
assert_eq!(res, [-40, -20, -20, -20, 0, 0, 0, 0, 20, 20].map(Some));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
pub fn test_align_timestamp_overflow() {
|
||||||
|
assert_eq!(Some(i64::MIN), align_timestamp(i64::MIN, 1));
|
||||||
|
assert_eq!(None, align_timestamp(i64::MIN, 2));
|
||||||
|
assert_eq!(
|
||||||
|
Some(((i64::MIN + 20) / 20 - 1) * 20),
|
||||||
|
align_timestamp(i64::MIN + 20, 20)
|
||||||
|
);
|
||||||
|
assert_eq!(None, align_timestamp(i64::MAX - (i64::MAX % 23), 23));
|
||||||
|
assert_eq!(
|
||||||
|
Some(9223372036854775780),
|
||||||
|
align_timestamp(i64::MAX / 20 * 20 - 1, 20)
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
pub fn test_write_batch_time_range() {
|
||||||
|
let intv = Arc::new(UInt64Vector::from_slice(&[1, 2, 3, 4, 5, 6]));
|
||||||
|
let tsv = Arc::new(Int64Vector::from_vec(vec![-21, -20, -1, 0, 1, 20]));
|
||||||
|
let boolv = Arc::new(BooleanVector::from(vec![
|
||||||
|
true, false, true, false, false, false,
|
||||||
|
]));
|
||||||
|
|
||||||
|
let mut put_data = PutData::new();
|
||||||
|
put_data.add_key_column("k1", intv.clone()).unwrap();
|
||||||
|
put_data.add_version_column(intv).unwrap();
|
||||||
|
put_data.add_value_column("v1", boolv).unwrap();
|
||||||
|
put_data.add_key_column("ts", tsv).unwrap();
|
||||||
|
|
||||||
|
let mut batch = new_test_batch();
|
||||||
|
batch.put(put_data).unwrap();
|
||||||
|
|
||||||
|
let duration_millis = 20i64;
|
||||||
|
let ranges = batch
|
||||||
|
.time_ranges(Duration::from_millis(duration_millis as u64))
|
||||||
|
.unwrap();
|
||||||
|
assert_eq!(
|
||||||
|
[-40, -20, 0, 20].map(|v| RangeMillis::new(v, v + duration_millis).unwrap()),
|
||||||
|
ranges.as_slice()
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_codec() -> Result<()> {
|
||||||
|
let intv = Arc::new(UInt64Vector::from_slice(&[1, 2, 3]));
|
||||||
|
let boolv = Arc::new(BooleanVector::from(vec![Some(true), Some(false), None]));
|
||||||
|
let tsv = Arc::new(Int64Vector::from_vec(vec![0, 0, 0]));
|
||||||
|
|
||||||
|
let mut put_data = PutData::new();
|
||||||
|
put_data.add_key_column("k1", intv.clone()).unwrap();
|
||||||
|
put_data.add_version_column(intv).unwrap();
|
||||||
|
put_data.add_value_column("v1", boolv).unwrap();
|
||||||
|
put_data.add_key_column("ts", tsv).unwrap();
|
||||||
|
|
||||||
|
let mut batch = new_test_batch();
|
||||||
|
assert!(batch.is_empty());
|
||||||
|
batch.put(put_data).unwrap();
|
||||||
|
assert!(!batch.is_empty());
|
||||||
|
|
||||||
|
let encoder = codec::WriteBatchArrowEncoder::new(proto::gen_mutation_extras(&batch));
|
||||||
|
let mut dst = vec![];
|
||||||
|
let result = encoder.encode(&batch, &mut dst);
|
||||||
|
assert!(result.is_ok());
|
||||||
|
|
||||||
|
let decoder = codec::WriteBatchArrowDecoder::new(proto::gen_mutation_extras(&batch));
|
||||||
|
let result = decoder.decode(&dst);
|
||||||
|
let batch2 = result?.unwrap();
|
||||||
|
assert_eq!(batch.num_rows, batch2.num_rows);
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_codec_with_none_column() -> Result<()> {
|
||||||
|
let intv = Arc::new(UInt64Vector::from_slice(&[1, 2, 3]));
|
||||||
|
let tsv = Arc::new(Int64Vector::from_vec(vec![0, 0, 0]));
|
||||||
|
|
||||||
|
let mut put_data = PutData::new();
|
||||||
|
put_data.add_key_column("k1", intv.clone()).unwrap();
|
||||||
|
put_data.add_version_column(intv).unwrap();
|
||||||
|
put_data.add_key_column("ts", tsv).unwrap();
|
||||||
|
|
||||||
|
let mut batch = new_test_batch();
|
||||||
|
assert!(batch.is_empty());
|
||||||
|
batch.put(put_data).unwrap();
|
||||||
|
assert!(!batch.is_empty());
|
||||||
|
|
||||||
|
let encoder = codec::WriteBatchArrowEncoder::new(proto::gen_mutation_extras(&batch));
|
||||||
|
let mut dst = vec![];
|
||||||
|
let result = encoder.encode(&batch, &mut dst);
|
||||||
|
assert!(result.is_ok());
|
||||||
|
|
||||||
|
let decoder = codec::WriteBatchArrowDecoder::new(proto::gen_mutation_extras(&batch));
|
||||||
|
let result = decoder.decode(&dst);
|
||||||
|
let batch2 = result?.unwrap();
|
||||||
|
assert_eq!(batch.num_rows, batch2.num_rows);
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -10,8 +10,11 @@ async-trait = "0.1"
|
|||||||
bytes = "1.1"
|
bytes = "1.1"
|
||||||
common-base = { path = "../common/base" }
|
common-base = { path = "../common/base" }
|
||||||
common-error = { path = "../common/error" }
|
common-error = { path = "../common/error" }
|
||||||
|
common-time = { path = "../common/time" }
|
||||||
datatypes = { path = "../datatypes" }
|
datatypes = { path = "../datatypes" }
|
||||||
futures = "0.3"
|
futures = "0.3"
|
||||||
|
object-store = { path = "../object-store" }
|
||||||
|
serde = { version = "1.0", features = ["derive"] }
|
||||||
snafu = { version = "0.7", features = ["backtraces"] }
|
snafu = { version = "0.7", features = ["backtraces"] }
|
||||||
|
|
||||||
[dev-dependencies]
|
[dev-dependencies]
|
||||||
|
|||||||
@@ -1,4 +1,5 @@
|
|||||||
//! Storage related APIs
|
//! Storage related APIs
|
||||||
|
|
||||||
pub mod logstore;
|
pub mod logstore;
|
||||||
|
pub mod manifest;
|
||||||
pub mod storage;
|
pub mod storage;
|
||||||
|
|||||||
@@ -12,8 +12,8 @@ pub mod namespace;
|
|||||||
|
|
||||||
/// `LogStore` serves as a Write-Ahead-Log for storage engine.
|
/// `LogStore` serves as a Write-Ahead-Log for storage engine.
|
||||||
#[async_trait::async_trait]
|
#[async_trait::async_trait]
|
||||||
pub trait LogStore {
|
pub trait LogStore: Send + Sync + 'static {
|
||||||
type Error: ErrorExt + Send + Sync;
|
type Error: ErrorExt + Send + Sync + 'static;
|
||||||
type Namespace: Namespace;
|
type Namespace: Namespace;
|
||||||
type Entry: Entry;
|
type Entry: Entry;
|
||||||
type AppendResponse: AppendResponse;
|
type AppendResponse: AppendResponse;
|
||||||
|
|||||||
@@ -1,3 +1,5 @@
|
|||||||
pub trait Namespace: Send + Sync + Clone {
|
pub trait Namespace: Send + Sync + Clone {
|
||||||
|
fn new(name: &str, id: u64) -> Self;
|
||||||
|
|
||||||
fn name(&self) -> &str;
|
fn name(&self) -> &str;
|
||||||
}
|
}
|
||||||
|
|||||||
45
src/store-api/src/manifest.rs
Normal file
45
src/store-api/src/manifest.rs
Normal file
@@ -0,0 +1,45 @@
|
|||||||
|
//! metadata service
|
||||||
|
mod storage;
|
||||||
|
|
||||||
|
use async_trait::async_trait;
|
||||||
|
use common_error::ext::ErrorExt;
|
||||||
|
use object_store::ObjectStore;
|
||||||
|
use serde::{de::DeserializeOwned, Serialize};
|
||||||
|
pub use storage::*;
|
||||||
|
|
||||||
|
pub type ManifestVersion = u64;
|
||||||
|
pub const MIN_VERSION: u64 = 0;
|
||||||
|
pub const MAX_VERSION: u64 = u64::MAX;
|
||||||
|
|
||||||
|
pub trait Metadata: Clone {}
|
||||||
|
|
||||||
|
pub trait MetadataId: Clone + Copy {}
|
||||||
|
|
||||||
|
/// The action to apply on metadata
|
||||||
|
pub trait MetaAction: Serialize + DeserializeOwned {
|
||||||
|
type MetadataId: MetadataId;
|
||||||
|
|
||||||
|
/// Returns the metadata id of the action
|
||||||
|
fn metadata_id(&self) -> Self::MetadataId;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Manifest service
|
||||||
|
#[async_trait]
|
||||||
|
pub trait Manifest: Send + Sync + Clone + 'static {
|
||||||
|
type Error: ErrorExt + Send + Sync;
|
||||||
|
type MetaAction: MetaAction;
|
||||||
|
type MetadataId: MetadataId;
|
||||||
|
type Metadata: Metadata;
|
||||||
|
|
||||||
|
fn new(id: Self::MetadataId, manifest_dir: &str, object_store: ObjectStore) -> Self;
|
||||||
|
|
||||||
|
/// Update metadata by the action
|
||||||
|
async fn update(&self, action: Self::MetaAction) -> Result<ManifestVersion, Self::Error>;
|
||||||
|
|
||||||
|
/// Retrieve the latest metadata
|
||||||
|
async fn load(&self) -> Result<Option<Self::Metadata>, Self::Error>;
|
||||||
|
|
||||||
|
async fn checkpoint(&self) -> Result<ManifestVersion, Self::Error>;
|
||||||
|
|
||||||
|
fn metadata_id(&self) -> Self::MetadataId;
|
||||||
|
}
|
||||||
41
src/store-api/src/manifest/storage.rs
Normal file
41
src/store-api/src/manifest/storage.rs
Normal file
@@ -0,0 +1,41 @@
|
|||||||
|
use async_trait::async_trait;
|
||||||
|
use common_error::ext::ErrorExt;
|
||||||
|
|
||||||
|
use crate::manifest::ManifestVersion;
|
||||||
|
|
||||||
|
#[async_trait]
|
||||||
|
pub trait LogIterator: Send + Sync {
|
||||||
|
type Error: ErrorExt + Send + Sync;
|
||||||
|
|
||||||
|
async fn next_log(&mut self) -> Result<Option<(ManifestVersion, Vec<u8>)>, Self::Error>;
|
||||||
|
}
|
||||||
|
|
||||||
|
#[async_trait]
|
||||||
|
pub trait ManifestLogStorage {
|
||||||
|
type Error: ErrorExt + Send + Sync;
|
||||||
|
type Iter: LogIterator<Error = Self::Error>;
|
||||||
|
|
||||||
|
/// Scan the logs in [start, end)
|
||||||
|
async fn scan(
|
||||||
|
&self,
|
||||||
|
start: ManifestVersion,
|
||||||
|
end: ManifestVersion,
|
||||||
|
) -> Result<Self::Iter, Self::Error>;
|
||||||
|
|
||||||
|
/// Save a log
|
||||||
|
async fn save(&self, version: ManifestVersion, bytes: &[u8]) -> Result<(), Self::Error>;
|
||||||
|
|
||||||
|
/// Delete logs in [start, end)
|
||||||
|
async fn delete(&self, start: ManifestVersion, end: ManifestVersion)
|
||||||
|
-> Result<(), Self::Error>;
|
||||||
|
|
||||||
|
/// Save a checkpoint
|
||||||
|
async fn save_checkpoint(
|
||||||
|
&self,
|
||||||
|
version: ManifestVersion,
|
||||||
|
bytes: &[u8],
|
||||||
|
) -> Result<(), Self::Error>;
|
||||||
|
|
||||||
|
/// Load the latest checkpoint
|
||||||
|
async fn load_checkpoint(&self) -> Result<Option<(ManifestVersion, Vec<u8>)>, Self::Error>;
|
||||||
|
}
|
||||||
@@ -29,6 +29,12 @@ pub const VERSION_COLUMN_NAME: &str = "__version";
|
|||||||
// Names for default column family.
|
// Names for default column family.
|
||||||
pub const DEFAULT_CF_NAME: &str = "default";
|
pub const DEFAULT_CF_NAME: &str = "default";
|
||||||
|
|
||||||
|
// Name for reserved column: sequence
|
||||||
|
pub const SEQUENCE_COLUMN_NAME: &str = "__sequence";
|
||||||
|
|
||||||
|
// Name for reserved column: value_type
|
||||||
|
pub const VALUE_TYPE_COLUMN_NAME: &str = "__value_type";
|
||||||
|
|
||||||
// -----------------------------------------------------------------------------
|
// -----------------------------------------------------------------------------
|
||||||
|
|
||||||
// ---------- Default options --------------------------------------------------
|
// ---------- Default options --------------------------------------------------
|
||||||
|
|||||||
@@ -1,5 +1,7 @@
|
|||||||
use datatypes::value::Value;
|
use datatypes::value::Value;
|
||||||
|
use serde::{Deserialize, Serialize};
|
||||||
|
|
||||||
|
use crate::manifest::MetadataId;
|
||||||
use crate::storage::{consts, ColumnSchema, ConcreteDataType};
|
use crate::storage::{consts, ColumnSchema, ConcreteDataType};
|
||||||
|
|
||||||
/// Id of column, unique in each region.
|
/// Id of column, unique in each region.
|
||||||
@@ -7,6 +9,7 @@ pub type ColumnId = u32;
|
|||||||
/// Id of column family, unique in each region.
|
/// Id of column family, unique in each region.
|
||||||
pub type ColumnFamilyId = u32;
|
pub type ColumnFamilyId = u32;
|
||||||
pub type RegionId = u32;
|
pub type RegionId = u32;
|
||||||
|
impl MetadataId for RegionId {}
|
||||||
/// Default region name prefix
|
/// Default region name prefix
|
||||||
pub const REGION_PREFIX: &str = "r_";
|
pub const REGION_PREFIX: &str = "r_";
|
||||||
|
|
||||||
@@ -17,7 +20,7 @@ pub fn gen_region_name(id: RegionId) -> String {
|
|||||||
|
|
||||||
// TODO(yingwen): Validate default value has same type with column, and name is a valid column name.
|
// TODO(yingwen): Validate default value has same type with column, and name is a valid column name.
|
||||||
/// A [ColumnDescriptor] contains information to create a column.
|
/// A [ColumnDescriptor] contains information to create a column.
|
||||||
#[derive(Debug, Clone, PartialEq)]
|
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
|
||||||
pub struct ColumnDescriptor {
|
pub struct ColumnDescriptor {
|
||||||
pub id: ColumnId,
|
pub id: ColumnId,
|
||||||
pub name: String,
|
pub name: String,
|
||||||
@@ -131,7 +134,7 @@ impl RowKeyDescriptorBuilder {
|
|||||||
Self {
|
Self {
|
||||||
columns: Vec::new(),
|
columns: Vec::new(),
|
||||||
timestamp,
|
timestamp,
|
||||||
enable_version_column: true,
|
enable_version_column: false,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -254,7 +257,7 @@ mod tests {
|
|||||||
|
|
||||||
let desc = RowKeyDescriptorBuilder::new(timestamp.clone()).build();
|
let desc = RowKeyDescriptorBuilder::new(timestamp.clone()).build();
|
||||||
assert!(desc.columns.is_empty());
|
assert!(desc.columns.is_empty());
|
||||||
assert!(desc.enable_version_column);
|
assert!(!desc.enable_version_column);
|
||||||
|
|
||||||
let desc = RowKeyDescriptorBuilder::new(timestamp.clone())
|
let desc = RowKeyDescriptorBuilder::new(timestamp.clone())
|
||||||
.columns_capacity(1)
|
.columns_capacity(1)
|
||||||
@@ -266,7 +269,7 @@ mod tests {
|
|||||||
)
|
)
|
||||||
.build();
|
.build();
|
||||||
assert_eq!(2, desc.columns.len());
|
assert_eq!(2, desc.columns.len());
|
||||||
assert!(desc.enable_version_column);
|
assert!(!desc.enable_version_column);
|
||||||
|
|
||||||
let desc = RowKeyDescriptorBuilder::new(timestamp)
|
let desc = RowKeyDescriptorBuilder::new(timestamp)
|
||||||
.enable_version_column(false)
|
.enable_version_column(false)
|
||||||
|
|||||||
@@ -1,4 +1,7 @@
|
|||||||
|
use std::time::Duration;
|
||||||
|
|
||||||
use common_error::ext::ErrorExt;
|
use common_error::ext::ErrorExt;
|
||||||
|
use common_time::RangeMillis;
|
||||||
use datatypes::schema::SchemaRef;
|
use datatypes::schema::SchemaRef;
|
||||||
use datatypes::vectors::VectorRef;
|
use datatypes::vectors::VectorRef;
|
||||||
|
|
||||||
@@ -12,6 +15,11 @@ pub trait WriteRequest: Send {
|
|||||||
fn new(schema: SchemaRef) -> Self;
|
fn new(schema: SchemaRef) -> Self;
|
||||||
|
|
||||||
fn put(&mut self, put: Self::PutOp) -> Result<(), Self::Error>;
|
fn put(&mut self, put: Self::PutOp) -> Result<(), Self::Error>;
|
||||||
|
|
||||||
|
/// Returns all possible time ranges that contain the timestamp in this batch.
|
||||||
|
///
|
||||||
|
/// Each time range is aligned to given `duration`.
|
||||||
|
fn time_ranges(&self, duration: Duration) -> Result<Vec<RangeMillis>, Self::Error>;
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Put multiple rows.
|
/// Put multiple rows.
|
||||||
|
|||||||
@@ -14,6 +14,7 @@ common-telemetry = {path = "../common/telemetry" }
|
|||||||
datafusion-common = { git = "https://github.com/apache/arrow-datafusion.git" , branch = "arrow2"}
|
datafusion-common = { git = "https://github.com/apache/arrow-datafusion.git" , branch = "arrow2"}
|
||||||
datatypes = { path = "../datatypes" }
|
datatypes = { path = "../datatypes" }
|
||||||
futures = "0.3"
|
futures = "0.3"
|
||||||
|
log-store = { path = "../log-store" }
|
||||||
snafu = { version = "0.7", features = ["backtraces"] }
|
snafu = { version = "0.7", features = ["backtraces"] }
|
||||||
storage ={ path = "../storage" }
|
storage ={ path = "../storage" }
|
||||||
store-api ={ path = "../store-api" }
|
store-api ={ path = "../store-api" }
|
||||||
@@ -21,4 +22,5 @@ table = { path = "../table" }
|
|||||||
|
|
||||||
[dev-dependencies]
|
[dev-dependencies]
|
||||||
datatypes = { path = "../datatypes" }
|
datatypes = { path = "../datatypes" }
|
||||||
tokio = { version = "1.18", features = ["full"] }
|
tempdir = "0.3"
|
||||||
|
tokio = { version = "1.18", features = ["full"] }
|
||||||
|
|||||||
@@ -194,8 +194,8 @@ mod tests {
|
|||||||
use crate::table::test;
|
use crate::table::test;
|
||||||
|
|
||||||
#[tokio::test]
|
#[tokio::test]
|
||||||
async fn test_creat_table_insert_scan() {
|
async fn test_create_table_insert_scan() {
|
||||||
let (_engine, table, schema) = test::setup_test_engine_and_table().await;
|
let (_engine, table, schema, _dir) = test::setup_test_engine_and_table().await;
|
||||||
|
|
||||||
assert_eq!(TableType::Base, table.table_type());
|
assert_eq!(TableType::Base, table.table_type());
|
||||||
assert_eq!(schema, table.schema());
|
assert_eq!(schema, table.schema());
|
||||||
|
|||||||
@@ -3,14 +3,23 @@ use std::sync::Arc;
|
|||||||
use datatypes::prelude::ConcreteDataType;
|
use datatypes::prelude::ConcreteDataType;
|
||||||
use datatypes::schema::SchemaRef;
|
use datatypes::schema::SchemaRef;
|
||||||
use datatypes::schema::{ColumnSchema, Schema};
|
use datatypes::schema::{ColumnSchema, Schema};
|
||||||
|
use log_store::fs::noop::NoopLogStore;
|
||||||
|
use storage::config::EngineConfig;
|
||||||
use storage::EngineImpl;
|
use storage::EngineImpl;
|
||||||
use table::engine::{EngineContext, TableEngine};
|
use table::engine::EngineContext;
|
||||||
|
use table::engine::TableEngine;
|
||||||
use table::requests::CreateTableRequest;
|
use table::requests::CreateTableRequest;
|
||||||
use table::TableRef;
|
use table::TableRef;
|
||||||
|
use tempdir::TempDir;
|
||||||
|
|
||||||
use crate::engine::MitoEngine;
|
use crate::engine::MitoEngine;
|
||||||
|
|
||||||
pub async fn setup_test_engine_and_table() -> (MitoEngine<EngineImpl>, TableRef, SchemaRef) {
|
pub async fn setup_test_engine_and_table() -> (
|
||||||
|
MitoEngine<EngineImpl<NoopLogStore>>,
|
||||||
|
TableRef,
|
||||||
|
SchemaRef,
|
||||||
|
TempDir,
|
||||||
|
) {
|
||||||
let column_schemas = vec![
|
let column_schemas = vec![
|
||||||
ColumnSchema::new("host", ConcreteDataType::string_datatype(), false),
|
ColumnSchema::new("host", ConcreteDataType::string_datatype(), false),
|
||||||
ColumnSchema::new("ts", ConcreteDataType::int64_datatype(), true),
|
ColumnSchema::new("ts", ConcreteDataType::int64_datatype(), true),
|
||||||
@@ -18,10 +27,22 @@ pub async fn setup_test_engine_and_table() -> (MitoEngine<EngineImpl>, TableRef,
|
|||||||
ColumnSchema::new("memory", ConcreteDataType::float64_datatype(), true),
|
ColumnSchema::new("memory", ConcreteDataType::float64_datatype(), true),
|
||||||
];
|
];
|
||||||
|
|
||||||
let table_engine = MitoEngine::<EngineImpl>::new(EngineImpl::new());
|
let dir = TempDir::new("setup_test_engine_and_table").unwrap();
|
||||||
|
let store_dir = dir.path().to_string_lossy();
|
||||||
|
|
||||||
|
let table_engine = MitoEngine::<EngineImpl<NoopLogStore>>::new(
|
||||||
|
EngineImpl::new(
|
||||||
|
EngineConfig::with_store_dir(&store_dir),
|
||||||
|
Arc::new(NoopLogStore::default()),
|
||||||
|
)
|
||||||
|
.await
|
||||||
|
.unwrap(),
|
||||||
|
);
|
||||||
|
|
||||||
let table_name = "demo";
|
let table_name = "demo";
|
||||||
let schema = Arc::new(Schema::new(column_schemas));
|
let schema = Arc::new(
|
||||||
|
Schema::with_timestamp_index(column_schemas, 1).expect("ts must be timestamp column"),
|
||||||
|
);
|
||||||
let table = table_engine
|
let table = table_engine
|
||||||
.create_table(
|
.create_table(
|
||||||
&EngineContext::default(),
|
&EngineContext::default(),
|
||||||
@@ -34,5 +55,5 @@ pub async fn setup_test_engine_and_table() -> (MitoEngine<EngineImpl>, TableRef,
|
|||||||
.await
|
.await
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
(table_engine, table, schema)
|
(table_engine, table, schema, dir)
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user