mirror of
https://github.com/GreptimeTeam/greptimedb.git
synced 2026-01-05 21:02:58 +00:00
Compare commits
20 Commits
v0.4.0-nig
...
v0.3.2
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
4b580f4037 | ||
|
|
ee16262b45 | ||
|
|
f37b394f1a | ||
|
|
ccee60f37d | ||
|
|
bee8323bae | ||
|
|
000df8cf1e | ||
|
|
884731a2c8 | ||
|
|
2922c25a16 | ||
|
|
4dec06ec86 | ||
|
|
3b6f70cde3 | ||
|
|
b8e92292d2 | ||
|
|
746fe8b4fe | ||
|
|
20f2fc4a2a | ||
|
|
2ef84f64f1 | ||
|
|
451cc02d8d | ||
|
|
b466ef6cb6 | ||
|
|
5b42e15105 | ||
|
|
e1bb7acfe5 | ||
|
|
2c0c4672b4 | ||
|
|
e54415e723 |
30
.github/workflows/release.yml
vendored
30
.github/workflows/release.yml
vendored
@@ -127,6 +127,21 @@ jobs:
|
||||
name: ${{ matrix.file }}.sha256sum
|
||||
path: target/${{ matrix.arch }}/${{ env.CARGO_PROFILE }}/${{ matrix.file }}.sha256sum
|
||||
|
||||
- name: Configure tag
|
||||
shell: bash
|
||||
if: github.event_name == 'push'
|
||||
run: |
|
||||
VERSION=${{ github.ref_name }}
|
||||
echo "TAG=${VERSION:1}" >> $GITHUB_ENV
|
||||
|
||||
- name: Upload to S3
|
||||
run: |
|
||||
aws s3 sync target/${{ matrix.arch }}/${{ env.CARGO_PROFILE }} s3://${{ secrets.GREPTIMEDB_RELEASE_BUCKET_NAME }}/releases/${TAG}
|
||||
env:
|
||||
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_CN_ACCESS_KEY_ID }}
|
||||
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_CN_SECRET_ACCESS_KEY }}
|
||||
AWS_DEFAULT_REGION: ${{ secrets.AWS_CN_REGION }}
|
||||
|
||||
build-linux:
|
||||
name: Build linux binary
|
||||
strategy:
|
||||
@@ -288,6 +303,21 @@ jobs:
|
||||
name: ${{ matrix.file }}.sha256sum
|
||||
path: target/${{ matrix.arch }}/${{ env.CARGO_PROFILE }}/${{ matrix.file }}.sha256sum
|
||||
|
||||
- name: Configure tag
|
||||
shell: bash
|
||||
if: github.event_name == 'push'
|
||||
run: |
|
||||
VERSION=${{ github.ref_name }}
|
||||
echo "TAG=${VERSION:1}" >> $GITHUB_ENV
|
||||
|
||||
- name: Upload to S3
|
||||
run: |
|
||||
aws s3 sync target/${{ matrix.arch }}/${{ env.CARGO_PROFILE }} s3://${{ secrets.GREPTIMEDB_RELEASE_BUCKET_NAME }}/releases/${TAG}
|
||||
env:
|
||||
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_CN_ACCESS_KEY_ID }}
|
||||
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_CN_SECRET_ACCESS_KEY }}
|
||||
AWS_DEFAULT_REGION: ${{ secrets.AWS_CN_REGION }}
|
||||
|
||||
docker:
|
||||
name: Build docker image
|
||||
needs: [build-linux, build-macos]
|
||||
|
||||
119
Cargo.lock
generated
119
Cargo.lock
generated
@@ -199,7 +199,7 @@ checksum = "8f1f8f5a6f3d50d89e3797d7593a50f96bb2aaa20ca0cc7be1fb673232c91d72"
|
||||
|
||||
[[package]]
|
||||
name = "api"
|
||||
version = "0.4.0"
|
||||
version = "0.3.2"
|
||||
dependencies = [
|
||||
"arrow-flight",
|
||||
"common-base",
|
||||
@@ -841,7 +841,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "benchmarks"
|
||||
version = "0.4.0"
|
||||
version = "0.3.2"
|
||||
dependencies = [
|
||||
"arrow",
|
||||
"clap 4.3.2",
|
||||
@@ -1224,7 +1224,7 @@ checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5"
|
||||
|
||||
[[package]]
|
||||
name = "catalog"
|
||||
version = "0.4.0"
|
||||
version = "0.3.2"
|
||||
dependencies = [
|
||||
"api",
|
||||
"arc-swap",
|
||||
@@ -1509,7 +1509,7 @@ checksum = "2da6da31387c7e4ef160ffab6d5e7f00c42626fe39aea70a7b0f1773f7dd6c1b"
|
||||
|
||||
[[package]]
|
||||
name = "client"
|
||||
version = "0.4.0"
|
||||
version = "0.3.2"
|
||||
dependencies = [
|
||||
"api",
|
||||
"arrow-flight",
|
||||
@@ -1535,7 +1535,7 @@ dependencies = [
|
||||
"prost",
|
||||
"rand",
|
||||
"snafu",
|
||||
"substrait 0.4.0",
|
||||
"substrait 0.3.2",
|
||||
"substrait 0.7.5",
|
||||
"tokio",
|
||||
"tokio-stream",
|
||||
@@ -1572,7 +1572,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "cmd"
|
||||
version = "0.4.0"
|
||||
version = "0.3.2"
|
||||
dependencies = [
|
||||
"anymap",
|
||||
"build-data",
|
||||
@@ -1602,7 +1602,7 @@ dependencies = [
|
||||
"servers",
|
||||
"session",
|
||||
"snafu",
|
||||
"substrait 0.4.0",
|
||||
"substrait 0.3.2",
|
||||
"temp-env",
|
||||
"tikv-jemallocator",
|
||||
"tokio",
|
||||
@@ -1634,7 +1634,7 @@ checksum = "55b672471b4e9f9e95499ea597ff64941a309b2cdbffcc46f2cc5e2d971fd335"
|
||||
|
||||
[[package]]
|
||||
name = "common-base"
|
||||
version = "0.4.0"
|
||||
version = "0.3.2"
|
||||
dependencies = [
|
||||
"anymap",
|
||||
"bitvec",
|
||||
@@ -1648,7 +1648,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-catalog"
|
||||
version = "0.4.0"
|
||||
version = "0.3.2"
|
||||
dependencies = [
|
||||
"async-trait",
|
||||
"chrono",
|
||||
@@ -1665,7 +1665,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-datasource"
|
||||
version = "0.4.0"
|
||||
version = "0.3.2"
|
||||
dependencies = [
|
||||
"arrow",
|
||||
"arrow-schema",
|
||||
@@ -1691,7 +1691,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-error"
|
||||
version = "0.4.0"
|
||||
version = "0.3.2"
|
||||
dependencies = [
|
||||
"snafu",
|
||||
"strum",
|
||||
@@ -1699,7 +1699,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-function"
|
||||
version = "0.4.0"
|
||||
version = "0.3.2"
|
||||
dependencies = [
|
||||
"arc-swap",
|
||||
"chrono-tz 0.6.3",
|
||||
@@ -1722,7 +1722,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-function-macro"
|
||||
version = "0.4.0"
|
||||
version = "0.3.2"
|
||||
dependencies = [
|
||||
"arc-swap",
|
||||
"backtrace",
|
||||
@@ -1738,7 +1738,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-grpc"
|
||||
version = "0.4.0"
|
||||
version = "0.3.2"
|
||||
dependencies = [
|
||||
"api",
|
||||
"arrow-flight",
|
||||
@@ -1768,7 +1768,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-grpc-expr"
|
||||
version = "0.4.0"
|
||||
version = "0.3.2"
|
||||
dependencies = [
|
||||
"api",
|
||||
"async-trait",
|
||||
@@ -1787,7 +1787,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-mem-prof"
|
||||
version = "0.4.0"
|
||||
version = "0.3.2"
|
||||
dependencies = [
|
||||
"common-error",
|
||||
"snafu",
|
||||
@@ -1800,9 +1800,10 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-meta"
|
||||
version = "0.4.0"
|
||||
version = "0.3.2"
|
||||
dependencies = [
|
||||
"api",
|
||||
"async-stream",
|
||||
"async-trait",
|
||||
"chrono",
|
||||
"common-catalog",
|
||||
@@ -1811,6 +1812,8 @@ dependencies = [
|
||||
"common-telemetry",
|
||||
"common-time",
|
||||
"datatypes",
|
||||
"futures",
|
||||
"prost",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"snafu",
|
||||
@@ -1821,7 +1824,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-pprof"
|
||||
version = "0.4.0"
|
||||
version = "0.3.2"
|
||||
dependencies = [
|
||||
"common-error",
|
||||
"pprof",
|
||||
@@ -1832,7 +1835,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-procedure"
|
||||
version = "0.4.0"
|
||||
version = "0.3.2"
|
||||
dependencies = [
|
||||
"async-stream",
|
||||
"async-trait",
|
||||
@@ -1854,7 +1857,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-procedure-test"
|
||||
version = "0.4.0"
|
||||
version = "0.3.2"
|
||||
dependencies = [
|
||||
"async-trait",
|
||||
"common-procedure",
|
||||
@@ -1862,7 +1865,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-query"
|
||||
version = "0.4.0"
|
||||
version = "0.3.2"
|
||||
dependencies = [
|
||||
"api",
|
||||
"async-trait",
|
||||
@@ -1882,7 +1885,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-recordbatch"
|
||||
version = "0.4.0"
|
||||
version = "0.3.2"
|
||||
dependencies = [
|
||||
"common-error",
|
||||
"datafusion",
|
||||
@@ -1898,7 +1901,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-runtime"
|
||||
version = "0.4.0"
|
||||
version = "0.3.2"
|
||||
dependencies = [
|
||||
"async-trait",
|
||||
"common-error",
|
||||
@@ -1914,7 +1917,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-telemetry"
|
||||
version = "0.4.0"
|
||||
version = "0.3.2"
|
||||
dependencies = [
|
||||
"backtrace",
|
||||
"common-error",
|
||||
@@ -1939,7 +1942,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-test-util"
|
||||
version = "0.4.0"
|
||||
version = "0.3.2"
|
||||
dependencies = [
|
||||
"once_cell",
|
||||
"rand",
|
||||
@@ -1948,7 +1951,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-time"
|
||||
version = "0.4.0"
|
||||
version = "0.3.2"
|
||||
dependencies = [
|
||||
"chrono",
|
||||
"chrono-tz 0.8.2",
|
||||
@@ -2588,7 +2591,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "datanode"
|
||||
version = "0.4.0"
|
||||
version = "0.3.2"
|
||||
dependencies = [
|
||||
"api",
|
||||
"async-compat",
|
||||
@@ -2644,7 +2647,7 @@ dependencies = [
|
||||
"sql",
|
||||
"storage",
|
||||
"store-api",
|
||||
"substrait 0.4.0",
|
||||
"substrait 0.3.2",
|
||||
"table",
|
||||
"table-procedure",
|
||||
"tokio",
|
||||
@@ -2658,7 +2661,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "datatypes"
|
||||
version = "0.4.0"
|
||||
version = "0.3.2"
|
||||
dependencies = [
|
||||
"arrow",
|
||||
"arrow-array",
|
||||
@@ -3099,7 +3102,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "file-table-engine"
|
||||
version = "0.4.0"
|
||||
version = "0.3.2"
|
||||
dependencies = [
|
||||
"async-trait",
|
||||
"common-catalog",
|
||||
@@ -3208,7 +3211,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "frontend"
|
||||
version = "0.4.0"
|
||||
version = "0.3.2"
|
||||
dependencies = [
|
||||
"api",
|
||||
"async-compat",
|
||||
@@ -3263,7 +3266,7 @@ dependencies = [
|
||||
"storage",
|
||||
"store-api",
|
||||
"strfmt",
|
||||
"substrait 0.4.0",
|
||||
"substrait 0.3.2",
|
||||
"table",
|
||||
"tokio",
|
||||
"toml",
|
||||
@@ -4106,7 +4109,7 @@ checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b"
|
||||
[[package]]
|
||||
name = "greptime-proto"
|
||||
version = "0.1.0"
|
||||
source = "git+https://github.com/GreptimeTeam/greptime-proto.git?rev=7aeaeaba1e0ca6a5c736b6ab2eb63144ae3d284b#7aeaeaba1e0ca6a5c736b6ab2eb63144ae3d284b"
|
||||
source = "git+https://github.com/WenyXu/greptime-proto.git?rev=1eda4691a5d2c8ffc463d48ca2317905ba7e4b2d#1eda4691a5d2c8ffc463d48ca2317905ba7e4b2d"
|
||||
dependencies = [
|
||||
"prost",
|
||||
"serde",
|
||||
@@ -4869,7 +4872,7 @@ checksum = "518ef76f2f87365916b142844c16d8fefd85039bc5699050210a7778ee1cd1de"
|
||||
|
||||
[[package]]
|
||||
name = "log-store"
|
||||
version = "0.4.0"
|
||||
version = "0.3.2"
|
||||
dependencies = [
|
||||
"arc-swap",
|
||||
"async-stream",
|
||||
@@ -5131,7 +5134,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "meta-client"
|
||||
version = "0.4.0"
|
||||
version = "0.3.2"
|
||||
dependencies = [
|
||||
"api",
|
||||
"async-trait",
|
||||
@@ -5159,7 +5162,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "meta-srv"
|
||||
version = "0.4.0"
|
||||
version = "0.3.2"
|
||||
dependencies = [
|
||||
"anymap",
|
||||
"api",
|
||||
@@ -5167,6 +5170,7 @@ dependencies = [
|
||||
"async-trait",
|
||||
"catalog",
|
||||
"chrono",
|
||||
"client",
|
||||
"common-base",
|
||||
"common-catalog",
|
||||
"common-error",
|
||||
@@ -5352,7 +5356,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "mito"
|
||||
version = "0.4.0"
|
||||
version = "0.3.2"
|
||||
dependencies = [
|
||||
"anymap",
|
||||
"arc-swap",
|
||||
@@ -5387,6 +5391,10 @@ dependencies = [
|
||||
"tokio",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "mito2"
|
||||
version = "0.3.2"
|
||||
|
||||
[[package]]
|
||||
name = "moka"
|
||||
version = "0.9.7"
|
||||
@@ -5823,7 +5831,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "object-store"
|
||||
version = "0.4.0"
|
||||
version = "0.3.2"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"async-trait",
|
||||
@@ -6217,7 +6225,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "partition"
|
||||
version = "0.4.0"
|
||||
version = "0.3.2"
|
||||
dependencies = [
|
||||
"api",
|
||||
"async-trait",
|
||||
@@ -6804,7 +6812,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "promql"
|
||||
version = "0.4.0"
|
||||
version = "0.3.2"
|
||||
dependencies = [
|
||||
"async-recursion",
|
||||
"async-trait",
|
||||
@@ -7054,7 +7062,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "query"
|
||||
version = "0.4.0"
|
||||
version = "0.3.2"
|
||||
dependencies = [
|
||||
"ahash 0.8.3",
|
||||
"approx_eq",
|
||||
@@ -7108,7 +7116,7 @@ dependencies = [
|
||||
"stats-cli",
|
||||
"store-api",
|
||||
"streaming-stats",
|
||||
"substrait 0.4.0",
|
||||
"substrait 0.3.2",
|
||||
"table",
|
||||
"tokio",
|
||||
"tokio-stream",
|
||||
@@ -8284,7 +8292,7 @@ checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd"
|
||||
|
||||
[[package]]
|
||||
name = "script"
|
||||
version = "0.4.0"
|
||||
version = "0.3.2"
|
||||
dependencies = [
|
||||
"arrow",
|
||||
"async-trait",
|
||||
@@ -8539,7 +8547,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "servers"
|
||||
version = "0.4.0"
|
||||
version = "0.3.2"
|
||||
dependencies = [
|
||||
"aide",
|
||||
"api",
|
||||
@@ -8627,7 +8635,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "session"
|
||||
version = "0.4.0"
|
||||
version = "0.3.2"
|
||||
dependencies = [
|
||||
"arc-swap",
|
||||
"common-catalog",
|
||||
@@ -8902,7 +8910,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "sql"
|
||||
version = "0.4.0"
|
||||
version = "0.3.2"
|
||||
dependencies = [
|
||||
"api",
|
||||
"common-base",
|
||||
@@ -8934,8 +8942,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "sqlness"
|
||||
version = "0.4.3"
|
||||
source = "git+https://github.com/CeresDB/sqlness.git?rev=a4663365795d2067eb53966c383e1bb0c89c7627#a4663365795d2067eb53966c383e1bb0c89c7627"
|
||||
version = "0.5.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0860f149718809371602b42573693e1ed2b1d0aed35fe69e04e4e4e9918d81f7"
|
||||
dependencies = [
|
||||
"async-trait",
|
||||
"derive_builder 0.11.2",
|
||||
@@ -8948,7 +8957,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "sqlness-runner"
|
||||
version = "0.4.0"
|
||||
version = "0.3.2"
|
||||
dependencies = [
|
||||
"async-trait",
|
||||
"client",
|
||||
@@ -9130,7 +9139,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "storage"
|
||||
version = "0.4.0"
|
||||
version = "0.3.2"
|
||||
dependencies = [
|
||||
"arc-swap",
|
||||
"arrow",
|
||||
@@ -9183,7 +9192,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "store-api"
|
||||
version = "0.4.0"
|
||||
version = "0.3.2"
|
||||
dependencies = [
|
||||
"async-stream",
|
||||
"async-trait",
|
||||
@@ -9298,7 +9307,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "substrait"
|
||||
version = "0.4.0"
|
||||
version = "0.3.2"
|
||||
dependencies = [
|
||||
"async-recursion",
|
||||
"async-trait",
|
||||
@@ -9453,7 +9462,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "table"
|
||||
version = "0.4.0"
|
||||
version = "0.3.2"
|
||||
dependencies = [
|
||||
"anymap",
|
||||
"async-trait",
|
||||
@@ -9489,7 +9498,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "table-procedure"
|
||||
version = "0.4.0"
|
||||
version = "0.3.2"
|
||||
dependencies = [
|
||||
"async-trait",
|
||||
"catalog",
|
||||
@@ -9582,7 +9591,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "tests-integration"
|
||||
version = "0.4.0"
|
||||
version = "0.3.2"
|
||||
dependencies = [
|
||||
"api",
|
||||
"async-trait",
|
||||
|
||||
@@ -33,6 +33,7 @@ members = [
|
||||
"src/meta-client",
|
||||
"src/meta-srv",
|
||||
"src/mito",
|
||||
"src/mito2",
|
||||
"src/object-store",
|
||||
"src/partition",
|
||||
"src/promql",
|
||||
@@ -50,7 +51,7 @@ members = [
|
||||
]
|
||||
|
||||
[workspace.package]
|
||||
version = "0.4.0"
|
||||
version = "0.3.2"
|
||||
edition = "2021"
|
||||
license = "Apache-2.0"
|
||||
|
||||
@@ -72,7 +73,7 @@ datafusion-sql = { git = "https://github.com/waynexia/arrow-datafusion.git", rev
|
||||
datafusion-substrait = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "63e52dde9e44cac4b1f6c6e6b6bf6368ba3bd323" }
|
||||
futures = "0.3"
|
||||
futures-util = "0.3"
|
||||
greptime-proto = { git = "https://github.com/GreptimeTeam/greptime-proto.git", rev = "7aeaeaba1e0ca6a5c736b6ab2eb63144ae3d284b" }
|
||||
greptime-proto = { git = "https://github.com/WenyXu/greptime-proto.git", rev = "1eda4691a5d2c8ffc463d48ca2317905ba7e4b2d" }
|
||||
itertools = "0.10"
|
||||
parquet = "40.0"
|
||||
paste = "1.0"
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
[build]
|
||||
pre-build = [
|
||||
"dpkg --add-architecture $CROSS_DEB_ARCH",
|
||||
"apt update && apt install -y unzip zlib1g-dev:$CROSS_DEB_ARCH",
|
||||
"apt update && apt install -y unzip zlib1g-dev zlib1g-dev:$CROSS_DEB_ARCH",
|
||||
"curl -LO https://github.com/protocolbuffers/protobuf/releases/download/v3.15.8/protoc-3.15.8-linux-x86_64.zip && unzip protoc-3.15.8-linux-x86_64.zip -d /usr/",
|
||||
"chmod a+x /usr/bin/protoc && chmod -R a+rx /usr/include/google",
|
||||
]
|
||||
|
||||
@@ -10,6 +10,8 @@ rpc_addr = "127.0.0.1:3001"
|
||||
rpc_hostname = "127.0.0.1"
|
||||
# The number of gRPC server worker threads, 8 by default.
|
||||
rpc_runtime_size = 8
|
||||
# Interval for sending heartbeat messages to the Metasrv in milliseconds, 5000 by default.
|
||||
heartbeat_interval_millis = 5000
|
||||
|
||||
# Metasrv client options.
|
||||
[meta_client_options]
|
||||
|
||||
@@ -1,10 +1,15 @@
|
||||
# Node running mode, see `standalone.example.toml`.
|
||||
mode = "distributed"
|
||||
# Interval for sending heartbeat task to the Metasrv in milliseconds, 5000 by default.
|
||||
heartbeat_interval_millis = 5000
|
||||
# Interval for retry sending heartbeat task in milliseconds, 5000 by default.
|
||||
retry_interval_millis = 5000
|
||||
|
||||
# HTTP server options, see `standalone.example.toml`.
|
||||
[http_options]
|
||||
addr = "127.0.0.1:4000"
|
||||
timeout = "30s"
|
||||
body_limit = "64MB"
|
||||
|
||||
# gRPC server options, see `standalone.example.toml`.
|
||||
[grpc_options]
|
||||
|
||||
@@ -9,6 +9,9 @@ enable_memory_catalog = false
|
||||
addr = "127.0.0.1:4000"
|
||||
# HTTP request timeout, 30s by default.
|
||||
timeout = "30s"
|
||||
# HTTP request body limit, 64Mb by default.
|
||||
# the following units are supported: B, KB, KiB, MB, MiB, GB, GiB, TB, TiB, PB, PiB
|
||||
body_limit = "64MB"
|
||||
|
||||
# gRPC server options.
|
||||
[grpc_options]
|
||||
|
||||
29
docker/Dockerfile-centos7-builder
Normal file
29
docker/Dockerfile-centos7-builder
Normal file
@@ -0,0 +1,29 @@
|
||||
FROM centos:7
|
||||
|
||||
ENV LANG en_US.utf8
|
||||
WORKDIR /greptimedb
|
||||
|
||||
RUN sed -e 's|^mirrorlist=|#mirrorlist=|g' \
|
||||
-e 's|^#baseurl=http://mirror.centos.org/centos|baseurl=http://mirrors.tuna.tsinghua.edu.cn/centos|g' \
|
||||
-i.bak \
|
||||
/etc/yum.repos.d/CentOS-*.repo
|
||||
|
||||
# Install dependencies
|
||||
RUN RUN ulimit -n 1024000 && yum groupinstall -y 'Development Tools'
|
||||
RUN yum install -y epel-release \
|
||||
openssl \
|
||||
openssl-devel \
|
||||
centos-release-scl \
|
||||
rh-python38 \
|
||||
rh-python38-python-devel
|
||||
|
||||
# Install protoc
|
||||
RUN curl -LO https://github.com/protocolbuffers/protobuf/releases/download/v3.15.8/protoc-3.15.8-linux-x86_64.zip
|
||||
RUN unzip protoc-3.15.8-linux-x86_64.zip -d /usr/local/
|
||||
|
||||
# Install Rust
|
||||
SHELL ["/bin/bash", "-c"]
|
||||
RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- --no-modify-path --default-toolchain none -y
|
||||
ENV PATH /opt/rh/rh-python38/root/usr/bin:/usr/local/bin:/root/.cargo/bin/:$PATH
|
||||
|
||||
CMD ["cargo", "build", "--release"]
|
||||
@@ -243,6 +243,12 @@ pub enum Error {
|
||||
|
||||
#[snafu(display("A generic error has occurred, msg: {}", msg))]
|
||||
Generic { msg: String, location: Location },
|
||||
|
||||
#[snafu(display("Table metadata manager error: {}", source))]
|
||||
TableMetadataManager {
|
||||
source: common_meta::error::Error,
|
||||
location: Location,
|
||||
},
|
||||
}
|
||||
|
||||
pub type Result<T> = std::result::Result<T, Error>;
|
||||
@@ -298,6 +304,7 @@ impl ErrorExt for Error {
|
||||
Error::Unimplemented { .. } | Error::NotSupported { .. } => StatusCode::Unsupported,
|
||||
Error::QueryAccessDenied { .. } => StatusCode::AccessDenied,
|
||||
Error::Datafusion { .. } => StatusCode::EngineExecuteQuery,
|
||||
Error::TableMetadataManager { source, .. } => source.status_code(),
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -243,9 +243,12 @@ impl LocalCatalogManager {
|
||||
info!("Registered schema: {:?}", s);
|
||||
}
|
||||
Entry::Table(t) => {
|
||||
max_table_id = max_table_id.max(t.table_id);
|
||||
if t.is_deleted {
|
||||
continue;
|
||||
}
|
||||
self.open_and_register_table(&t).await?;
|
||||
info!("Registered table: {:?}", t);
|
||||
max_table_id = max_table_id.max(t.table_id);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -602,6 +605,7 @@ mod tests {
|
||||
table_name: "T1".to_string(),
|
||||
table_id: 1,
|
||||
engine: MITO_ENGINE.to_string(),
|
||||
is_deleted: false,
|
||||
}),
|
||||
Entry::Catalog(CatalogEntry {
|
||||
catalog_name: "C2".to_string(),
|
||||
@@ -623,6 +627,7 @@ mod tests {
|
||||
table_name: "T2".to_string(),
|
||||
table_id: 2,
|
||||
engine: MITO_ENGINE.to_string(),
|
||||
is_deleted: false,
|
||||
}),
|
||||
];
|
||||
let res = LocalCatalogManager::sort_entries(vec);
|
||||
|
||||
@@ -12,18 +12,11 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::any::Any;
|
||||
use std::fmt::Debug;
|
||||
use std::pin::Pin;
|
||||
use std::sync::Arc;
|
||||
|
||||
pub use client::{CachedMetaKvBackend, MetaKvBackend};
|
||||
use futures::Stream;
|
||||
use futures_util::StreamExt;
|
||||
pub use manager::RemoteCatalogManager;
|
||||
|
||||
use crate::error::Error;
|
||||
|
||||
mod client;
|
||||
mod manager;
|
||||
|
||||
@@ -31,59 +24,6 @@ mod manager;
|
||||
pub mod mock;
|
||||
pub mod region_alive_keeper;
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Kv(pub Vec<u8>, pub Vec<u8>);
|
||||
|
||||
pub type ValueIter<'a, E> = Pin<Box<dyn Stream<Item = Result<Kv, E>> + Send + 'a>>;
|
||||
|
||||
#[async_trait::async_trait]
|
||||
pub trait KvBackend: Send + Sync {
|
||||
fn range<'a, 'b>(&'a self, key: &[u8]) -> ValueIter<'b, Error>
|
||||
where
|
||||
'a: 'b;
|
||||
|
||||
async fn set(&self, key: &[u8], val: &[u8]) -> Result<(), Error>;
|
||||
|
||||
/// Compare and set value of key. `expect` is the expected value, if backend's current value associated
|
||||
/// with key is the same as `expect`, the value will be updated to `val`.
|
||||
///
|
||||
/// - If the compare-and-set operation successfully updated value, this method will return an `Ok(Ok())`
|
||||
/// - If associated value is not the same as `expect`, no value will be updated and an `Ok(Err(Vec<u8>))`
|
||||
/// will be returned, the `Err(Vec<u8>)` indicates the current associated value of key.
|
||||
/// - If any error happens during operation, an `Err(Error)` will be returned.
|
||||
async fn compare_and_set(
|
||||
&self,
|
||||
key: &[u8],
|
||||
expect: &[u8],
|
||||
val: &[u8],
|
||||
) -> Result<Result<(), Option<Vec<u8>>>, Error>;
|
||||
|
||||
async fn delete_range(&self, key: &[u8], end: &[u8]) -> Result<(), Error>;
|
||||
|
||||
async fn delete(&self, key: &[u8]) -> Result<(), Error> {
|
||||
self.delete_range(key, &[]).await
|
||||
}
|
||||
|
||||
/// Default get is implemented based on `range` method.
|
||||
async fn get(&self, key: &[u8]) -> Result<Option<Kv>, Error> {
|
||||
let mut iter = self.range(key);
|
||||
while let Some(r) = iter.next().await {
|
||||
let kv = r?;
|
||||
if kv.0 == key {
|
||||
return Ok(Some(kv));
|
||||
}
|
||||
}
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
/// MoveValue atomically renames the key to the given updated key.
|
||||
async fn move_value(&self, from_key: &[u8], to_key: &[u8]) -> Result<(), Error>;
|
||||
|
||||
fn as_any(&self) -> &dyn Any;
|
||||
}
|
||||
|
||||
pub type KvBackendRef = Arc<dyn KvBackend>;
|
||||
|
||||
#[async_trait::async_trait]
|
||||
pub trait KvCacheInvalidator: Send + Sync {
|
||||
async fn invalidate_key(&self, key: &[u8]);
|
||||
@@ -93,14 +33,19 @@ pub type KvCacheInvalidatorRef = Arc<dyn KvCacheInvalidator>;
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use async_stream::stream;
|
||||
use std::any::Any;
|
||||
|
||||
use super::*;
|
||||
use async_stream::stream;
|
||||
use common_meta::kv_backend::{Kv, KvBackend, ValueIter};
|
||||
|
||||
use crate::error::Error;
|
||||
|
||||
struct MockKvBackend {}
|
||||
|
||||
#[async_trait::async_trait]
|
||||
impl KvBackend for MockKvBackend {
|
||||
type Error = Error;
|
||||
|
||||
fn range<'a, 'b>(&'a self, _key: &[u8]) -> ValueIter<'b, Error>
|
||||
where
|
||||
'a: 'b,
|
||||
|
||||
@@ -18,24 +18,26 @@ use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
|
||||
use async_stream::stream;
|
||||
use common_error::prelude::BoxedError;
|
||||
use common_meta::error::Error::{CacheNotGet, GetKvCache};
|
||||
use common_meta::error::{CacheNotGetSnafu, Error, MetaSrvSnafu, Result};
|
||||
use common_meta::kv_backend::{Kv, KvBackend, KvBackendRef, ValueIter};
|
||||
use common_meta::rpc::store::{
|
||||
CompareAndPutRequest, DeleteRangeRequest, MoveValueRequest, PutRequest, RangeRequest,
|
||||
};
|
||||
use common_telemetry::{info, timer};
|
||||
use meta_client::client::MetaClient;
|
||||
use moka::future::{Cache, CacheBuilder};
|
||||
use snafu::ResultExt;
|
||||
use snafu::{OptionExt, ResultExt};
|
||||
|
||||
use super::KvCacheInvalidator;
|
||||
use crate::error::{Error, GenericSnafu, MetaSrvSnafu, Result};
|
||||
use crate::metrics::{METRIC_CATALOG_KV_GET, METRIC_CATALOG_KV_REMOTE_GET};
|
||||
use crate::remote::{Kv, KvBackend, KvBackendRef, ValueIter};
|
||||
|
||||
const CACHE_MAX_CAPACITY: u64 = 10000;
|
||||
const CACHE_TTL_SECOND: u64 = 10 * 60;
|
||||
const CACHE_TTI_SECOND: u64 = 5 * 60;
|
||||
|
||||
pub type CacheBackendRef = Arc<Cache<Vec<u8>, Option<Kv>>>;
|
||||
pub type CacheBackendRef = Arc<Cache<Vec<u8>, Kv>>;
|
||||
pub struct CachedMetaKvBackend {
|
||||
kv_backend: KvBackendRef,
|
||||
cache: CacheBackendRef,
|
||||
@@ -43,6 +45,8 @@ pub struct CachedMetaKvBackend {
|
||||
|
||||
#[async_trait::async_trait]
|
||||
impl KvBackend for CachedMetaKvBackend {
|
||||
type Error = Error;
|
||||
|
||||
fn range<'a, 'b>(&'a self, key: &[u8]) -> ValueIter<'b, Error>
|
||||
where
|
||||
'a: 'b,
|
||||
@@ -55,12 +59,26 @@ impl KvBackend for CachedMetaKvBackend {
|
||||
|
||||
let init = async {
|
||||
let _timer = timer!(METRIC_CATALOG_KV_REMOTE_GET);
|
||||
|
||||
self.kv_backend.get(key).await
|
||||
self.kv_backend.get(key).await.map(|val| {
|
||||
val.with_context(|| CacheNotGetSnafu {
|
||||
key: String::from_utf8_lossy(key),
|
||||
})
|
||||
})?
|
||||
};
|
||||
|
||||
let schema_provider = self.cache.try_get_with_by_ref(key, init).await;
|
||||
schema_provider.map_err(|e| GenericSnafu { msg: e.to_string() }.build())
|
||||
// currently moka doesn't have `optionally_try_get_with_by_ref`
|
||||
// TODO(fys): change to moka method when available
|
||||
// https://github.com/moka-rs/moka/issues/254
|
||||
match self.cache.try_get_with_by_ref(key, init).await {
|
||||
Ok(val) => Ok(Some(val)),
|
||||
Err(e) => match e.as_ref() {
|
||||
CacheNotGet { .. } => Ok(None),
|
||||
_ => Err(e),
|
||||
},
|
||||
}
|
||||
.map_err(|e| GetKvCache {
|
||||
err_msg: e.to_string(),
|
||||
})
|
||||
}
|
||||
|
||||
async fn set(&self, key: &[u8], val: &[u8]) -> Result<()> {
|
||||
@@ -165,6 +183,8 @@ pub struct MetaKvBackend {
|
||||
/// comparing to `Accessor`'s list and get method.
|
||||
#[async_trait::async_trait]
|
||||
impl KvBackend for MetaKvBackend {
|
||||
type Error = Error;
|
||||
|
||||
fn range<'a, 'b>(&'a self, key: &[u8]) -> ValueIter<'b, Error>
|
||||
where
|
||||
'a: 'b,
|
||||
@@ -175,6 +195,7 @@ impl KvBackend for MetaKvBackend {
|
||||
.client
|
||||
.range(RangeRequest::new().with_prefix(key))
|
||||
.await
|
||||
.map_err(BoxedError::new)
|
||||
.context(MetaSrvSnafu)?;
|
||||
let kvs = resp.take_kvs();
|
||||
for mut kv in kvs.into_iter() {
|
||||
@@ -188,6 +209,7 @@ impl KvBackend for MetaKvBackend {
|
||||
.client
|
||||
.range(RangeRequest::new().with_key(key))
|
||||
.await
|
||||
.map_err(BoxedError::new)
|
||||
.context(MetaSrvSnafu)?;
|
||||
Ok(response
|
||||
.take_kvs()
|
||||
@@ -199,13 +221,23 @@ impl KvBackend for MetaKvBackend {
|
||||
let req = PutRequest::new()
|
||||
.with_key(key.to_vec())
|
||||
.with_value(val.to_vec());
|
||||
let _ = self.client.put(req).await.context(MetaSrvSnafu)?;
|
||||
let _ = self
|
||||
.client
|
||||
.put(req)
|
||||
.await
|
||||
.map_err(BoxedError::new)
|
||||
.context(MetaSrvSnafu)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn delete_range(&self, key: &[u8], end: &[u8]) -> Result<()> {
|
||||
let req = DeleteRangeRequest::new().with_range(key.to_vec(), end.to_vec());
|
||||
let resp = self.client.delete_range(req).await.context(MetaSrvSnafu)?;
|
||||
let resp = self
|
||||
.client
|
||||
.delete_range(req)
|
||||
.await
|
||||
.map_err(BoxedError::new)
|
||||
.context(MetaSrvSnafu)?;
|
||||
info!(
|
||||
"Delete range, key: {}, end: {}, deleted: {}",
|
||||
String::from_utf8_lossy(key),
|
||||
@@ -230,6 +262,7 @@ impl KvBackend for MetaKvBackend {
|
||||
.client
|
||||
.compare_and_put(request)
|
||||
.await
|
||||
.map_err(BoxedError::new)
|
||||
.context(MetaSrvSnafu)?;
|
||||
if response.is_success() {
|
||||
Ok(Ok(()))
|
||||
@@ -240,7 +273,12 @@ impl KvBackend for MetaKvBackend {
|
||||
|
||||
async fn move_value(&self, from_key: &[u8], to_key: &[u8]) -> Result<()> {
|
||||
let req = MoveValueRequest::new(from_key, to_key);
|
||||
let _ = self.client.move_value(req).await.context(MetaSrvSnafu)?;
|
||||
let _ = self
|
||||
.client
|
||||
.move_value(req)
|
||||
.await
|
||||
.map_err(BoxedError::new)
|
||||
.context(MetaSrvSnafu)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
|
||||
@@ -21,6 +21,7 @@ use async_stream::stream;
|
||||
use async_trait::async_trait;
|
||||
use common_catalog::consts::{MAX_SYS_TABLE_ID, MITO_ENGINE};
|
||||
use common_meta::ident::TableIdent;
|
||||
use common_meta::kv_backend::{Kv, KvBackendRef};
|
||||
use common_telemetry::{debug, error, info, warn};
|
||||
use futures::Stream;
|
||||
use futures_util::{StreamExt, TryStreamExt};
|
||||
@@ -35,6 +36,7 @@ use tokio::sync::Mutex;
|
||||
use crate::error::{
|
||||
CatalogNotFoundSnafu, CreateTableSnafu, InvalidCatalogValueSnafu, OpenTableSnafu,
|
||||
ParallelOpenTableSnafu, Result, SchemaNotFoundSnafu, TableEngineNotFoundSnafu,
|
||||
TableMetadataManagerSnafu,
|
||||
};
|
||||
use crate::helper::{
|
||||
build_catalog_prefix, build_schema_prefix, build_table_global_prefix,
|
||||
@@ -42,7 +44,6 @@ use crate::helper::{
|
||||
TableGlobalValue, TableRegionalKey, TableRegionalValue, CATALOG_KEY_PREFIX,
|
||||
};
|
||||
use crate::remote::region_alive_keeper::RegionAliveKeepers;
|
||||
use crate::remote::{Kv, KvBackendRef};
|
||||
use crate::{
|
||||
handle_system_table_request, CatalogManager, DeregisterTableRequest, RegisterSchemaRequest,
|
||||
RegisterSystemTableRequest, RegisterTableRequest, RenameTableRequest,
|
||||
@@ -80,7 +81,7 @@ impl RemoteCatalogManager {
|
||||
let mut catalogs = self.backend.range(catalog_range_prefix.as_bytes());
|
||||
Box::pin(stream!({
|
||||
while let Some(r) = catalogs.next().await {
|
||||
let Kv(k, _) = r?;
|
||||
let Kv(k, _) = r.context(TableMetadataManagerSnafu)?;
|
||||
if !k.starts_with(catalog_range_prefix.as_bytes()) {
|
||||
debug!("Ignoring non-catalog key: {}", String::from_utf8_lossy(&k));
|
||||
continue;
|
||||
@@ -134,7 +135,8 @@ impl RemoteCatalogManager {
|
||||
.as_bytes()
|
||||
.context(InvalidCatalogValueSnafu)?,
|
||||
)
|
||||
.await?;
|
||||
.await
|
||||
.context(TableMetadataManagerSnafu)?;
|
||||
info!("Created schema '{schema_key}'");
|
||||
|
||||
let catalog_key = CatalogKey {
|
||||
@@ -148,7 +150,8 @@ impl RemoteCatalogManager {
|
||||
.as_bytes()
|
||||
.context(InvalidCatalogValueSnafu)?,
|
||||
)
|
||||
.await?;
|
||||
.await
|
||||
.context(TableMetadataManagerSnafu)?;
|
||||
info!("Created catalog '{catalog_key}");
|
||||
Ok(())
|
||||
}
|
||||
@@ -316,7 +319,8 @@ impl RemoteCatalogManager {
|
||||
table_key.as_bytes(),
|
||||
&table_value.as_bytes().context(InvalidCatalogValueSnafu)?,
|
||||
)
|
||||
.await?;
|
||||
.await
|
||||
.context(TableMetadataManagerSnafu)?;
|
||||
debug!(
|
||||
"Successfully set catalog table entry, key: {}, table value: {:?}",
|
||||
table_key, table_value
|
||||
@@ -343,7 +347,8 @@ impl RemoteCatalogManager {
|
||||
let engine_opt = self
|
||||
.backend
|
||||
.get(table_key.as_bytes())
|
||||
.await?
|
||||
.await
|
||||
.context(TableMetadataManagerSnafu)?
|
||||
.map(|Kv(_, v)| {
|
||||
let TableRegionalValue {
|
||||
table_id,
|
||||
@@ -361,7 +366,10 @@ impl RemoteCatalogManager {
|
||||
return Ok(None);
|
||||
};
|
||||
|
||||
self.backend.delete(table_key.as_bytes()).await?;
|
||||
self.backend
|
||||
.delete(table_key.as_bytes())
|
||||
.await
|
||||
.context(TableMetadataManagerSnafu)?;
|
||||
debug!(
|
||||
"Successfully deleted catalog table entry, key: {}",
|
||||
table_key
|
||||
@@ -428,7 +436,7 @@ async fn iter_remote_schemas<'a>(
|
||||
|
||||
Box::pin(stream!({
|
||||
while let Some(r) = schemas.next().await {
|
||||
let Kv(k, _) = r?;
|
||||
let Kv(k, _) = r.context(TableMetadataManagerSnafu)?;
|
||||
if !k.starts_with(schema_prefix.as_bytes()) {
|
||||
debug!("Ignoring non-schema key: {}", String::from_utf8_lossy(&k));
|
||||
continue;
|
||||
@@ -452,7 +460,7 @@ async fn iter_remote_tables<'a>(
|
||||
let mut tables = backend.range(table_prefix.as_bytes());
|
||||
Box::pin(stream!({
|
||||
while let Some(r) = tables.next().await {
|
||||
let Kv(k, v) = r?;
|
||||
let Kv(k, v) = r.context(TableMetadataManagerSnafu)?;
|
||||
if !k.starts_with(table_prefix.as_bytes()) {
|
||||
debug!("Ignoring non-table prefix: {}", String::from_utf8_lossy(&k));
|
||||
continue;
|
||||
@@ -701,7 +709,8 @@ impl CatalogManager for RemoteCatalogManager {
|
||||
.as_bytes()
|
||||
.context(InvalidCatalogValueSnafu)?,
|
||||
)
|
||||
.await?;
|
||||
.await
|
||||
.context(TableMetadataManagerSnafu)?;
|
||||
|
||||
increment_gauge!(crate::metrics::METRIC_CATALOG_MANAGER_SCHEMA_COUNT, 1.0);
|
||||
Ok(true)
|
||||
@@ -720,7 +729,7 @@ impl CatalogManager for RemoteCatalogManager {
|
||||
node_id: self.node_id,
|
||||
}
|
||||
.to_string();
|
||||
let Some(Kv(_, value_bytes)) = self.backend.get(old_table_key.as_bytes()).await? else {
|
||||
let Some(Kv(_, value_bytes)) = self.backend.get(old_table_key.as_bytes()).await.context(TableMetadataManagerSnafu)? else {
|
||||
return Ok(false)
|
||||
};
|
||||
let new_table_key = TableRegionalKey {
|
||||
@@ -731,10 +740,12 @@ impl CatalogManager for RemoteCatalogManager {
|
||||
};
|
||||
self.backend
|
||||
.set(new_table_key.to_string().as_bytes(), &value_bytes)
|
||||
.await?;
|
||||
.await
|
||||
.context(TableMetadataManagerSnafu)?;
|
||||
self.backend
|
||||
.delete(old_table_key.to_string().as_bytes())
|
||||
.await?;
|
||||
.await
|
||||
.context(TableMetadataManagerSnafu)?;
|
||||
Ok(true)
|
||||
}
|
||||
|
||||
@@ -756,7 +767,12 @@ impl CatalogManager for RemoteCatalogManager {
|
||||
let key = self
|
||||
.build_schema_key(catalog.to_string(), schema.to_string())
|
||||
.to_string();
|
||||
Ok(self.backend.get(key.as_bytes()).await?.is_some())
|
||||
Ok(self
|
||||
.backend
|
||||
.get(key.as_bytes())
|
||||
.await
|
||||
.context(TableMetadataManagerSnafu)?
|
||||
.is_some())
|
||||
}
|
||||
|
||||
async fn table(
|
||||
@@ -778,7 +794,8 @@ impl CatalogManager for RemoteCatalogManager {
|
||||
let table_opt = self
|
||||
.backend
|
||||
.get(key.as_bytes())
|
||||
.await?
|
||||
.await
|
||||
.context(TableMetadataManagerSnafu)?
|
||||
.map(|Kv(_, v)| {
|
||||
let TableRegionalValue {
|
||||
table_id,
|
||||
@@ -821,7 +838,8 @@ impl CatalogManager for RemoteCatalogManager {
|
||||
Ok(self
|
||||
.backend
|
||||
.get(key.to_string().as_bytes())
|
||||
.await?
|
||||
.await
|
||||
.context(TableMetadataManagerSnafu)?
|
||||
.is_some())
|
||||
}
|
||||
|
||||
@@ -836,7 +854,12 @@ impl CatalogManager for RemoteCatalogManager {
|
||||
}
|
||||
.to_string();
|
||||
|
||||
Ok(self.backend.get(key.as_bytes()).await?.is_some())
|
||||
Ok(self
|
||||
.backend
|
||||
.get(key.as_bytes())
|
||||
.await
|
||||
.context(TableMetadataManagerSnafu)?
|
||||
.is_some())
|
||||
}
|
||||
|
||||
async fn catalog_names(&self) -> Result<Vec<String>> {
|
||||
@@ -905,7 +928,8 @@ impl CatalogManager for RemoteCatalogManager {
|
||||
.as_bytes()
|
||||
.context(InvalidCatalogValueSnafu)?,
|
||||
)
|
||||
.await?;
|
||||
.await
|
||||
.context(TableMetadataManagerSnafu)?;
|
||||
increment_gauge!(crate::metrics::METRIC_CATALOG_MANAGER_CATALOG_COUNT, 1.0);
|
||||
Ok(false)
|
||||
}
|
||||
|
||||
@@ -12,20 +12,13 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::any::Any;
|
||||
use std::collections::btree_map::Entry;
|
||||
use std::collections::{BTreeMap, HashMap};
|
||||
use std::fmt::{Display, Formatter};
|
||||
use std::collections::HashMap;
|
||||
use std::sync::{Arc, RwLock as StdRwLock};
|
||||
|
||||
use async_stream::stream;
|
||||
use common_catalog::consts::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME};
|
||||
use common_recordbatch::RecordBatch;
|
||||
use common_telemetry::logging::info;
|
||||
use datatypes::data_type::ConcreteDataType;
|
||||
use datatypes::schema::{ColumnSchema, Schema};
|
||||
use datatypes::vectors::StringVector;
|
||||
use serde::Serializer;
|
||||
use table::engine::{CloseTableResult, EngineContext, TableEngine};
|
||||
use table::metadata::TableId;
|
||||
use table::requests::{
|
||||
@@ -33,135 +26,6 @@ use table::requests::{
|
||||
};
|
||||
use table::test_util::MemTable;
|
||||
use table::TableRef;
|
||||
use tokio::sync::RwLock;
|
||||
|
||||
use crate::error::Error;
|
||||
use crate::helper::{CatalogKey, CatalogValue, SchemaKey, SchemaValue};
|
||||
use crate::remote::{Kv, KvBackend, ValueIter};
|
||||
|
||||
pub struct MockKvBackend {
|
||||
map: RwLock<BTreeMap<Vec<u8>, Vec<u8>>>,
|
||||
}
|
||||
|
||||
impl Default for MockKvBackend {
|
||||
fn default() -> Self {
|
||||
let catalog_value = CatalogValue {}.as_bytes().unwrap();
|
||||
let schema_value = SchemaValue {}.as_bytes().unwrap();
|
||||
|
||||
let default_catalog_key = CatalogKey {
|
||||
catalog_name: DEFAULT_CATALOG_NAME.to_string(),
|
||||
}
|
||||
.to_string();
|
||||
|
||||
let default_schema_key = SchemaKey {
|
||||
catalog_name: DEFAULT_CATALOG_NAME.to_string(),
|
||||
schema_name: DEFAULT_SCHEMA_NAME.to_string(),
|
||||
}
|
||||
.to_string();
|
||||
|
||||
let map = RwLock::new(BTreeMap::from([
|
||||
// create default catalog and schema
|
||||
(default_catalog_key.into(), catalog_value),
|
||||
(default_schema_key.into(), schema_value),
|
||||
]));
|
||||
Self { map }
|
||||
}
|
||||
}
|
||||
|
||||
impl Display for MockKvBackend {
|
||||
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
|
||||
futures::executor::block_on(async {
|
||||
let map = self.map.read().await;
|
||||
for (k, v) in map.iter() {
|
||||
f.serialize_str(&String::from_utf8_lossy(k))?;
|
||||
f.serialize_str(" -> ")?;
|
||||
f.serialize_str(&String::from_utf8_lossy(v))?;
|
||||
f.serialize_str("\n")?;
|
||||
}
|
||||
Ok(())
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait::async_trait]
|
||||
impl KvBackend for MockKvBackend {
|
||||
fn range<'a, 'b>(&'a self, key: &[u8]) -> ValueIter<'b, Error>
|
||||
where
|
||||
'a: 'b,
|
||||
{
|
||||
let prefix = key.to_vec();
|
||||
let prefix_string = String::from_utf8_lossy(&prefix).to_string();
|
||||
Box::pin(stream!({
|
||||
let maps = self.map.read().await.clone();
|
||||
for (k, v) in maps.range(prefix.clone()..) {
|
||||
let key_string = String::from_utf8_lossy(k).to_string();
|
||||
let matches = key_string.starts_with(&prefix_string);
|
||||
if matches {
|
||||
yield Ok(Kv(k.clone(), v.clone()))
|
||||
} else {
|
||||
info!("Stream finished");
|
||||
return;
|
||||
}
|
||||
}
|
||||
}))
|
||||
}
|
||||
|
||||
async fn set(&self, key: &[u8], val: &[u8]) -> Result<(), Error> {
|
||||
let mut map = self.map.write().await;
|
||||
let _ = map.insert(key.to_vec(), val.to_vec());
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn compare_and_set(
|
||||
&self,
|
||||
key: &[u8],
|
||||
expect: &[u8],
|
||||
val: &[u8],
|
||||
) -> Result<Result<(), Option<Vec<u8>>>, Error> {
|
||||
let mut map = self.map.write().await;
|
||||
let existing = map.entry(key.to_vec());
|
||||
match existing {
|
||||
Entry::Vacant(e) => {
|
||||
if expect.is_empty() {
|
||||
let _ = e.insert(val.to_vec());
|
||||
Ok(Ok(()))
|
||||
} else {
|
||||
Ok(Err(None))
|
||||
}
|
||||
}
|
||||
Entry::Occupied(mut existing) => {
|
||||
if existing.get() == expect {
|
||||
let _ = existing.insert(val.to_vec());
|
||||
Ok(Ok(()))
|
||||
} else {
|
||||
Ok(Err(Some(existing.get().clone())))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async fn delete_range(&self, key: &[u8], end: &[u8]) -> Result<(), Error> {
|
||||
let mut map = self.map.write().await;
|
||||
if end.is_empty() {
|
||||
let _ = map.remove(key);
|
||||
} else {
|
||||
let start = key.to_vec();
|
||||
let end = end.to_vec();
|
||||
let range = start..end;
|
||||
|
||||
map.retain(|k, _| !range.contains(k));
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn move_value(&self, _from_key: &[u8], _to_key: &[u8]) -> Result<(), Error> {
|
||||
unimplemented!()
|
||||
}
|
||||
|
||||
fn as_any(&self) -> &dyn Any {
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Default)]
|
||||
pub struct MockTableEngine {
|
||||
|
||||
@@ -203,20 +203,32 @@ pub fn build_table_insert_request(
|
||||
build_insert_request(
|
||||
EntryType::Table,
|
||||
entry_key.as_bytes(),
|
||||
serde_json::to_string(&TableEntryValue { table_name, engine })
|
||||
.unwrap()
|
||||
.as_bytes(),
|
||||
serde_json::to_string(&TableEntryValue {
|
||||
table_name,
|
||||
engine,
|
||||
is_deleted: false,
|
||||
})
|
||||
.unwrap()
|
||||
.as_bytes(),
|
||||
)
|
||||
}
|
||||
|
||||
pub(crate) fn build_table_deletion_request(
|
||||
request: &DeregisterTableRequest,
|
||||
table_id: TableId,
|
||||
) -> DeleteRequest {
|
||||
let table_key = format_table_entry_key(&request.catalog, &request.schema, table_id);
|
||||
DeleteRequest {
|
||||
key_column_values: build_primary_key_columns(EntryType::Table, table_key.as_bytes()),
|
||||
}
|
||||
) -> InsertRequest {
|
||||
let entry_key = format_table_entry_key(&request.catalog, &request.schema, table_id);
|
||||
build_insert_request(
|
||||
EntryType::Table,
|
||||
entry_key.as_bytes(),
|
||||
serde_json::to_string(&TableEntryValue {
|
||||
table_name: "".to_string(),
|
||||
engine: "".to_string(),
|
||||
is_deleted: true,
|
||||
})
|
||||
.unwrap()
|
||||
.as_bytes(),
|
||||
)
|
||||
}
|
||||
|
||||
fn build_primary_key_columns(entry_type: EntryType, key: &[u8]) -> HashMap<String, VectorRef> {
|
||||
@@ -335,6 +347,7 @@ pub fn decode_system_catalog(
|
||||
table_name: table_meta.table_name,
|
||||
table_id,
|
||||
engine: table_meta.engine,
|
||||
is_deleted: table_meta.is_deleted,
|
||||
}))
|
||||
}
|
||||
}
|
||||
@@ -391,6 +404,7 @@ pub struct TableEntry {
|
||||
pub table_name: String,
|
||||
pub table_id: TableId,
|
||||
pub engine: String,
|
||||
pub is_deleted: bool,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, PartialEq, Eq)]
|
||||
@@ -399,12 +413,19 @@ pub struct TableEntryValue {
|
||||
|
||||
#[serde(default = "mito_engine")]
|
||||
pub engine: String,
|
||||
|
||||
#[serde(default = "not_deleted")]
|
||||
pub is_deleted: bool,
|
||||
}
|
||||
|
||||
fn mito_engine() -> String {
|
||||
MITO_ENGINE.to_string()
|
||||
}
|
||||
|
||||
fn not_deleted() -> bool {
|
||||
false
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use common_recordbatch::RecordBatches;
|
||||
@@ -563,6 +584,7 @@ mod tests {
|
||||
table_name: "my_table".to_string(),
|
||||
table_id: 1,
|
||||
engine: MITO_ENGINE.to_string(),
|
||||
is_deleted: false,
|
||||
});
|
||||
assert_eq!(entry, expected);
|
||||
|
||||
@@ -574,11 +596,11 @@ mod tests {
|
||||
},
|
||||
1,
|
||||
);
|
||||
let result = catalog_table.delete(table_deletion).await.unwrap();
|
||||
let result = catalog_table.insert(table_deletion).await.unwrap();
|
||||
assert_eq!(result, 1);
|
||||
|
||||
let records = catalog_table.records().await.unwrap();
|
||||
let batches = RecordBatches::try_collect(records).await.unwrap().take();
|
||||
assert_eq!(batches.len(), 0);
|
||||
assert_eq!(batches.len(), 1);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -69,7 +69,7 @@ impl SystemCatalog {
|
||||
) -> CatalogResult<()> {
|
||||
self.information_schema
|
||||
.system
|
||||
.delete(build_table_deletion_request(request, table_id))
|
||||
.insert(build_table_deletion_request(request, table_id))
|
||||
.await
|
||||
.map(|x| {
|
||||
if x != 1 {
|
||||
|
||||
@@ -22,12 +22,14 @@ mod tests {
|
||||
use std::time::Duration;
|
||||
|
||||
use catalog::helper::{CatalogKey, CatalogValue, SchemaKey, SchemaValue};
|
||||
use catalog::remote::mock::{MockKvBackend, MockTableEngine};
|
||||
use catalog::remote::mock::MockTableEngine;
|
||||
use catalog::remote::region_alive_keeper::RegionAliveKeepers;
|
||||
use catalog::remote::{CachedMetaKvBackend, KvBackend, KvBackendRef, RemoteCatalogManager};
|
||||
use catalog::remote::{CachedMetaKvBackend, RemoteCatalogManager};
|
||||
use catalog::{CatalogManager, RegisterSchemaRequest, RegisterTableRequest};
|
||||
use common_catalog::consts::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, MITO_ENGINE};
|
||||
use common_meta::ident::TableIdent;
|
||||
use common_meta::kv_backend::memory::MemoryKvBackend;
|
||||
use common_meta::kv_backend::KvBackend;
|
||||
use datatypes::schema::RawSchema;
|
||||
use futures_util::StreamExt;
|
||||
use table::engine::manager::{MemoryTableEngineManager, TableEngineManagerRef};
|
||||
@@ -37,8 +39,6 @@ mod tests {
|
||||
use tokio::time::Instant;
|
||||
|
||||
struct TestingComponents {
|
||||
#[allow(dead_code)]
|
||||
kv_backend: KvBackendRef,
|
||||
catalog_manager: Arc<RemoteCatalogManager>,
|
||||
table_engine_manager: TableEngineManagerRef,
|
||||
region_alive_keepers: Arc<RegionAliveKeepers>,
|
||||
@@ -53,7 +53,7 @@ mod tests {
|
||||
#[tokio::test]
|
||||
async fn test_backend() {
|
||||
common_telemetry::init_default_ut_logging();
|
||||
let backend = MockKvBackend::default();
|
||||
let backend = MemoryKvBackend::default();
|
||||
|
||||
let default_catalog_key = CatalogKey {
|
||||
catalog_name: DEFAULT_CATALOG_NAME.to_string(),
|
||||
@@ -92,8 +92,7 @@ mod tests {
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_cached_backend() {
|
||||
common_telemetry::init_default_ut_logging();
|
||||
let backend = CachedMetaKvBackend::wrap(Arc::new(MockKvBackend::default()));
|
||||
let backend = CachedMetaKvBackend::wrap(Arc::new(MemoryKvBackend::default()));
|
||||
|
||||
let default_catalog_key = CatalogKey {
|
||||
catalog_name: DEFAULT_CATALOG_NAME.to_string(),
|
||||
@@ -135,9 +134,11 @@ mod tests {
|
||||
}
|
||||
|
||||
async fn prepare_components(node_id: u64) -> TestingComponents {
|
||||
let cached_backend = Arc::new(CachedMetaKvBackend::wrap(
|
||||
Arc::new(MockKvBackend::default()),
|
||||
));
|
||||
let backend = Arc::new(MemoryKvBackend::default());
|
||||
backend.set(b"__c-greptime", b"").await.unwrap();
|
||||
backend.set(b"__s-greptime-public", b"").await.unwrap();
|
||||
|
||||
let cached_backend = Arc::new(CachedMetaKvBackend::wrap(backend));
|
||||
|
||||
let table_engine = Arc::new(MockTableEngine::default());
|
||||
let engine_manager = Arc::new(MemoryTableEngineManager::alias(
|
||||
@@ -156,7 +157,6 @@ mod tests {
|
||||
catalog_manager.start().await.unwrap();
|
||||
|
||||
TestingComponents {
|
||||
kv_backend: cached_backend,
|
||||
catalog_manager: Arc::new(catalog_manager),
|
||||
table_engine_manager: engine_manager,
|
||||
region_alive_keepers,
|
||||
|
||||
@@ -10,7 +10,9 @@ name = "greptime"
|
||||
path = "src/bin/greptime.rs"
|
||||
|
||||
[features]
|
||||
default = ["metrics-process"]
|
||||
tokio-console = ["common-telemetry/tokio-console"]
|
||||
metrics-process = ["servers/metrics-process"]
|
||||
|
||||
[dependencies]
|
||||
anymap = "1.0.0-beta.2"
|
||||
|
||||
@@ -236,6 +236,7 @@ mod tests {
|
||||
use std::io::Write;
|
||||
use std::time::Duration;
|
||||
|
||||
use common_base::readable_size::ReadableSize;
|
||||
use common_test_util::temp_dir::create_named_temp_file;
|
||||
use frontend::service_config::GrpcOptions;
|
||||
use servers::auth::{Identity, Password, UserProviderRef};
|
||||
@@ -260,6 +261,10 @@ mod tests {
|
||||
command.load_options(TopLevelOptions::default()).unwrap() else { unreachable!() };
|
||||
|
||||
assert_eq!(opts.http_options.as_ref().unwrap().addr, "127.0.0.1:1234");
|
||||
assert_eq!(
|
||||
ReadableSize::mb(64),
|
||||
opts.http_options.as_ref().unwrap().body_limit
|
||||
);
|
||||
assert_eq!(opts.mysql_options.as_ref().unwrap().addr, "127.0.0.1:5678");
|
||||
assert_eq!(
|
||||
opts.postgres_options.as_ref().unwrap().addr,
|
||||
@@ -301,6 +306,7 @@ mod tests {
|
||||
[http_options]
|
||||
addr = "127.0.0.1:4000"
|
||||
timeout = "30s"
|
||||
body_limit = "2GB"
|
||||
|
||||
[logging]
|
||||
level = "debug"
|
||||
@@ -326,6 +332,11 @@ mod tests {
|
||||
fe_opts.http_options.as_ref().unwrap().timeout
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
ReadableSize::gb(2),
|
||||
fe_opts.http_options.as_ref().unwrap().body_limit
|
||||
);
|
||||
|
||||
assert_eq!("debug", fe_opts.logging.level.as_ref().unwrap());
|
||||
assert_eq!("/tmp/greptimedb/test/logs".to_string(), fe_opts.logging.dir);
|
||||
}
|
||||
|
||||
@@ -132,6 +132,7 @@ impl StandaloneOptions {
|
||||
prom_options: self.prom_options,
|
||||
meta_client_options: None,
|
||||
logging: self.logging,
|
||||
..Default::default()
|
||||
}
|
||||
}
|
||||
|
||||
@@ -341,6 +342,7 @@ mod tests {
|
||||
use std::io::Write;
|
||||
use std::time::Duration;
|
||||
|
||||
use common_base::readable_size::ReadableSize;
|
||||
use common_test_util::temp_dir::create_named_temp_file;
|
||||
use servers::auth::{Identity, Password, UserProviderRef};
|
||||
use servers::Mode;
|
||||
@@ -408,6 +410,7 @@ mod tests {
|
||||
[http_options]
|
||||
addr = "127.0.0.1:4000"
|
||||
timeout = "30s"
|
||||
body_limit = "128MB"
|
||||
|
||||
[logging]
|
||||
level = "debug"
|
||||
@@ -433,6 +436,10 @@ mod tests {
|
||||
Duration::from_secs(30),
|
||||
fe_opts.http_options.as_ref().unwrap().timeout
|
||||
);
|
||||
assert_eq!(
|
||||
ReadableSize::mb(128),
|
||||
fe_opts.http_options.as_ref().unwrap().body_limit
|
||||
);
|
||||
assert_eq!(
|
||||
"127.0.0.1:4001".to_string(),
|
||||
fe_opts.grpc_options.unwrap().addr
|
||||
@@ -559,6 +566,10 @@ mod tests {
|
||||
opts.fe_opts.http_options.as_ref().unwrap().addr,
|
||||
"127.0.0.1:14000"
|
||||
);
|
||||
assert_eq!(
|
||||
ReadableSize::mb(64),
|
||||
opts.fe_opts.http_options.as_ref().unwrap().body_limit
|
||||
);
|
||||
|
||||
// Should be default value.
|
||||
assert_eq!(
|
||||
|
||||
@@ -6,12 +6,15 @@ license.workspace = true
|
||||
|
||||
[dependencies]
|
||||
api = { path = "../../api" }
|
||||
async-stream.workspace = true
|
||||
async-trait.workspace = true
|
||||
common-catalog = { path = "../catalog" }
|
||||
common-error = { path = "../error" }
|
||||
common-runtime = { path = "../runtime" }
|
||||
common-telemetry = { path = "../telemetry" }
|
||||
common-time = { path = "../time" }
|
||||
futures.workspace = true
|
||||
prost.workspace = true
|
||||
serde.workspace = true
|
||||
serde_json.workspace = true
|
||||
snafu.workspace = true
|
||||
|
||||
@@ -55,6 +55,21 @@ pub enum Error {
|
||||
|
||||
#[snafu(display("Invalid protobuf message, err: {}", err_msg))]
|
||||
InvalidProtoMsg { err_msg: String, location: Location },
|
||||
|
||||
#[snafu(display("Invalid table metadata, err: {}", err_msg))]
|
||||
InvalidTableMetadata { err_msg: String, location: Location },
|
||||
|
||||
#[snafu(display("Failed to get kv cache, err: {}", err_msg))]
|
||||
GetKvCache { err_msg: String },
|
||||
|
||||
#[snafu(display("Get null from cache, key: {}", key))]
|
||||
CacheNotGet { key: String, location: Location },
|
||||
|
||||
#[snafu(display("Failed to request MetaSrv, source: {}", source))]
|
||||
MetaSrv {
|
||||
source: BoxedError,
|
||||
location: Location,
|
||||
},
|
||||
}
|
||||
|
||||
pub type Result<T> = std::result::Result<T, Error>;
|
||||
@@ -65,15 +80,18 @@ impl ErrorExt for Error {
|
||||
match self {
|
||||
IllegalServerState { .. } => StatusCode::Internal,
|
||||
|
||||
SerdeJson { .. } | RouteInfoCorrupted { .. } | InvalidProtoMsg { .. } => {
|
||||
StatusCode::Unexpected
|
||||
}
|
||||
SerdeJson { .. }
|
||||
| RouteInfoCorrupted { .. }
|
||||
| InvalidProtoMsg { .. }
|
||||
| InvalidTableMetadata { .. } => StatusCode::Unexpected,
|
||||
|
||||
SendMessage { .. } => StatusCode::Internal,
|
||||
SendMessage { .. } | GetKvCache { .. } | CacheNotGet { .. } => StatusCode::Internal,
|
||||
|
||||
EncodeJson { .. } | DecodeJson { .. } | PayloadNotExist { .. } => {
|
||||
StatusCode::Unexpected
|
||||
}
|
||||
|
||||
MetaSrv { source, .. } => source.status_code(),
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -12,16 +12,102 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
//! This mod defines all the keys used in the metadata store (Metasrv).
|
||||
//! Specifically, there are these kinds of keys:
|
||||
//!
|
||||
//! 1. Table info key: `__table_info/{table_id}`
|
||||
//! - The value is a [TableInfoValue] struct; it contains the whole table info (like column
|
||||
//! schemas).
|
||||
//! - This key is mainly used in constructing the table in Datanode and Frontend.
|
||||
//!
|
||||
//! 2. Table region key: `__table_region/{table_id}`
|
||||
//! - The value is a [TableRegionValue] struct; it contains the region distribution of the
|
||||
//! table in the Datanodes.
|
||||
//!
|
||||
//! All keys have related managers. The managers take care of the serialization and deserialization
|
||||
//! of keys and values, and the interaction with the underlying KV store backend.
|
||||
//!
|
||||
//! To simplify the managers used in struct fields and function parameters, we define a "unify"
|
||||
//! table metadata manager: [TableMetadataManager]. It contains all the managers defined above.
|
||||
//! It's recommended to just use this manager only.
|
||||
|
||||
pub mod table_info;
|
||||
pub mod table_region;
|
||||
mod table_route;
|
||||
|
||||
use std::sync::Arc;
|
||||
|
||||
use snafu::ResultExt;
|
||||
use table_info::{TableInfoManager, TableInfoValue};
|
||||
use table_region::{TableRegionManager, TableRegionValue};
|
||||
|
||||
use crate::error::{InvalidTableMetadataSnafu, Result, SerdeJsonSnafu};
|
||||
pub use crate::key::table_route::{TableRouteKey, TABLE_ROUTE_PREFIX};
|
||||
use crate::kv_backend::KvBackendRef;
|
||||
|
||||
pub const REMOVED_PREFIX: &str = "__removed";
|
||||
|
||||
const TABLE_INFO_KEY_PREFIX: &str = "__table_info";
|
||||
const TABLE_REGION_KEY_PREFIX: &str = "__table_region";
|
||||
|
||||
pub fn to_removed_key(key: &str) -> String {
|
||||
format!("{REMOVED_PREFIX}-{key}")
|
||||
}
|
||||
|
||||
pub trait TableMetaKey {
|
||||
fn as_raw_key(&self) -> Vec<u8>;
|
||||
}
|
||||
|
||||
pub type TableMetadataManagerRef = Arc<TableMetadataManager>;
|
||||
|
||||
pub struct TableMetadataManager {
|
||||
table_info_manager: TableInfoManager,
|
||||
table_region_manager: TableRegionManager,
|
||||
}
|
||||
|
||||
impl TableMetadataManager {
|
||||
pub fn new(kv_backend: KvBackendRef) -> Self {
|
||||
TableMetadataManager {
|
||||
table_info_manager: TableInfoManager::new(kv_backend.clone()),
|
||||
table_region_manager: TableRegionManager::new(kv_backend),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn table_info_manager(&self) -> &TableInfoManager {
|
||||
&self.table_info_manager
|
||||
}
|
||||
|
||||
pub fn table_region_manager(&self) -> &TableRegionManager {
|
||||
&self.table_region_manager
|
||||
}
|
||||
}
|
||||
|
||||
macro_rules! impl_table_meta_value {
|
||||
( $($val_ty: ty), *) => {
|
||||
$(
|
||||
impl $val_ty {
|
||||
pub fn try_from_raw_value(raw_value: Vec<u8>) -> Result<Self> {
|
||||
let raw_value = String::from_utf8(raw_value).map_err(|e| {
|
||||
InvalidTableMetadataSnafu { err_msg: e.to_string() }.build()
|
||||
})?;
|
||||
serde_json::from_str(&raw_value).context(SerdeJsonSnafu)
|
||||
}
|
||||
|
||||
pub fn try_as_raw_value(&self) -> Result<Vec<u8>> {
|
||||
serde_json::to_string(self)
|
||||
.map(|x| x.into_bytes())
|
||||
.context(SerdeJsonSnafu)
|
||||
}
|
||||
}
|
||||
)*
|
||||
}
|
||||
}
|
||||
|
||||
impl_table_meta_value! {
|
||||
TableInfoValue,
|
||||
TableRegionValue
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use crate::key::to_removed_key;
|
||||
|
||||
230
src/common/meta/src/key/table_info.rs
Normal file
230
src/common/meta/src/key/table_info.rs
Normal file
@@ -0,0 +1,230 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
use table::metadata::{RawTableInfo, TableId};
|
||||
|
||||
use super::TABLE_INFO_KEY_PREFIX;
|
||||
use crate::error::Result;
|
||||
use crate::key::{to_removed_key, TableMetaKey};
|
||||
use crate::kv_backend::KvBackendRef;
|
||||
|
||||
pub struct TableInfoKey {
|
||||
table_id: TableId,
|
||||
}
|
||||
|
||||
impl TableInfoKey {
|
||||
pub fn new(table_id: TableId) -> Self {
|
||||
Self { table_id }
|
||||
}
|
||||
}
|
||||
|
||||
impl TableMetaKey for TableInfoKey {
|
||||
fn as_raw_key(&self) -> Vec<u8> {
|
||||
format!("{}/{}", TABLE_INFO_KEY_PREFIX, self.table_id).into_bytes()
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
|
||||
pub struct TableInfoValue {
|
||||
pub table_info: RawTableInfo,
|
||||
version: u64,
|
||||
}
|
||||
|
||||
pub struct TableInfoManager {
|
||||
kv_backend: KvBackendRef,
|
||||
}
|
||||
|
||||
impl TableInfoManager {
|
||||
pub fn new(kv_backend: KvBackendRef) -> Self {
|
||||
Self { kv_backend }
|
||||
}
|
||||
|
||||
pub async fn get(&self, table_id: TableId) -> Result<Option<TableInfoValue>> {
|
||||
let key = TableInfoKey::new(table_id);
|
||||
let raw_key = key.as_raw_key();
|
||||
self.kv_backend
|
||||
.get(&raw_key)
|
||||
.await?
|
||||
.map(|x| TableInfoValue::try_from_raw_value(x.1))
|
||||
.transpose()
|
||||
}
|
||||
|
||||
pub async fn compare_and_set(
|
||||
&self,
|
||||
table_id: TableId,
|
||||
expect: Option<TableInfoValue>,
|
||||
table_info: RawTableInfo,
|
||||
) -> Result<std::result::Result<(), Option<Vec<u8>>>> {
|
||||
let key = TableInfoKey::new(table_id);
|
||||
let raw_key = key.as_raw_key();
|
||||
|
||||
let (expect, version) = if let Some(x) = expect {
|
||||
(x.try_as_raw_value()?, x.version + 1)
|
||||
} else {
|
||||
(vec![], 0)
|
||||
};
|
||||
|
||||
let value = TableInfoValue {
|
||||
table_info,
|
||||
version,
|
||||
};
|
||||
let raw_value = value.try_as_raw_value()?;
|
||||
|
||||
self.kv_backend
|
||||
.compare_and_set(&raw_key, &expect, &raw_value)
|
||||
.await
|
||||
}
|
||||
|
||||
pub async fn remove(&self, table_id: TableId) -> Result<()> {
|
||||
let key = TableInfoKey::new(table_id);
|
||||
let removed_key = to_removed_key(&String::from_utf8_lossy(key.as_raw_key().as_slice()));
|
||||
self.kv_backend
|
||||
.move_value(&key.as_raw_key(), removed_key.as_bytes())
|
||||
.await
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::sync::Arc;
|
||||
|
||||
use datatypes::prelude::ConcreteDataType;
|
||||
use datatypes::schema::{ColumnSchema, RawSchema, Schema};
|
||||
use table::metadata::{RawTableMeta, TableIdent, TableType};
|
||||
|
||||
use super::*;
|
||||
use crate::kv_backend::memory::MemoryKvBackend;
|
||||
use crate::kv_backend::KvBackend;
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_table_info_manager() {
|
||||
let backend = Arc::new(MemoryKvBackend::default());
|
||||
|
||||
for i in 1..=3 {
|
||||
let key = TableInfoKey::new(i).as_raw_key();
|
||||
let val = TableInfoValue {
|
||||
table_info: new_table_info(i),
|
||||
version: 1,
|
||||
}
|
||||
.try_as_raw_value()
|
||||
.unwrap();
|
||||
backend.set(&key, &val).await.unwrap();
|
||||
}
|
||||
|
||||
let manager = TableInfoManager::new(backend.clone());
|
||||
|
||||
let val = manager.get(1).await.unwrap().unwrap();
|
||||
assert_eq!(
|
||||
val,
|
||||
TableInfoValue {
|
||||
table_info: new_table_info(1),
|
||||
version: 1,
|
||||
}
|
||||
);
|
||||
assert!(manager.get(4).await.unwrap().is_none());
|
||||
|
||||
let table_info = new_table_info(4);
|
||||
let result = manager
|
||||
.compare_and_set(4, None, table_info.clone())
|
||||
.await
|
||||
.unwrap();
|
||||
assert!(result.is_ok());
|
||||
|
||||
// test cas failed, the new table info is not set
|
||||
let new_table_info = new_table_info(4);
|
||||
let result = manager
|
||||
.compare_and_set(4, None, new_table_info.clone())
|
||||
.await
|
||||
.unwrap();
|
||||
let actual = TableInfoValue::try_from_raw_value(result.unwrap_err().unwrap()).unwrap();
|
||||
assert_eq!(
|
||||
actual,
|
||||
TableInfoValue {
|
||||
table_info: table_info.clone(),
|
||||
version: 0,
|
||||
}
|
||||
);
|
||||
|
||||
// test cas success
|
||||
let result = manager
|
||||
.compare_and_set(4, Some(actual), new_table_info.clone())
|
||||
.await
|
||||
.unwrap();
|
||||
assert!(result.is_ok());
|
||||
|
||||
assert!(manager.remove(4).await.is_ok());
|
||||
|
||||
let kv = backend
|
||||
.get(b"__removed-__table_info/4")
|
||||
.await
|
||||
.unwrap()
|
||||
.unwrap();
|
||||
assert_eq!(b"__removed-__table_info/4", kv.0.as_slice());
|
||||
let value = TableInfoValue::try_from_raw_value(kv.1).unwrap();
|
||||
assert_eq!(value.table_info, new_table_info);
|
||||
assert_eq!(value.version, 1);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_key_serde() {
|
||||
let key = TableInfoKey::new(42);
|
||||
let raw_key = key.as_raw_key();
|
||||
assert_eq!(raw_key, b"__table_info/42");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_value_serde() {
|
||||
let value = TableInfoValue {
|
||||
table_info: new_table_info(42),
|
||||
version: 1,
|
||||
};
|
||||
let serialized = value.try_as_raw_value().unwrap();
|
||||
let deserialized = TableInfoValue::try_from_raw_value(serialized).unwrap();
|
||||
assert_eq!(value, deserialized);
|
||||
}
|
||||
|
||||
fn new_table_info(table_id: TableId) -> RawTableInfo {
|
||||
let schema = Schema::new(vec![ColumnSchema::new(
|
||||
"name",
|
||||
ConcreteDataType::string_datatype(),
|
||||
true,
|
||||
)]);
|
||||
|
||||
let meta = RawTableMeta {
|
||||
schema: RawSchema::from(&schema),
|
||||
engine: "mito".to_string(),
|
||||
created_on: chrono::DateTime::default(),
|
||||
primary_key_indices: vec![0, 1],
|
||||
next_column_id: 3,
|
||||
engine_options: Default::default(),
|
||||
value_indices: vec![2, 3],
|
||||
options: Default::default(),
|
||||
region_numbers: vec![1],
|
||||
};
|
||||
|
||||
RawTableInfo {
|
||||
ident: TableIdent {
|
||||
table_id,
|
||||
version: 1,
|
||||
},
|
||||
name: "table_1".to_string(),
|
||||
desc: Some("blah".to_string()),
|
||||
catalog_name: "catalog_1".to_string(),
|
||||
schema_name: "schema_1".to_string(),
|
||||
meta,
|
||||
table_type: TableType::Base,
|
||||
}
|
||||
}
|
||||
}
|
||||
190
src/common/meta/src/key/table_region.rs
Normal file
190
src/common/meta/src/key/table_region.rs
Normal file
@@ -0,0 +1,190 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::collections::BTreeMap;
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
use store_api::storage::RegionNumber;
|
||||
use table::metadata::TableId;
|
||||
|
||||
use super::TABLE_REGION_KEY_PREFIX;
|
||||
use crate::error::Result;
|
||||
use crate::key::{to_removed_key, TableMetaKey};
|
||||
use crate::kv_backend::KvBackendRef;
|
||||
use crate::DatanodeId;
|
||||
|
||||
pub type RegionDistribution = BTreeMap<DatanodeId, Vec<RegionNumber>>;
|
||||
|
||||
pub struct TableRegionKey {
|
||||
table_id: TableId,
|
||||
}
|
||||
|
||||
impl TableRegionKey {
|
||||
pub fn new(table_id: TableId) -> Self {
|
||||
Self { table_id }
|
||||
}
|
||||
}
|
||||
|
||||
impl TableMetaKey for TableRegionKey {
|
||||
fn as_raw_key(&self) -> Vec<u8> {
|
||||
format!("{}/{}", TABLE_REGION_KEY_PREFIX, self.table_id).into_bytes()
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
|
||||
pub struct TableRegionValue {
|
||||
pub region_distribution: RegionDistribution,
|
||||
version: u64,
|
||||
}
|
||||
|
||||
pub struct TableRegionManager {
|
||||
kv_backend: KvBackendRef,
|
||||
}
|
||||
|
||||
impl TableRegionManager {
|
||||
pub fn new(kv_backend: KvBackendRef) -> Self {
|
||||
Self { kv_backend }
|
||||
}
|
||||
|
||||
pub async fn get(&self, table_id: TableId) -> Result<Option<TableRegionValue>> {
|
||||
let key = TableRegionKey::new(table_id);
|
||||
let raw_key = key.as_raw_key();
|
||||
self.kv_backend
|
||||
.get(&raw_key)
|
||||
.await?
|
||||
.map(|x| TableRegionValue::try_from_raw_value(x.1))
|
||||
.transpose()
|
||||
}
|
||||
|
||||
pub async fn compare_and_set(
|
||||
&self,
|
||||
table_id: TableId,
|
||||
expect: Option<TableRegionValue>,
|
||||
region_distribution: RegionDistribution,
|
||||
) -> Result<std::result::Result<(), Option<Vec<u8>>>> {
|
||||
let key = TableRegionKey::new(table_id);
|
||||
let raw_key = key.as_raw_key();
|
||||
|
||||
let (expect, version) = if let Some(x) = expect {
|
||||
(x.try_as_raw_value()?, x.version + 1)
|
||||
} else {
|
||||
(vec![], 0)
|
||||
};
|
||||
|
||||
let value = TableRegionValue {
|
||||
region_distribution,
|
||||
version,
|
||||
};
|
||||
let raw_value = value.try_as_raw_value()?;
|
||||
|
||||
self.kv_backend
|
||||
.compare_and_set(&raw_key, &expect, &raw_value)
|
||||
.await
|
||||
}
|
||||
|
||||
pub async fn remove(&self, table_id: TableId) -> Result<()> {
|
||||
let key = TableRegionKey::new(table_id);
|
||||
let remove_key = to_removed_key(&String::from_utf8_lossy(key.as_raw_key().as_slice()));
|
||||
self.kv_backend
|
||||
.move_value(&key.as_raw_key(), remove_key.as_bytes())
|
||||
.await
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::sync::Arc;
|
||||
|
||||
use super::*;
|
||||
use crate::kv_backend::memory::MemoryKvBackend;
|
||||
use crate::kv_backend::KvBackend;
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_table_region_manager() {
|
||||
let backend = Arc::new(MemoryKvBackend::default());
|
||||
let manager = TableRegionManager::new(backend.clone());
|
||||
|
||||
let region_distribution =
|
||||
RegionDistribution::from([(1, vec![1, 2, 3]), (2, vec![4, 5, 6])]);
|
||||
let result = manager
|
||||
.compare_and_set(1, None, region_distribution.clone())
|
||||
.await
|
||||
.unwrap();
|
||||
assert!(result.is_ok());
|
||||
|
||||
let new_region_distribution =
|
||||
RegionDistribution::from([(1, vec![4, 5, 6]), (2, vec![1, 2, 3])]);
|
||||
let curr = manager
|
||||
.compare_and_set(1, None, new_region_distribution.clone())
|
||||
.await
|
||||
.unwrap()
|
||||
.unwrap_err()
|
||||
.unwrap();
|
||||
let curr = TableRegionValue::try_from_raw_value(curr).unwrap();
|
||||
assert_eq!(
|
||||
curr,
|
||||
TableRegionValue {
|
||||
region_distribution,
|
||||
version: 0
|
||||
}
|
||||
);
|
||||
|
||||
assert!(manager
|
||||
.compare_and_set(1, Some(curr), new_region_distribution.clone())
|
||||
.await
|
||||
.unwrap()
|
||||
.is_ok());
|
||||
|
||||
let value = manager.get(1).await.unwrap().unwrap();
|
||||
assert_eq!(
|
||||
value,
|
||||
TableRegionValue {
|
||||
region_distribution: new_region_distribution.clone(),
|
||||
version: 1
|
||||
}
|
||||
);
|
||||
assert!(manager.get(2).await.unwrap().is_none());
|
||||
|
||||
assert!(manager.remove(1).await.is_ok());
|
||||
|
||||
let kv = backend
|
||||
.get(b"__removed-__table_region/1")
|
||||
.await
|
||||
.unwrap()
|
||||
.unwrap();
|
||||
assert_eq!(b"__removed-__table_region/1", kv.0.as_slice());
|
||||
let value = TableRegionValue::try_from_raw_value(kv.1).unwrap();
|
||||
assert_eq!(value.region_distribution, new_region_distribution);
|
||||
assert_eq!(value.version, 1);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_serde() {
|
||||
let key = TableRegionKey::new(1);
|
||||
let raw_key = key.as_raw_key();
|
||||
assert_eq!(raw_key, b"__table_region/1");
|
||||
|
||||
let value = TableRegionValue {
|
||||
region_distribution: RegionDistribution::from([(1, vec![1, 2, 3]), (2, vec![4, 5, 6])]),
|
||||
version: 0,
|
||||
};
|
||||
let literal = br#"{"region_distribution":{"1":[1,2,3],"2":[4,5,6]},"version":0}"#;
|
||||
|
||||
assert_eq!(value.try_as_raw_value().unwrap(), literal);
|
||||
assert_eq!(
|
||||
TableRegionValue::try_from_raw_value(literal.to_vec()).unwrap(),
|
||||
value,
|
||||
);
|
||||
}
|
||||
}
|
||||
80
src/common/meta/src/kv_backend.rs
Normal file
80
src/common/meta/src/kv_backend.rs
Normal file
@@ -0,0 +1,80 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
pub mod memory;
|
||||
|
||||
use std::any::Any;
|
||||
use std::pin::Pin;
|
||||
use std::sync::Arc;
|
||||
|
||||
use async_trait::async_trait;
|
||||
use common_error::ext::ErrorExt;
|
||||
use futures::{Stream, StreamExt};
|
||||
|
||||
use crate::error::Error;
|
||||
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
pub struct Kv(pub Vec<u8>, pub Vec<u8>);
|
||||
|
||||
pub type ValueIter<'a, E> = Pin<Box<dyn Stream<Item = Result<Kv, E>> + Send + 'a>>;
|
||||
|
||||
pub type KvBackendRef = Arc<dyn KvBackend<Error = Error>>;
|
||||
|
||||
#[async_trait]
|
||||
pub trait KvBackend: Send + Sync {
|
||||
type Error: ErrorExt;
|
||||
|
||||
fn range<'a, 'b>(&'a self, key: &[u8]) -> ValueIter<'b, Self::Error>
|
||||
where
|
||||
'a: 'b;
|
||||
|
||||
async fn set(&self, key: &[u8], val: &[u8]) -> Result<(), Self::Error>;
|
||||
|
||||
/// Compare and set value of key. `expect` is the expected value, if backend's current value associated
|
||||
/// with key is the same as `expect`, the value will be updated to `val`.
|
||||
///
|
||||
/// - If the compare-and-set operation successfully updated value, this method will return an `Ok(Ok())`
|
||||
/// - If associated value is not the same as `expect`, no value will be updated and an `Ok(Err(Vec<u8>))`
|
||||
/// will be returned, the `Err(Vec<u8>)` indicates the current associated value of key.
|
||||
/// - If any error happens during operation, an `Err(Error)` will be returned.
|
||||
async fn compare_and_set(
|
||||
&self,
|
||||
key: &[u8],
|
||||
expect: &[u8],
|
||||
val: &[u8],
|
||||
) -> Result<Result<(), Option<Vec<u8>>>, Self::Error>;
|
||||
|
||||
async fn delete_range(&self, key: &[u8], end: &[u8]) -> Result<(), Self::Error>;
|
||||
|
||||
async fn delete(&self, key: &[u8]) -> Result<(), Self::Error> {
|
||||
self.delete_range(key, &[]).await
|
||||
}
|
||||
|
||||
/// Default get is implemented based on `range` method.
|
||||
async fn get(&self, key: &[u8]) -> Result<Option<Kv>, Self::Error> {
|
||||
let mut iter = self.range(key);
|
||||
while let Some(r) = iter.next().await {
|
||||
let kv = r?;
|
||||
if kv.0 == key {
|
||||
return Ok(Some(kv));
|
||||
}
|
||||
}
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
/// MoveValue atomically renames the key to the given updated key.
|
||||
async fn move_value(&self, from_key: &[u8], to_key: &[u8]) -> Result<(), Self::Error>;
|
||||
|
||||
fn as_any(&self) -> &dyn Any;
|
||||
}
|
||||
197
src/common/meta/src/kv_backend/memory.rs
Normal file
197
src/common/meta/src/kv_backend/memory.rs
Normal file
@@ -0,0 +1,197 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::any::Any;
|
||||
use std::collections::btree_map::Entry;
|
||||
use std::collections::BTreeMap;
|
||||
use std::fmt::{Display, Formatter};
|
||||
use std::sync::RwLock;
|
||||
|
||||
use async_stream::stream;
|
||||
use async_trait::async_trait;
|
||||
use serde::Serializer;
|
||||
|
||||
use crate::error::Error;
|
||||
use crate::kv_backend::{Kv, KvBackend, ValueIter};
|
||||
|
||||
pub struct MemoryKvBackend {
|
||||
kvs: RwLock<BTreeMap<Vec<u8>, Vec<u8>>>,
|
||||
}
|
||||
|
||||
impl Display for MemoryKvBackend {
|
||||
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
|
||||
let kvs = self.kvs.read().unwrap();
|
||||
for (k, v) in kvs.iter() {
|
||||
f.serialize_str(&String::from_utf8_lossy(k))?;
|
||||
f.serialize_str(" -> ")?;
|
||||
f.serialize_str(&String::from_utf8_lossy(v))?;
|
||||
f.serialize_str("\n")?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for MemoryKvBackend {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
kvs: RwLock::new(BTreeMap::new()),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl KvBackend for MemoryKvBackend {
|
||||
type Error = Error;
|
||||
|
||||
fn range<'a, 'b>(&'a self, prefix: &[u8]) -> ValueIter<'b, Error>
|
||||
where
|
||||
'a: 'b,
|
||||
{
|
||||
let kvs = self.kvs.read().unwrap();
|
||||
let kvs = kvs.clone();
|
||||
|
||||
let prefix = prefix.to_vec();
|
||||
Box::pin(stream!({
|
||||
for (k, v) in kvs.range(prefix.clone()..) {
|
||||
if !k.starts_with(&prefix) {
|
||||
break;
|
||||
}
|
||||
yield Ok(Kv(k.clone(), v.clone()));
|
||||
}
|
||||
}))
|
||||
}
|
||||
|
||||
async fn set(&self, key: &[u8], val: &[u8]) -> Result<(), Error> {
|
||||
let mut kvs = self.kvs.write().unwrap();
|
||||
let _ = kvs.insert(key.to_vec(), val.to_vec());
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn compare_and_set(
|
||||
&self,
|
||||
key: &[u8],
|
||||
expect: &[u8],
|
||||
val: &[u8],
|
||||
) -> Result<Result<(), Option<Vec<u8>>>, Error> {
|
||||
let key = key.to_vec();
|
||||
let val = val.to_vec();
|
||||
|
||||
let mut kvs = self.kvs.write().unwrap();
|
||||
let existed = kvs.entry(key);
|
||||
Ok(match existed {
|
||||
Entry::Vacant(e) => {
|
||||
if expect.is_empty() {
|
||||
let _ = e.insert(val);
|
||||
Ok(())
|
||||
} else {
|
||||
Err(None)
|
||||
}
|
||||
}
|
||||
Entry::Occupied(mut existed) => {
|
||||
if existed.get() == expect {
|
||||
let _ = existed.insert(val);
|
||||
Ok(())
|
||||
} else {
|
||||
Err(Some(existed.get().clone()))
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
async fn delete_range(&self, key: &[u8], end: &[u8]) -> Result<(), Error> {
|
||||
let mut kvs = self.kvs.write().unwrap();
|
||||
if end.is_empty() {
|
||||
let _ = kvs.remove(key);
|
||||
} else {
|
||||
let start = key.to_vec();
|
||||
let end = end.to_vec();
|
||||
let range = start..end;
|
||||
|
||||
kvs.retain(|k, _| !range.contains(k));
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn move_value(&self, from_key: &[u8], to_key: &[u8]) -> Result<(), Error> {
|
||||
let mut kvs = self.kvs.write().unwrap();
|
||||
if let Some(v) = kvs.remove(from_key) {
|
||||
let _ = kvs.insert(to_key.to_vec(), v);
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn as_any(&self) -> &dyn Any {
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use futures::TryStreamExt;
|
||||
|
||||
use super::*;
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_memory_kv_backend() {
|
||||
let backend = MemoryKvBackend::default();
|
||||
|
||||
for i in 1..10 {
|
||||
let key = format!("key{}", i);
|
||||
let val = format!("val{}", i);
|
||||
assert!(backend.set(key.as_bytes(), val.as_bytes()).await.is_ok());
|
||||
}
|
||||
|
||||
let result = backend
|
||||
.compare_and_set(b"hello", b"what", b"world")
|
||||
.await
|
||||
.unwrap();
|
||||
assert!(result.unwrap_err().is_none());
|
||||
|
||||
let result = backend
|
||||
.compare_and_set(b"hello", b"", b"world")
|
||||
.await
|
||||
.unwrap();
|
||||
assert!(result.is_ok());
|
||||
|
||||
let result = backend
|
||||
.compare_and_set(b"hello", b"world", b"greptime")
|
||||
.await
|
||||
.unwrap();
|
||||
assert!(result.is_ok());
|
||||
|
||||
let result = backend
|
||||
.compare_and_set(b"hello", b"world", b"what")
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(result.unwrap_err().unwrap(), b"greptime");
|
||||
|
||||
assert!(backend.delete_range(b"key1", &[]).await.is_ok());
|
||||
assert!(backend.delete_range(b"key3", b"key9").await.is_ok());
|
||||
|
||||
assert!(backend.move_value(b"key9", b"key10").await.is_ok());
|
||||
|
||||
assert_eq!(
|
||||
backend.to_string(),
|
||||
r#"hello -> greptime
|
||||
key10 -> val9
|
||||
key2 -> val2
|
||||
"#
|
||||
);
|
||||
|
||||
let range = backend.range(b"key").try_collect::<Vec<_>>().await.unwrap();
|
||||
assert_eq!(range.len(), 2);
|
||||
assert_eq!(range[0], Kv(b"key10".to_vec(), b"val9".to_vec()));
|
||||
assert_eq!(range[1], Kv(b"key2".to_vec(), b"val2".to_vec()));
|
||||
}
|
||||
}
|
||||
@@ -17,6 +17,7 @@ pub mod heartbeat;
|
||||
pub mod ident;
|
||||
pub mod instruction;
|
||||
pub mod key;
|
||||
pub mod kv_backend;
|
||||
pub mod peer;
|
||||
pub mod rpc;
|
||||
pub mod table_name;
|
||||
|
||||
@@ -12,6 +12,7 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
pub mod ddl;
|
||||
pub mod lock;
|
||||
pub mod router;
|
||||
pub mod store;
|
||||
|
||||
217
src/common/meta/src/rpc/ddl.rs
Normal file
217
src/common/meta/src/rpc/ddl.rs
Normal file
@@ -0,0 +1,217 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::result;
|
||||
|
||||
use api::v1::meta::submit_ddl_task_request::Task;
|
||||
use api::v1::meta::{
|
||||
CreateTableTask as PbCreateTableTask, Partition,
|
||||
SubmitDdlTaskRequest as PbSubmitDdlTaskRequest,
|
||||
SubmitDdlTaskResponse as PbSubmitDdlTaskResponse,
|
||||
};
|
||||
use api::v1::CreateTableExpr;
|
||||
use prost::Message;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use snafu::{OptionExt, ResultExt};
|
||||
use table::engine::TableReference;
|
||||
use table::metadata::{RawTableInfo, TableId};
|
||||
|
||||
use crate::error::{self, Result};
|
||||
use crate::table_name::TableName;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum DdlTask {
|
||||
CreateTable(CreateTableTask),
|
||||
}
|
||||
|
||||
impl DdlTask {
|
||||
pub fn new_create_table(
|
||||
expr: CreateTableExpr,
|
||||
partitions: Vec<Partition>,
|
||||
table_info: RawTableInfo,
|
||||
) -> Self {
|
||||
DdlTask::CreateTable(CreateTableTask::new(expr, partitions, table_info))
|
||||
}
|
||||
}
|
||||
|
||||
impl TryFrom<Task> for DdlTask {
|
||||
type Error = error::Error;
|
||||
fn try_from(task: Task) -> Result<Self> {
|
||||
match task {
|
||||
Task::CreateTableTask(create_table) => {
|
||||
Ok(DdlTask::CreateTable(create_table.try_into()?))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub struct SubmitDdlTaskRequest {
|
||||
pub task: DdlTask,
|
||||
}
|
||||
|
||||
impl TryFrom<SubmitDdlTaskRequest> for PbSubmitDdlTaskRequest {
|
||||
type Error = error::Error;
|
||||
|
||||
fn try_from(request: SubmitDdlTaskRequest) -> Result<Self> {
|
||||
let task = match request.task {
|
||||
DdlTask::CreateTable(task) => Task::CreateTableTask(PbCreateTableTask {
|
||||
table_info: serde_json::to_vec(&task.table_info).context(error::SerdeJsonSnafu)?,
|
||||
create_table: Some(task.create_table),
|
||||
partitions: task.partitions,
|
||||
}),
|
||||
};
|
||||
Ok(Self {
|
||||
header: None,
|
||||
task: Some(task),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
pub struct SubmitDdlTaskResponse {
|
||||
pub key: Vec<u8>,
|
||||
pub table_id: TableId,
|
||||
}
|
||||
|
||||
impl TryFrom<PbSubmitDdlTaskResponse> for SubmitDdlTaskResponse {
|
||||
type Error = error::Error;
|
||||
|
||||
fn try_from(resp: PbSubmitDdlTaskResponse) -> Result<Self> {
|
||||
let table_id = resp.table_id.context(error::InvalidProtoMsgSnafu {
|
||||
err_msg: "expected table_id",
|
||||
})?;
|
||||
Ok(Self {
|
||||
key: resp.key,
|
||||
table_id: table_id.id,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq)]
|
||||
pub struct CreateTableTask {
|
||||
pub create_table: CreateTableExpr,
|
||||
pub partitions: Vec<Partition>,
|
||||
pub table_info: RawTableInfo,
|
||||
}
|
||||
|
||||
impl TryFrom<PbCreateTableTask> for CreateTableTask {
|
||||
type Error = error::Error;
|
||||
|
||||
fn try_from(pb: PbCreateTableTask) -> Result<Self> {
|
||||
let table_info = serde_json::from_slice(&pb.table_info).context(error::SerdeJsonSnafu)?;
|
||||
|
||||
Ok(CreateTableTask::new(
|
||||
pb.create_table.context(error::InvalidProtoMsgSnafu {
|
||||
err_msg: "expected create table",
|
||||
})?,
|
||||
pb.partitions,
|
||||
table_info,
|
||||
))
|
||||
}
|
||||
}
|
||||
|
||||
impl CreateTableTask {
|
||||
pub fn new(
|
||||
expr: CreateTableExpr,
|
||||
partitions: Vec<Partition>,
|
||||
table_info: RawTableInfo,
|
||||
) -> CreateTableTask {
|
||||
CreateTableTask {
|
||||
create_table: expr,
|
||||
partitions,
|
||||
table_info,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn table_name(&self) -> TableName {
|
||||
let table = &self.create_table;
|
||||
|
||||
TableName {
|
||||
catalog_name: table.catalog_name.to_string(),
|
||||
schema_name: table.schema_name.to_string(),
|
||||
table_name: table.table_name.to_string(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn table_ref(&self) -> TableReference {
|
||||
let table = &self.create_table;
|
||||
|
||||
TableReference {
|
||||
catalog: &table.catalog_name,
|
||||
schema: &table.schema_name,
|
||||
table: &table.table_name,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Serialize for CreateTableTask {
|
||||
fn serialize<S>(&self, serializer: S) -> result::Result<S::Ok, S::Error>
|
||||
where
|
||||
S: serde::Serializer,
|
||||
{
|
||||
let table_info = serde_json::to_vec(&self.table_info)
|
||||
.map_err(|err| serde::ser::Error::custom(err.to_string()))?;
|
||||
|
||||
let pb = PbCreateTableTask {
|
||||
create_table: Some(self.create_table.clone()),
|
||||
partitions: self.partitions.clone(),
|
||||
table_info,
|
||||
};
|
||||
let buf = pb.encode_to_vec();
|
||||
serializer.serialize_bytes(&buf)
|
||||
}
|
||||
}
|
||||
|
||||
impl<'de> Deserialize<'de> for CreateTableTask {
|
||||
fn deserialize<D>(deserializer: D) -> result::Result<Self, D::Error>
|
||||
where
|
||||
D: serde::Deserializer<'de>,
|
||||
{
|
||||
let buf = Vec::<u8>::deserialize(deserializer)?;
|
||||
let expr: PbCreateTableTask = PbCreateTableTask::decode(&*buf)
|
||||
.map_err(|err| serde::de::Error::custom(err.to_string()))?;
|
||||
|
||||
let expr = CreateTableTask::try_from(expr)
|
||||
.map_err(|err| serde::de::Error::custom(err.to_string()))?;
|
||||
|
||||
Ok(expr)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::sync::Arc;
|
||||
|
||||
use api::v1::CreateTableExpr;
|
||||
use datatypes::schema::SchemaBuilder;
|
||||
use table::metadata::RawTableInfo;
|
||||
use table::test_util::table_info::test_table_info;
|
||||
|
||||
use super::CreateTableTask;
|
||||
|
||||
#[test]
|
||||
fn test_basic_ser_de_create_table_task() {
|
||||
let schema = SchemaBuilder::default().build().unwrap();
|
||||
let table_info = test_table_info(1025, "foo", "bar", "baz", Arc::new(schema));
|
||||
let task = CreateTableTask::new(
|
||||
CreateTableExpr::default(),
|
||||
Vec::new(),
|
||||
RawTableInfo::from(table_info),
|
||||
);
|
||||
|
||||
let output = serde_json::to_vec(&task).unwrap();
|
||||
|
||||
let de = serde_json::from_slice(&output).unwrap();
|
||||
assert_eq!(task, de);
|
||||
}
|
||||
}
|
||||
@@ -16,6 +16,7 @@ use std::fmt::{Display, Formatter};
|
||||
|
||||
use api::v1::meta::TableName as PbTableName;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use table::engine::TableReference;
|
||||
|
||||
#[derive(Debug, Clone, Hash, Eq, PartialEq, Deserialize, Serialize)]
|
||||
pub struct TableName {
|
||||
@@ -46,6 +47,14 @@ impl TableName {
|
||||
table_name: table_name.into(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn table_ref(&self) -> TableReference<'_> {
|
||||
TableReference {
|
||||
catalog: &self.catalog_name,
|
||||
schema: &self.schema_name,
|
||||
table: &self.table_name,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<TableName> for PbTableName {
|
||||
|
||||
@@ -14,6 +14,7 @@
|
||||
|
||||
use std::cmp::Ordering;
|
||||
|
||||
use crate::util::div_ceil;
|
||||
use crate::Timestamp;
|
||||
|
||||
/// Unix timestamp in millisecond resolution.
|
||||
@@ -80,11 +81,17 @@ impl PartialOrd<TimestampMillis> for i64 {
|
||||
}
|
||||
|
||||
pub trait BucketAligned: Sized {
|
||||
/// Returns the timestamp aligned by `bucket_duration` or `None` if underflow occurred.
|
||||
/// Aligns the value by `bucket_duration` or `None` if underflow occurred.
|
||||
///
|
||||
/// # Panics
|
||||
/// Panics if `bucket_duration <= 0`.
|
||||
fn align_by_bucket(self, bucket_duration: i64) -> Option<Self>;
|
||||
|
||||
/// Aligns the value by `bucket_duration` to ceil or `None` if overflow occurred.
|
||||
///
|
||||
/// # Panics
|
||||
/// Panics if `bucket_duration <= 0`.
|
||||
fn align_to_ceil_by_bucket(self, bucket_duration: i64) -> Option<Self>;
|
||||
}
|
||||
|
||||
impl BucketAligned for i64 {
|
||||
@@ -93,6 +100,11 @@ impl BucketAligned for i64 {
|
||||
self.checked_div_euclid(bucket_duration)
|
||||
.and_then(|val| val.checked_mul(bucket_duration))
|
||||
}
|
||||
|
||||
fn align_to_ceil_by_bucket(self, bucket_duration: i64) -> Option<Self> {
|
||||
assert!(bucket_duration > 0, "{}", bucket_duration);
|
||||
div_ceil(self, bucket_duration).checked_mul(bucket_duration)
|
||||
}
|
||||
}
|
||||
|
||||
impl BucketAligned for Timestamp {
|
||||
@@ -103,6 +115,14 @@ impl BucketAligned for Timestamp {
|
||||
.align_by_bucket(bucket_duration)
|
||||
.map(|val| Timestamp::new(val, unit))
|
||||
}
|
||||
|
||||
fn align_to_ceil_by_bucket(self, bucket_duration: i64) -> Option<Self> {
|
||||
assert!(bucket_duration > 0, "{}", bucket_duration);
|
||||
let unit = self.unit();
|
||||
self.value()
|
||||
.align_to_ceil_by_bucket(bucket_duration)
|
||||
.map(|val| Timestamp::new(val, unit))
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
@@ -180,4 +200,31 @@ mod tests {
|
||||
Timestamp::new_millisecond(i64::MIN).align_by_bucket(bucket)
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_align_to_ceil() {
|
||||
assert_eq!(None, i64::MAX.align_to_ceil_by_bucket(10));
|
||||
assert_eq!(
|
||||
Some(i64::MAX - (i64::MAX % 10)),
|
||||
(i64::MAX - (i64::MAX % 10)).align_to_ceil_by_bucket(10)
|
||||
);
|
||||
assert_eq!(Some(i64::MAX), i64::MAX.align_to_ceil_by_bucket(1));
|
||||
assert_eq!(Some(i64::MAX), i64::MAX.align_to_ceil_by_bucket(1));
|
||||
assert_eq!(Some(i64::MAX), i64::MAX.align_to_ceil_by_bucket(i64::MAX));
|
||||
|
||||
assert_eq!(
|
||||
Some(i64::MIN - (i64::MIN % 10)),
|
||||
i64::MIN.align_to_ceil_by_bucket(10)
|
||||
);
|
||||
assert_eq!(Some(i64::MIN), i64::MIN.align_to_ceil_by_bucket(1));
|
||||
|
||||
assert_eq!(Some(3), 1i64.align_to_ceil_by_bucket(3));
|
||||
assert_eq!(Some(3), 3i64.align_to_ceil_by_bucket(3));
|
||||
assert_eq!(Some(6), 4i64.align_to_ceil_by_bucket(3));
|
||||
assert_eq!(Some(0), 0i64.align_to_ceil_by_bucket(3));
|
||||
assert_eq!(Some(0), (-1i64).align_to_ceil_by_bucket(3));
|
||||
assert_eq!(Some(0), (-2i64).align_to_ceil_by_bucket(3));
|
||||
assert_eq!(Some(-3), (-3i64).align_to_ceil_by_bucket(3));
|
||||
assert_eq!(Some(-3), (-4i64).align_to_ceil_by_bucket(3));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -44,7 +44,7 @@ use query::query_engine::{QueryEngineFactory, QueryEngineRef};
|
||||
use servers::Mode;
|
||||
use session::context::QueryContext;
|
||||
use snafu::prelude::*;
|
||||
use storage::compaction::{CompactionHandler, CompactionSchedulerRef, SimplePicker};
|
||||
use storage::compaction::{CompactionHandler, CompactionSchedulerRef};
|
||||
use storage::config::EngineConfig as StorageEngineConfig;
|
||||
use storage::scheduler::{LocalScheduler, SchedulerConfig};
|
||||
use storage::EngineImpl;
|
||||
@@ -395,9 +395,8 @@ impl Instance {
|
||||
}
|
||||
|
||||
fn create_compaction_scheduler<S: LogStore>(opts: &DatanodeOptions) -> CompactionSchedulerRef<S> {
|
||||
let picker = SimplePicker::default();
|
||||
let config = SchedulerConfig::from(opts);
|
||||
let handler = CompactionHandler { picker };
|
||||
let handler = CompactionHandler::default();
|
||||
let scheduler = LocalScheduler::new(config, handler);
|
||||
Arc::new(scheduler)
|
||||
}
|
||||
|
||||
@@ -19,14 +19,14 @@ use std::sync::Arc;
|
||||
use api::v1::CreateTableExpr;
|
||||
use catalog::error::{
|
||||
self as catalog_err, InternalSnafu, InvalidCatalogValueSnafu, InvalidSystemTableDefSnafu,
|
||||
Result as CatalogResult, UnimplementedSnafu,
|
||||
Result as CatalogResult, TableMetadataManagerSnafu, UnimplementedSnafu,
|
||||
};
|
||||
use catalog::helper::{
|
||||
build_catalog_prefix, build_schema_prefix, build_table_global_prefix, CatalogKey, SchemaKey,
|
||||
TableGlobalKey, TableGlobalValue,
|
||||
};
|
||||
use catalog::information_schema::InformationSchemaProvider;
|
||||
use catalog::remote::{Kv, KvBackendRef, KvCacheInvalidatorRef};
|
||||
use catalog::remote::KvCacheInvalidatorRef;
|
||||
use catalog::{
|
||||
CatalogManager, DeregisterTableRequest, RegisterSchemaRequest, RegisterSystemTableRequest,
|
||||
RegisterTableRequest, RenameTableRequest,
|
||||
@@ -34,6 +34,7 @@ use catalog::{
|
||||
use client::client_manager::DatanodeClients;
|
||||
use common_catalog::consts::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, INFORMATION_SCHEMA_NAME};
|
||||
use common_error::prelude::BoxedError;
|
||||
use common_meta::kv_backend::{Kv, KvBackendRef};
|
||||
use common_meta::table_name::TableName;
|
||||
use common_telemetry::warn;
|
||||
use futures::StreamExt;
|
||||
@@ -254,7 +255,7 @@ impl CatalogManager for FrontendCatalogManager {
|
||||
let mut iter = self.backend.range(key.as_bytes());
|
||||
let mut res = HashSet::new();
|
||||
while let Some(r) = iter.next().await {
|
||||
let Kv(k, _) = r?;
|
||||
let Kv(k, _) = r.context(TableMetadataManagerSnafu)?;
|
||||
let catalog_key = String::from_utf8_lossy(&k);
|
||||
if let Ok(key) = CatalogKey::parse(catalog_key.as_ref()) {
|
||||
let _ = res.insert(key.catalog_name);
|
||||
@@ -270,7 +271,7 @@ impl CatalogManager for FrontendCatalogManager {
|
||||
let mut iter = self.backend.range(key.as_bytes());
|
||||
let mut res = HashSet::new();
|
||||
while let Some(r) = iter.next().await {
|
||||
let Kv(k, _) = r?;
|
||||
let Kv(k, _) = r.context(TableMetadataManagerSnafu)?;
|
||||
let key =
|
||||
SchemaKey::parse(String::from_utf8_lossy(&k)).context(InvalidCatalogValueSnafu)?;
|
||||
let _ = res.insert(key.schema_name);
|
||||
@@ -287,7 +288,7 @@ impl CatalogManager for FrontendCatalogManager {
|
||||
let iter = self.backend.range(key.as_bytes());
|
||||
let result = iter
|
||||
.map(|r| {
|
||||
let Kv(k, _) = r?;
|
||||
let Kv(k, _) = r.context(TableMetadataManagerSnafu)?;
|
||||
let key = TableGlobalKey::parse(String::from_utf8_lossy(&k))
|
||||
.context(InvalidCatalogValueSnafu)?;
|
||||
Ok(key.table_name)
|
||||
@@ -304,7 +305,12 @@ impl CatalogManager for FrontendCatalogManager {
|
||||
}
|
||||
.to_string();
|
||||
|
||||
Ok(self.backend.get(key.as_bytes()).await?.is_some())
|
||||
Ok(self
|
||||
.backend
|
||||
.get(key.as_bytes())
|
||||
.await
|
||||
.context(TableMetadataManagerSnafu)?
|
||||
.is_some())
|
||||
}
|
||||
|
||||
async fn schema_exist(&self, catalog: &str, schema: &str) -> CatalogResult<bool> {
|
||||
@@ -314,7 +320,12 @@ impl CatalogManager for FrontendCatalogManager {
|
||||
}
|
||||
.to_string();
|
||||
|
||||
Ok(self.backend().get(schema_key.as_bytes()).await?.is_some())
|
||||
Ok(self
|
||||
.backend()
|
||||
.get(schema_key.as_bytes())
|
||||
.await
|
||||
.context(TableMetadataManagerSnafu)?
|
||||
.is_some())
|
||||
}
|
||||
|
||||
async fn table_exist(&self, catalog: &str, schema: &str, table: &str) -> CatalogResult<bool> {
|
||||
@@ -326,7 +337,8 @@ impl CatalogManager for FrontendCatalogManager {
|
||||
Ok(self
|
||||
.backend()
|
||||
.get(table_global_key.to_string().as_bytes())
|
||||
.await?
|
||||
.await
|
||||
.context(TableMetadataManagerSnafu)?
|
||||
.is_some())
|
||||
}
|
||||
|
||||
@@ -362,7 +374,7 @@ impl CatalogManager for FrontendCatalogManager {
|
||||
schema_name: schema.to_string(),
|
||||
table_name: table_name.to_string(),
|
||||
};
|
||||
let Some(kv) = self.backend().get(table_global_key.to_string().as_bytes()).await? else {
|
||||
let Some(kv) = self.backend().get(table_global_key.to_string().as_bytes()).await.context(TableMetadataManagerSnafu)? else {
|
||||
return Ok(None);
|
||||
};
|
||||
let v = TableGlobalValue::from_bytes(kv.1).context(InvalidCatalogValueSnafu)?;
|
||||
|
||||
@@ -565,6 +565,12 @@ pub enum Error {
|
||||
value: String,
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Table metadata manager error: {}", source))]
|
||||
TableMetadataManager {
|
||||
source: common_meta::error::Error,
|
||||
location: Location,
|
||||
},
|
||||
}
|
||||
|
||||
pub type Result<T> = std::result::Result<T, Error>;
|
||||
@@ -688,6 +694,7 @@ impl ErrorExt for Error {
|
||||
|
||||
Error::WriteParquet { source, .. } => source.status_code(),
|
||||
Error::InvalidCopyParameter { .. } => StatusCode::InvalidArguments,
|
||||
Error::TableMetadataManager { source, .. } => source.status_code(),
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -27,6 +27,8 @@ use crate::service_config::{
|
||||
#[serde(default)]
|
||||
pub struct FrontendOptions {
|
||||
pub mode: Mode,
|
||||
pub heartbeat_interval_millis: u64,
|
||||
pub retry_interval_millis: u64,
|
||||
pub http_options: Option<HttpOptions>,
|
||||
pub grpc_options: Option<GrpcOptions>,
|
||||
pub mysql_options: Option<MysqlOptions>,
|
||||
@@ -43,6 +45,8 @@ impl Default for FrontendOptions {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
mode: Mode::Standalone,
|
||||
heartbeat_interval_millis: 5000,
|
||||
retry_interval_millis: 5000,
|
||||
http_options: Some(HttpOptions::default()),
|
||||
grpc_options: Some(GrpcOptions::default()),
|
||||
mysql_options: Some(MysqlOptions::default()),
|
||||
|
||||
@@ -43,14 +43,14 @@ pub struct HeartbeatTask {
|
||||
impl HeartbeatTask {
|
||||
pub fn new(
|
||||
meta_client: Arc<MetaClient>,
|
||||
report_interval: u64,
|
||||
retry_interval: u64,
|
||||
heartbeat_interval_millis: u64,
|
||||
retry_interval_millis: u64,
|
||||
resp_handler_executor: HeartbeatResponseHandlerExecutorRef,
|
||||
) -> Self {
|
||||
HeartbeatTask {
|
||||
meta_client,
|
||||
report_interval,
|
||||
retry_interval,
|
||||
report_interval: heartbeat_interval_millis,
|
||||
retry_interval: retry_interval_millis,
|
||||
resp_handler_executor,
|
||||
}
|
||||
}
|
||||
@@ -92,7 +92,7 @@ impl HeartbeatTask {
|
||||
Err(e) => {
|
||||
error!(e; "Occur error while reading heartbeat response");
|
||||
capture_self
|
||||
.start_with_retry(Duration::from_secs(retry_interval))
|
||||
.start_with_retry(Duration::from_millis(retry_interval))
|
||||
.await;
|
||||
|
||||
break;
|
||||
@@ -136,7 +136,7 @@ impl HeartbeatTask {
|
||||
}
|
||||
}
|
||||
_ = &mut sleep => {
|
||||
sleep.as_mut().reset(Instant::now() + Duration::from_secs(report_interval));
|
||||
sleep.as_mut().reset(Instant::now() + Duration::from_millis(report_interval));
|
||||
Some(HeartbeatRequest::default())
|
||||
}
|
||||
};
|
||||
|
||||
@@ -47,7 +47,6 @@ use datanode::instance::sql::table_idents_to_full_name;
|
||||
use datanode::instance::InstanceRef as DnInstanceRef;
|
||||
use datatypes::schema::Schema;
|
||||
use distributed::DistInstance;
|
||||
use futures::future;
|
||||
use meta_client::client::{MetaClient, MetaClientBuilder};
|
||||
use meta_client::MetaClientOptions;
|
||||
use partition::manager::PartitionRuleManager;
|
||||
@@ -136,13 +135,14 @@ impl Instance {
|
||||
|
||||
let datanode_clients = Arc::new(DatanodeClients::default());
|
||||
|
||||
Self::try_new_distributed_with(meta_client, datanode_clients, plugins).await
|
||||
Self::try_new_distributed_with(meta_client, datanode_clients, plugins, opts).await
|
||||
}
|
||||
|
||||
pub async fn try_new_distributed_with(
|
||||
meta_client: Arc<MetaClient>,
|
||||
datanode_clients: Arc<DatanodeClients>,
|
||||
plugins: Arc<Plugins>,
|
||||
opts: &FrontendOptions,
|
||||
) -> Result<Self> {
|
||||
let meta_backend = Arc::new(CachedMetaKvBackend::new(meta_client.clone()));
|
||||
let table_routes = Arc::new(TableRoutes::new(meta_client.clone()));
|
||||
@@ -195,8 +195,8 @@ impl Instance {
|
||||
|
||||
let heartbeat_task = Some(HeartbeatTask::new(
|
||||
meta_client,
|
||||
5,
|
||||
5,
|
||||
opts.heartbeat_interval_millis,
|
||||
opts.retry_interval_millis,
|
||||
Arc::new(handlers_executor),
|
||||
));
|
||||
|
||||
@@ -288,13 +288,10 @@ impl Instance {
|
||||
requests: InsertRequests,
|
||||
ctx: QueryContextRef,
|
||||
) -> Result<Output> {
|
||||
let _ = future::join_all(
|
||||
requests
|
||||
.inserts
|
||||
.iter()
|
||||
.map(|x| self.create_or_alter_table_on_demand(ctx.clone(), x)),
|
||||
)
|
||||
.await;
|
||||
for req in requests.inserts.iter() {
|
||||
self.create_or_alter_table_on_demand(ctx.clone(), req)
|
||||
.await?;
|
||||
}
|
||||
|
||||
let query = Request::Inserts(requests);
|
||||
GrpcQueryHandler::do_query(&*self.grpc_query_handler, query, ctx).await
|
||||
|
||||
@@ -658,7 +658,7 @@ impl GrpcQueryHandler for DistInstance {
|
||||
match expr {
|
||||
DdlExpr::CreateDatabase(expr) => self.handle_create_database(expr, ctx).await,
|
||||
DdlExpr::CreateTable(mut expr) => {
|
||||
let _ = self.create_table(&mut expr, None).await;
|
||||
let _ = self.create_table(&mut expr, None).await?;
|
||||
Ok(Output::AffectedRows(0))
|
||||
}
|
||||
DdlExpr::Alter(expr) => self.handle_alter_table(expr).await,
|
||||
|
||||
@@ -179,10 +179,10 @@ mod tests {
|
||||
use catalog::helper::{
|
||||
CatalogKey, CatalogValue, SchemaKey, SchemaValue, TableGlobalKey, TableGlobalValue,
|
||||
};
|
||||
use catalog::remote::mock::MockKvBackend;
|
||||
use catalog::remote::{KvBackend, KvBackendRef};
|
||||
use client::client_manager::DatanodeClients;
|
||||
use common_catalog::consts::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME};
|
||||
use common_meta::kv_backend::memory::MemoryKvBackend;
|
||||
use common_meta::kv_backend::{KvBackend, KvBackendRef};
|
||||
use datatypes::prelude::{ConcreteDataType, VectorRef};
|
||||
use datatypes::schema::{ColumnDefaultConstraint, ColumnSchema, Schema};
|
||||
use datatypes::vectors::Int32Vector;
|
||||
@@ -193,7 +193,7 @@ mod tests {
|
||||
use crate::table::test::create_partition_rule_manager;
|
||||
|
||||
async fn prepare_mocked_backend() -> KvBackendRef {
|
||||
let backend = Arc::new(MockKvBackend::default());
|
||||
let backend = Arc::new(MemoryKvBackend::default());
|
||||
|
||||
let default_catalog = CatalogKey {
|
||||
catalog_name: DEFAULT_CATALOG_NAME.to_string(),
|
||||
|
||||
@@ -55,7 +55,9 @@ use table::Table;
|
||||
use tokio::sync::RwLock;
|
||||
|
||||
use crate::catalog::FrontendCatalogManager;
|
||||
use crate::error::{self, FindDatanodeSnafu, FindTableRouteSnafu, Result};
|
||||
use crate::error::{
|
||||
self, FindDatanodeSnafu, FindTableRouteSnafu, Result, TableMetadataManagerSnafu,
|
||||
};
|
||||
use crate::instance::distributed::inserter::DistInserter;
|
||||
use crate::table::delete::to_grpc_delete_request;
|
||||
use crate::table::scan::{DatanodeInstance, TableScanPlan};
|
||||
@@ -256,7 +258,7 @@ impl DistTable {
|
||||
.backend()
|
||||
.get(key.to_string().as_bytes())
|
||||
.await
|
||||
.context(error::CatalogSnafu)?;
|
||||
.context(TableMetadataManagerSnafu)?;
|
||||
Ok(if let Some(raw) = raw {
|
||||
Some(TableGlobalValue::from_bytes(raw.1).context(error::CatalogEntrySerdeSnafu)?)
|
||||
} else {
|
||||
@@ -274,7 +276,7 @@ impl DistTable {
|
||||
.backend()
|
||||
.set(key.to_string().as_bytes(), &value)
|
||||
.await
|
||||
.context(error::CatalogSnafu)
|
||||
.context(TableMetadataManagerSnafu)
|
||||
}
|
||||
|
||||
async fn delete_table_global_value(&self, key: TableGlobalKey) -> Result<()> {
|
||||
@@ -282,7 +284,7 @@ impl DistTable {
|
||||
.backend()
|
||||
.delete(key.to_string().as_bytes())
|
||||
.await
|
||||
.context(error::CatalogSnafu)
|
||||
.context(TableMetadataManagerSnafu)
|
||||
}
|
||||
|
||||
async fn move_table_route_value(
|
||||
@@ -313,7 +315,7 @@ impl DistTable {
|
||||
.backend()
|
||||
.move_value(old_key.as_bytes(), new_key.as_bytes())
|
||||
.await
|
||||
.context(error::CatalogSnafu)?;
|
||||
.context(TableMetadataManagerSnafu)?;
|
||||
|
||||
self.catalog_manager
|
||||
.partition_manager()
|
||||
|
||||
@@ -12,6 +12,7 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
mod ddl;
|
||||
mod heartbeat;
|
||||
mod load_balance;
|
||||
mod lock;
|
||||
@@ -29,6 +30,7 @@ use common_meta::rpc::store::{
|
||||
RangeRequest, RangeResponse,
|
||||
};
|
||||
use common_telemetry::info;
|
||||
use ddl::Client as DdlClient;
|
||||
use heartbeat::Client as HeartbeatClient;
|
||||
use lock::Client as LockClient;
|
||||
use router::Client as RouterClient;
|
||||
@@ -49,6 +51,7 @@ pub struct MetaClientBuilder {
|
||||
enable_router: bool,
|
||||
enable_store: bool,
|
||||
enable_lock: bool,
|
||||
enable_ddl: bool,
|
||||
channel_manager: Option<ChannelManager>,
|
||||
}
|
||||
|
||||
@@ -89,6 +92,13 @@ impl MetaClientBuilder {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn enable_ddl(self) -> Self {
|
||||
Self {
|
||||
enable_ddl: true,
|
||||
..self
|
||||
}
|
||||
}
|
||||
|
||||
pub fn channel_manager(self, channel_manager: ChannelManager) -> Self {
|
||||
Self {
|
||||
channel_manager: Some(channel_manager),
|
||||
@@ -119,7 +129,10 @@ impl MetaClientBuilder {
|
||||
client.store = Some(StoreClient::new(self.id, self.role, mgr.clone()));
|
||||
}
|
||||
if self.enable_lock {
|
||||
client.lock = Some(LockClient::new(self.id, self.role, mgr));
|
||||
client.lock = Some(LockClient::new(self.id, self.role, mgr.clone()));
|
||||
}
|
||||
if self.enable_ddl {
|
||||
client.ddl = Some(DdlClient::new(self.id, self.role, mgr));
|
||||
}
|
||||
|
||||
client
|
||||
@@ -134,6 +147,7 @@ pub struct MetaClient {
|
||||
router: Option<RouterClient>,
|
||||
store: Option<StoreClient>,
|
||||
lock: Option<LockClient>,
|
||||
ddl: Option<DdlClient>,
|
||||
}
|
||||
|
||||
impl MetaClient {
|
||||
|
||||
145
src/meta-client/src/client/ddl.rs
Normal file
145
src/meta-client/src/client/ddl.rs
Normal file
@@ -0,0 +1,145 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::sync::Arc;
|
||||
|
||||
use api::v1::meta::ddl_task_client::DdlTaskClient;
|
||||
use api::v1::meta::{ErrorCode, Role, SubmitDdlTaskRequest, SubmitDdlTaskResponse};
|
||||
use common_grpc::channel_manager::ChannelManager;
|
||||
use snafu::{ensure, ResultExt};
|
||||
use tokio::sync::RwLock;
|
||||
use tonic::transport::Channel;
|
||||
|
||||
use crate::client::heartbeat::Inner as HeartbeatInner;
|
||||
use crate::client::Id;
|
||||
use crate::error;
|
||||
use crate::error::Result;
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
// TODO(weny): removes this in following PRs.
|
||||
#[allow(unused)]
|
||||
pub struct Client {
|
||||
inner: Arc<RwLock<Inner>>,
|
||||
}
|
||||
|
||||
// TODO(weny): removes this in following PRs.
|
||||
#[allow(dead_code)]
|
||||
impl Client {
|
||||
pub fn new(id: Id, role: Role, channel_manager: ChannelManager) -> Self {
|
||||
let inner = Arc::new(RwLock::new(Inner {
|
||||
id,
|
||||
role,
|
||||
channel_manager: channel_manager.clone(),
|
||||
heartbeat_inner: HeartbeatInner::new(id, role, channel_manager),
|
||||
}));
|
||||
|
||||
Self { inner }
|
||||
}
|
||||
|
||||
pub async fn start<U, A>(&mut self, urls: A) -> Result<()>
|
||||
where
|
||||
U: AsRef<str>,
|
||||
A: AsRef<[U]>,
|
||||
{
|
||||
let mut inner = self.inner.write().await;
|
||||
inner.start(urls).await
|
||||
}
|
||||
|
||||
pub async fn is_started(&self) -> bool {
|
||||
let inner = self.inner.read().await;
|
||||
inner.is_started()
|
||||
}
|
||||
|
||||
pub async fn submit_ddl_task(
|
||||
&self,
|
||||
req: SubmitDdlTaskRequest,
|
||||
) -> Result<SubmitDdlTaskResponse> {
|
||||
let mut inner = self.inner.write().await;
|
||||
inner.submit_ddl_task(req).await
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
// TODO(weny): removes this in following PRs.
|
||||
#[allow(unused)]
|
||||
struct Inner {
|
||||
id: Id,
|
||||
role: Role,
|
||||
channel_manager: ChannelManager,
|
||||
heartbeat_inner: HeartbeatInner,
|
||||
}
|
||||
|
||||
impl Inner {
|
||||
async fn start<U, A>(&mut self, urls: A) -> Result<()>
|
||||
where
|
||||
U: AsRef<str>,
|
||||
A: AsRef<[U]>,
|
||||
{
|
||||
ensure!(
|
||||
!self.is_started(),
|
||||
error::IllegalGrpcClientStateSnafu {
|
||||
err_msg: "Router client already started",
|
||||
}
|
||||
);
|
||||
|
||||
self.heartbeat_inner.start(urls).await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn make_client(&self, addr: impl AsRef<str>) -> Result<DdlTaskClient<Channel>> {
|
||||
let channel = self
|
||||
.channel_manager
|
||||
.get(addr)
|
||||
.context(error::CreateChannelSnafu)?;
|
||||
|
||||
Ok(DdlTaskClient::new(channel))
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn is_started(&self) -> bool {
|
||||
self.heartbeat_inner.is_started()
|
||||
}
|
||||
|
||||
pub async fn submit_ddl_task(
|
||||
&mut self,
|
||||
mut req: SubmitDdlTaskRequest,
|
||||
) -> Result<SubmitDdlTaskResponse> {
|
||||
req.set_header(self.id, self.role);
|
||||
|
||||
loop {
|
||||
if let Some(leader) = &self.heartbeat_inner.get_leader() {
|
||||
let mut client = self.make_client(leader)?;
|
||||
let res = client
|
||||
.submit_ddl_task(req.clone())
|
||||
.await
|
||||
.context(error::TonicStatusSnafu)?;
|
||||
|
||||
let res = res.into_inner();
|
||||
|
||||
if let Some(header) = res.header.as_ref() {
|
||||
if let Some(err) = header.error.as_ref() {
|
||||
if err.code == ErrorCode::NotLeader as i32 {
|
||||
self.heartbeat_inner.ask_leader().await?;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return Ok(res);
|
||||
} else if let Err(err) = self.heartbeat_inner.ask_leader().await {
|
||||
return Err(err);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -133,7 +133,7 @@ impl Client {
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
struct Inner {
|
||||
pub(crate) struct Inner {
|
||||
id: Id,
|
||||
role: Role,
|
||||
channel_manager: ChannelManager,
|
||||
@@ -142,7 +142,16 @@ struct Inner {
|
||||
}
|
||||
|
||||
impl Inner {
|
||||
async fn start<U, A>(&mut self, urls: A) -> Result<()>
|
||||
pub(crate) fn new(id: Id, role: Role, channel_manager: ChannelManager) -> Self {
|
||||
Self {
|
||||
id,
|
||||
role,
|
||||
channel_manager,
|
||||
peers: HashSet::new(),
|
||||
leader: None,
|
||||
}
|
||||
}
|
||||
pub(crate) async fn start<U, A>(&mut self, urls: A) -> Result<()>
|
||||
where
|
||||
U: AsRef<str>,
|
||||
A: AsRef<[U]>,
|
||||
@@ -163,7 +172,11 @@ impl Inner {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn ask_leader(&mut self) -> Result<()> {
|
||||
pub(crate) fn get_leader(&self) -> Option<String> {
|
||||
self.leader.clone()
|
||||
}
|
||||
|
||||
pub(crate) async fn ask_leader(&mut self) -> Result<()> {
|
||||
ensure!(
|
||||
self.is_started(),
|
||||
error::IllegalGrpcClientStateSnafu {
|
||||
@@ -242,7 +255,7 @@ impl Inner {
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn is_started(&self) -> bool {
|
||||
pub(crate) fn is_started(&self) -> bool {
|
||||
!self.peers.is_empty()
|
||||
}
|
||||
}
|
||||
|
||||
@@ -13,6 +13,7 @@ api = { path = "../api" }
|
||||
async-stream.workspace = true
|
||||
async-trait = "0.1"
|
||||
catalog = { path = "../catalog" }
|
||||
client = { path = "../client" }
|
||||
common-base = { path = "../common/base" }
|
||||
common-catalog = { path = "../common/catalog" }
|
||||
common-error = { path = "../common/error" }
|
||||
|
||||
110
src/meta-srv/src/ddl.rs
Normal file
110
src/meta-srv/src/ddl.rs
Normal file
@@ -0,0 +1,110 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::sync::Arc;
|
||||
|
||||
use client::client_manager::DatanodeClients;
|
||||
use common_meta::rpc::ddl::CreateTableTask;
|
||||
use common_meta::rpc::router::TableRoute;
|
||||
use common_procedure::{watcher, ProcedureId, ProcedureManagerRef, ProcedureWithId};
|
||||
use snafu::ResultExt;
|
||||
|
||||
use crate::error::{self, Result};
|
||||
use crate::procedure::create_table::CreateTableProcedure;
|
||||
use crate::service::store::kv::KvStoreRef;
|
||||
|
||||
pub type DdlManagerRef = Arc<DdlManager>;
|
||||
|
||||
pub struct DdlManager {
|
||||
procedure_manager: ProcedureManagerRef,
|
||||
kv_store: KvStoreRef,
|
||||
datanode_clients: Arc<DatanodeClients>,
|
||||
}
|
||||
|
||||
// TODO(weny): removes in following PRs.
|
||||
#[allow(unused)]
|
||||
#[derive(Clone)]
|
||||
pub(crate) struct DdlContext {
|
||||
pub(crate) kv_store: KvStoreRef,
|
||||
pub(crate) datanode_clients: Arc<DatanodeClients>,
|
||||
}
|
||||
|
||||
impl DdlManager {
|
||||
pub(crate) fn new(
|
||||
procedure_manager: ProcedureManagerRef,
|
||||
kv_store: KvStoreRef,
|
||||
datanode_clients: Arc<DatanodeClients>,
|
||||
) -> Self {
|
||||
Self {
|
||||
procedure_manager,
|
||||
kv_store,
|
||||
datanode_clients,
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn create_context(&self) -> DdlContext {
|
||||
DdlContext {
|
||||
kv_store: self.kv_store.clone(),
|
||||
datanode_clients: self.datanode_clients.clone(),
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn try_start(&self) -> Result<()> {
|
||||
let context = self.create_context();
|
||||
|
||||
self.procedure_manager
|
||||
.register_loader(
|
||||
CreateTableProcedure::TYPE_NAME,
|
||||
Box::new(move |json| {
|
||||
let context = context.clone();
|
||||
CreateTableProcedure::from_json(json, context).map(|p| Box::new(p) as _)
|
||||
}),
|
||||
)
|
||||
.context(error::RegisterProcedureLoaderSnafu {
|
||||
type_name: CreateTableProcedure::TYPE_NAME,
|
||||
})
|
||||
}
|
||||
|
||||
pub async fn submit_create_table_task(
|
||||
&self,
|
||||
cluster_id: u64,
|
||||
create_table_task: CreateTableTask,
|
||||
table_route: TableRoute,
|
||||
) -> Result<ProcedureId> {
|
||||
let context = self.create_context();
|
||||
|
||||
let procedure =
|
||||
CreateTableProcedure::new(cluster_id, create_table_task, table_route, context);
|
||||
|
||||
let procedure_with_id = ProcedureWithId::with_random_id(Box::new(procedure));
|
||||
|
||||
self.submit_procedure(procedure_with_id).await
|
||||
}
|
||||
|
||||
async fn submit_procedure(&self, procedure_with_id: ProcedureWithId) -> Result<ProcedureId> {
|
||||
let procedure_id = procedure_with_id.id;
|
||||
|
||||
let mut watcher = self
|
||||
.procedure_manager
|
||||
.submit(procedure_with_id)
|
||||
.await
|
||||
.context(error::SubmitProcedureSnafu)?;
|
||||
|
||||
watcher::wait(&mut watcher)
|
||||
.await
|
||||
.context(error::WaitProcedureSnafu)?;
|
||||
|
||||
Ok(procedure_id)
|
||||
}
|
||||
}
|
||||
@@ -26,7 +26,7 @@ pub const LEASE_SECS: i64 = 5;
|
||||
pub const KEEP_ALIVE_PERIOD_SECS: u64 = LEASE_SECS as u64 / 2;
|
||||
pub const ELECTION_KEY: &str = "__meta_srv_election";
|
||||
|
||||
#[derive(Clone)]
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum LeaderChangeMessage {
|
||||
Elected(Arc<LeaderKey>),
|
||||
StepDown(Arc<LeaderKey>),
|
||||
|
||||
@@ -13,14 +13,54 @@
|
||||
// limitations under the License.
|
||||
|
||||
use common_error::prelude::*;
|
||||
use common_meta::peer::Peer;
|
||||
use snafu::Location;
|
||||
use tokio::sync::mpsc::error::SendError;
|
||||
use tokio::sync::oneshot::error::TryRecvError;
|
||||
use tonic::codegen::http;
|
||||
use tonic::Code;
|
||||
|
||||
#[derive(Debug, Snafu)]
|
||||
#[snafu(visibility(pub))]
|
||||
pub enum Error {
|
||||
#[snafu(display("Failed to execute transaction: {}", msg))]
|
||||
Txn { location: Location, msg: String },
|
||||
|
||||
#[snafu(display(
|
||||
"Unexpected table_id changed, expected: {}, found: {}",
|
||||
expected,
|
||||
found,
|
||||
))]
|
||||
TableIdChanged {
|
||||
location: Location,
|
||||
expected: u64,
|
||||
found: u64,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to receive status, source: {}", source,))]
|
||||
TryReceiveStatus {
|
||||
location: Location,
|
||||
source: TryRecvError,
|
||||
},
|
||||
|
||||
#[snafu(display(
|
||||
"Failed to request Datanode, expected: {}, but only {} available",
|
||||
expected,
|
||||
available
|
||||
))]
|
||||
NoEnoughAvailableDatanode {
|
||||
location: Location,
|
||||
expected: usize,
|
||||
available: usize,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to request Datanode {}, source: {}", peer, source))]
|
||||
RequestDatanode {
|
||||
location: Location,
|
||||
peer: Peer,
|
||||
source: client::Error,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to send shutdown signal"))]
|
||||
SendShutdownSignal { source: SendError<()> },
|
||||
|
||||
@@ -274,6 +314,18 @@ pub enum Error {
|
||||
source: common_procedure::Error,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to recover procedure, source: {source}"))]
|
||||
WaitProcedure {
|
||||
location: Location,
|
||||
source: common_procedure::Error,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to submit procedure, source: {source}"))]
|
||||
SubmitProcedure {
|
||||
location: Location,
|
||||
source: common_procedure::Error,
|
||||
},
|
||||
|
||||
#[snafu(display("Schema already exists, name: {schema_name}"))]
|
||||
SchemaAlreadyExists {
|
||||
schema_name: String,
|
||||
@@ -413,7 +465,9 @@ impl ErrorExt for Error {
|
||||
| Error::MailboxReceiver { .. }
|
||||
| Error::RetryLater { .. }
|
||||
| Error::StartGrpc { .. }
|
||||
| Error::Combine { .. } => StatusCode::Internal,
|
||||
| Error::Combine { .. }
|
||||
| Error::NoEnoughAvailableDatanode { .. }
|
||||
| Error::TryReceiveStatus { .. } => StatusCode::Internal,
|
||||
Error::EmptyKey { .. }
|
||||
| Error::MissingRequiredParameter { .. }
|
||||
| Error::MissingRequestHeader { .. }
|
||||
@@ -437,10 +491,15 @@ impl ErrorExt for Error {
|
||||
| Error::InvalidUtf8Value { .. }
|
||||
| Error::UnexpectedInstructionReply { .. }
|
||||
| Error::EtcdTxnOpResponse { .. }
|
||||
| Error::Unexpected { .. } => StatusCode::Unexpected,
|
||||
| Error::Unexpected { .. }
|
||||
| Error::Txn { .. }
|
||||
| Error::TableIdChanged { .. } => StatusCode::Unexpected,
|
||||
Error::TableNotFound { .. } => StatusCode::TableNotFound,
|
||||
Error::RequestDatanode { source, .. } => source.status_code(),
|
||||
Error::InvalidCatalogValue { source, .. } => source.status_code(),
|
||||
Error::RecoverProcedure { source, .. } => source.status_code(),
|
||||
Error::RecoverProcedure { source, .. }
|
||||
| Error::SubmitProcedure { source, .. }
|
||||
| Error::WaitProcedure { source, .. } => source.status_code(),
|
||||
Error::ShutdownServer { source, .. } | Error::StartHttp { source, .. } => {
|
||||
source.status_code()
|
||||
}
|
||||
|
||||
@@ -37,6 +37,7 @@ impl HeartbeatHandler for OnLeaderStartHandler {
|
||||
if election.in_infancy() {
|
||||
ctx.is_infancy = true;
|
||||
ctx.reset_in_memory();
|
||||
ctx.reset_leader_cached_kv_store();
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
|
||||
@@ -146,6 +146,7 @@ mod tests {
|
||||
use crate::handler::{HeartbeatMailbox, Pushers};
|
||||
use crate::keys::StatKey;
|
||||
use crate::sequence::Sequence;
|
||||
use crate::service::store::cached_kv::LeaderCachedKvStore;
|
||||
use crate::service::store::ext::KvStoreExt;
|
||||
use crate::service::store::memory::MemStore;
|
||||
|
||||
@@ -153,6 +154,8 @@ mod tests {
|
||||
async fn test_handle_datanode_stats() {
|
||||
let in_memory = Arc::new(MemStore::new());
|
||||
let kv_store = Arc::new(MemStore::new());
|
||||
let leader_cached_kv_store =
|
||||
Arc::new(LeaderCachedKvStore::with_always_leader(kv_store.clone()));
|
||||
let seq = Sequence::new("test_seq", 0, 10, kv_store.clone());
|
||||
let mailbox = HeartbeatMailbox::create(Pushers::default(), seq);
|
||||
let meta_peer_client = MetaPeerClientBuilder::default()
|
||||
@@ -166,6 +169,7 @@ mod tests {
|
||||
server_addr: "127.0.0.1:0000".to_string(),
|
||||
in_memory,
|
||||
kv_store,
|
||||
leader_cached_kv_store,
|
||||
meta_peer_client,
|
||||
mailbox,
|
||||
election: None,
|
||||
|
||||
@@ -56,12 +56,15 @@ mod tests {
|
||||
use crate::cluster::MetaPeerClientBuilder;
|
||||
use crate::handler::{Context, HeartbeatMailbox, Pushers};
|
||||
use crate::sequence::Sequence;
|
||||
use crate::service::store::cached_kv::LeaderCachedKvStore;
|
||||
use crate::service::store::memory::MemStore;
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_handle_heartbeat_resp_header() {
|
||||
let in_memory = Arc::new(MemStore::new());
|
||||
let kv_store = Arc::new(MemStore::new());
|
||||
let leader_cached_kv_store =
|
||||
Arc::new(LeaderCachedKvStore::with_always_leader(kv_store.clone()));
|
||||
let seq = Sequence::new("test_seq", 0, 10, kv_store.clone());
|
||||
let mailbox = HeartbeatMailbox::create(Pushers::default(), seq);
|
||||
let meta_peer_client = MetaPeerClientBuilder::default()
|
||||
@@ -75,6 +78,7 @@ mod tests {
|
||||
server_addr: "127.0.0.1:0000".to_string(),
|
||||
in_memory,
|
||||
kv_store,
|
||||
leader_cached_kv_store,
|
||||
meta_peer_client,
|
||||
mailbox,
|
||||
election: None,
|
||||
|
||||
@@ -17,6 +17,7 @@
|
||||
|
||||
pub mod bootstrap;
|
||||
pub mod cluster;
|
||||
pub mod ddl;
|
||||
pub mod election;
|
||||
pub mod error;
|
||||
mod failure_detector;
|
||||
|
||||
@@ -28,6 +28,7 @@ use snafu::ResultExt;
|
||||
use tokio::sync::broadcast::error::RecvError;
|
||||
|
||||
use crate::cluster::MetaPeerClientRef;
|
||||
use crate::ddl::DdlManagerRef;
|
||||
use crate::election::{Election, LeaderChangeMessage};
|
||||
use crate::error::{RecoverProcedureSnafu, Result};
|
||||
use crate::handler::HeartbeatHandlerGroup;
|
||||
@@ -75,6 +76,7 @@ pub struct Context {
|
||||
pub server_addr: String,
|
||||
pub in_memory: ResettableKvStoreRef,
|
||||
pub kv_store: KvStoreRef,
|
||||
pub leader_cached_kv_store: ResettableKvStoreRef,
|
||||
pub meta_peer_client: MetaPeerClientRef,
|
||||
pub mailbox: MailboxRef,
|
||||
pub election: Option<ElectionRef>,
|
||||
@@ -94,6 +96,10 @@ impl Context {
|
||||
pub fn reset_in_memory(&self) {
|
||||
self.in_memory.reset();
|
||||
}
|
||||
|
||||
pub fn reset_leader_cached_kv_store(&self) {
|
||||
self.leader_cached_kv_store.reset();
|
||||
}
|
||||
}
|
||||
|
||||
pub struct LeaderValue(pub String);
|
||||
@@ -120,6 +126,7 @@ pub struct MetaSrv {
|
||||
// store some data that will not be persisted.
|
||||
in_memory: ResettableKvStoreRef,
|
||||
kv_store: KvStoreRef,
|
||||
leader_cached_kv_store: ResettableKvStoreRef,
|
||||
table_id_sequence: SequenceRef,
|
||||
meta_peer_client: MetaPeerClientRef,
|
||||
selector: SelectorRef,
|
||||
@@ -129,6 +136,7 @@ pub struct MetaSrv {
|
||||
procedure_manager: ProcedureManagerRef,
|
||||
metadata_service: MetadataServiceRef,
|
||||
mailbox: MailboxRef,
|
||||
ddl_manager: DdlManagerRef,
|
||||
}
|
||||
|
||||
impl MetaSrv {
|
||||
@@ -146,20 +154,30 @@ impl MetaSrv {
|
||||
|
||||
if let Some(election) = self.election() {
|
||||
let procedure_manager = self.procedure_manager.clone();
|
||||
let in_memory = self.in_memory.clone();
|
||||
let leader_cached_kv_store = self.leader_cached_kv_store.clone();
|
||||
let mut rx = election.subscribe_leader_change();
|
||||
let _handle = common_runtime::spawn_bg(async move {
|
||||
loop {
|
||||
match rx.recv().await {
|
||||
Ok(msg) => match msg {
|
||||
LeaderChangeMessage::Elected(_) => {
|
||||
if let Err(e) = procedure_manager.recover().await {
|
||||
error!("Failed to recover procedures, error: {e}");
|
||||
Ok(msg) => {
|
||||
in_memory.reset();
|
||||
leader_cached_kv_store.reset();
|
||||
info!(
|
||||
"Leader's cache has bean cleared on leader change: {:?}",
|
||||
msg
|
||||
);
|
||||
match msg {
|
||||
LeaderChangeMessage::Elected(_) => {
|
||||
if let Err(e) = procedure_manager.recover().await {
|
||||
error!("Failed to recover procedures, error: {e}");
|
||||
}
|
||||
}
|
||||
LeaderChangeMessage::StepDown(leader) => {
|
||||
error!("Leader :{:?} step down", leader);
|
||||
}
|
||||
}
|
||||
LeaderChangeMessage::StepDown(leader) => {
|
||||
error!("Leader :{:?} step down", leader);
|
||||
}
|
||||
},
|
||||
}
|
||||
Err(RecvError::Closed) => {
|
||||
error!("Not expected, is leader election loop still running?");
|
||||
break;
|
||||
@@ -219,6 +237,11 @@ impl MetaSrv {
|
||||
self.kv_store.clone()
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn leader_cached_kv_store(&self) -> ResettableKvStoreRef {
|
||||
self.leader_cached_kv_store.clone()
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn meta_peer_client(&self) -> MetaPeerClientRef {
|
||||
self.meta_peer_client.clone()
|
||||
@@ -254,6 +277,11 @@ impl MetaSrv {
|
||||
self.mailbox.clone()
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn ddl_manager(&self) -> &DdlManagerRef {
|
||||
&self.ddl_manager
|
||||
}
|
||||
|
||||
pub fn procedure_manager(&self) -> &ProcedureManagerRef {
|
||||
&self.procedure_manager
|
||||
}
|
||||
@@ -263,6 +291,7 @@ impl MetaSrv {
|
||||
let server_addr = self.options().server_addr.clone();
|
||||
let in_memory = self.in_memory();
|
||||
let kv_store = self.kv_store();
|
||||
let leader_cached_kv_store = self.leader_cached_kv_store();
|
||||
let meta_peer_client = self.meta_peer_client();
|
||||
let mailbox = self.mailbox();
|
||||
let election = self.election();
|
||||
@@ -271,6 +300,7 @@ impl MetaSrv {
|
||||
server_addr,
|
||||
in_memory,
|
||||
kv_store,
|
||||
leader_cached_kv_store,
|
||||
meta_peer_client,
|
||||
mailbox,
|
||||
election,
|
||||
|
||||
@@ -15,9 +15,11 @@
|
||||
use std::sync::atomic::AtomicBool;
|
||||
use std::sync::Arc;
|
||||
|
||||
use client::client_manager::DatanodeClients;
|
||||
use common_procedure::local::{LocalManager, ManagerConfig};
|
||||
|
||||
use crate::cluster::{MetaPeerClientBuilder, MetaPeerClientRef};
|
||||
use crate::ddl::DdlManager;
|
||||
use crate::error::Result;
|
||||
use crate::handler::mailbox_handler::MailboxHandler;
|
||||
use crate::handler::region_lease_handler::RegionLeaseHandler;
|
||||
@@ -36,6 +38,7 @@ use crate::procedure::region_failover::RegionFailoverManager;
|
||||
use crate::procedure::state_store::MetaStateStore;
|
||||
use crate::selector::lease_based::LeaseBasedSelector;
|
||||
use crate::sequence::Sequence;
|
||||
use crate::service::store::cached_kv::{CheckLeader, LeaderCachedKvStore};
|
||||
use crate::service::store::kv::{KvStoreRef, ResettableKvStoreRef};
|
||||
use crate::service::store::memory::MemStore;
|
||||
|
||||
@@ -131,6 +134,10 @@ impl MetaSrvBuilder {
|
||||
|
||||
let kv_store = kv_store.unwrap_or_else(|| Arc::new(MemStore::default()));
|
||||
let in_memory = in_memory.unwrap_or_else(|| Arc::new(MemStore::default()));
|
||||
let leader_cached_kv_store = Arc::new(LeaderCachedKvStore::new(
|
||||
Arc::new(CheckLeaderByElection(election.clone())),
|
||||
kv_store.clone(),
|
||||
));
|
||||
let meta_peer_client = meta_peer_client.unwrap_or_else(|| {
|
||||
MetaPeerClientBuilder::default()
|
||||
.election(election.clone())
|
||||
@@ -146,6 +153,9 @@ impl MetaSrvBuilder {
|
||||
let mailbox = HeartbeatMailbox::create(pushers.clone(), mailbox_sequence);
|
||||
let state_store = Arc::new(MetaStateStore::new(kv_store.clone()));
|
||||
let procedure_manager = Arc::new(LocalManager::new(ManagerConfig::default(), state_store));
|
||||
let table_id_sequence = Arc::new(Sequence::new(TABLE_ID_SEQ, 1024, 10, kv_store.clone()));
|
||||
let metadata_service = metadata_service
|
||||
.unwrap_or_else(|| Arc::new(DefaultMetadataService::new(kv_store.clone())));
|
||||
let lock = lock.unwrap_or_else(|| Arc::new(MemLock::default()));
|
||||
|
||||
let handler_group = match handler_group {
|
||||
@@ -202,16 +212,21 @@ impl MetaSrvBuilder {
|
||||
}
|
||||
};
|
||||
|
||||
let table_id_sequence = Arc::new(Sequence::new(TABLE_ID_SEQ, 1024, 10, kv_store.clone()));
|
||||
// TODO(weny): considers to modify the default config of procedure manager
|
||||
let ddl_manager = Arc::new(DdlManager::new(
|
||||
procedure_manager.clone(),
|
||||
kv_store.clone(),
|
||||
Arc::new(DatanodeClients::default()),
|
||||
));
|
||||
|
||||
let metadata_service = metadata_service
|
||||
.unwrap_or_else(|| Arc::new(DefaultMetadataService::new(kv_store.clone())));
|
||||
let _ = ddl_manager.try_start();
|
||||
|
||||
Ok(MetaSrv {
|
||||
started,
|
||||
options,
|
||||
in_memory,
|
||||
kv_store,
|
||||
leader_cached_kv_store,
|
||||
meta_peer_client,
|
||||
table_id_sequence,
|
||||
selector,
|
||||
@@ -221,6 +236,7 @@ impl MetaSrvBuilder {
|
||||
procedure_manager,
|
||||
metadata_service,
|
||||
mailbox,
|
||||
ddl_manager,
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -230,3 +246,13 @@ impl Default for MetaSrvBuilder {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
|
||||
struct CheckLeaderByElection(Option<ElectionRef>);
|
||||
|
||||
impl CheckLeader for CheckLeaderByElection {
|
||||
fn check(&self) -> bool {
|
||||
self.0
|
||||
.as_ref()
|
||||
.map_or(false, |election| election.is_leader())
|
||||
}
|
||||
}
|
||||
|
||||
@@ -12,5 +12,6 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
pub mod create_table;
|
||||
pub mod region_failover;
|
||||
pub(crate) mod state_store;
|
||||
|
||||
330
src/meta-srv/src/procedure/create_table.rs
Normal file
330
src/meta-srv/src/procedure/create_table.rs
Normal file
@@ -0,0 +1,330 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use api::v1::meta::TableRouteValue;
|
||||
use async_trait::async_trait;
|
||||
use catalog::helper::TableGlobalKey;
|
||||
use client::Database;
|
||||
use common_error::ext::ErrorExt;
|
||||
use common_error::status_code::StatusCode;
|
||||
use common_meta::key::TableRouteKey;
|
||||
use common_meta::rpc::ddl::CreateTableTask;
|
||||
use common_meta::rpc::router::TableRoute;
|
||||
use common_meta::table_name::TableName;
|
||||
use common_procedure::error::{FromJsonSnafu, Result as ProcedureResult, ToJsonSnafu};
|
||||
use common_procedure::{
|
||||
Context as ProcedureContext, Error as ProcedureError, LockKey, Procedure, Status,
|
||||
};
|
||||
use futures::future::join_all;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use snafu::{ensure, ResultExt};
|
||||
use table::engine::TableReference;
|
||||
|
||||
use crate::ddl::DdlContext;
|
||||
use crate::error::{self, Result};
|
||||
use crate::service::router::create_table_global_value;
|
||||
use crate::service::store::txn::{Compare, CompareOp, Txn, TxnOp};
|
||||
use crate::table_routes::get_table_global_value;
|
||||
|
||||
// TODO(weny): removes in following PRs.
|
||||
#[allow(unused)]
|
||||
pub struct CreateTableProcedure {
|
||||
context: DdlContext,
|
||||
creator: TableCreator,
|
||||
}
|
||||
|
||||
// TODO(weny): removes in following PRs.
|
||||
#[allow(dead_code)]
|
||||
impl CreateTableProcedure {
|
||||
pub(crate) const TYPE_NAME: &'static str = "metasrv-procedure::CreateTable";
|
||||
|
||||
pub(crate) fn new(
|
||||
cluster_id: u64,
|
||||
task: CreateTableTask,
|
||||
table_route: TableRoute,
|
||||
context: DdlContext,
|
||||
) -> Self {
|
||||
Self {
|
||||
context,
|
||||
creator: TableCreator::new(cluster_id, task, table_route),
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn from_json(json: &str, context: DdlContext) -> ProcedureResult<Self> {
|
||||
let data = serde_json::from_str(json).context(FromJsonSnafu)?;
|
||||
Ok(CreateTableProcedure {
|
||||
context,
|
||||
creator: TableCreator { data },
|
||||
})
|
||||
}
|
||||
|
||||
fn global_table_key(&self) -> TableGlobalKey {
|
||||
let table_ref = self.creator.data.table_ref();
|
||||
|
||||
TableGlobalKey {
|
||||
catalog_name: table_ref.catalog.to_string(),
|
||||
schema_name: table_ref.schema.to_string(),
|
||||
table_name: table_ref.table.to_string(),
|
||||
}
|
||||
}
|
||||
|
||||
fn table_name(&self) -> TableName {
|
||||
self.creator.data.task.table_name()
|
||||
}
|
||||
|
||||
/// Checks whether the table exists.
|
||||
async fn on_prepare(&mut self) -> Result<Status> {
|
||||
if (get_table_global_value(&self.context.kv_store, &self.global_table_key()).await?)
|
||||
.is_some()
|
||||
{
|
||||
ensure!(
|
||||
self.creator.data.task.create_table.create_if_not_exists,
|
||||
error::TableAlreadyExistsSnafu {
|
||||
table_name: self.creator.data.table_ref().to_string(),
|
||||
}
|
||||
);
|
||||
|
||||
return Ok(Status::Done);
|
||||
}
|
||||
|
||||
self.creator.data.state = CreateTableState::DatanodeCreateTable;
|
||||
|
||||
Ok(Status::executing(true))
|
||||
}
|
||||
|
||||
/// registers the `TableRouteValue`,`TableGlobalValue`
|
||||
async fn register_metadata(&self) -> Result<()> {
|
||||
let table_name = self.table_name();
|
||||
|
||||
let table_id = self.creator.data.table_route.table.id;
|
||||
|
||||
let table_route_key = TableRouteKey::with_table_name(table_id, &table_name.clone().into())
|
||||
.key()
|
||||
.into_bytes();
|
||||
|
||||
let table_global_key = TableGlobalKey {
|
||||
catalog_name: table_name.catalog_name.clone(),
|
||||
schema_name: table_name.schema_name.clone(),
|
||||
table_name: table_name.table_name.clone(),
|
||||
}
|
||||
.to_string()
|
||||
.into_bytes();
|
||||
|
||||
let (peers, table_route) = self
|
||||
.creator
|
||||
.data
|
||||
.table_route
|
||||
.clone()
|
||||
.try_into_raw()
|
||||
.context(error::ConvertProtoDataSnafu)?;
|
||||
|
||||
let table_route_value = TableRouteValue {
|
||||
peers,
|
||||
table_route: Some(table_route),
|
||||
};
|
||||
|
||||
let table_global_value = create_table_global_value(
|
||||
&table_route_value,
|
||||
self.creator.data.task.table_info.clone(),
|
||||
)?
|
||||
.as_bytes()
|
||||
.context(error::InvalidCatalogValueSnafu)?;
|
||||
|
||||
let txn = Txn::new()
|
||||
.when(vec![
|
||||
Compare::with_not_exist_value(table_route_key.clone(), CompareOp::Equal),
|
||||
Compare::with_not_exist_value(table_global_key.clone(), CompareOp::Equal),
|
||||
])
|
||||
.and_then(vec![
|
||||
TxnOp::Put(table_route_key, table_route_value.into()),
|
||||
TxnOp::Put(table_global_key, table_global_value),
|
||||
]);
|
||||
|
||||
let resp = self.context.kv_store.txn(txn).await?;
|
||||
|
||||
ensure!(
|
||||
resp.succeeded,
|
||||
error::TxnSnafu {
|
||||
msg: "table_route_key or table_global_key exists"
|
||||
}
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn on_create_metadata(&mut self) -> Result<Status> {
|
||||
let kv_store = &self.context.kv_store;
|
||||
let key = &self.global_table_key();
|
||||
|
||||
match get_table_global_value(kv_store, key).await? {
|
||||
Some(table_global_value) => {
|
||||
// The metasrv crashed after metadata was created immediately.
|
||||
// Recovers table_route from kv.
|
||||
let table_id = table_global_value.table_id() as u64;
|
||||
|
||||
let expected = self.creator.data.table_route.table.id;
|
||||
// If there is something like:
|
||||
// Create table A, Create table A(from another Fe, Somehow, Failed), Renames table A to B, Create table A(Recovered).
|
||||
// We must ensure the table_id isn't changed.
|
||||
ensure!(
|
||||
table_id == expected,
|
||||
error::TableIdChangedSnafu {
|
||||
expected,
|
||||
found: table_id
|
||||
}
|
||||
);
|
||||
}
|
||||
None => {
|
||||
// registers metadata
|
||||
self.register_metadata().await?;
|
||||
}
|
||||
}
|
||||
|
||||
Ok(Status::Done)
|
||||
}
|
||||
|
||||
async fn on_datanode_create_table(&mut self) -> Result<Status> {
|
||||
let table_route = &self.creator.data.table_route;
|
||||
|
||||
let table_name = self.table_name();
|
||||
let clients = self.context.datanode_clients.clone();
|
||||
let leaders = table_route.find_leaders();
|
||||
let mut joins = Vec::with_capacity(leaders.len());
|
||||
|
||||
for datanode in leaders {
|
||||
let client = clients.get_client(&datanode).await;
|
||||
let client = Database::new(&table_name.catalog_name, &table_name.schema_name, client);
|
||||
|
||||
let regions = table_route.find_leader_regions(&datanode);
|
||||
let mut create_expr_for_region = self.creator.data.task.create_table.clone();
|
||||
create_expr_for_region.region_numbers = regions;
|
||||
|
||||
joins.push(common_runtime::spawn_bg(async move {
|
||||
if let Err(err) = client
|
||||
.create(create_expr_for_region)
|
||||
.await
|
||||
.context(error::RequestDatanodeSnafu { peer: datanode })
|
||||
{
|
||||
// TODO(weny): add tests for `TableAlreadyExists`
|
||||
if err.status_code() != StatusCode::TableAlreadyExists {
|
||||
return Err(err);
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}));
|
||||
}
|
||||
|
||||
let _ = join_all(joins)
|
||||
.await
|
||||
.into_iter()
|
||||
.map(|result| {
|
||||
result.map_err(|err| {
|
||||
error::RetryLaterSnafu {
|
||||
reason: format!(
|
||||
"Failed to execute create table on datanode, source: {}",
|
||||
err
|
||||
),
|
||||
}
|
||||
.build()
|
||||
})
|
||||
})
|
||||
.collect::<Result<Vec<_>>>()?;
|
||||
|
||||
self.creator.data.state = CreateTableState::CreateMetadata;
|
||||
|
||||
Ok(Status::executing(true))
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl Procedure for CreateTableProcedure {
|
||||
fn type_name(&self) -> &str {
|
||||
Self::TYPE_NAME
|
||||
}
|
||||
|
||||
async fn execute(&mut self, _ctx: &ProcedureContext) -> ProcedureResult<Status> {
|
||||
let error_handler = |e| {
|
||||
if matches!(e, error::Error::RetryLater { .. }) {
|
||||
ProcedureError::retry_later(e)
|
||||
} else {
|
||||
ProcedureError::external(e)
|
||||
}
|
||||
};
|
||||
match self.creator.data.state {
|
||||
CreateTableState::Prepare => self.on_prepare().await.map_err(error_handler),
|
||||
CreateTableState::DatanodeCreateTable => {
|
||||
self.on_datanode_create_table().await.map_err(error_handler)
|
||||
}
|
||||
CreateTableState::CreateMetadata => {
|
||||
self.on_create_metadata().await.map_err(error_handler)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn dump(&self) -> ProcedureResult<String> {
|
||||
serde_json::to_string(&self.creator.data).context(ToJsonSnafu)
|
||||
}
|
||||
|
||||
fn lock_key(&self) -> LockKey {
|
||||
let table_ref = &self.creator.data.table_ref();
|
||||
let key = common_catalog::format_full_table_name(
|
||||
table_ref.catalog,
|
||||
table_ref.schema,
|
||||
table_ref.table,
|
||||
);
|
||||
|
||||
LockKey::single(key)
|
||||
}
|
||||
}
|
||||
|
||||
pub struct TableCreator {
|
||||
data: CreateTableData,
|
||||
}
|
||||
|
||||
impl TableCreator {
|
||||
pub fn new(cluster_id: u64, task: CreateTableTask, table_route: TableRoute) -> Self {
|
||||
Self {
|
||||
data: CreateTableData {
|
||||
state: CreateTableState::Prepare,
|
||||
cluster_id,
|
||||
task,
|
||||
table_route,
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
enum CreateTableState {
|
||||
/// Prepares to create the table
|
||||
Prepare,
|
||||
/// Datanode creates the table
|
||||
DatanodeCreateTable,
|
||||
/// Creates metadata
|
||||
CreateMetadata,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
pub struct CreateTableData {
|
||||
state: CreateTableState,
|
||||
task: CreateTableTask,
|
||||
table_route: TableRoute,
|
||||
cluster_id: u64,
|
||||
}
|
||||
|
||||
impl CreateTableData {
|
||||
fn table_ref(&self) -> TableReference<'_> {
|
||||
self.task.table_ref()
|
||||
}
|
||||
}
|
||||
@@ -19,6 +19,7 @@ use tonic::{Response, Status};
|
||||
|
||||
pub mod admin;
|
||||
pub mod cluster;
|
||||
pub mod ddl;
|
||||
mod heartbeat;
|
||||
pub mod lock;
|
||||
pub mod mailbox;
|
||||
|
||||
187
src/meta-srv/src/service/ddl.rs
Normal file
187
src/meta-srv/src/service/ddl.rs
Normal file
@@ -0,0 +1,187 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use api::v1::meta::{
|
||||
ddl_task_server, Partition, Region, RegionRoute, SubmitDdlTaskRequest, SubmitDdlTaskResponse,
|
||||
Table, TableRoute,
|
||||
};
|
||||
use api::v1::TableId;
|
||||
use common_meta::rpc::ddl::{CreateTableTask, DdlTask};
|
||||
use common_meta::rpc::router;
|
||||
use common_meta::table_name::TableName;
|
||||
use common_telemetry::{info, warn};
|
||||
use snafu::{OptionExt, ResultExt};
|
||||
use table::metadata::RawTableInfo;
|
||||
use tonic::{Request, Response};
|
||||
|
||||
use super::GrpcResult;
|
||||
use crate::ddl::DdlManagerRef;
|
||||
use crate::error::{self, Result};
|
||||
use crate::metasrv::{MetaSrv, SelectorContext, SelectorRef};
|
||||
use crate::sequence::SequenceRef;
|
||||
|
||||
#[async_trait::async_trait]
|
||||
impl ddl_task_server::DdlTask for MetaSrv {
|
||||
async fn submit_ddl_task(
|
||||
&self,
|
||||
request: Request<SubmitDdlTaskRequest>,
|
||||
) -> GrpcResult<SubmitDdlTaskResponse> {
|
||||
let SubmitDdlTaskRequest { header, task, .. } = request.into_inner();
|
||||
|
||||
let header = header.context(error::MissingRequestHeaderSnafu)?;
|
||||
let task: DdlTask = task
|
||||
.context(error::MissingRequiredParameterSnafu { param: "task" })?
|
||||
.try_into()
|
||||
.context(error::ConvertProtoDataSnafu)?;
|
||||
|
||||
let ctx = SelectorContext {
|
||||
datanode_lease_secs: self.options().datanode_lease_secs,
|
||||
server_addr: self.options().server_addr.clone(),
|
||||
kv_store: self.kv_store(),
|
||||
meta_peer_client: self.meta_peer_client(),
|
||||
catalog: None,
|
||||
schema: None,
|
||||
table: None,
|
||||
};
|
||||
|
||||
let resp = match task {
|
||||
DdlTask::CreateTable(create_table_task) => {
|
||||
handle_create_table_task(
|
||||
header.cluster_id,
|
||||
create_table_task,
|
||||
ctx,
|
||||
self.selector().clone(),
|
||||
self.table_id_sequence().clone(),
|
||||
self.ddl_manager().clone(),
|
||||
)
|
||||
.await?
|
||||
}
|
||||
};
|
||||
|
||||
Ok(Response::new(resp))
|
||||
}
|
||||
}
|
||||
|
||||
async fn handle_create_table_task(
|
||||
cluster_id: u64,
|
||||
mut create_table_task: CreateTableTask,
|
||||
ctx: SelectorContext,
|
||||
selector: SelectorRef,
|
||||
table_id_sequence: SequenceRef,
|
||||
ddl_manager: DdlManagerRef,
|
||||
) -> Result<SubmitDdlTaskResponse> {
|
||||
let table_name = create_table_task.table_name();
|
||||
|
||||
let ctx = SelectorContext {
|
||||
datanode_lease_secs: ctx.datanode_lease_secs,
|
||||
server_addr: ctx.server_addr,
|
||||
kv_store: ctx.kv_store,
|
||||
meta_peer_client: ctx.meta_peer_client,
|
||||
catalog: Some(table_name.catalog_name.clone()),
|
||||
schema: Some(table_name.schema_name.clone()),
|
||||
table: Some(table_name.table_name.clone()),
|
||||
};
|
||||
|
||||
let partitions = create_table_task
|
||||
.partitions
|
||||
.clone()
|
||||
.into_iter()
|
||||
.map(Into::into)
|
||||
.collect();
|
||||
|
||||
let table_route = handle_create_table_route(
|
||||
cluster_id,
|
||||
table_name,
|
||||
partitions,
|
||||
&mut create_table_task.table_info,
|
||||
ctx,
|
||||
selector,
|
||||
table_id_sequence,
|
||||
)
|
||||
.await?;
|
||||
let table_id = table_route.table.id;
|
||||
|
||||
let id = ddl_manager
|
||||
.submit_create_table_task(cluster_id, create_table_task, table_route)
|
||||
.await?;
|
||||
|
||||
info!("Table: {table_id} created via procedure_id {id:?}");
|
||||
|
||||
Ok(SubmitDdlTaskResponse {
|
||||
key: id.to_string().into(),
|
||||
table_id: Some(TableId {
|
||||
id: table_id as u32,
|
||||
}),
|
||||
..Default::default()
|
||||
})
|
||||
}
|
||||
|
||||
/// pre-calculates create table task's metadata.
|
||||
async fn handle_create_table_route(
|
||||
cluster_id: u64,
|
||||
table_name: TableName,
|
||||
partitions: Vec<Partition>,
|
||||
table_info: &mut RawTableInfo,
|
||||
ctx: SelectorContext,
|
||||
selector: SelectorRef,
|
||||
table_id_sequence: SequenceRef,
|
||||
) -> Result<router::TableRoute> {
|
||||
let mut peers = selector.select(cluster_id, &ctx).await?;
|
||||
|
||||
if peers.len() < partitions.len() {
|
||||
warn!("Create table failed due to no enough available datanodes, table: {table_name:?}, partition number: {}, datanode number: {}", partitions.len(), peers.len());
|
||||
return error::NoEnoughAvailableDatanodeSnafu {
|
||||
expected: partitions.len(),
|
||||
available: peers.len(),
|
||||
}
|
||||
.fail();
|
||||
}
|
||||
|
||||
// We don't need to keep all peers, just truncate it to the number of partitions.
|
||||
// If the peers are not enough, some peers will be used for multiple partitions.
|
||||
peers.truncate(partitions.len());
|
||||
|
||||
let id = table_id_sequence.next().await?;
|
||||
table_info.ident.table_id = id as u32;
|
||||
|
||||
let table = Table {
|
||||
id,
|
||||
table_name: Some(table_name.into()),
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let region_routes = partitions
|
||||
.into_iter()
|
||||
.enumerate()
|
||||
.map(|(i, partition)| {
|
||||
let region = Region {
|
||||
id: i as u64,
|
||||
partition: Some(partition),
|
||||
..Default::default()
|
||||
};
|
||||
RegionRoute {
|
||||
region: Some(region),
|
||||
leader_peer_index: (i % peers.len()) as u64,
|
||||
follower_peer_indexes: vec![], // follower_peers is not supported at the moment
|
||||
}
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
let table_route = TableRoute {
|
||||
table: Some(table),
|
||||
region_routes,
|
||||
};
|
||||
|
||||
router::TableRoute::try_from_raw(&peers, table_route).context(error::TableRouteConversionSnafu)
|
||||
}
|
||||
@@ -258,7 +258,7 @@ async fn handle_create(
|
||||
})
|
||||
}
|
||||
|
||||
fn create_table_global_value(
|
||||
pub(crate) fn create_table_global_value(
|
||||
table_route_value: &TableRouteValue,
|
||||
table_info: RawTableInfo,
|
||||
) -> Result<TableGlobalValue> {
|
||||
@@ -349,7 +349,7 @@ async fn handle_delete(req: DeleteRequest, ctx: Context) -> Result<RouteResponse
|
||||
})
|
||||
}
|
||||
|
||||
fn fill_table_routes(
|
||||
pub(crate) fn fill_table_routes(
|
||||
tables: Vec<(TableGlobalValue, TableRouteValue)>,
|
||||
) -> Result<(Vec<Peer>, Vec<TableRoute>)> {
|
||||
let mut peer_dict = PeerDict::default();
|
||||
@@ -407,7 +407,7 @@ async fn fetch_tables(
|
||||
Ok(tables)
|
||||
}
|
||||
|
||||
fn table_route_key(table_id: u64, t: &TableGlobalKey) -> TableRouteKey<'_> {
|
||||
pub(crate) fn table_route_key(table_id: u64, t: &TableGlobalKey) -> TableRouteKey<'_> {
|
||||
TableRouteKey {
|
||||
table_id,
|
||||
catalog_name: &t.catalog_name,
|
||||
|
||||
@@ -12,6 +12,7 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
pub mod cached_kv;
|
||||
pub mod etcd;
|
||||
pub(crate) mod etcd_util;
|
||||
pub mod ext;
|
||||
|
||||
474
src/meta-srv/src/service/store/cached_kv.rs
Normal file
474
src/meta-srv/src/service/store/cached_kv.rs
Normal file
@@ -0,0 +1,474 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::collections::HashSet;
|
||||
use std::sync::atomic::{AtomicUsize, Ordering};
|
||||
use std::sync::Arc;
|
||||
|
||||
use api::v1::meta::{
|
||||
BatchDeleteRequest, BatchDeleteResponse, BatchGetRequest, BatchGetResponse, BatchPutRequest,
|
||||
BatchPutResponse, CompareAndPutRequest, CompareAndPutResponse, DeleteRangeRequest,
|
||||
DeleteRangeResponse, KeyValue, MoveValueRequest, MoveValueResponse, PutRequest, PutResponse,
|
||||
RangeRequest, RangeResponse,
|
||||
};
|
||||
|
||||
use crate::error::Result;
|
||||
use crate::service::store::ext::KvStoreExt;
|
||||
use crate::service::store::kv::{KvStore, KvStoreRef, ResettableKvStore, ResettableKvStoreRef};
|
||||
use crate::service::store::memory::MemStore;
|
||||
use crate::service::store::txn::{Txn, TxnOp, TxnRequest, TxnResponse, TxnService};
|
||||
|
||||
pub type CheckLeaderRef = Arc<dyn CheckLeader>;
|
||||
|
||||
pub trait CheckLeader: Sync + Send {
|
||||
fn check(&self) -> bool;
|
||||
}
|
||||
|
||||
struct AlwaysLeader;
|
||||
|
||||
impl CheckLeader for AlwaysLeader {
|
||||
fn check(&self) -> bool {
|
||||
true
|
||||
}
|
||||
}
|
||||
|
||||
/// A cache dedicated to a Leader node, in order to cache some metadata.
|
||||
///
|
||||
/// To use this cache, the following constraints must be followed:
|
||||
/// 1. The leader node can create this metadata.
|
||||
/// 2. The follower node can create this metadata. The leader node can lazily retrieve
|
||||
/// the corresponding data through the caching loading mechanism.
|
||||
/// 3. Only the leader node can update this metadata, as the cache cannot detect
|
||||
/// modifications made to the data on the follower node.
|
||||
/// 4. Only the leader node can delete this metadata for the same reason mentioned above.
|
||||
pub struct LeaderCachedKvStore {
|
||||
check_leader: CheckLeaderRef,
|
||||
store: KvStoreRef,
|
||||
cache: ResettableKvStoreRef,
|
||||
version: AtomicUsize,
|
||||
}
|
||||
|
||||
impl LeaderCachedKvStore {
|
||||
pub fn new(check_leader: CheckLeaderRef, store: KvStoreRef) -> Self {
|
||||
Self {
|
||||
check_leader,
|
||||
store,
|
||||
cache: Arc::new(MemStore::new()),
|
||||
version: AtomicUsize::new(0),
|
||||
}
|
||||
}
|
||||
|
||||
/// With a leader checker which always returns true when checking,
|
||||
/// mainly used in test scenarios.
|
||||
pub fn with_always_leader(store: KvStoreRef) -> Self {
|
||||
Self::new(Arc::new(AlwaysLeader), store)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn is_leader(&self) -> bool {
|
||||
self.check_leader.check()
|
||||
}
|
||||
|
||||
#[inline]
|
||||
async fn invalid_key(&self, key: Vec<u8>) -> Result<()> {
|
||||
let _ = self.cache.delete(key, false).await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[inline]
|
||||
async fn invalid_keys(&self, keys: Vec<Vec<u8>>) -> Result<()> {
|
||||
let txn = Txn::new().and_then(keys.into_iter().map(TxnOp::Delete).collect::<Vec<_>>());
|
||||
let _ = self.cache.txn(txn).await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn get_version(&self) -> usize {
|
||||
self.version.load(Ordering::Relaxed)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn create_new_version(&self) -> usize {
|
||||
self.version.fetch_add(1, Ordering::Relaxed) + 1
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn validate_version(&self, version: usize) -> bool {
|
||||
version == self.version.load(Ordering::Relaxed)
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait::async_trait]
|
||||
impl KvStore for LeaderCachedKvStore {
|
||||
async fn range(&self, req: RangeRequest) -> Result<RangeResponse> {
|
||||
if !self.is_leader() {
|
||||
return self.store.range(req).await;
|
||||
}
|
||||
|
||||
// We can only cache for exact key queries (i.e. get requests)
|
||||
// because we cannot confirm if a range response is complete.
|
||||
if !req.range_end.is_empty() {
|
||||
return self.store.range(req).await;
|
||||
}
|
||||
|
||||
let res = self.cache.range(req.clone()).await?;
|
||||
if !res.kvs.is_empty() {
|
||||
return Ok(res);
|
||||
}
|
||||
|
||||
let ver = self.get_version();
|
||||
|
||||
let res = self.store.range(req.clone()).await?;
|
||||
if !res.kvs.is_empty() {
|
||||
let KeyValue { key, value } = res.kvs[0].clone();
|
||||
let put_req = PutRequest {
|
||||
key: key.clone(),
|
||||
value,
|
||||
..Default::default()
|
||||
};
|
||||
let _ = self.cache.put(put_req).await?;
|
||||
|
||||
if !self.validate_version(ver) {
|
||||
self.invalid_key(key).await?;
|
||||
}
|
||||
}
|
||||
|
||||
return Ok(res);
|
||||
}
|
||||
|
||||
async fn put(&self, req: PutRequest) -> Result<PutResponse> {
|
||||
if !self.is_leader() {
|
||||
return self.store.put(req).await;
|
||||
}
|
||||
|
||||
let ver = self.create_new_version();
|
||||
|
||||
let res = self.store.put(req.clone()).await?;
|
||||
let _ = self.cache.put(req.clone()).await?;
|
||||
|
||||
if !self.validate_version(ver) {
|
||||
self.invalid_key(req.key).await?;
|
||||
}
|
||||
|
||||
Ok(res)
|
||||
}
|
||||
|
||||
async fn batch_get(&self, req: BatchGetRequest) -> Result<BatchGetResponse> {
|
||||
if !self.is_leader() {
|
||||
return self.store.batch_get(req).await;
|
||||
}
|
||||
|
||||
let cached_res = self.cache.batch_get(req.clone()).await?;
|
||||
// The cache hit all keys
|
||||
if cached_res.kvs.len() == req.keys.len() {
|
||||
return Ok(cached_res);
|
||||
}
|
||||
|
||||
let hit_keys = cached_res
|
||||
.kvs
|
||||
.iter()
|
||||
.map(|kv| kv.key.clone())
|
||||
.collect::<HashSet<_>>();
|
||||
let missed_keys = req
|
||||
.keys
|
||||
.iter()
|
||||
.filter(|key| !hit_keys.contains(*key))
|
||||
.cloned()
|
||||
.collect::<Vec<_>>();
|
||||
let remote_req = BatchGetRequest {
|
||||
keys: missed_keys,
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let ver = self.get_version();
|
||||
|
||||
let remote_res = self.store.batch_get(remote_req).await?;
|
||||
let put_req = BatchPutRequest {
|
||||
kvs: remote_res.kvs.clone(),
|
||||
..Default::default()
|
||||
};
|
||||
let _ = self.cache.batch_put(put_req).await?;
|
||||
|
||||
if !self.validate_version(ver) {
|
||||
let keys = remote_res
|
||||
.kvs
|
||||
.iter()
|
||||
.map(|kv| kv.key.clone())
|
||||
.collect::<Vec<_>>();
|
||||
self.invalid_keys(keys).await?;
|
||||
}
|
||||
|
||||
let mut merged_res = cached_res;
|
||||
merged_res.kvs.extend(remote_res.kvs);
|
||||
Ok(merged_res)
|
||||
}
|
||||
|
||||
async fn batch_put(&self, req: BatchPutRequest) -> Result<BatchPutResponse> {
|
||||
if !self.is_leader() {
|
||||
return self.store.batch_put(req).await;
|
||||
}
|
||||
|
||||
let ver = self.create_new_version();
|
||||
|
||||
let res = self.store.batch_put(req.clone()).await?;
|
||||
let _ = self.cache.batch_put(req.clone()).await?;
|
||||
|
||||
if !self.validate_version(ver) {
|
||||
let keys = req.kvs.into_iter().map(|kv| kv.key).collect::<Vec<_>>();
|
||||
self.invalid_keys(keys).await?;
|
||||
}
|
||||
|
||||
Ok(res)
|
||||
}
|
||||
|
||||
async fn batch_delete(&self, req: BatchDeleteRequest) -> Result<BatchDeleteResponse> {
|
||||
if !self.is_leader() {
|
||||
return self.store.batch_delete(req).await;
|
||||
}
|
||||
|
||||
let _ = self.create_new_version();
|
||||
|
||||
let res = self.store.batch_delete(req.clone()).await?;
|
||||
let _ = self.cache.batch_delete(req).await?;
|
||||
Ok(res)
|
||||
}
|
||||
|
||||
async fn compare_and_put(&self, req: CompareAndPutRequest) -> Result<CompareAndPutResponse> {
|
||||
if !self.is_leader() {
|
||||
return self.store.compare_and_put(req).await;
|
||||
}
|
||||
|
||||
let _ = self.create_new_version();
|
||||
|
||||
let key = req.key.clone();
|
||||
let res = self.store.compare_and_put(req).await?;
|
||||
// Delete key in the cache.
|
||||
//
|
||||
// Cache can not deal with the CAS operation, because it does
|
||||
// not contain full data, so we need to delete the key.
|
||||
self.invalid_key(key).await?;
|
||||
Ok(res)
|
||||
}
|
||||
|
||||
async fn delete_range(&self, req: DeleteRangeRequest) -> Result<DeleteRangeResponse> {
|
||||
if !self.is_leader() {
|
||||
return self.store.delete_range(req).await;
|
||||
}
|
||||
|
||||
let _ = self.create_new_version();
|
||||
|
||||
let res = self.store.delete_range(req.clone()).await?;
|
||||
let _ = self.cache.delete_range(req).await?;
|
||||
Ok(res)
|
||||
}
|
||||
|
||||
async fn move_value(&self, req: MoveValueRequest) -> Result<MoveValueResponse> {
|
||||
if !self.is_leader() {
|
||||
return self.store.move_value(req).await;
|
||||
}
|
||||
|
||||
let _ = self.create_new_version();
|
||||
|
||||
let res = self.store.move_value(req.clone()).await?;
|
||||
let MoveValueRequest {
|
||||
from_key, to_key, ..
|
||||
} = req;
|
||||
// Delete all keys in the cache.
|
||||
//
|
||||
// Cache can not deal with the move operation, because it does
|
||||
// not contain full data, so we need to delete both keys.
|
||||
self.invalid_keys(vec![from_key, to_key]).await?;
|
||||
Ok(res)
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait::async_trait]
|
||||
impl TxnService for LeaderCachedKvStore {
|
||||
async fn txn(&self, txn: Txn) -> Result<TxnResponse> {
|
||||
if !self.is_leader() {
|
||||
return self.store.txn(txn).await;
|
||||
}
|
||||
|
||||
let _ = self.create_new_version();
|
||||
|
||||
let res = self.store.txn(txn.clone()).await?;
|
||||
let TxnRequest {
|
||||
success, failure, ..
|
||||
} = txn.into();
|
||||
let mut all = success;
|
||||
all.extend(failure);
|
||||
// Delete all keys in the cache.
|
||||
//
|
||||
// Cache can not deal with the txn operation, because it does
|
||||
// not contain full data, so we need to delete both keys.
|
||||
let mut keys = Vec::with_capacity(all.len());
|
||||
for txn_op in all {
|
||||
match txn_op {
|
||||
TxnOp::Put(key, _) => {
|
||||
keys.push(key);
|
||||
}
|
||||
TxnOp::Delete(key) => {
|
||||
keys.push(key);
|
||||
}
|
||||
TxnOp::Get(_) => {}
|
||||
}
|
||||
}
|
||||
self.invalid_keys(keys).await?;
|
||||
|
||||
Ok(res)
|
||||
}
|
||||
}
|
||||
|
||||
impl ResettableKvStore for LeaderCachedKvStore {
|
||||
fn reset(&self) {
|
||||
self.cache.reset()
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use api::v1::meta::KeyValue;
|
||||
|
||||
use super::*;
|
||||
use crate::service::store::memory::MemStore;
|
||||
|
||||
fn create_leader_cached_kv_store() -> LeaderCachedKvStore {
|
||||
let store = Arc::new(MemStore::new());
|
||||
LeaderCachedKvStore::with_always_leader(store)
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_get_put_delete() {
|
||||
let cached_store = create_leader_cached_kv_store();
|
||||
let inner_store = cached_store.store.clone();
|
||||
let inner_cache = cached_store.cache.clone();
|
||||
|
||||
let key = "test_key".to_owned().into_bytes();
|
||||
let value = "value".to_owned().into_bytes();
|
||||
|
||||
let put_req = PutRequest {
|
||||
key: key.clone(),
|
||||
value: value.clone(),
|
||||
..Default::default()
|
||||
};
|
||||
let _ = inner_store.put(put_req).await.unwrap();
|
||||
|
||||
let cached_value = inner_cache.get(key.clone()).await.unwrap();
|
||||
assert!(cached_value.is_none());
|
||||
|
||||
let cached_value = cached_store.get(key.clone()).await.unwrap().unwrap();
|
||||
assert_eq!(cached_value.value, value);
|
||||
|
||||
let cached_value = inner_cache.get(key.clone()).await.unwrap().unwrap();
|
||||
assert_eq!(cached_value.value, value);
|
||||
|
||||
let res = cached_store
|
||||
.delete(key.clone(), true)
|
||||
.await
|
||||
.unwrap()
|
||||
.unwrap();
|
||||
assert_eq!(res.value, value);
|
||||
|
||||
let cached_value = inner_cache.get(key.clone()).await.unwrap();
|
||||
assert!(cached_value.is_none());
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_batch_get_put_delete() {
|
||||
let cached_store = create_leader_cached_kv_store();
|
||||
let inner_store = cached_store.store.clone();
|
||||
let inner_cache = cached_store.cache.clone();
|
||||
|
||||
let kvs = (1..3)
|
||||
.map(|i| {
|
||||
let key = format!("test_key_{}", i).into_bytes();
|
||||
let value = format!("value_{}", i).into_bytes();
|
||||
KeyValue { key, value }
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
let batch_put_req = BatchPutRequest {
|
||||
kvs: kvs.clone(),
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let _ = inner_store.batch_put(batch_put_req).await.unwrap();
|
||||
|
||||
let keys = (1..5)
|
||||
.map(|i| format!("test_key_{}", i).into_bytes())
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
let batch_get_req = BatchGetRequest {
|
||||
keys,
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let cached_values = inner_cache.batch_get(batch_get_req.clone()).await.unwrap();
|
||||
assert!(cached_values.kvs.is_empty());
|
||||
|
||||
let cached_values = cached_store.batch_get(batch_get_req.clone()).await.unwrap();
|
||||
assert_eq!(cached_values.kvs.len(), 2);
|
||||
|
||||
let cached_values = inner_cache.batch_get(batch_get_req.clone()).await.unwrap();
|
||||
assert_eq!(cached_values.kvs.len(), 2);
|
||||
|
||||
cached_store.reset();
|
||||
|
||||
let cached_values = inner_cache.batch_get(batch_get_req).await.unwrap();
|
||||
assert!(cached_values.kvs.is_empty());
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_txn() {
|
||||
let cached_store = create_leader_cached_kv_store();
|
||||
let inner_cache = cached_store.cache.clone();
|
||||
|
||||
let kvs = (1..5)
|
||||
.map(|i| {
|
||||
let key = format!("test_key_{}", i).into_bytes();
|
||||
let value = format!("value_{}", i).into_bytes();
|
||||
KeyValue { key, value }
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
let batch_put_req = BatchPutRequest {
|
||||
kvs: kvs.clone(),
|
||||
..Default::default()
|
||||
};
|
||||
let _ = cached_store.batch_put(batch_put_req).await.unwrap();
|
||||
|
||||
let keys = (1..5)
|
||||
.map(|i| format!("test_key_{}", i).into_bytes())
|
||||
.collect::<Vec<_>>();
|
||||
let batch_get_req = BatchGetRequest {
|
||||
keys,
|
||||
..Default::default()
|
||||
};
|
||||
let cached_values = inner_cache.batch_get(batch_get_req.clone()).await.unwrap();
|
||||
assert_eq!(cached_values.kvs.len(), 4);
|
||||
|
||||
let put_ops = (1..5)
|
||||
.map(|i| {
|
||||
let key = format!("test_key_{}", i).into_bytes();
|
||||
let value = format!("value_{}", i).into_bytes();
|
||||
TxnOp::Put(key, value)
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
let txn = Txn::new().and_then(put_ops);
|
||||
let _ = cached_store.txn(txn).await.unwrap();
|
||||
|
||||
let cached_values = inner_cache.batch_get(batch_get_req).await.unwrap();
|
||||
assert!(cached_values.kvs.is_empty());
|
||||
}
|
||||
}
|
||||
@@ -33,8 +33,8 @@ use snafu::{ensure, OptionExt, ResultExt};
|
||||
use storage::manifest::manifest_compress_type;
|
||||
use store_api::storage::{
|
||||
CloseOptions, ColumnDescriptorBuilder, ColumnFamilyDescriptor, ColumnFamilyDescriptorBuilder,
|
||||
ColumnId, EngineContext as StorageEngineContext, OpenOptions, RegionNumber, RowKeyDescriptor,
|
||||
RowKeyDescriptorBuilder, StorageEngine,
|
||||
ColumnId, CompactionStrategy, EngineContext as StorageEngineContext, OpenOptions, RegionNumber,
|
||||
RowKeyDescriptor, RowKeyDescriptorBuilder, StorageEngine,
|
||||
};
|
||||
use table::engine::{
|
||||
region_name, table_dir, CloseTableResult, EngineContext, TableEngine, TableEngineProcedure,
|
||||
@@ -417,6 +417,7 @@ impl<S: StorageEngine> MitoEngineInner<S> {
|
||||
.await.map_err(BoxedError::new)
|
||||
.context(table_error::TableOperationSnafu)? else { return Ok(None) };
|
||||
|
||||
let compaction_strategy = CompactionStrategy::from(&table_info.meta.options.extra_options);
|
||||
let opts = OpenOptions {
|
||||
parent_dir: table_dir.to_string(),
|
||||
write_buffer_size: table_info
|
||||
@@ -425,6 +426,7 @@ impl<S: StorageEngine> MitoEngineInner<S> {
|
||||
.write_buffer_size
|
||||
.map(|s| s.0 as usize),
|
||||
ttl: table_info.meta.options.ttl,
|
||||
compaction_strategy,
|
||||
};
|
||||
|
||||
debug!(
|
||||
@@ -501,6 +503,7 @@ impl<S: StorageEngine> MitoEngineInner<S> {
|
||||
table: name,
|
||||
};
|
||||
|
||||
let compaction_strategy = CompactionStrategy::from(&table_info.meta.options.extra_options);
|
||||
let opts = OpenOptions {
|
||||
parent_dir: table_dir.to_string(),
|
||||
write_buffer_size: table_info
|
||||
@@ -509,6 +512,7 @@ impl<S: StorageEngine> MitoEngineInner<S> {
|
||||
.write_buffer_size
|
||||
.map(|s| s.0 as usize),
|
||||
ttl: table_info.meta.options.ttl,
|
||||
compaction_strategy,
|
||||
};
|
||||
|
||||
// TODO(weny): Returns an error earlier if the target region does not exist in the meta.
|
||||
|
||||
@@ -24,8 +24,8 @@ use datatypes::schema::{Schema, SchemaRef};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use snafu::{ensure, ResultExt};
|
||||
use store_api::storage::{
|
||||
ColumnId, CreateOptions, EngineContext, OpenOptions, RegionDescriptorBuilder, RegionNumber,
|
||||
StorageEngine,
|
||||
ColumnId, CompactionStrategy, CreateOptions, EngineContext, OpenOptions,
|
||||
RegionDescriptorBuilder, RegionNumber, StorageEngine,
|
||||
};
|
||||
use table::engine::{region_id, table_dir};
|
||||
use table::metadata::{TableInfoBuilder, TableMetaBuilder, TableType};
|
||||
@@ -232,15 +232,18 @@ impl<S: StorageEngine> TableCreator<S> {
|
||||
let table_options = &self.data.request.table_options;
|
||||
let write_buffer_size = table_options.write_buffer_size.map(|size| size.0 as usize);
|
||||
let ttl = table_options.ttl;
|
||||
let compaction_strategy = CompactionStrategy::from(&table_options.extra_options);
|
||||
let open_opts = OpenOptions {
|
||||
parent_dir: table_dir.to_string(),
|
||||
write_buffer_size,
|
||||
ttl,
|
||||
compaction_strategy: compaction_strategy.clone(),
|
||||
};
|
||||
let create_opts = CreateOptions {
|
||||
parent_dir: table_dir.to_string(),
|
||||
write_buffer_size,
|
||||
ttl,
|
||||
compaction_strategy,
|
||||
};
|
||||
|
||||
let primary_key_indices = &self.data.request.primary_key_indices;
|
||||
|
||||
@@ -183,7 +183,6 @@ impl ErrorExt for Error {
|
||||
| BuildTableMeta { .. }
|
||||
| BuildTableInfo { .. }
|
||||
| BuildRegionDescriptor { .. }
|
||||
| TableExists { .. }
|
||||
| ProjectedColumnNotFound { .. }
|
||||
| InvalidPrimaryKey { .. }
|
||||
| MissingTimestampIndex { .. }
|
||||
@@ -191,6 +190,8 @@ impl ErrorExt for Error {
|
||||
| InvalidRawSchema { .. }
|
||||
| VersionChanged { .. } => StatusCode::InvalidArguments,
|
||||
|
||||
TableExists { .. } => StatusCode::TableAlreadyExists,
|
||||
|
||||
ConvertRaw { .. } => StatusCode::Unexpected,
|
||||
|
||||
ScanTableManifest { .. } | UpdateTableManifest { .. } => StatusCode::StorageUnavailable,
|
||||
|
||||
7
src/mito2/Cargo.toml
Normal file
7
src/mito2/Cargo.toml
Normal file
@@ -0,0 +1,7 @@
|
||||
[package]
|
||||
name = "mito2"
|
||||
version.workspace = true
|
||||
edition.workspace = true
|
||||
license.workspace = true
|
||||
|
||||
[dependencies]
|
||||
9
src/mito2/README.md
Normal file
9
src/mito2/README.md
Normal file
@@ -0,0 +1,9 @@
|
||||
# Mito
|
||||
|
||||
Mito is GreptimeDB's default region engine.
|
||||
|
||||
## About Mito
|
||||
The Alfa Romeo [MiTo](https://en.wikipedia.org/wiki/Alfa_Romeo_MiTo) is a front-wheel drive, three-door supermini designed by Centro Stile Alfa Romeo.
|
||||
|
||||
> "You can't be a true petrolhead until you've owned an Alfa Romeo."
|
||||
> <div align="right">-- by Jeremy Clarkson</div>
|
||||
17
src/mito2/src/engine.rs
Normal file
17
src/mito2/src/engine.rs
Normal file
@@ -0,0 +1,17 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
/// Region engine implementation for timeseries data.
|
||||
#[derive(Clone)]
|
||||
pub struct MitoEngine {}
|
||||
15
src/mito2/src/lib.rs
Normal file
15
src/mito2/src/lib.rs
Normal file
@@ -0,0 +1,15 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
pub mod engine;
|
||||
@@ -5,9 +5,9 @@ edition.workspace = true
|
||||
license.workspace = true
|
||||
|
||||
[features]
|
||||
pprof = ["dep:common-pprof"]
|
||||
mem-prof = ["dep:common-mem-prof"]
|
||||
dashboard = []
|
||||
mem-prof = ["dep:common-mem-prof"]
|
||||
pprof = ["dep:common-pprof"]
|
||||
|
||||
[dependencies]
|
||||
aide = { version = "0.9", features = ["axum"] }
|
||||
@@ -48,7 +48,7 @@ influxdb_line_protocol = { git = "https://github.com/evenyag/influxdb_iox", bran
|
||||
itertools.workspace = true
|
||||
metrics.workspace = true
|
||||
# metrics-process 1.0.10 depends on metrics-0.21 but opendal depends on metrics-0.20.1
|
||||
metrics-process = "<1.0.10"
|
||||
metrics-process = { version = "<1.0.10", optional = true }
|
||||
mime_guess = "2.0"
|
||||
num_cpus = "1.13"
|
||||
once_cell = "1.16"
|
||||
|
||||
@@ -34,11 +34,12 @@ use aide::openapi::{Info, OpenApi, Server as OpenAPIServer};
|
||||
use async_trait::async_trait;
|
||||
use axum::body::BoxBody;
|
||||
use axum::error_handling::HandleErrorLayer;
|
||||
use axum::extract::MatchedPath;
|
||||
use axum::extract::{DefaultBodyLimit, MatchedPath};
|
||||
use axum::http::Request;
|
||||
use axum::middleware::{self, Next};
|
||||
use axum::response::{Html, IntoResponse, Json};
|
||||
use axum::{routing, BoxError, Extension, Router};
|
||||
use common_base::readable_size::ReadableSize;
|
||||
use common_error::prelude::ErrorExt;
|
||||
use common_error::status_code::StatusCode;
|
||||
use common_query::Output;
|
||||
@@ -104,6 +105,8 @@ pub(crate) async fn query_context_from_db(
|
||||
|
||||
pub const HTTP_API_VERSION: &str = "v1";
|
||||
pub const HTTP_API_PREFIX: &str = "/v1/";
|
||||
/// Default http body limit (64M).
|
||||
const DEFAULT_BODY_LIMIT: ReadableSize = ReadableSize::mb(64);
|
||||
|
||||
// TODO(fys): This is a temporary workaround, it will be improved later
|
||||
pub static PUBLIC_APIS: [&str; 2] = ["/v1/influxdb/ping", "/v1/influxdb/health"];
|
||||
@@ -133,6 +136,8 @@ pub struct HttpOptions {
|
||||
|
||||
#[serde(skip)]
|
||||
pub disable_dashboard: bool,
|
||||
|
||||
pub body_limit: ReadableSize,
|
||||
}
|
||||
|
||||
impl Default for HttpOptions {
|
||||
@@ -141,6 +146,7 @@ impl Default for HttpOptions {
|
||||
addr: "127.0.0.1:4000".to_string(),
|
||||
timeout: Duration::from_secs(30),
|
||||
disable_dashboard: false,
|
||||
body_limit: DEFAULT_BODY_LIMIT,
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -544,6 +550,13 @@ impl HttpServer {
|
||||
.layer(HandleErrorLayer::new(handle_error))
|
||||
.layer(TraceLayer::new_for_http())
|
||||
.layer(TimeoutLayer::new(self.options.timeout))
|
||||
.layer(DefaultBodyLimit::max(
|
||||
self.options
|
||||
.body_limit
|
||||
.0
|
||||
.try_into()
|
||||
.unwrap_or_else(|_| DEFAULT_BODY_LIMIT.as_bytes() as usize),
|
||||
))
|
||||
// custom layer
|
||||
.layer(AsyncRequireAuthorizationLayer::new(
|
||||
HttpAuth::<BoxBody>::new(self.user_provider.clone()),
|
||||
|
||||
@@ -27,7 +27,7 @@ use serde::{Deserialize, Serialize};
|
||||
use session::context::UserInfo;
|
||||
|
||||
use crate::http::{ApiState, JsonResponse};
|
||||
use crate::metrics::{JEMALLOC_COLLECTOR, PROCESS_COLLECTOR};
|
||||
use crate::metrics::JEMALLOC_COLLECTOR;
|
||||
use crate::metrics_handler::MetricsHandler;
|
||||
|
||||
#[derive(Debug, Default, Serialize, Deserialize, JsonSchema)]
|
||||
@@ -137,7 +137,9 @@ pub async fn metrics(
|
||||
Query(_params): Query<HashMap<String, String>>,
|
||||
) -> String {
|
||||
// Collect process metrics.
|
||||
PROCESS_COLLECTOR.collect();
|
||||
#[cfg(feature = "metrics-process")]
|
||||
crate::metrics::PROCESS_COLLECTOR.collect();
|
||||
|
||||
if let Some(c) = JEMALLOC_COLLECTOR.as_ref() {
|
||||
if let Err(e) = c.update() {
|
||||
error!(e; "Failed to update jemalloc metrics");
|
||||
|
||||
@@ -18,7 +18,6 @@ use std::time::Instant;
|
||||
use common_telemetry::error;
|
||||
use hyper::Body;
|
||||
use metrics::gauge;
|
||||
use metrics_process::Collector;
|
||||
use once_cell::sync::Lazy;
|
||||
use snafu::ResultExt;
|
||||
use tikv_jemalloc_ctl::stats::{allocated_mib, resident_mib};
|
||||
@@ -71,8 +70,9 @@ pub(crate) const METRIC_JEMALLOC_RESIDENT: &str = "sys.jemalloc.resident";
|
||||
pub(crate) const METRIC_JEMALLOC_ALLOCATED: &str = "sys.jemalloc.allocated";
|
||||
|
||||
/// Prometheus style process metrics collector.
|
||||
pub(crate) static PROCESS_COLLECTOR: Lazy<Collector> = Lazy::new(|| {
|
||||
let collector = Collector::default();
|
||||
#[cfg(feature = "metrics-process")]
|
||||
pub(crate) static PROCESS_COLLECTOR: Lazy<metrics_process::Collector> = Lazy::new(|| {
|
||||
let collector = metrics_process::Collector::default();
|
||||
// Describe collector.
|
||||
collector.describe();
|
||||
collector
|
||||
|
||||
@@ -344,10 +344,16 @@ impl<W: AsyncWrite + Send + Sync + Unpin> AsyncMysqlShim<W> for MysqlInstanceShi
|
||||
|
||||
async fn on_init<'a>(&'a mut self, database: &'a str, w: InitWriter<'a, W>) -> Result<()> {
|
||||
let (catalog, schema) = crate::parse_catalog_and_schema_from_client_database_name(database);
|
||||
ensure!(
|
||||
self.query_handler.is_valid_schema(catalog, schema).await?,
|
||||
error::DatabaseNotFoundSnafu { catalog, schema }
|
||||
);
|
||||
|
||||
if !self.query_handler.is_valid_schema(catalog, schema).await? {
|
||||
return w
|
||||
.error(
|
||||
ErrorKind::ER_WRONG_DB_NAME,
|
||||
format!("Unknown database '{}'", database).as_bytes(),
|
||||
)
|
||||
.await
|
||||
.map_err(|e| e.into());
|
||||
}
|
||||
|
||||
let user_info = &self.session.user_info();
|
||||
|
||||
|
||||
@@ -17,16 +17,17 @@ use std::sync::Arc;
|
||||
use async_trait::async_trait;
|
||||
use common_query::logical_plan::Expr;
|
||||
use common_recordbatch::OrderOption;
|
||||
use common_telemetry::debug;
|
||||
use common_telemetry::logging;
|
||||
use common_time::range::TimestampRange;
|
||||
use snafu::ResultExt;
|
||||
use store_api::storage::{Chunk, ChunkReader, SchemaRef, SequenceNumber};
|
||||
use store_api::storage::{Chunk, ChunkReader, RegionId, SchemaRef, SequenceNumber};
|
||||
use table::predicate::{Predicate, TimeRangePredicateBuilder};
|
||||
|
||||
use crate::error::{self, Error, Result};
|
||||
use crate::memtable::{IterContext, MemtableRef};
|
||||
use crate::read::windowed::WindowedReader;
|
||||
use crate::read::{Batch, BoxedBatchReader, DedupReader, MergeReaderBuilder};
|
||||
use crate::read::{
|
||||
Batch, BoxedBatchReader, ChainReader, DedupReader, MergeReaderBuilder, WindowedReader,
|
||||
};
|
||||
use crate::schema::{ProjectedSchema, ProjectedSchemaRef, RegionSchemaRef};
|
||||
use crate::sst::{AccessLayerRef, FileHandle, LevelMetas, ReadOptions};
|
||||
use crate::window_infer::{PlainWindowInference, WindowInfer};
|
||||
@@ -90,6 +91,7 @@ impl ChunkReaderImpl {
|
||||
|
||||
/// Builder to create a new [ChunkReaderImpl] from scan request.
|
||||
pub struct ChunkReaderBuilder {
|
||||
region_id: RegionId,
|
||||
schema: RegionSchemaRef,
|
||||
projection: Option<Vec<usize>>,
|
||||
filters: Vec<Expr>,
|
||||
@@ -98,11 +100,13 @@ pub struct ChunkReaderBuilder {
|
||||
memtables: Vec<MemtableRef>,
|
||||
files_to_read: Vec<FileHandle>,
|
||||
output_ordering: Option<Vec<OrderOption>>,
|
||||
use_chain_reader: bool,
|
||||
}
|
||||
|
||||
impl ChunkReaderBuilder {
|
||||
pub fn new(schema: RegionSchemaRef, sst_layer: AccessLayerRef) -> Self {
|
||||
pub fn new(region_id: RegionId, schema: RegionSchemaRef, sst_layer: AccessLayerRef) -> Self {
|
||||
ChunkReaderBuilder {
|
||||
region_id,
|
||||
schema,
|
||||
projection: None,
|
||||
filters: vec![],
|
||||
@@ -111,6 +115,7 @@ impl ChunkReaderBuilder {
|
||||
memtables: Vec::new(),
|
||||
files_to_read: Vec::new(),
|
||||
output_ordering: None,
|
||||
use_chain_reader: false,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -150,6 +155,15 @@ impl ChunkReaderBuilder {
|
||||
self
|
||||
}
|
||||
|
||||
/// Partition files and memtables according to their time windows and scan time windows
|
||||
/// one by one.
|
||||
///
|
||||
/// Note that compaction should not enable this.
|
||||
pub fn use_chain_reader(mut self, use_chain_reader: bool) -> Self {
|
||||
self.use_chain_reader = use_chain_reader;
|
||||
self
|
||||
}
|
||||
|
||||
/// Picks all SSTs in all levels
|
||||
pub fn pick_all_ssts(mut self, ssts: &LevelMetas) -> Result<Self> {
|
||||
let files = ssts.levels().iter().flat_map(|level| level.files());
|
||||
@@ -183,7 +197,12 @@ impl ChunkReaderBuilder {
|
||||
if name != self.schema.timestamp_column_name() {
|
||||
return None;
|
||||
}
|
||||
let memtable_stats = self.memtables.iter().map(|m| m.stats()).collect::<Vec<_>>();
|
||||
let memtable_stats = self
|
||||
.memtables
|
||||
.iter()
|
||||
.filter(|m| m.num_rows() > 0) // Skip empty memtables.
|
||||
.map(|m| m.stats())
|
||||
.collect::<Vec<_>>();
|
||||
let files = self
|
||||
.files_to_read
|
||||
.iter()
|
||||
@@ -238,15 +257,32 @@ impl ChunkReaderBuilder {
|
||||
predicate,
|
||||
time_range: *time_range,
|
||||
};
|
||||
|
||||
let mut num_read_files = 0;
|
||||
for file in &self.files_to_read {
|
||||
if !Self::file_in_range(file, time_range) {
|
||||
debug!("Skip file {:?}, predicate: {:?}", file, time_range);
|
||||
logging::debug!(
|
||||
"Skip region {} file {:?}, predicate: {:?}",
|
||||
self.region_id,
|
||||
file,
|
||||
time_range
|
||||
);
|
||||
continue;
|
||||
}
|
||||
|
||||
let reader = self.sst_layer.read_sst(file.clone(), &read_opts).await?;
|
||||
reader_builder = reader_builder.push_batch_reader(reader);
|
||||
num_read_files += 1;
|
||||
}
|
||||
|
||||
logging::debug!(
|
||||
"build reader done, region_id: {}, time_range: {:?}, total_files: {}, num_read_files: {}",
|
||||
self.region_id,
|
||||
time_range,
|
||||
self.files_to_read.len(),
|
||||
num_read_files,
|
||||
);
|
||||
|
||||
let reader = reader_builder.build();
|
||||
let reader = DedupReader::new(schema.clone(), reader);
|
||||
Ok(Box::new(reader) as Box<_>)
|
||||
@@ -266,6 +302,8 @@ impl ChunkReaderBuilder {
|
||||
output_ordering = Some(ordering.clone());
|
||||
self.build_windowed(&schema, &time_range_predicate, windows, ordering)
|
||||
.await?
|
||||
} else if self.use_chain_reader {
|
||||
self.build_chained(&schema, &time_range_predicate).await?
|
||||
} else {
|
||||
self.build_reader(&schema, &time_range_predicate).await?
|
||||
};
|
||||
@@ -273,8 +311,41 @@ impl ChunkReaderBuilder {
|
||||
Ok(ChunkReaderImpl::new(schema, reader, output_ordering))
|
||||
}
|
||||
|
||||
async fn build_chained(
|
||||
&self,
|
||||
schema: &ProjectedSchemaRef,
|
||||
time_range: &TimestampRange,
|
||||
) -> Result<BoxedBatchReader> {
|
||||
let windows = self.infer_window_for_chain_reader(time_range);
|
||||
|
||||
logging::debug!(
|
||||
"Infer window for chain reader, region_id: {}, memtables: {}, files: {}, num_windows: {}",
|
||||
self.region_id,
|
||||
self.memtables.len(),
|
||||
self.files_to_read.len(),
|
||||
windows.len(),
|
||||
);
|
||||
|
||||
let mut readers = Vec::with_capacity(windows.len());
|
||||
for window in &windows {
|
||||
let time_range = time_range.and(window);
|
||||
let reader = self.build_reader(schema, &time_range).await?;
|
||||
readers.push(reader);
|
||||
}
|
||||
|
||||
logging::debug!(
|
||||
"Build chain reader, region_id: {}, time_range: {:?}, num_readers: {}",
|
||||
self.region_id,
|
||||
time_range,
|
||||
readers.len(),
|
||||
);
|
||||
|
||||
let chain_reader = ChainReader::new(schema.clone(), readers);
|
||||
Ok(Box::new(chain_reader) as Box<_>)
|
||||
}
|
||||
|
||||
/// Build time range predicate from schema and filters.
|
||||
pub fn build_time_range_predicate(&self) -> TimestampRange {
|
||||
fn build_time_range_predicate(&self) -> TimestampRange {
|
||||
let Some(ts_col) = self.schema.user_schema().timestamp_column() else { return TimestampRange::min_to_max() };
|
||||
let unit = ts_col
|
||||
.data_type
|
||||
@@ -294,4 +365,87 @@ impl ChunkReaderBuilder {
|
||||
let file_ts_range = TimestampRange::new_inclusive(Some(start), Some(end));
|
||||
file_ts_range.intersects(predicate)
|
||||
}
|
||||
|
||||
/// Returns the time range of memtables to read.
|
||||
fn compute_memtable_range(&self) -> Option<TimestampRange> {
|
||||
let (min_timestamp, max_timestamp) = self
|
||||
.memtables
|
||||
.iter()
|
||||
.filter(|m| m.num_rows() > 0) // Skip empty memtables.
|
||||
.map(|m| {
|
||||
let stats = m.stats();
|
||||
(stats.min_timestamp, stats.max_timestamp)
|
||||
})
|
||||
.reduce(|acc, e| (acc.0.min(e.0), acc.1.max(e.1)))?;
|
||||
|
||||
logging::debug!(
|
||||
"Compute memtable range, region_id: {}, min: {:?}, max: {:?}",
|
||||
self.region_id,
|
||||
min_timestamp,
|
||||
max_timestamp,
|
||||
);
|
||||
|
||||
Some(TimestampRange::new_inclusive(
|
||||
Some(min_timestamp),
|
||||
Some(max_timestamp),
|
||||
))
|
||||
}
|
||||
|
||||
/// Infer time window for chain reader according to the time range of memtables and files.
|
||||
fn infer_window_for_chain_reader(&self, time_range: &TimestampRange) -> Vec<TimestampRange> {
|
||||
let mut memtable_range = self.compute_memtable_range();
|
||||
// file ranges: (start, end)
|
||||
let mut file_ranges = Vec::with_capacity(self.files_to_read.len());
|
||||
for file in &self.files_to_read {
|
||||
if !Self::file_in_range(file, time_range) || file.time_range().is_none() {
|
||||
continue;
|
||||
}
|
||||
// Safety: we have skip files whose range is `None`.
|
||||
let range = file.time_range().unwrap();
|
||||
|
||||
// Filter by memtable's time range.
|
||||
if let Some(mem_range) = &mut memtable_range {
|
||||
let file_range = TimestampRange::new_inclusive(Some(range.0), Some(range.1));
|
||||
if mem_range.intersects(&file_range) {
|
||||
// If the range of the SST intersects with the range of the
|
||||
// memtable, we merge it into the memtable's range.
|
||||
*mem_range = mem_range.or(&file_range);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
file_ranges.push((range.0, range.1));
|
||||
}
|
||||
|
||||
if file_ranges.is_empty() {
|
||||
return memtable_range.map(|range| vec![range]).unwrap_or_default();
|
||||
}
|
||||
|
||||
// Sort by start times.
|
||||
file_ranges.sort_unstable_by(|left, right| left.0.cmp(&right.0));
|
||||
|
||||
// Compute ranges for all SSTs.
|
||||
let mut time_ranges = Vec::with_capacity(file_ranges.len() + 1);
|
||||
// Safety: file_ranges is not empty.
|
||||
let mut prev =
|
||||
TimestampRange::new_inclusive(Some(file_ranges[0].0), Some(file_ranges[0].1));
|
||||
for file_range in &file_ranges[1..] {
|
||||
let current = TimestampRange::new_inclusive(Some(file_range.0), Some(file_range.1));
|
||||
if prev.intersects(¤t) {
|
||||
prev = prev.or(¤t);
|
||||
} else {
|
||||
time_ranges.push(prev);
|
||||
prev = current;
|
||||
}
|
||||
}
|
||||
time_ranges.push(prev);
|
||||
|
||||
if let Some(mem_range) = memtable_range {
|
||||
time_ranges.push(mem_range);
|
||||
// We have pushed the memtable range, resort the array.
|
||||
time_ranges.sort_unstable_by(|left, right| left.start().cmp(right.start()));
|
||||
}
|
||||
|
||||
time_ranges
|
||||
}
|
||||
}
|
||||
|
||||
@@ -15,17 +15,182 @@
|
||||
pub mod noop;
|
||||
mod picker;
|
||||
mod scheduler;
|
||||
mod strategy;
|
||||
mod task;
|
||||
mod twcs;
|
||||
mod writer;
|
||||
|
||||
use std::sync::Arc;
|
||||
|
||||
pub use picker::{Picker, PickerContext, SimplePicker};
|
||||
use common_telemetry::tracing::log::warn;
|
||||
use common_time::timestamp::TimeUnit;
|
||||
use common_time::Timestamp;
|
||||
pub use picker::{LeveledTimeWindowPicker, Picker, PickerContext};
|
||||
pub use scheduler::{CompactionHandler, CompactionRequestImpl};
|
||||
use store_api::logstore::LogStore;
|
||||
use store_api::storage::CompactionStrategy;
|
||||
pub use task::{CompactionTask, CompactionTaskImpl};
|
||||
pub use twcs::TwcsPicker;
|
||||
|
||||
use crate::scheduler::Scheduler;
|
||||
use crate::sst::FileHandle;
|
||||
|
||||
pub type CompactionPickerRef<S> =
|
||||
Arc<dyn Picker<Request = CompactionRequestImpl<S>, Task = CompactionTaskImpl<S>> + Send + Sync>;
|
||||
|
||||
pub type CompactionSchedulerRef<S> =
|
||||
Arc<dyn Scheduler<Request = CompactionRequestImpl<S>> + Send + Sync>;
|
||||
|
||||
/// Infers the suitable time bucket duration.
|
||||
/// Now it simply find the max and min timestamp across all SSTs in level and fit the time span
|
||||
/// into time bucket.
|
||||
pub(crate) fn infer_time_bucket<'a>(files: impl Iterator<Item = &'a FileHandle>) -> i64 {
|
||||
let mut max_ts = Timestamp::new(i64::MIN, TimeUnit::Second);
|
||||
let mut min_ts = Timestamp::new(i64::MAX, TimeUnit::Second);
|
||||
|
||||
for f in files {
|
||||
if let Some((start, end)) = f.time_range() {
|
||||
min_ts = min_ts.min(*start);
|
||||
max_ts = max_ts.max(*end);
|
||||
} else {
|
||||
// we don't expect an SST file without time range,
|
||||
// it's either a bug or data corruption.
|
||||
warn!("Found SST file without time range metadata: {f:?}");
|
||||
}
|
||||
}
|
||||
|
||||
// safety: Convert whatever timestamp into seconds will not cause overflow.
|
||||
let min_sec = min_ts.convert_to(TimeUnit::Second).unwrap().value();
|
||||
let max_sec = max_ts.convert_to(TimeUnit::Second).unwrap().value();
|
||||
|
||||
max_sec
|
||||
.checked_sub(min_sec)
|
||||
.map(|span| TIME_BUCKETS.fit_time_bucket(span)) // return the max bucket on subtraction overflow.
|
||||
.unwrap_or_else(|| TIME_BUCKETS.max()) // safety: TIME_BUCKETS cannot be empty.
|
||||
}
|
||||
|
||||
pub(crate) struct TimeBuckets([i64; 7]);
|
||||
|
||||
impl TimeBuckets {
|
||||
/// Fits a given time span into time bucket by find the minimum bucket that can cover the span.
|
||||
/// Returns the max bucket if no such bucket can be found.
|
||||
fn fit_time_bucket(&self, span_sec: i64) -> i64 {
|
||||
assert!(span_sec >= 0);
|
||||
match self.0.binary_search(&span_sec) {
|
||||
Ok(idx) => self.0[idx],
|
||||
Err(idx) => {
|
||||
if idx < self.0.len() {
|
||||
self.0[idx]
|
||||
} else {
|
||||
self.0.last().copied().unwrap()
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
fn get(&self, idx: usize) -> i64 {
|
||||
self.0[idx]
|
||||
}
|
||||
|
||||
fn max(&self) -> i64 {
|
||||
self.0.last().copied().unwrap()
|
||||
}
|
||||
}
|
||||
|
||||
/// A set of predefined time buckets.
|
||||
pub(crate) const TIME_BUCKETS: TimeBuckets = TimeBuckets([
|
||||
60 * 60, // one hour
|
||||
2 * 60 * 60, // two hours
|
||||
12 * 60 * 60, // twelve hours
|
||||
24 * 60 * 60, // one day
|
||||
7 * 24 * 60 * 60, // one week
|
||||
365 * 24 * 60 * 60, // one year
|
||||
10 * 365 * 24 * 60 * 60, // ten years
|
||||
]);
|
||||
|
||||
pub fn compaction_strategy_to_picker<S: LogStore>(
|
||||
strategy: &CompactionStrategy,
|
||||
) -> CompactionPickerRef<S> {
|
||||
match strategy {
|
||||
CompactionStrategy::LeveledTimeWindow => {
|
||||
Arc::new(LeveledTimeWindowPicker::default()) as Arc<_>
|
||||
}
|
||||
CompactionStrategy::Twcs(twcs_opts) => Arc::new(TwcsPicker::new(
|
||||
twcs_opts.max_active_window_files,
|
||||
twcs_opts.max_inactive_window_files,
|
||||
twcs_opts.time_window_seconds,
|
||||
)) as Arc<_>,
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use common_time::Timestamp;
|
||||
|
||||
use super::*;
|
||||
use crate::file_purger::noop::new_noop_file_purger;
|
||||
use crate::sst::{FileHandle, FileId, FileMeta, Level};
|
||||
|
||||
/// Test util to create file handles.
|
||||
pub fn new_file_handle(
|
||||
file_id: FileId,
|
||||
start_ts_millis: i64,
|
||||
end_ts_millis: i64,
|
||||
level: Level,
|
||||
) -> FileHandle {
|
||||
let file_purger = new_noop_file_purger();
|
||||
let layer = Arc::new(crate::test_util::access_layer_util::MockAccessLayer {});
|
||||
FileHandle::new(
|
||||
FileMeta {
|
||||
region_id: 0,
|
||||
file_id,
|
||||
time_range: Some((
|
||||
Timestamp::new_millisecond(start_ts_millis),
|
||||
Timestamp::new_millisecond(end_ts_millis),
|
||||
)),
|
||||
level,
|
||||
file_size: 0,
|
||||
},
|
||||
layer,
|
||||
file_purger,
|
||||
)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_time_bucket() {
|
||||
assert_eq!(TIME_BUCKETS.get(0), TIME_BUCKETS.fit_time_bucket(1));
|
||||
assert_eq!(TIME_BUCKETS.get(0), TIME_BUCKETS.fit_time_bucket(60 * 60));
|
||||
assert_eq!(
|
||||
TIME_BUCKETS.get(1),
|
||||
TIME_BUCKETS.fit_time_bucket(60 * 60 + 1)
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
TIME_BUCKETS.get(2),
|
||||
TIME_BUCKETS.fit_time_bucket(TIME_BUCKETS.get(2) - 1)
|
||||
);
|
||||
assert_eq!(
|
||||
TIME_BUCKETS.get(2),
|
||||
TIME_BUCKETS.fit_time_bucket(TIME_BUCKETS.get(2))
|
||||
);
|
||||
assert_eq!(
|
||||
TIME_BUCKETS.get(3),
|
||||
TIME_BUCKETS.fit_time_bucket(TIME_BUCKETS.get(3) - 1)
|
||||
);
|
||||
assert_eq!(TIME_BUCKETS.get(6), TIME_BUCKETS.fit_time_bucket(i64::MAX));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_infer_time_buckets() {
|
||||
assert_eq!(
|
||||
TIME_BUCKETS.get(0),
|
||||
infer_time_bucket(
|
||||
[
|
||||
new_file_handle(FileId::random(), 0, TIME_BUCKETS.get(0) * 1000 - 1, 0),
|
||||
new_file_handle(FileId::random(), 1, 10_000, 0)
|
||||
]
|
||||
.iter()
|
||||
)
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -12,30 +12,49 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::collections::HashMap;
|
||||
use std::fmt::{Debug, Formatter};
|
||||
use std::marker::PhantomData;
|
||||
use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
|
||||
use common_telemetry::tracing::log::warn;
|
||||
use common_telemetry::{debug, error, info};
|
||||
use common_time::timestamp::TimeUnit;
|
||||
use common_time::timestamp_millis::BucketAligned;
|
||||
use common_time::Timestamp;
|
||||
use snafu::ResultExt;
|
||||
use store_api::logstore::LogStore;
|
||||
|
||||
use crate::compaction::infer_time_bucket;
|
||||
use crate::compaction::scheduler::CompactionRequestImpl;
|
||||
use crate::compaction::strategy::{SimpleTimeWindowStrategy, StrategyRef};
|
||||
use crate::compaction::task::{CompactionTask, CompactionTaskImpl};
|
||||
use crate::error::TtlCalculationSnafu;
|
||||
use crate::compaction::task::{CompactionOutput, CompactionTask, CompactionTaskImpl};
|
||||
use crate::error::{Result, TtlCalculationSnafu};
|
||||
use crate::scheduler::Request;
|
||||
use crate::sst::{FileHandle, Level};
|
||||
use crate::version::LevelMetasRef;
|
||||
use crate::sst::{FileHandle, LevelMeta};
|
||||
|
||||
/// Picker picks input SST files and builds the compaction task.
|
||||
/// Different compaction strategy may implement different pickers.
|
||||
pub trait Picker: Send + 'static {
|
||||
pub trait Picker: Debug + Send + 'static {
|
||||
type Request: Request;
|
||||
type Task: CompactionTask;
|
||||
|
||||
fn pick(&self, req: &Self::Request) -> crate::error::Result<Option<Self::Task>>;
|
||||
fn pick(&self, req: &Self::Request) -> Result<Option<Self::Task>>;
|
||||
}
|
||||
|
||||
pub(crate) fn get_expired_ssts(
|
||||
levels: &[LevelMeta],
|
||||
ttl: Option<Duration>,
|
||||
now: Timestamp,
|
||||
) -> Result<Vec<FileHandle>> {
|
||||
let Some(ttl) = ttl else { return Ok(vec![]); };
|
||||
|
||||
let expire_time = now.sub_duration(ttl).context(TtlCalculationSnafu)?;
|
||||
|
||||
let expired_ssts = levels
|
||||
.iter()
|
||||
.flat_map(|l| l.get_expired_files(&expire_time).into_iter())
|
||||
.collect();
|
||||
Ok(expired_ssts)
|
||||
}
|
||||
|
||||
pub struct PickerContext {
|
||||
@@ -54,56 +73,40 @@ impl PickerContext {
|
||||
}
|
||||
}
|
||||
|
||||
/// L0 -> L1 compaction based on time windows.
|
||||
pub struct SimplePicker<S> {
|
||||
strategy: StrategyRef,
|
||||
/// `LeveledTimeWindowPicker` only handles level 0 to level 1 compaction in a time-window tiered
|
||||
/// manner. It picks all SSTs in level 0 and writes rows in these SSTs to a new file partitioned
|
||||
/// by a inferred time bucket in level 1.
|
||||
pub struct LeveledTimeWindowPicker<S> {
|
||||
_phantom_data: PhantomData<S>,
|
||||
}
|
||||
|
||||
impl<S> Default for SimplePicker<S> {
|
||||
fn default() -> Self {
|
||||
Self::new(Arc::new(SimpleTimeWindowStrategy {}))
|
||||
impl<S> Debug for LeveledTimeWindowPicker<S> {
|
||||
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
|
||||
write!(f, "LeveledTimeWindowPicker{{..}}")
|
||||
}
|
||||
}
|
||||
|
||||
impl<S> SimplePicker<S> {
|
||||
pub fn new(strategy: StrategyRef) -> Self {
|
||||
impl<S> Default for LeveledTimeWindowPicker<S> {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
|
||||
impl<S> LeveledTimeWindowPicker<S> {
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
strategy,
|
||||
_phantom_data: Default::default(),
|
||||
}
|
||||
}
|
||||
|
||||
fn get_expired_ssts(
|
||||
&self,
|
||||
levels: &LevelMetasRef,
|
||||
ttl: Option<Duration>,
|
||||
) -> crate::error::Result<Vec<FileHandle>> {
|
||||
let Some(ttl) = ttl else { return Ok(vec![]); };
|
||||
|
||||
let expire_time = Timestamp::current_millis()
|
||||
.sub_duration(ttl)
|
||||
.context(TtlCalculationSnafu)?;
|
||||
|
||||
let mut expired_ssts = vec![];
|
||||
for level in 0..levels.level_num() {
|
||||
expired_ssts.extend(levels.level(level as Level).get_expired_files(&expire_time));
|
||||
}
|
||||
Ok(expired_ssts)
|
||||
}
|
||||
}
|
||||
|
||||
impl<S: LogStore> Picker for SimplePicker<S> {
|
||||
impl<S: LogStore> Picker for LeveledTimeWindowPicker<S> {
|
||||
type Request = CompactionRequestImpl<S>;
|
||||
type Task = CompactionTaskImpl<S>;
|
||||
|
||||
fn pick(
|
||||
&self,
|
||||
req: &CompactionRequestImpl<S>,
|
||||
) -> crate::error::Result<Option<CompactionTaskImpl<S>>> {
|
||||
fn pick(&self, req: &CompactionRequestImpl<S>) -> Result<Option<CompactionTaskImpl<S>>> {
|
||||
let levels = &req.levels();
|
||||
let expired_ssts = self
|
||||
.get_expired_ssts(levels, req.ttl)
|
||||
let expired_ssts = get_expired_ssts(levels.levels(), req.ttl, Timestamp::current_millis())
|
||||
.map_err(|e| {
|
||||
error!(e;"Failed to get region expired SST files, region: {}, ttl: {:?}", req.region_id, req.ttl);
|
||||
e
|
||||
@@ -121,12 +124,16 @@ impl<S: LogStore> Picker for SimplePicker<S> {
|
||||
|
||||
let ctx = &PickerContext::with(req.compaction_time_window);
|
||||
|
||||
let mut outputs = vec![];
|
||||
for level_num in 0..levels.level_num() {
|
||||
let level = levels.level(level_num as u8);
|
||||
let (compaction_time_window, outputs) = self.strategy.pick(ctx, level);
|
||||
let compaction_time_window = Self::pick_level(ctx, level, &mut outputs);
|
||||
|
||||
if outputs.is_empty() {
|
||||
debug!("No SST file can be compacted at level {}", level_num);
|
||||
debug!(
|
||||
"No SST file can be compacted at level {}, path: {:?}",
|
||||
level_num, req.sst_layer
|
||||
);
|
||||
continue;
|
||||
}
|
||||
|
||||
@@ -151,3 +158,272 @@ impl<S: LogStore> Picker for SimplePicker<S> {
|
||||
Ok(None)
|
||||
}
|
||||
}
|
||||
|
||||
impl<S> LeveledTimeWindowPicker<S> {
|
||||
fn pick_level(
|
||||
ctx: &PickerContext,
|
||||
level: &LevelMeta,
|
||||
results: &mut Vec<CompactionOutput>,
|
||||
) -> Option<i64> {
|
||||
// SimpleTimeWindowStrategy only handles level 0 to level 1 compaction.
|
||||
if level.level() != 0 {
|
||||
return None;
|
||||
}
|
||||
let files = find_compactable_files(level);
|
||||
debug!("Compactable files found: {:?}", files);
|
||||
if files.is_empty() {
|
||||
return None;
|
||||
}
|
||||
let time_window = ctx.compaction_time_window().unwrap_or_else(|| {
|
||||
let inferred = infer_time_bucket(files.iter());
|
||||
debug!(
|
||||
"Compaction window is not present, inferring from files: {:?}",
|
||||
inferred
|
||||
);
|
||||
inferred
|
||||
});
|
||||
let buckets = calculate_time_buckets(time_window, &files);
|
||||
debug!("File bucket:{}, file groups: {:?}", time_window, buckets);
|
||||
|
||||
results.extend(buckets.into_iter().map(|(bound, files)| CompactionOutput {
|
||||
output_level: 1,
|
||||
time_window_bound: bound,
|
||||
time_window_sec: time_window,
|
||||
inputs: files,
|
||||
// strict window is used in simple time window strategy in that rows in one file
|
||||
// may get compacted to multiple destinations.
|
||||
strict_window: true,
|
||||
}));
|
||||
Some(time_window)
|
||||
}
|
||||
}
|
||||
|
||||
/// Finds files that can be compacted in given level.
|
||||
/// Currently they're files that is not currently under compaction.
|
||||
#[inline]
|
||||
fn find_compactable_files(level: &LevelMeta) -> Vec<FileHandle> {
|
||||
level.files().filter(|f| !f.compacting()).cloned().collect()
|
||||
}
|
||||
|
||||
/// Calculates buckets for files. If file does not contain a time range in metadata, it will be
|
||||
/// assigned to a special bucket `i64::MAX` (normally no timestamp can be aligned to this bucket)
|
||||
/// so that all files without timestamp can be compacted together.
|
||||
fn calculate_time_buckets(bucket_sec: i64, files: &[FileHandle]) -> HashMap<i64, Vec<FileHandle>> {
|
||||
let mut buckets = HashMap::new();
|
||||
|
||||
for file in files {
|
||||
if let Some((start, end)) = file.time_range() {
|
||||
let bounds = file_time_bucket_span(
|
||||
start.convert_to(TimeUnit::Second).unwrap().value(),
|
||||
end.convert_to(TimeUnit::Second).unwrap().value(),
|
||||
bucket_sec,
|
||||
);
|
||||
for bound in bounds {
|
||||
buckets
|
||||
.entry(bound)
|
||||
.or_insert_with(Vec::new)
|
||||
.push(file.clone());
|
||||
}
|
||||
} else {
|
||||
warn!("Found corrupted SST without timestamp bounds: {:?}", file);
|
||||
}
|
||||
}
|
||||
buckets
|
||||
}
|
||||
|
||||
/// Calculates timestamp span between start and end timestamp.
|
||||
fn file_time_bucket_span(start_sec: i64, end_sec: i64, bucket_sec: i64) -> Vec<i64> {
|
||||
assert!(start_sec <= end_sec);
|
||||
|
||||
// if timestamp is between `[i64::MIN, i64::MIN.align_by_bucket(bucket)]`, which cannot
|
||||
// be aligned to a valid i64 bound, simply return `i64::MIN` rather than just underflow.
|
||||
let mut start_aligned = start_sec.align_by_bucket(bucket_sec).unwrap_or(i64::MIN);
|
||||
let end_aligned = end_sec.align_by_bucket(bucket_sec).unwrap_or(i64::MIN);
|
||||
|
||||
let mut res = Vec::with_capacity(((end_aligned - start_aligned) / bucket_sec + 1) as usize);
|
||||
while start_aligned < end_aligned {
|
||||
res.push(start_aligned);
|
||||
start_aligned += bucket_sec;
|
||||
}
|
||||
res.push(end_aligned);
|
||||
res
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::collections::{HashMap, HashSet};
|
||||
use std::sync::Arc;
|
||||
|
||||
use super::*;
|
||||
use crate::compaction::tests::new_file_handle;
|
||||
use crate::compaction::TIME_BUCKETS;
|
||||
use crate::file_purger::noop::new_noop_file_purger;
|
||||
use crate::sst::{FileId, Level, LevelMetas};
|
||||
|
||||
#[test]
|
||||
fn test_time_bucket_span() {
|
||||
assert_eq!(vec![0], file_time_bucket_span(1, 9, 10));
|
||||
|
||||
assert_eq!(vec![0, 10], file_time_bucket_span(1, 10, 10));
|
||||
|
||||
assert_eq!(vec![-10], file_time_bucket_span(-10, -1, 10));
|
||||
|
||||
assert_eq!(vec![-10, 0], file_time_bucket_span(-10, 0, 10));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_time_bucket_span_large() {
|
||||
assert_eq!(
|
||||
vec![
|
||||
(i64::MAX - 10).align_by_bucket(10).unwrap(),
|
||||
i64::MAX.align_by_bucket(10).unwrap(),
|
||||
],
|
||||
file_time_bucket_span(i64::MAX - 10, i64::MAX, 10)
|
||||
);
|
||||
|
||||
// magic hmmm?
|
||||
for bucket in 1..100 {
|
||||
assert_eq!(
|
||||
vec![
|
||||
i64::MIN,
|
||||
(i64::MIN + bucket).align_by_bucket(bucket).unwrap()
|
||||
],
|
||||
file_time_bucket_span(i64::MIN, i64::MIN + bucket, bucket)
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
fn new_file_handles(input: &[(FileId, i64, i64)]) -> Vec<FileHandle> {
|
||||
input
|
||||
.iter()
|
||||
.map(|(file_id, start, end)| new_file_handle(*file_id, *start, *end, 0))
|
||||
.collect()
|
||||
}
|
||||
|
||||
fn check_bucket_calculation(
|
||||
bucket_sec: i64,
|
||||
files: Vec<FileHandle>,
|
||||
expected: &[(i64, &[FileId])],
|
||||
) {
|
||||
let res = calculate_time_buckets(bucket_sec, &files);
|
||||
|
||||
let expected = expected
|
||||
.iter()
|
||||
.map(|(bucket, file_ids)| (*bucket, file_ids.iter().copied().collect::<HashSet<_>>()))
|
||||
.collect::<HashMap<_, _>>();
|
||||
|
||||
for (bucket, file_ids) in expected {
|
||||
let actual = res
|
||||
.get(&bucket)
|
||||
.unwrap()
|
||||
.iter()
|
||||
.map(|f| f.file_id())
|
||||
.collect();
|
||||
assert_eq!(
|
||||
file_ids, actual,
|
||||
"bucket: {bucket}, expected: {file_ids:?}, actual: {actual:?}",
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_calculate_time_buckets() {
|
||||
let file_id_a = FileId::random();
|
||||
let file_id_b = FileId::random();
|
||||
// simple case, files with disjoint
|
||||
check_bucket_calculation(
|
||||
10,
|
||||
new_file_handles(&[(file_id_a, 0, 9000), (file_id_b, 10000, 19000)]),
|
||||
&[(0, &[file_id_a]), (10, &[file_id_b])],
|
||||
);
|
||||
|
||||
// files across buckets
|
||||
check_bucket_calculation(
|
||||
10,
|
||||
new_file_handles(&[(file_id_a, 0, 10001), (file_id_b, 10000, 19000)]),
|
||||
&[(0, &[file_id_a]), (10, &[file_id_a, file_id_b])],
|
||||
);
|
||||
check_bucket_calculation(
|
||||
10,
|
||||
new_file_handles(&[(file_id_a, 0, 10000)]),
|
||||
&[(0, &[file_id_a]), (10, &[file_id_a])],
|
||||
);
|
||||
|
||||
// file with an large time range
|
||||
let file_id_array = &[file_id_a];
|
||||
let expected = (0..(TIME_BUCKETS.get(4) / TIME_BUCKETS.get(0)))
|
||||
.map(|b| (b * TIME_BUCKETS.get(0), file_id_array as _))
|
||||
.collect::<Vec<_>>();
|
||||
check_bucket_calculation(
|
||||
TIME_BUCKETS.get(0),
|
||||
new_file_handles(&[(file_id_a, 0, TIME_BUCKETS.get(4) * 1000)]),
|
||||
&expected,
|
||||
);
|
||||
}
|
||||
|
||||
struct TtlTester {
|
||||
files: Vec<(FileId, i64, i64, Level)>,
|
||||
ttl: Option<Duration>,
|
||||
expired: Vec<usize>,
|
||||
now: Timestamp,
|
||||
}
|
||||
|
||||
impl TtlTester {
|
||||
fn check(&self) {
|
||||
let expected_expired = self
|
||||
.expired
|
||||
.iter()
|
||||
.map(|idx| self.files[*idx].0)
|
||||
.collect::<HashSet<_>>();
|
||||
let file_purger = new_noop_file_purger();
|
||||
let layer = Arc::new(crate::test_util::access_layer_util::MockAccessLayer {});
|
||||
let file_handles = self
|
||||
.files
|
||||
.iter()
|
||||
.map(|(file_id, start_ts, end_ts, level)| {
|
||||
new_file_handle(*file_id, *start_ts, *end_ts, *level).meta()
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
let levels = LevelMetas::new(layer, file_purger).merge(
|
||||
file_handles.into_iter(),
|
||||
vec![].into_iter(),
|
||||
None,
|
||||
);
|
||||
let expired = get_expired_ssts(levels.levels(), self.ttl, self.now)
|
||||
.unwrap()
|
||||
.into_iter()
|
||||
.map(|f| f.file_id())
|
||||
.collect::<HashSet<_>>();
|
||||
assert_eq!(expected_expired, expired);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_find_expired_ssts() {
|
||||
TtlTester {
|
||||
files: vec![
|
||||
(FileId::random(), 8000, 9000, 0),
|
||||
(FileId::random(), 10000, 11000, 0),
|
||||
(FileId::random(), 8000, 11000, 1),
|
||||
(FileId::random(), 2000, 3000, 1),
|
||||
],
|
||||
ttl: Some(Duration::from_secs(1)),
|
||||
expired: vec![3],
|
||||
now: Timestamp::new_second(10),
|
||||
}
|
||||
.check();
|
||||
|
||||
TtlTester {
|
||||
files: vec![
|
||||
(FileId::random(), 8000, 8999, 0),
|
||||
(FileId::random(), 10000, 11000, 0),
|
||||
(FileId::random(), 8000, 11000, 1),
|
||||
(FileId::random(), 2000, 3000, 1),
|
||||
],
|
||||
ttl: Some(Duration::from_secs(1)),
|
||||
expired: vec![0, 3],
|
||||
now: Timestamp::new_second(10),
|
||||
}
|
||||
.check();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -12,6 +12,7 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::marker::PhantomData;
|
||||
use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
|
||||
@@ -22,8 +23,8 @@ use store_api::storage::RegionId;
|
||||
use tokio::sync::oneshot::Sender;
|
||||
use tokio::sync::Notify;
|
||||
|
||||
use crate::compaction::picker::Picker;
|
||||
use crate::compaction::task::CompactionTask;
|
||||
use crate::compaction::CompactionPickerRef;
|
||||
use crate::error::Result;
|
||||
use crate::manifest::region::RegionManifest;
|
||||
use crate::region::{RegionWriterRef, SharedDataRef};
|
||||
@@ -63,7 +64,7 @@ pub struct CompactionRequestImpl<S: LogStore> {
|
||||
pub compaction_time_window: Option<i64>,
|
||||
/// Compaction result sender.
|
||||
pub sender: Option<Sender<Result<()>>>,
|
||||
|
||||
pub picker: CompactionPickerRef<S>,
|
||||
pub sst_write_buffer_size: ReadableSize,
|
||||
}
|
||||
|
||||
@@ -79,18 +80,40 @@ impl<S: LogStore> CompactionRequestImpl<S> {
|
||||
}
|
||||
}
|
||||
|
||||
pub struct CompactionHandler<P> {
|
||||
pub picker: P,
|
||||
pub struct CompactionHandler<S: LogStore> {
|
||||
_phantom_data: PhantomData<S>,
|
||||
#[cfg(test)]
|
||||
pub pending_tasks: Arc<tokio::sync::RwLock<Vec<tokio::task::JoinHandle<()>>>>,
|
||||
}
|
||||
|
||||
impl<S: LogStore> Default for CompactionHandler<S> {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
_phantom_data: Default::default(),
|
||||
#[cfg(test)]
|
||||
pending_tasks: Arc::new(Default::default()),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<S: LogStore> CompactionHandler<S> {
|
||||
#[cfg(test)]
|
||||
pub fn new_with_pending_tasks(
|
||||
tasks: Arc<tokio::sync::RwLock<Vec<tokio::task::JoinHandle<()>>>>,
|
||||
) -> Self {
|
||||
Self {
|
||||
_phantom_data: Default::default(),
|
||||
pending_tasks: tasks,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait::async_trait]
|
||||
impl<P> Handler for CompactionHandler<P>
|
||||
impl<S> Handler for CompactionHandler<S>
|
||||
where
|
||||
P: Picker + Send + Sync,
|
||||
S: LogStore,
|
||||
{
|
||||
type Request = P::Request;
|
||||
type Request = CompactionRequestImpl<S>;
|
||||
|
||||
async fn handle_request(
|
||||
&self,
|
||||
@@ -99,7 +122,7 @@ where
|
||||
finish_notifier: Arc<Notify>,
|
||||
) -> Result<()> {
|
||||
let region_id = req.key();
|
||||
let Some(task) = self.picker.pick(&req)? else {
|
||||
let Some(task) = req.picker.pick(&req)? else {
|
||||
info!("No file needs compaction in region: {:?}", region_id);
|
||||
req.complete(Ok(()));
|
||||
return Ok(());
|
||||
|
||||
@@ -1,327 +0,0 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::collections::HashMap;
|
||||
use std::sync::Arc;
|
||||
|
||||
use common_telemetry::{debug, warn};
|
||||
use common_time::timestamp::TimeUnit;
|
||||
use common_time::timestamp_millis::BucketAligned;
|
||||
use common_time::Timestamp;
|
||||
|
||||
use crate::compaction::picker::PickerContext;
|
||||
use crate::compaction::task::CompactionOutput;
|
||||
use crate::sst::{FileHandle, LevelMeta};
|
||||
|
||||
/// Compaction strategy that defines which SSTs need to be compacted at given level.
|
||||
pub trait Strategy {
|
||||
fn pick(&self, ctx: &PickerContext, level: &LevelMeta) -> (Option<i64>, Vec<CompactionOutput>);
|
||||
}
|
||||
|
||||
pub type StrategyRef = Arc<dyn Strategy + Send + Sync>;
|
||||
|
||||
/// SimpleTimeWindowStrategy only handles level 0 to level 1 compaction in a time-window tiered
|
||||
/// manner. It picks all SSTs in level 0 and writes rows in these SSTs to a new file partitioned
|
||||
/// by a inferred time bucket in level 1.
|
||||
pub struct SimpleTimeWindowStrategy {}
|
||||
|
||||
impl Strategy for SimpleTimeWindowStrategy {
|
||||
fn pick(&self, ctx: &PickerContext, level: &LevelMeta) -> (Option<i64>, Vec<CompactionOutput>) {
|
||||
// SimpleTimeWindowStrategy only handles level 0 to level 1 compaction.
|
||||
if level.level() != 0 {
|
||||
return (None, vec![]);
|
||||
}
|
||||
let files = find_compactable_files(level);
|
||||
debug!("Compactable files found: {:?}", files);
|
||||
if files.is_empty() {
|
||||
return (None, vec![]);
|
||||
}
|
||||
let time_window = ctx.compaction_time_window().unwrap_or_else(|| {
|
||||
let inferred = infer_time_bucket(&files);
|
||||
debug!(
|
||||
"Compaction window is not present, inferring from files: {:?}",
|
||||
inferred
|
||||
);
|
||||
inferred
|
||||
});
|
||||
let buckets = calculate_time_buckets(time_window, &files);
|
||||
debug!("File bucket:{}, file groups: {:?}", time_window, buckets);
|
||||
(
|
||||
Some(time_window),
|
||||
buckets
|
||||
.into_iter()
|
||||
.map(|(bound, files)| CompactionOutput {
|
||||
output_level: 1,
|
||||
bucket_bound: bound,
|
||||
bucket: time_window,
|
||||
inputs: files,
|
||||
})
|
||||
.collect(),
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
/// Finds files that can be compacted in given level.
|
||||
/// Currently they're files that is not currently under compaction.
|
||||
#[inline]
|
||||
fn find_compactable_files(level: &LevelMeta) -> Vec<FileHandle> {
|
||||
level.files().filter(|f| !f.compacting()).cloned().collect()
|
||||
}
|
||||
|
||||
/// Calculates buckets for files. If file does not contain a time range in metadata, it will be
|
||||
/// assigned to a special bucket `i64::MAX` (normally no timestamp can be aligned to this bucket)
|
||||
/// so that all files without timestamp can be compacted together.
|
||||
fn calculate_time_buckets(bucket_sec: i64, files: &[FileHandle]) -> HashMap<i64, Vec<FileHandle>> {
|
||||
let mut buckets = HashMap::new();
|
||||
|
||||
for file in files {
|
||||
if let Some((start, end)) = file.time_range() {
|
||||
let bounds = file_time_bucket_span(
|
||||
start.convert_to(TimeUnit::Second).unwrap().value(),
|
||||
end.convert_to(TimeUnit::Second).unwrap().value(),
|
||||
bucket_sec,
|
||||
);
|
||||
for bound in bounds {
|
||||
buckets
|
||||
.entry(bound)
|
||||
.or_insert_with(Vec::new)
|
||||
.push(file.clone());
|
||||
}
|
||||
} else {
|
||||
warn!("Found corrupted SST without timestamp bounds: {:?}", file);
|
||||
}
|
||||
}
|
||||
buckets
|
||||
}
|
||||
|
||||
/// Calculates timestamp span between start and end timestamp.
|
||||
fn file_time_bucket_span(start_sec: i64, end_sec: i64, bucket_sec: i64) -> Vec<i64> {
|
||||
assert!(start_sec <= end_sec);
|
||||
|
||||
// if timestamp is between `[i64::MIN, i64::MIN.align_by_bucket(bucket)]`, which cannot
|
||||
// be aligned to a valid i64 bound, simply return `i64::MIN` rather than just underflow.
|
||||
let mut start_aligned = start_sec.align_by_bucket(bucket_sec).unwrap_or(i64::MIN);
|
||||
let end_aligned = end_sec.align_by_bucket(bucket_sec).unwrap_or(i64::MIN);
|
||||
|
||||
let mut res = Vec::with_capacity(((end_aligned - start_aligned) / bucket_sec + 1) as usize);
|
||||
while start_aligned < end_aligned {
|
||||
res.push(start_aligned);
|
||||
start_aligned += bucket_sec;
|
||||
}
|
||||
res.push(end_aligned);
|
||||
res
|
||||
}
|
||||
|
||||
/// Infers the suitable time bucket duration.
|
||||
/// Now it simply find the max and min timestamp across all SSTs in level and fit the time span
|
||||
/// into time bucket.
|
||||
fn infer_time_bucket(files: &[FileHandle]) -> i64 {
|
||||
let mut max_ts = &Timestamp::new(i64::MIN, TimeUnit::Second);
|
||||
let mut min_ts = &Timestamp::new(i64::MAX, TimeUnit::Second);
|
||||
|
||||
for f in files {
|
||||
if let Some((start, end)) = f.time_range() {
|
||||
min_ts = min_ts.min(start);
|
||||
max_ts = max_ts.max(end);
|
||||
} else {
|
||||
// we don't expect an SST file without time range,
|
||||
// it's either a bug or data corruption.
|
||||
warn!("Found SST file without time range metadata: {f:?}");
|
||||
}
|
||||
}
|
||||
|
||||
// safety: Convert whatever timestamp into seconds will not cause overflow.
|
||||
let min_sec = min_ts.convert_to(TimeUnit::Second).unwrap().value();
|
||||
let max_sec = max_ts.convert_to(TimeUnit::Second).unwrap().value();
|
||||
|
||||
max_sec
|
||||
.checked_sub(min_sec)
|
||||
.map(fit_time_bucket) // return the max bucket on subtraction overflow.
|
||||
.unwrap_or_else(|| *TIME_BUCKETS.last().unwrap()) // safety: TIME_BUCKETS cannot be empty.
|
||||
}
|
||||
|
||||
/// A set of predefined time buckets.
|
||||
const TIME_BUCKETS: [i64; 7] = [
|
||||
60 * 60, // one hour
|
||||
2 * 60 * 60, // two hours
|
||||
12 * 60 * 60, // twelve hours
|
||||
24 * 60 * 60, // one day
|
||||
7 * 24 * 60 * 60, // one week
|
||||
365 * 24 * 60 * 60, // one year
|
||||
10 * 365 * 24 * 60 * 60, // ten years
|
||||
];
|
||||
|
||||
/// Fits a given time span into time bucket by find the minimum bucket that can cover the span.
|
||||
/// Returns the max bucket if no such bucket can be found.
|
||||
fn fit_time_bucket(span_sec: i64) -> i64 {
|
||||
assert!(span_sec >= 0);
|
||||
for b in TIME_BUCKETS {
|
||||
if b >= span_sec {
|
||||
return b;
|
||||
}
|
||||
}
|
||||
*TIME_BUCKETS.last().unwrap()
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::collections::{HashMap, HashSet};
|
||||
|
||||
use super::*;
|
||||
use crate::file_purger::noop::new_noop_file_purger;
|
||||
use crate::sst::{FileId, FileMeta};
|
||||
|
||||
#[test]
|
||||
fn test_time_bucket_span() {
|
||||
assert_eq!(vec![0], file_time_bucket_span(1, 9, 10));
|
||||
|
||||
assert_eq!(vec![0, 10], file_time_bucket_span(1, 10, 10));
|
||||
|
||||
assert_eq!(vec![-10], file_time_bucket_span(-10, -1, 10));
|
||||
|
||||
assert_eq!(vec![-10, 0], file_time_bucket_span(-10, 0, 10));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_time_bucket_span_large() {
|
||||
assert_eq!(
|
||||
vec![
|
||||
(i64::MAX - 10).align_by_bucket(10).unwrap(),
|
||||
i64::MAX.align_by_bucket(10).unwrap(),
|
||||
],
|
||||
file_time_bucket_span(i64::MAX - 10, i64::MAX, 10)
|
||||
);
|
||||
|
||||
// magic hmmm?
|
||||
for bucket in 1..100 {
|
||||
assert_eq!(
|
||||
vec![
|
||||
i64::MIN,
|
||||
(i64::MIN + bucket).align_by_bucket(bucket).unwrap()
|
||||
],
|
||||
file_time_bucket_span(i64::MIN, i64::MIN + bucket, bucket)
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_time_bucket() {
|
||||
assert_eq!(TIME_BUCKETS[0], fit_time_bucket(1));
|
||||
assert_eq!(TIME_BUCKETS[0], fit_time_bucket(60 * 60));
|
||||
assert_eq!(TIME_BUCKETS[1], fit_time_bucket(60 * 60 + 1));
|
||||
|
||||
assert_eq!(TIME_BUCKETS[2], fit_time_bucket(TIME_BUCKETS[2] - 1));
|
||||
assert_eq!(TIME_BUCKETS[2], fit_time_bucket(TIME_BUCKETS[2]));
|
||||
assert_eq!(TIME_BUCKETS[3], fit_time_bucket(TIME_BUCKETS[3] - 1));
|
||||
assert_eq!(TIME_BUCKETS[6], fit_time_bucket(i64::MAX));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_infer_time_buckets() {
|
||||
assert_eq!(
|
||||
TIME_BUCKETS[0],
|
||||
infer_time_bucket(&[
|
||||
new_file_handle(FileId::random(), 0, TIME_BUCKETS[0] * 1000 - 1),
|
||||
new_file_handle(FileId::random(), 1, 10_000)
|
||||
])
|
||||
);
|
||||
}
|
||||
|
||||
fn new_file_handle(file_id: FileId, start_ts_millis: i64, end_ts_millis: i64) -> FileHandle {
|
||||
let file_purger = new_noop_file_purger();
|
||||
let layer = Arc::new(crate::test_util::access_layer_util::MockAccessLayer {});
|
||||
FileHandle::new(
|
||||
FileMeta {
|
||||
region_id: 0,
|
||||
file_id,
|
||||
time_range: Some((
|
||||
Timestamp::new_millisecond(start_ts_millis),
|
||||
Timestamp::new_millisecond(end_ts_millis),
|
||||
)),
|
||||
level: 0,
|
||||
file_size: 0,
|
||||
},
|
||||
layer,
|
||||
file_purger,
|
||||
)
|
||||
}
|
||||
|
||||
fn new_file_handles(input: &[(FileId, i64, i64)]) -> Vec<FileHandle> {
|
||||
input
|
||||
.iter()
|
||||
.map(|(file_id, start, end)| new_file_handle(*file_id, *start, *end))
|
||||
.collect()
|
||||
}
|
||||
|
||||
fn check_bucket_calculation(
|
||||
bucket_sec: i64,
|
||||
files: Vec<FileHandle>,
|
||||
expected: &[(i64, &[FileId])],
|
||||
) {
|
||||
let res = calculate_time_buckets(bucket_sec, &files);
|
||||
|
||||
let expected = expected
|
||||
.iter()
|
||||
.map(|(bucket, file_ids)| (*bucket, file_ids.iter().copied().collect::<HashSet<_>>()))
|
||||
.collect::<HashMap<_, _>>();
|
||||
|
||||
for (bucket, file_ids) in expected {
|
||||
let actual = res
|
||||
.get(&bucket)
|
||||
.unwrap()
|
||||
.iter()
|
||||
.map(|f| f.file_id())
|
||||
.collect();
|
||||
assert_eq!(
|
||||
file_ids, actual,
|
||||
"bucket: {bucket}, expected: {file_ids:?}, actual: {actual:?}",
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_calculate_time_buckets() {
|
||||
let file_id_a = FileId::random();
|
||||
let file_id_b = FileId::random();
|
||||
// simple case, files with disjoint
|
||||
check_bucket_calculation(
|
||||
10,
|
||||
new_file_handles(&[(file_id_a, 0, 9000), (file_id_b, 10000, 19000)]),
|
||||
&[(0, &[file_id_a]), (10, &[file_id_b])],
|
||||
);
|
||||
|
||||
// files across buckets
|
||||
check_bucket_calculation(
|
||||
10,
|
||||
new_file_handles(&[(file_id_a, 0, 10001), (file_id_b, 10000, 19000)]),
|
||||
&[(0, &[file_id_a]), (10, &[file_id_a, file_id_b])],
|
||||
);
|
||||
check_bucket_calculation(
|
||||
10,
|
||||
new_file_handles(&[(file_id_a, 0, 10000)]),
|
||||
&[(0, &[file_id_a]), (10, &[file_id_a])],
|
||||
);
|
||||
|
||||
// file with an large time range
|
||||
let file_id_array = &[file_id_a];
|
||||
let expected = (0..(TIME_BUCKETS[4] / TIME_BUCKETS[0]))
|
||||
.map(|b| (b * TIME_BUCKETS[0], file_id_array as _))
|
||||
.collect::<Vec<_>>();
|
||||
check_bucket_calculation(
|
||||
TIME_BUCKETS[0],
|
||||
new_file_handles(&[(file_id_a, 0, TIME_BUCKETS[4] * 1000)]),
|
||||
&expected,
|
||||
);
|
||||
}
|
||||
}
|
||||
@@ -169,13 +169,15 @@ impl<S: LogStore> CompactionTask for CompactionTaskImpl<S> {
|
||||
#[derive(Debug)]
|
||||
pub struct CompactionOutput {
|
||||
/// Compaction output file level.
|
||||
pub(crate) output_level: Level,
|
||||
/// The left bound of time bucket.
|
||||
pub(crate) bucket_bound: i64,
|
||||
/// Bucket duration in seconds.
|
||||
pub(crate) bucket: i64,
|
||||
pub output_level: Level,
|
||||
/// The left bound of time window.
|
||||
pub time_window_bound: i64,
|
||||
/// Time window size in seconds.
|
||||
pub time_window_sec: i64,
|
||||
/// Compaction input files.
|
||||
pub(crate) inputs: Vec<FileHandle>,
|
||||
pub inputs: Vec<FileHandle>,
|
||||
/// If the compaction output is strictly windowed.
|
||||
pub strict_window: bool,
|
||||
}
|
||||
|
||||
impl CompactionOutput {
|
||||
@@ -186,12 +188,21 @@ impl CompactionOutput {
|
||||
sst_layer: AccessLayerRef,
|
||||
sst_write_buffer_size: ReadableSize,
|
||||
) -> Result<Option<FileMeta>> {
|
||||
let time_range = if self.strict_window {
|
||||
(
|
||||
Some(self.time_window_bound),
|
||||
Some(self.time_window_bound + self.time_window_sec),
|
||||
)
|
||||
} else {
|
||||
(None, None)
|
||||
};
|
||||
|
||||
let reader = build_sst_reader(
|
||||
region_id,
|
||||
schema,
|
||||
sst_layer.clone(),
|
||||
&self.inputs,
|
||||
self.bucket_bound,
|
||||
self.bucket_bound + self.bucket,
|
||||
time_range,
|
||||
)
|
||||
.await?;
|
||||
|
||||
|
||||
398
src/storage/src/compaction/twcs.rs
Normal file
398
src/storage/src/compaction/twcs.rs
Normal file
@@ -0,0 +1,398 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
//! Time-window compaction strategy
|
||||
|
||||
use std::collections::BTreeMap;
|
||||
use std::fmt::{Debug, Formatter};
|
||||
use std::marker::PhantomData;
|
||||
|
||||
use common_telemetry::tracing::warn;
|
||||
use common_telemetry::{debug, info};
|
||||
use common_time::timestamp::TimeUnit;
|
||||
use common_time::timestamp_millis::BucketAligned;
|
||||
use common_time::Timestamp;
|
||||
use store_api::logstore::LogStore;
|
||||
|
||||
use crate::compaction::picker::get_expired_ssts;
|
||||
use crate::compaction::task::CompactionOutput;
|
||||
use crate::compaction::{infer_time_bucket, CompactionRequestImpl, CompactionTaskImpl, Picker};
|
||||
use crate::sst::{FileHandle, LevelMeta};
|
||||
|
||||
/// `TwcsPicker` picks files of which the max timestamp are in the same time window as compaction
|
||||
/// candidates.
|
||||
pub struct TwcsPicker<S> {
|
||||
max_active_window_files: usize,
|
||||
max_inactive_window_files: usize,
|
||||
time_window_seconds: Option<i64>,
|
||||
_phantom_data: PhantomData<S>,
|
||||
}
|
||||
|
||||
impl<S> Debug for TwcsPicker<S> {
|
||||
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
|
||||
f.debug_struct("TwcsPicker")
|
||||
.field("max_active_window_files", &self.max_active_window_files)
|
||||
.field("max_inactive_window_files", &self.max_inactive_window_files)
|
||||
.finish()
|
||||
}
|
||||
}
|
||||
|
||||
impl<S> TwcsPicker<S> {
|
||||
pub fn new(
|
||||
max_active_window_files: usize,
|
||||
max_inactive_window_files: usize,
|
||||
time_window_seconds: Option<i64>,
|
||||
) -> Self {
|
||||
Self {
|
||||
max_inactive_window_files,
|
||||
max_active_window_files,
|
||||
_phantom_data: Default::default(),
|
||||
time_window_seconds,
|
||||
}
|
||||
}
|
||||
|
||||
/// Builds compaction output from files.
|
||||
/// For active writing window, we allow for at most `max_active_window_files` files to alleviate
|
||||
/// fragmentation. For other windows, we allow at most 1 file at each window.
|
||||
fn build_output(
|
||||
&self,
|
||||
time_windows: &BTreeMap<i64, Vec<FileHandle>>,
|
||||
active_window: Option<i64>,
|
||||
window_size: i64,
|
||||
) -> Vec<CompactionOutput> {
|
||||
let mut output = vec![];
|
||||
for (window, files) in time_windows {
|
||||
if let Some(active_window) = active_window && *window == active_window {
|
||||
if files.len() > self.max_active_window_files {
|
||||
output.push(CompactionOutput {
|
||||
output_level: 1, // we only have two levels and always compact to l1
|
||||
time_window_bound: *window,
|
||||
time_window_sec: window_size,
|
||||
inputs: files.clone(),
|
||||
// Strict window is not needed since we always compact many files to one
|
||||
// single file in TWCS.
|
||||
strict_window: false,
|
||||
});
|
||||
} else {
|
||||
debug!("Active window not present or no enough files in active window {:?}", active_window);
|
||||
}
|
||||
} else {
|
||||
// not active writing window
|
||||
if files.len() > self.max_inactive_window_files {
|
||||
output.push(CompactionOutput {
|
||||
output_level: 1,
|
||||
time_window_bound: *window,
|
||||
time_window_sec: window_size,
|
||||
inputs: files.clone(),
|
||||
strict_window: false,
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
output
|
||||
}
|
||||
}
|
||||
|
||||
impl<S: LogStore> Picker for TwcsPicker<S> {
|
||||
type Request = CompactionRequestImpl<S>;
|
||||
type Task = CompactionTaskImpl<S>;
|
||||
|
||||
fn pick(&self, req: &Self::Request) -> crate::error::Result<Option<Self::Task>> {
|
||||
let levels = req.levels();
|
||||
let expired_ssts = get_expired_ssts(levels.levels(), req.ttl, Timestamp::current_millis())?;
|
||||
if !expired_ssts.is_empty() {
|
||||
info!(
|
||||
"Expired SSTs in region {}: {:?}",
|
||||
req.region_id, expired_ssts
|
||||
);
|
||||
// here we mark expired SSTs as compacting to avoid them being picked.
|
||||
expired_ssts.iter().for_each(|f| f.mark_compacting(true));
|
||||
}
|
||||
|
||||
let time_window_size = req
|
||||
.compaction_time_window
|
||||
.or(self.time_window_seconds)
|
||||
.unwrap_or_else(|| {
|
||||
let inferred = infer_time_bucket(req.levels().level(0).files());
|
||||
info!(
|
||||
"Compaction window for region {} is not present, inferring from files: {:?}",
|
||||
req.region_id, inferred
|
||||
);
|
||||
inferred
|
||||
});
|
||||
|
||||
// Find active window from files in level 0.
|
||||
let active_window =
|
||||
find_latest_window_in_seconds(levels.level(0).files(), time_window_size);
|
||||
|
||||
let windows = assign_to_windows(
|
||||
levels.levels().iter().flat_map(LevelMeta::files),
|
||||
time_window_size,
|
||||
);
|
||||
|
||||
let outputs = self.build_output(&windows, active_window, time_window_size);
|
||||
let task = CompactionTaskImpl {
|
||||
schema: req.schema(),
|
||||
sst_layer: req.sst_layer.clone(),
|
||||
outputs,
|
||||
writer: req.writer.clone(),
|
||||
shared_data: req.shared.clone(),
|
||||
wal: req.wal.clone(),
|
||||
manifest: req.manifest.clone(),
|
||||
expired_ssts,
|
||||
sst_write_buffer_size: req.sst_write_buffer_size,
|
||||
compaction_time_window: Some(time_window_size),
|
||||
};
|
||||
Ok(Some(task))
|
||||
}
|
||||
}
|
||||
|
||||
/// Assigns files to windows with predefined window size (in seconds) by their max timestamps.
|
||||
fn assign_to_windows<'a>(
|
||||
files: impl Iterator<Item = &'a FileHandle>,
|
||||
time_window_size: i64,
|
||||
) -> BTreeMap<i64, Vec<FileHandle>> {
|
||||
let mut windows: BTreeMap<i64, Vec<FileHandle>> = BTreeMap::new();
|
||||
// Iterates all files and assign to time windows according to max timestamp
|
||||
for file in files {
|
||||
if let Some((_, end)) = file.time_range() {
|
||||
let time_window = end
|
||||
.convert_to(TimeUnit::Second)
|
||||
.unwrap()
|
||||
.value()
|
||||
.align_to_ceil_by_bucket(time_window_size)
|
||||
.unwrap_or(i64::MIN);
|
||||
windows.entry(time_window).or_default().push(file.clone());
|
||||
} else {
|
||||
warn!("Unexpected file w/o timestamp: {:?}", file.file_id());
|
||||
}
|
||||
}
|
||||
windows
|
||||
}
|
||||
|
||||
/// Finds the latest active writing window among all files.
|
||||
/// Returns `None` when there are no files or all files are corrupted.
|
||||
fn find_latest_window_in_seconds<'a>(
|
||||
files: impl Iterator<Item = &'a FileHandle>,
|
||||
time_window_size: i64,
|
||||
) -> Option<i64> {
|
||||
let mut latest_timestamp = None;
|
||||
for f in files {
|
||||
if let Some((_, end)) = f.time_range() {
|
||||
if let Some(latest) = latest_timestamp && end > latest {
|
||||
latest_timestamp = Some(end);
|
||||
} else {
|
||||
latest_timestamp = Some(end);
|
||||
}
|
||||
} else {
|
||||
warn!("Cannot find timestamp range of file: {}", f.file_id());
|
||||
}
|
||||
}
|
||||
latest_timestamp
|
||||
.and_then(|ts| ts.convert_to_ceil(TimeUnit::Second))
|
||||
.and_then(|ts| ts.value().align_to_ceil_by_bucket(time_window_size))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::collections::HashSet;
|
||||
|
||||
use log_store::NoopLogStore;
|
||||
|
||||
use super::*;
|
||||
use crate::compaction::tests::new_file_handle;
|
||||
use crate::sst::{FileId, Level};
|
||||
|
||||
#[test]
|
||||
fn test_get_latest_window_in_seconds() {
|
||||
assert_eq!(
|
||||
Some(1),
|
||||
find_latest_window_in_seconds([new_file_handle(FileId::random(), 0, 999, 0)].iter(), 1)
|
||||
);
|
||||
assert_eq!(
|
||||
Some(1),
|
||||
find_latest_window_in_seconds(
|
||||
[new_file_handle(FileId::random(), 0, 1000, 0)].iter(),
|
||||
1
|
||||
)
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
Some(-9223372036854000),
|
||||
find_latest_window_in_seconds(
|
||||
[new_file_handle(FileId::random(), i64::MIN, i64::MIN + 1, 0)].iter(),
|
||||
3600,
|
||||
)
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
(i64::MAX / 10000000 + 1) * 10000,
|
||||
find_latest_window_in_seconds(
|
||||
[new_file_handle(FileId::random(), i64::MIN, i64::MAX, 0)].iter(),
|
||||
10000,
|
||||
)
|
||||
.unwrap()
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_assign_to_windows() {
|
||||
let windows = assign_to_windows(
|
||||
[
|
||||
new_file_handle(FileId::random(), 0, 999, 0),
|
||||
new_file_handle(FileId::random(), 0, 999, 0),
|
||||
new_file_handle(FileId::random(), 0, 999, 0),
|
||||
new_file_handle(FileId::random(), 0, 999, 0),
|
||||
new_file_handle(FileId::random(), 0, 999, 0),
|
||||
]
|
||||
.iter(),
|
||||
3,
|
||||
);
|
||||
assert_eq!(5, windows.get(&0).unwrap().len());
|
||||
|
||||
let files = [FileId::random(); 3];
|
||||
let windows = assign_to_windows(
|
||||
[
|
||||
new_file_handle(files[0], -2000, -3, 0),
|
||||
new_file_handle(files[1], 0, 2999, 0),
|
||||
new_file_handle(files[2], 50, 10001, 0),
|
||||
]
|
||||
.iter(),
|
||||
3,
|
||||
);
|
||||
assert_eq!(files[0], windows.get(&0).unwrap().get(0).unwrap().file_id());
|
||||
assert_eq!(files[1], windows.get(&3).unwrap().get(0).unwrap().file_id());
|
||||
assert_eq!(
|
||||
files[2],
|
||||
windows.get(&12).unwrap().get(0).unwrap().file_id()
|
||||
);
|
||||
}
|
||||
|
||||
struct CompactionPickerTestCase {
|
||||
window_size: i64,
|
||||
input_files: Vec<FileHandle>,
|
||||
expected_outputs: Vec<ExpectedOutput>,
|
||||
}
|
||||
|
||||
impl CompactionPickerTestCase {
|
||||
fn check(&self) {
|
||||
let windows = assign_to_windows(self.input_files.iter(), self.window_size);
|
||||
let active_window =
|
||||
find_latest_window_in_seconds(self.input_files.iter(), self.window_size);
|
||||
let output = TwcsPicker::<NoopLogStore>::new(4, 1, None).build_output(
|
||||
&windows,
|
||||
active_window,
|
||||
self.window_size,
|
||||
);
|
||||
|
||||
let output = output
|
||||
.iter()
|
||||
.map(|o| {
|
||||
let input_file_ids =
|
||||
o.inputs.iter().map(|f| f.file_id()).collect::<HashSet<_>>();
|
||||
(
|
||||
input_file_ids,
|
||||
o.output_level,
|
||||
o.time_window_sec,
|
||||
o.time_window_bound,
|
||||
o.strict_window,
|
||||
)
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
let expected = self
|
||||
.expected_outputs
|
||||
.iter()
|
||||
.map(|o| {
|
||||
let input_file_ids = o
|
||||
.input_files
|
||||
.iter()
|
||||
.map(|idx| self.input_files[*idx].file_id())
|
||||
.collect::<HashSet<_>>();
|
||||
(
|
||||
input_file_ids,
|
||||
o.output_level,
|
||||
o.time_window_sec,
|
||||
o.time_window_bound,
|
||||
o.strict_window,
|
||||
)
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
assert_eq!(expected, output);
|
||||
}
|
||||
}
|
||||
|
||||
struct ExpectedOutput {
|
||||
input_files: Vec<usize>,
|
||||
output_level: Level,
|
||||
time_window_sec: i64,
|
||||
time_window_bound: i64,
|
||||
strict_window: bool,
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_build_twcs_output() {
|
||||
let file_ids = (0..4).map(|_| FileId::random()).collect::<Vec<_>>();
|
||||
|
||||
CompactionPickerTestCase {
|
||||
window_size: 3,
|
||||
input_files: [
|
||||
new_file_handle(file_ids[0], -2000, -3, 0),
|
||||
new_file_handle(file_ids[1], -3000, -100, 0),
|
||||
new_file_handle(file_ids[2], 0, 2999, 0), //active windows
|
||||
new_file_handle(file_ids[3], 50, 2998, 0), //active windows
|
||||
]
|
||||
.to_vec(),
|
||||
expected_outputs: vec![ExpectedOutput {
|
||||
input_files: vec![0, 1],
|
||||
output_level: 1,
|
||||
time_window_sec: 3,
|
||||
time_window_bound: 0,
|
||||
strict_window: false,
|
||||
}],
|
||||
}
|
||||
.check();
|
||||
|
||||
let file_ids = (0..6).map(|_| FileId::random()).collect::<Vec<_>>();
|
||||
CompactionPickerTestCase {
|
||||
window_size: 3,
|
||||
input_files: [
|
||||
new_file_handle(file_ids[0], -2000, -3, 0),
|
||||
new_file_handle(file_ids[1], -3000, -100, 0),
|
||||
new_file_handle(file_ids[2], 0, 2999, 0),
|
||||
new_file_handle(file_ids[3], 50, 2998, 0),
|
||||
new_file_handle(file_ids[4], 11, 2990, 0),
|
||||
new_file_handle(file_ids[5], 50, 4998, 0),
|
||||
]
|
||||
.to_vec(),
|
||||
expected_outputs: vec![
|
||||
ExpectedOutput {
|
||||
input_files: vec![0, 1],
|
||||
output_level: 1,
|
||||
time_window_sec: 3,
|
||||
time_window_bound: 0,
|
||||
strict_window: false,
|
||||
},
|
||||
ExpectedOutput {
|
||||
input_files: vec![2, 3, 4],
|
||||
output_level: 1,
|
||||
time_window_sec: 3,
|
||||
time_window_bound: 3,
|
||||
strict_window: false,
|
||||
},
|
||||
],
|
||||
}
|
||||
.check();
|
||||
}
|
||||
}
|
||||
@@ -16,6 +16,7 @@ use common_query::logical_plan::{DfExpr, Expr};
|
||||
use common_time::timestamp::TimeUnit;
|
||||
use datafusion_expr::Operator;
|
||||
use datatypes::value::timestamp_to_scalar_value;
|
||||
use store_api::storage::RegionId;
|
||||
|
||||
use crate::chunk::{ChunkReaderBuilder, ChunkReaderImpl};
|
||||
use crate::error;
|
||||
@@ -24,11 +25,11 @@ use crate::sst::{AccessLayerRef, FileHandle};
|
||||
|
||||
/// Builds an SST reader that only reads rows within given time range.
|
||||
pub(crate) async fn build_sst_reader(
|
||||
region_id: RegionId,
|
||||
schema: RegionSchemaRef,
|
||||
sst_layer: AccessLayerRef,
|
||||
files: &[FileHandle],
|
||||
lower_sec_inclusive: i64,
|
||||
upper_sec_exclusive: i64,
|
||||
time_range: (Option<i64>, Option<i64>),
|
||||
) -> error::Result<ChunkReaderImpl> {
|
||||
// TODO(hl): Schemas in different SSTs may differ, thus we should infer
|
||||
// timestamp column name from Parquet metadata.
|
||||
@@ -38,17 +39,12 @@ pub(crate) async fn build_sst_reader(
|
||||
let ts_col_unit = ts_col.data_type.as_timestamp().unwrap().unit();
|
||||
let ts_col_name = ts_col.name.clone();
|
||||
|
||||
ChunkReaderBuilder::new(schema, sst_layer)
|
||||
ChunkReaderBuilder::new(region_id, schema, sst_layer)
|
||||
.pick_ssts(files)
|
||||
.filters(
|
||||
build_time_range_filter(
|
||||
lower_sec_inclusive,
|
||||
upper_sec_exclusive,
|
||||
&ts_col_name,
|
||||
ts_col_unit,
|
||||
)
|
||||
.into_iter()
|
||||
.collect(),
|
||||
build_time_range_filter(time_range, &ts_col_name, ts_col_unit)
|
||||
.into_iter()
|
||||
.collect(),
|
||||
)
|
||||
.build()
|
||||
.await
|
||||
@@ -57,21 +53,22 @@ pub(crate) async fn build_sst_reader(
|
||||
/// Build time range filter expr from lower (inclusive) and upper bound(exclusive).
|
||||
/// Returns `None` if time range overflows.
|
||||
fn build_time_range_filter(
|
||||
low_sec: i64,
|
||||
high_sec: i64,
|
||||
time_range: (Option<i64>, Option<i64>),
|
||||
ts_col_name: &str,
|
||||
ts_col_unit: TimeUnit,
|
||||
) -> Option<Expr> {
|
||||
debug_assert!(low_sec <= high_sec);
|
||||
let (low_ts_inclusive, high_ts_exclusive) = time_range;
|
||||
let ts_col = DfExpr::Column(datafusion_common::Column::from_name(ts_col_name));
|
||||
|
||||
// Converting seconds to whatever unit won't lose precision.
|
||||
// Here only handles overflow.
|
||||
let low_ts = common_time::Timestamp::new_second(low_sec)
|
||||
.convert_to(ts_col_unit)
|
||||
let low_ts = low_ts_inclusive
|
||||
.map(common_time::Timestamp::new_second)
|
||||
.and_then(|ts| ts.convert_to(ts_col_unit))
|
||||
.map(|ts| ts.value());
|
||||
let high_ts = common_time::Timestamp::new_second(high_sec)
|
||||
.convert_to(ts_col_unit)
|
||||
let high_ts = high_ts_exclusive
|
||||
.map(common_time::Timestamp::new_second)
|
||||
.and_then(|ts| ts.convert_to(ts_col_unit))
|
||||
.map(|ts| ts.value());
|
||||
|
||||
let expr = match (low_ts, high_ts) {
|
||||
@@ -139,6 +136,8 @@ mod tests {
|
||||
use crate::sst::{self, FileId, FileMeta, FsAccessLayer, Source, SstInfo, WriteOptions};
|
||||
use crate::test_util::descriptor_util::RegionDescBuilder;
|
||||
|
||||
const REGION_ID: RegionId = 1;
|
||||
|
||||
fn schema_for_test() -> RegionSchemaRef {
|
||||
// Just build a region desc and use its columns metadata.
|
||||
let desc = RegionDescBuilder::new("test")
|
||||
@@ -277,7 +276,9 @@ mod tests {
|
||||
handle
|
||||
}
|
||||
|
||||
// The region id is only used to build the reader, we don't check its content.
|
||||
async fn check_reads(
|
||||
region_id: RegionId,
|
||||
schema: RegionSchemaRef,
|
||||
sst_layer: AccessLayerRef,
|
||||
files: &[FileHandle],
|
||||
@@ -286,11 +287,11 @@ mod tests {
|
||||
expect: &[i64],
|
||||
) {
|
||||
let mut reader = build_sst_reader(
|
||||
region_id,
|
||||
schema,
|
||||
sst_layer,
|
||||
files,
|
||||
lower_sec_inclusive,
|
||||
upper_sec_exclusive,
|
||||
(Some(lower_sec_inclusive), Some(upper_sec_exclusive)),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
@@ -352,6 +353,7 @@ mod tests {
|
||||
let files = vec![file1, file2];
|
||||
// read from two sst files with time range filter,
|
||||
check_reads(
|
||||
REGION_ID,
|
||||
schema.clone(),
|
||||
sst_layer.clone(),
|
||||
&files,
|
||||
@@ -361,7 +363,7 @@ mod tests {
|
||||
)
|
||||
.await;
|
||||
|
||||
check_reads(schema, sst_layer, &files, 1, 2, &[1000]).await;
|
||||
check_reads(REGION_ID, schema, sst_layer, &files, 1, 2, &[1000]).await;
|
||||
}
|
||||
|
||||
async fn read_file(
|
||||
@@ -370,9 +372,15 @@ mod tests {
|
||||
sst_layer: AccessLayerRef,
|
||||
) -> Vec<i64> {
|
||||
let mut timestamps = vec![];
|
||||
let mut reader = build_sst_reader(schema, sst_layer, files, i64::MIN, i64::MAX)
|
||||
.await
|
||||
.unwrap();
|
||||
let mut reader = build_sst_reader(
|
||||
REGION_ID,
|
||||
schema,
|
||||
sst_layer,
|
||||
files,
|
||||
(Some(i64::MIN), Some(i64::MAX)),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
while let Some(chunk) = reader.next_chunk().await.unwrap() {
|
||||
let ts = chunk.columns[0]
|
||||
.as_any()
|
||||
@@ -434,15 +442,33 @@ mod tests {
|
||||
let sst_layer = Arc::new(FsAccessLayer::new("./", object_store.clone()));
|
||||
let input_files = vec![file2, file1];
|
||||
|
||||
let reader1 = build_sst_reader(schema.clone(), sst_layer.clone(), &input_files, 0, 3)
|
||||
.await
|
||||
.unwrap();
|
||||
let reader2 = build_sst_reader(schema.clone(), sst_layer.clone(), &input_files, 3, 6)
|
||||
.await
|
||||
.unwrap();
|
||||
let reader3 = build_sst_reader(schema.clone(), sst_layer.clone(), &input_files, 6, 10)
|
||||
.await
|
||||
.unwrap();
|
||||
let reader1 = build_sst_reader(
|
||||
REGION_ID,
|
||||
schema.clone(),
|
||||
sst_layer.clone(),
|
||||
&input_files,
|
||||
(Some(0), Some(3)),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
let reader2 = build_sst_reader(
|
||||
REGION_ID,
|
||||
schema.clone(),
|
||||
sst_layer.clone(),
|
||||
&input_files,
|
||||
(Some(3), Some(6)),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
let reader3 = build_sst_reader(
|
||||
REGION_ID,
|
||||
schema.clone(),
|
||||
sst_layer.clone(),
|
||||
&input_files,
|
||||
(Some(6), Some(10)),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let opts = WriteOptions {
|
||||
sst_write_buffer_size: ReadableSize::mb(8),
|
||||
@@ -525,7 +551,12 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn test_build_time_range_filter() {
|
||||
assert!(build_time_range_filter(i64::MIN, i64::MAX, "ts", TimeUnit::Nanosecond).is_none());
|
||||
assert!(build_time_range_filter(
|
||||
(Some(i64::MIN), Some(i64::MAX)),
|
||||
"ts",
|
||||
TimeUnit::Nanosecond
|
||||
)
|
||||
.is_none());
|
||||
|
||||
assert_eq!(
|
||||
Expr::from(datafusion_expr::binary_expr(
|
||||
@@ -533,10 +564,10 @@ mod tests {
|
||||
Operator::Lt,
|
||||
datafusion_expr::lit(timestamp_to_scalar_value(
|
||||
TimeUnit::Nanosecond,
|
||||
Some(TimeUnit::Second.factor() as i64 / TimeUnit::Nanosecond.factor() as i64)
|
||||
))
|
||||
Some(TimeUnit::Second.factor() as i64 / TimeUnit::Nanosecond.factor() as i64),
|
||||
)),
|
||||
)),
|
||||
build_time_range_filter(i64::MIN, 1, "ts", TimeUnit::Nanosecond).unwrap()
|
||||
build_time_range_filter((Some(i64::MIN), Some(1)), "ts", TimeUnit::Nanosecond).unwrap()
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
@@ -547,10 +578,10 @@ mod tests {
|
||||
TimeUnit::Nanosecond,
|
||||
Some(
|
||||
2 * TimeUnit::Second.factor() as i64 / TimeUnit::Nanosecond.factor() as i64
|
||||
)
|
||||
))
|
||||
),
|
||||
)),
|
||||
)),
|
||||
build_time_range_filter(2, i64::MAX, "ts", TimeUnit::Nanosecond).unwrap()
|
||||
build_time_range_filter((Some(2), Some(i64::MAX)), "ts", TimeUnit::Nanosecond).unwrap()
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -23,8 +23,8 @@ use snafu::ResultExt;
|
||||
use store_api::logstore::LogStore;
|
||||
use store_api::manifest::Manifest;
|
||||
use store_api::storage::{
|
||||
CloseContext, CloseOptions, CreateOptions, EngineContext, OpenOptions, Region,
|
||||
RegionDescriptor, StorageEngine,
|
||||
CloseContext, CloseOptions, CompactionStrategy, CreateOptions, EngineContext, OpenOptions,
|
||||
Region, RegionDescriptor, StorageEngine,
|
||||
};
|
||||
|
||||
use crate::compaction::CompactionSchedulerRef;
|
||||
@@ -395,6 +395,7 @@ impl<S: LogStore> EngineInner<S> {
|
||||
name,
|
||||
&self.config,
|
||||
opts.ttl,
|
||||
opts.compaction_strategy.clone(),
|
||||
)
|
||||
.await?;
|
||||
|
||||
@@ -440,6 +441,7 @@ impl<S: LogStore> EngineInner<S> {
|
||||
®ion_name,
|
||||
&self.config,
|
||||
opts.ttl,
|
||||
opts.compaction_strategy.clone(),
|
||||
)
|
||||
.await?;
|
||||
|
||||
@@ -471,6 +473,7 @@ impl<S: LogStore> EngineInner<S> {
|
||||
region_name: &str,
|
||||
config: &EngineConfig,
|
||||
region_ttl: Option<Duration>,
|
||||
compaction_strategy: CompactionStrategy,
|
||||
) -> Result<StoreConfig<S>> {
|
||||
let parent_dir = util::normalize_dir(parent_dir);
|
||||
|
||||
@@ -503,6 +506,7 @@ impl<S: LogStore> EngineInner<S> {
|
||||
ttl,
|
||||
write_buffer_size: write_buffer_size
|
||||
.unwrap_or(self.config.region_write_buffer_size.as_bytes() as usize),
|
||||
compaction_strategy,
|
||||
})
|
||||
}
|
||||
|
||||
|
||||
@@ -25,7 +25,7 @@ use store_api::storage::{RegionId, SequenceNumber};
|
||||
use tokio::sync::oneshot::{Receiver, Sender};
|
||||
use tokio::sync::{oneshot, Notify};
|
||||
|
||||
use crate::compaction::{CompactionRequestImpl, CompactionSchedulerRef};
|
||||
use crate::compaction::{CompactionPickerRef, CompactionRequestImpl, CompactionSchedulerRef};
|
||||
use crate::config::EngineConfig;
|
||||
use crate::engine::RegionMap;
|
||||
use crate::error::{
|
||||
@@ -109,6 +109,7 @@ pub struct FlushRegionRequest<S: LogStore> {
|
||||
pub ttl: Option<Duration>,
|
||||
/// Time window for compaction.
|
||||
pub compaction_time_window: Option<i64>,
|
||||
pub compaction_picker: CompactionPickerRef<S>,
|
||||
}
|
||||
|
||||
impl<S: LogStore> FlushRegionRequest<S> {
|
||||
@@ -146,6 +147,7 @@ impl<S: LogStore> From<&FlushRegionRequest<S>> for CompactionRequestImpl<S> {
|
||||
ttl: req.ttl,
|
||||
compaction_time_window: req.compaction_time_window,
|
||||
sender: None,
|
||||
picker: req.compaction_picker.clone(),
|
||||
sst_write_buffer_size: req.engine_config.sst_write_buffer_size,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -75,9 +75,7 @@ pub trait Memtable: Send + Sync + fmt::Debug {
|
||||
/// Iterates the memtable.
|
||||
fn iter(&self, ctx: IterContext) -> Result<BoxedBatchIterator>;
|
||||
|
||||
/// Returns the estimated bytes allocated by this memtable from heap. Result
|
||||
/// of this method may be larger than the estimated based on [`num_rows`] because
|
||||
/// of the implementor's pre-alloc behavior.
|
||||
/// Returns the number of rows in the memtable.
|
||||
fn num_rows(&self) -> usize;
|
||||
|
||||
/// Returns stats of this memtable.
|
||||
|
||||
@@ -14,9 +14,10 @@
|
||||
|
||||
//! Common structs and utilities for read.
|
||||
|
||||
mod chain;
|
||||
mod dedup;
|
||||
mod merge;
|
||||
pub(crate) mod windowed;
|
||||
mod windowed;
|
||||
|
||||
use std::cmp::Ordering;
|
||||
|
||||
@@ -25,11 +26,13 @@ use common_base::BitVec;
|
||||
use datatypes::data_type::DataType;
|
||||
use datatypes::prelude::ConcreteDataType;
|
||||
use datatypes::vectors::{BooleanVector, MutableVector, VectorRef};
|
||||
pub use dedup::DedupReader;
|
||||
pub use merge::{MergeReader, MergeReaderBuilder};
|
||||
use snafu::{ensure, ResultExt};
|
||||
|
||||
use crate::error::{self, Result};
|
||||
pub use crate::read::chain::ChainReader;
|
||||
pub use crate::read::dedup::DedupReader;
|
||||
pub use crate::read::merge::{MergeReader, MergeReaderBuilder};
|
||||
pub use crate::read::windowed::WindowedReader;
|
||||
|
||||
/// Storage internal representation of a batch of rows.
|
||||
// Now the structure of `Batch` is still unstable, all pub fields may be changed.
|
||||
|
||||
124
src/storage/src/read/chain.rs
Normal file
124
src/storage/src/read/chain.rs
Normal file
@@ -0,0 +1,124 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use crate::error::Result;
|
||||
use crate::read::{Batch, BatchReader};
|
||||
use crate::schema::ProjectedSchemaRef;
|
||||
|
||||
/// A reader that simply chain the outputs of input readers.
|
||||
pub struct ChainReader<R> {
|
||||
/// Schema to read
|
||||
pub schema: ProjectedSchemaRef,
|
||||
/// Each reader reads a slice of time window
|
||||
pub readers: Vec<R>,
|
||||
}
|
||||
|
||||
impl<R> ChainReader<R> {
|
||||
/// Returns a new [ChainReader] with specific input `readers`.
|
||||
pub fn new(schema: ProjectedSchemaRef, mut readers: Vec<R>) -> Self {
|
||||
// Reverse readers since we iter them backward.
|
||||
readers.reverse();
|
||||
Self { schema, readers }
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait::async_trait]
|
||||
impl<R> BatchReader for ChainReader<R>
|
||||
where
|
||||
R: BatchReader,
|
||||
{
|
||||
async fn next_batch(&mut self) -> Result<Option<Batch>> {
|
||||
while let Some(reader) = self.readers.last_mut() {
|
||||
if let Some(batch) = reader.next_batch().await? {
|
||||
return Ok(Some(batch));
|
||||
} else {
|
||||
// Remove the exhausted reader.
|
||||
self.readers.pop();
|
||||
}
|
||||
}
|
||||
Ok(None)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::test_util::read_util::{self, Batches, VecBatchReader};
|
||||
|
||||
fn build_chain_reader(sources: &[Batches]) -> ChainReader<VecBatchReader> {
|
||||
let schema = read_util::new_projected_schema();
|
||||
let readers = sources
|
||||
.iter()
|
||||
.map(|source| read_util::build_vec_reader(source))
|
||||
.collect();
|
||||
|
||||
ChainReader::new(schema, readers)
|
||||
}
|
||||
|
||||
async fn check_chain_reader_result(
|
||||
mut reader: ChainReader<VecBatchReader>,
|
||||
input: &[Batches<'_>],
|
||||
) {
|
||||
let expect: Vec<_> = input
|
||||
.iter()
|
||||
.flat_map(|v| v.iter())
|
||||
.flat_map(|v| v.iter().copied())
|
||||
.collect();
|
||||
|
||||
let result = read_util::collect_kv_batch(&mut reader).await;
|
||||
assert_eq!(expect, result);
|
||||
|
||||
// Call next_batch() again is allowed.
|
||||
assert!(reader.next_batch().await.unwrap().is_none());
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_chain_empty() {
|
||||
let mut reader = build_chain_reader(&[]);
|
||||
|
||||
assert!(reader.next_batch().await.unwrap().is_none());
|
||||
// Call next_batch() again is allowed.
|
||||
assert!(reader.next_batch().await.unwrap().is_none());
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_chain_one() {
|
||||
let input: &[Batches] = &[&[
|
||||
&[(1, Some(1)), (2, Some(2))],
|
||||
&[(3, Some(3)), (4, Some(4))],
|
||||
&[(5, Some(5))],
|
||||
]];
|
||||
|
||||
let reader = build_chain_reader(input);
|
||||
|
||||
check_chain_reader_result(reader, input).await;
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_chain_multi() {
|
||||
let input: &[Batches] = &[
|
||||
&[
|
||||
&[(1, Some(1)), (2, Some(2))],
|
||||
&[(3, Some(3)), (4, Some(4))],
|
||||
&[(5, Some(5))],
|
||||
],
|
||||
&[&[(6, Some(3)), (7, Some(4)), (8, Some(8))], &[(9, Some(9))]],
|
||||
&[&[(10, Some(10)), (11, Some(11))], &[(12, Some(12))]],
|
||||
];
|
||||
|
||||
let reader = build_chain_reader(input);
|
||||
|
||||
check_chain_reader_result(reader, input).await;
|
||||
}
|
||||
}
|
||||
@@ -608,7 +608,7 @@ mod tests {
|
||||
use datatypes::vectors::{Int64Vector, TimestampMillisecondVector};
|
||||
|
||||
use super::*;
|
||||
use crate::test_util::read_util;
|
||||
use crate::test_util::read_util::{self, Batches};
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_merge_reader_empty() {
|
||||
@@ -653,8 +653,6 @@ mod tests {
|
||||
assert!(output.contains("pos: 1"));
|
||||
}
|
||||
|
||||
type Batches<'a> = &'a [&'a [(i64, Option<i64>)]];
|
||||
|
||||
fn build_merge_reader(sources: &[Batches], num_iter: usize, batch_size: usize) -> MergeReader {
|
||||
let schema = read_util::new_projected_schema();
|
||||
let mut builder =
|
||||
|
||||
@@ -32,11 +32,13 @@ use store_api::manifest::{
|
||||
self, Manifest, ManifestLogStorage, ManifestVersion, MetaActionIterator,
|
||||
};
|
||||
use store_api::storage::{
|
||||
AlterRequest, CloseContext, FlushContext, FlushReason, OpenOptions, ReadContext, Region,
|
||||
RegionId, SequenceNumber, WriteContext, WriteResponse,
|
||||
AlterRequest, CloseContext, CompactionStrategy, FlushContext, FlushReason, OpenOptions,
|
||||
ReadContext, Region, RegionId, SequenceNumber, WriteContext, WriteResponse,
|
||||
};
|
||||
|
||||
use crate::compaction::CompactionSchedulerRef;
|
||||
use crate::compaction::{
|
||||
compaction_strategy_to_picker, CompactionPickerRef, CompactionSchedulerRef,
|
||||
};
|
||||
use crate::config::EngineConfig;
|
||||
use crate::error::{self, Error, Result};
|
||||
use crate::file_purger::FilePurgerRef;
|
||||
@@ -164,6 +166,7 @@ pub struct StoreConfig<S: LogStore> {
|
||||
pub file_purger: FilePurgerRef,
|
||||
pub ttl: Option<Duration>,
|
||||
pub write_buffer_size: usize,
|
||||
pub compaction_strategy: CompactionStrategy,
|
||||
}
|
||||
|
||||
pub type RecoveredMetadata = (SequenceNumber, (ManifestVersion, RawRegionMetadata));
|
||||
@@ -252,6 +255,7 @@ impl<S: LogStore> RegionImpl<S> {
|
||||
flush_strategy: store_config.flush_strategy,
|
||||
flush_scheduler: store_config.flush_scheduler,
|
||||
compaction_scheduler: store_config.compaction_scheduler,
|
||||
compaction_picker: compaction_strategy_to_picker(&store_config.compaction_strategy),
|
||||
sst_layer: store_config.sst_layer,
|
||||
manifest: store_config.manifest,
|
||||
});
|
||||
@@ -336,6 +340,8 @@ impl<S: LogStore> RegionImpl<S> {
|
||||
store_config.ttl,
|
||||
store_config.write_buffer_size,
|
||||
));
|
||||
|
||||
let compaction_picker = compaction_strategy_to_picker(&store_config.compaction_strategy);
|
||||
let writer_ctx = WriterContext {
|
||||
shared: &shared,
|
||||
flush_strategy: &store_config.flush_strategy,
|
||||
@@ -345,6 +351,7 @@ impl<S: LogStore> RegionImpl<S> {
|
||||
wal: &wal,
|
||||
writer: &writer,
|
||||
manifest: &store_config.manifest,
|
||||
compaction_picker: compaction_picker.clone(),
|
||||
};
|
||||
// Replay all unflushed data.
|
||||
writer
|
||||
@@ -364,6 +371,7 @@ impl<S: LogStore> RegionImpl<S> {
|
||||
flush_strategy: store_config.flush_strategy,
|
||||
flush_scheduler: store_config.flush_scheduler,
|
||||
compaction_scheduler: store_config.compaction_scheduler,
|
||||
compaction_picker,
|
||||
sst_layer: store_config.sst_layer,
|
||||
manifest: store_config.manifest,
|
||||
});
|
||||
@@ -586,6 +594,7 @@ impl<S: LogStore> RegionImpl<S> {
|
||||
wal: &inner.wal,
|
||||
writer: &inner.writer,
|
||||
manifest: &inner.manifest,
|
||||
compaction_picker: inner.compaction_picker.clone(),
|
||||
};
|
||||
|
||||
inner.writer.replay(recovered_metadata, writer_ctx).await
|
||||
@@ -642,6 +651,7 @@ struct RegionInner<S: LogStore> {
|
||||
flush_strategy: FlushStrategyRef,
|
||||
flush_scheduler: FlushSchedulerRef<S>,
|
||||
compaction_scheduler: CompactionSchedulerRef<S>,
|
||||
compaction_picker: CompactionPickerRef<S>,
|
||||
sst_layer: AccessLayerRef,
|
||||
manifest: RegionManifest,
|
||||
}
|
||||
@@ -685,6 +695,7 @@ impl<S: LogStore> RegionInner<S> {
|
||||
wal: &self.wal,
|
||||
writer: &self.writer,
|
||||
manifest: &self.manifest,
|
||||
compaction_picker: self.compaction_picker.clone(),
|
||||
};
|
||||
// The writer would also try to compat the schema of write batch if it finds out the
|
||||
// schema version of request is less than current schema version.
|
||||
@@ -746,6 +757,7 @@ impl<S: LogStore> RegionInner<S> {
|
||||
wal: &self.wal,
|
||||
writer: &self.writer,
|
||||
manifest: &self.manifest,
|
||||
compaction_picker: self.compaction_picker.clone(),
|
||||
};
|
||||
self.writer.flush(writer_ctx, ctx).await
|
||||
}
|
||||
@@ -761,6 +773,7 @@ impl<S: LogStore> RegionInner<S> {
|
||||
wal: &self.wal,
|
||||
writer: &self.writer,
|
||||
manifest: &self.manifest,
|
||||
compaction_picker: self.compaction_picker.clone(),
|
||||
};
|
||||
self.writer.compact(writer_ctx, ctx).await
|
||||
}
|
||||
|
||||
@@ -559,6 +559,7 @@ async fn create_store_config(region_name: &str, root: &str) -> StoreConfig<NoopL
|
||||
file_purger,
|
||||
ttl: None,
|
||||
write_buffer_size: ReadableSize::mb(32).0 as usize,
|
||||
compaction_strategy: Default::default(),
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -26,7 +26,7 @@ use object_store::ObjectStore;
|
||||
use store_api::storage::{FlushContext, FlushReason, OpenOptions, Region};
|
||||
use tokio::sync::{Notify, RwLock};
|
||||
|
||||
use crate::compaction::{CompactionHandler, SimplePicker};
|
||||
use crate::compaction::CompactionHandler;
|
||||
use crate::config::EngineConfig;
|
||||
use crate::error::Result;
|
||||
use crate::file_purger::{FilePurgeHandler, FilePurgeRequest};
|
||||
@@ -93,13 +93,8 @@ async fn create_region_for_compaction<
|
||||
store_config.engine_config = Arc::new(engine_config);
|
||||
store_config.flush_strategy = flush_strategy;
|
||||
|
||||
let picker = SimplePicker::default();
|
||||
let pending_compaction_tasks = Arc::new(RwLock::new(vec![]));
|
||||
let handler = CompactionHandler {
|
||||
picker,
|
||||
#[cfg(test)]
|
||||
pending_tasks: pending_compaction_tasks.clone(),
|
||||
};
|
||||
let handler = CompactionHandler::new_with_pending_tasks(pending_compaction_tasks.clone());
|
||||
let config = SchedulerConfig::default();
|
||||
// Overwrite test compaction scheduler and file purger.
|
||||
store_config.compaction_scheduler = Arc::new(LocalScheduler::new(config, handler));
|
||||
@@ -262,12 +257,7 @@ impl CompactionTester {
|
||||
store_config.engine_config = Arc::new(self.engine_config.clone());
|
||||
store_config.flush_strategy = self.flush_strategy.clone();
|
||||
|
||||
let picker = SimplePicker::default();
|
||||
let handler = CompactionHandler {
|
||||
picker,
|
||||
#[cfg(test)]
|
||||
pending_tasks: Arc::new(Default::default()),
|
||||
};
|
||||
let handler = CompactionHandler::new_with_pending_tasks(Arc::new(Default::default()));
|
||||
let config = SchedulerConfig::default();
|
||||
// Overwrite test compaction scheduler and file purger.
|
||||
store_config.compaction_scheduler = Arc::new(LocalScheduler::new(config, handler));
|
||||
|
||||
@@ -252,7 +252,7 @@ async fn test_flush_empty() {
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_read_after_flush() {
|
||||
async fn test_read_after_flush_across_window() {
|
||||
common_telemetry::init_default_ut_logging();
|
||||
|
||||
let dir = create_temp_dir("read-flush");
|
||||
@@ -289,6 +289,44 @@ async fn test_read_after_flush() {
|
||||
assert_eq!(expect, output);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_read_after_flush_same_window() {
|
||||
common_telemetry::init_default_ut_logging();
|
||||
|
||||
let dir = create_temp_dir("read-flush");
|
||||
let store_dir = dir.path().to_str().unwrap();
|
||||
|
||||
let flush_switch = Arc::new(FlushSwitch::default());
|
||||
let tester = FlushTester::new(store_dir, flush_switch.clone()).await;
|
||||
|
||||
// Put elements so we have content to flush.
|
||||
tester.put(&[(1000, Some(100))]).await;
|
||||
tester.put(&[(2000, Some(200))]).await;
|
||||
|
||||
// Flush.
|
||||
tester.flush(None).await;
|
||||
|
||||
// Put element again.
|
||||
tester.put(&[(1003, Some(300))]).await;
|
||||
|
||||
let expect = vec![
|
||||
(1000, Some(100.to_string())),
|
||||
(1003, Some(300.to_string())),
|
||||
(2000, Some(200.to_string())),
|
||||
];
|
||||
|
||||
let output = tester.full_scan().await;
|
||||
assert_eq!(expect, output);
|
||||
|
||||
// Reopen
|
||||
let mut tester = tester;
|
||||
tester.reopen().await;
|
||||
|
||||
// Scan after reopen.
|
||||
let output = tester.full_scan().await;
|
||||
assert_eq!(expect, output);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_merge_read_after_flush() {
|
||||
let dir = create_temp_dir("merge-read-flush");
|
||||
|
||||
@@ -27,7 +27,7 @@ use store_api::storage::{
|
||||
};
|
||||
use tokio::sync::{oneshot, Mutex};
|
||||
|
||||
use crate::compaction::{CompactionRequestImpl, CompactionSchedulerRef};
|
||||
use crate::compaction::{CompactionPickerRef, CompactionRequestImpl, CompactionSchedulerRef};
|
||||
use crate::config::EngineConfig;
|
||||
use crate::error::{self, Result};
|
||||
use crate::flush::{
|
||||
@@ -412,6 +412,7 @@ pub struct WriterContext<'a, S: LogStore> {
|
||||
pub wal: &'a Wal<S>,
|
||||
pub writer: &'a RegionWriterRef,
|
||||
pub manifest: &'a RegionManifest,
|
||||
pub compaction_picker: CompactionPickerRef<S>,
|
||||
}
|
||||
|
||||
impl<'a, S: LogStore> WriterContext<'a, S> {
|
||||
@@ -779,6 +780,7 @@ impl WriterInner {
|
||||
engine_config: self.engine_config.clone(),
|
||||
ttl: self.ttl,
|
||||
compaction_time_window: current_version.ssts().compaction_time_window(),
|
||||
compaction_picker: ctx.compaction_picker.clone(),
|
||||
};
|
||||
|
||||
let flush_handle = ctx
|
||||
@@ -816,6 +818,7 @@ impl WriterInner {
|
||||
ttl: self.ttl,
|
||||
compaction_time_window,
|
||||
sender: None,
|
||||
picker: writer_ctx.compaction_picker.clone(),
|
||||
sst_write_buffer_size,
|
||||
};
|
||||
|
||||
|
||||
@@ -53,15 +53,19 @@ impl Snapshot for SnapshotImpl {
|
||||
let mutables = memtable_version.mutable_memtable();
|
||||
let immutables = memtable_version.immutable_memtables();
|
||||
|
||||
let mut builder =
|
||||
ChunkReaderBuilder::new(self.version.schema().clone(), self.sst_layer.clone())
|
||||
.reserve_num_memtables(memtable_version.num_memtables())
|
||||
.projection(request.projection)
|
||||
.filters(request.filters)
|
||||
.batch_size(ctx.batch_size)
|
||||
.output_ordering(request.output_ordering)
|
||||
.visible_sequence(visible_sequence)
|
||||
.pick_memtables(mutables.clone());
|
||||
let mut builder = ChunkReaderBuilder::new(
|
||||
self.version.metadata().id(),
|
||||
self.version.schema().clone(),
|
||||
self.sst_layer.clone(),
|
||||
)
|
||||
.reserve_num_memtables(memtable_version.num_memtables())
|
||||
.projection(request.projection)
|
||||
.filters(request.filters)
|
||||
.batch_size(ctx.batch_size)
|
||||
.output_ordering(request.output_ordering)
|
||||
.visible_sequence(visible_sequence)
|
||||
.pick_memtables(mutables.clone())
|
||||
.use_chain_reader(true);
|
||||
|
||||
for memtable in immutables {
|
||||
builder = builder.pick_memtables(memtable.clone());
|
||||
|
||||
@@ -125,6 +125,7 @@ pub async fn new_store_config_with_object_store(
|
||||
file_purger,
|
||||
ttl: None,
|
||||
write_buffer_size: DEFAULT_REGION_WRITE_BUFFER_SIZE.as_bytes() as usize,
|
||||
compaction_strategy: Default::default(),
|
||||
},
|
||||
regions,
|
||||
)
|
||||
|
||||
@@ -92,6 +92,8 @@ pub async fn collect_kv_batch(reader: &mut dyn BatchReader) -> Vec<(i64, Option<
|
||||
result
|
||||
}
|
||||
|
||||
pub type Batches<'a> = &'a [&'a [(i64, Option<i64>)]];
|
||||
|
||||
/// A reader for test that pop batch from Vec.
|
||||
pub struct VecBatchReader {
|
||||
schema: ProjectedSchemaRef,
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
#![feature(let_chains)]
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
|
||||
@@ -32,7 +32,10 @@ pub use datatypes::schema::{
|
||||
|
||||
pub use self::chunk::{Chunk, ChunkReader};
|
||||
pub use self::descriptors::*;
|
||||
pub use self::engine::{CloseOptions, CreateOptions, EngineContext, OpenOptions, StorageEngine};
|
||||
pub use self::engine::{
|
||||
CloseOptions, CompactionStrategy, CreateOptions, EngineContext, OpenOptions, StorageEngine,
|
||||
TwcsOptions,
|
||||
};
|
||||
pub use self::metadata::RegionMeta;
|
||||
pub use self::region::{CloseContext, FlushContext, FlushReason, Region, RegionStat, WriteContext};
|
||||
pub use self::requests::{
|
||||
|
||||
@@ -18,6 +18,7 @@
|
||||
//! a [`StorageEngine`] instance manages a bunch of storage unit called [`Region`], which holds
|
||||
//! chunks of rows, support operations like PUT/DELETE/SCAN.
|
||||
|
||||
use std::collections::HashMap;
|
||||
use std::time::Duration;
|
||||
|
||||
use async_trait::async_trait;
|
||||
@@ -26,6 +27,13 @@ use common_error::ext::ErrorExt;
|
||||
use crate::storage::descriptors::RegionDescriptor;
|
||||
use crate::storage::region::Region;
|
||||
|
||||
const COMPACTION_STRATEGY_KEY: &str = "compaction";
|
||||
const COMPACTION_STRATEGY_LEVELED_TIME_WINDOW_VALUE: &str = "LTW";
|
||||
const COMPACTION_STRATEGY_TWCS_VALUE: &str = "TWCS";
|
||||
const TWCS_MAX_ACTIVE_WINDOW_FILES_KEY: &str = "compaction.twcs.max_active_window_files";
|
||||
const TWCS_TIME_WINDOW_SECONDS_KEY: &str = "compaction.twcs.time_window_seconds";
|
||||
const TWCS_MAX_INACTIVE_WINDOW_FILES_KEY: &str = "compaction.twcs.max_inactive_window_files";
|
||||
|
||||
/// Storage engine provides primitive operations to store and access data.
|
||||
#[async_trait]
|
||||
pub trait StorageEngine: Send + Sync + Clone + 'static {
|
||||
@@ -92,6 +100,8 @@ pub struct CreateOptions {
|
||||
pub write_buffer_size: Option<usize>,
|
||||
/// Region SST files TTL
|
||||
pub ttl: Option<Duration>,
|
||||
/// Compaction strategy
|
||||
pub compaction_strategy: CompactionStrategy,
|
||||
}
|
||||
|
||||
/// Options to open a region.
|
||||
@@ -103,6 +113,8 @@ pub struct OpenOptions {
|
||||
pub write_buffer_size: Option<usize>,
|
||||
/// Region SST files TTL
|
||||
pub ttl: Option<Duration>,
|
||||
/// Compaction strategy
|
||||
pub compaction_strategy: CompactionStrategy,
|
||||
}
|
||||
|
||||
/// Options to close a region.
|
||||
@@ -111,3 +123,70 @@ pub struct CloseOptions {
|
||||
/// Flush region
|
||||
pub flush: bool,
|
||||
}
|
||||
|
||||
/// Options for compactions
|
||||
#[derive(Debug, Clone, Default)]
|
||||
pub enum CompactionStrategy {
|
||||
/// Leveled time window compaction strategy
|
||||
#[default]
|
||||
LeveledTimeWindow,
|
||||
/// TWCS
|
||||
Twcs(TwcsOptions),
|
||||
}
|
||||
|
||||
/// TWCS compaction options.
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct TwcsOptions {
|
||||
/// Max num of files that can be kept in active writing time window.
|
||||
pub max_active_window_files: usize,
|
||||
/// Max num of files that can be kept in inactive time window.
|
||||
pub max_inactive_window_files: usize,
|
||||
/// Compaction time window defined when creating tables.
|
||||
pub time_window_seconds: Option<i64>,
|
||||
}
|
||||
|
||||
impl Default for TwcsOptions {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
max_active_window_files: 4,
|
||||
max_inactive_window_files: 1,
|
||||
time_window_seconds: None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<&HashMap<String, String>> for CompactionStrategy {
|
||||
fn from(opts: &HashMap<String, String>) -> Self {
|
||||
let Some(strategy_name) = opts.get(COMPACTION_STRATEGY_KEY) else { return CompactionStrategy::default() };
|
||||
if strategy_name.eq_ignore_ascii_case(COMPACTION_STRATEGY_LEVELED_TIME_WINDOW_VALUE) {
|
||||
CompactionStrategy::LeveledTimeWindow
|
||||
} else if strategy_name.eq_ignore_ascii_case(COMPACTION_STRATEGY_TWCS_VALUE) {
|
||||
let mut twcs_opts = TwcsOptions::default();
|
||||
if let Some(max_active_window_files) = opts
|
||||
.get(TWCS_MAX_ACTIVE_WINDOW_FILES_KEY)
|
||||
.and_then(|num| num.parse::<usize>().ok())
|
||||
{
|
||||
twcs_opts.max_active_window_files = max_active_window_files;
|
||||
}
|
||||
|
||||
if let Some(max_inactive_window_files) = opts
|
||||
.get(TWCS_MAX_INACTIVE_WINDOW_FILES_KEY)
|
||||
.and_then(|num| num.parse::<usize>().ok())
|
||||
{
|
||||
twcs_opts.max_inactive_window_files = max_inactive_window_files;
|
||||
}
|
||||
|
||||
if let Some(time_window) = opts
|
||||
.get(TWCS_TIME_WINDOW_SECONDS_KEY)
|
||||
.and_then(|num| num.parse::<i64>().ok()) && time_window > 0
|
||||
{
|
||||
twcs_opts.time_window_seconds = Some(time_window);
|
||||
}
|
||||
|
||||
CompactionStrategy::Twcs(twcs_opts)
|
||||
} else {
|
||||
// unrecognized compaction strategy
|
||||
CompactionStrategy::default()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -28,7 +28,7 @@ use table::engine::{EngineContext, TableEngineProcedureRef, TableEngineRef, Tabl
|
||||
use table::requests::{CreateTableRequest, OpenTableRequest};
|
||||
|
||||
use crate::error::{
|
||||
AccessCatalogSnafu, DeserializeProcedureSnafu, SchemaNotFoundSnafu, SerializeProcedureSnafu,
|
||||
AccessCatalogSnafu, DeserializeProcedureSnafu, SerializeProcedureSnafu, TableExistsSnafu,
|
||||
};
|
||||
|
||||
/// Procedure to create a table.
|
||||
@@ -132,23 +132,24 @@ impl CreateTableProcedure {
|
||||
}
|
||||
|
||||
async fn on_prepare(&mut self) -> Result<Status> {
|
||||
if !self
|
||||
let table_exists = self
|
||||
.catalog_manager
|
||||
.schema_exist(
|
||||
.table_exist(
|
||||
&self.data.request.catalog_name,
|
||||
&self.data.request.schema_name,
|
||||
&self.data.request.table_name,
|
||||
)
|
||||
.await
|
||||
.context(AccessCatalogSnafu)?
|
||||
{
|
||||
logging::error!(
|
||||
"Failed to create table {}, schema not found",
|
||||
self.data.table_ref(),
|
||||
);
|
||||
return SchemaNotFoundSnafu {
|
||||
name: &self.data.request.schema_name,
|
||||
}
|
||||
.fail()?;
|
||||
.context(AccessCatalogSnafu)?;
|
||||
if table_exists {
|
||||
return if self.data.request.create_if_not_exists {
|
||||
Ok(Status::Done)
|
||||
} else {
|
||||
TableExistsSnafu {
|
||||
name: &self.data.request.table_name,
|
||||
}
|
||||
.fail()?
|
||||
};
|
||||
}
|
||||
|
||||
self.data.state = CreateTableState::EngineCreateTable;
|
||||
@@ -168,8 +169,9 @@ impl CreateTableProcedure {
|
||||
// do this check as we might not submitted the subprocedure yet when the manager
|
||||
// recover this procedure from procedure store.
|
||||
logging::info!(
|
||||
"On engine create table {}, subprocedure not found, sub_id: {}",
|
||||
"On engine create table {}, table_id: {}, subprocedure not found, sub_id: {}",
|
||||
self.data.request.table_name,
|
||||
self.data.request.id,
|
||||
sub_id
|
||||
);
|
||||
|
||||
@@ -195,8 +197,9 @@ impl CreateTableProcedure {
|
||||
}),
|
||||
ProcedureState::Done => {
|
||||
logging::info!(
|
||||
"On engine create table {}, done, sub_id: {}",
|
||||
"On engine create table {}, table_id: {}, done, sub_id: {}",
|
||||
self.data.request.table_name,
|
||||
self.data.request.id,
|
||||
sub_id
|
||||
);
|
||||
// The sub procedure is done, we can execute next step.
|
||||
|
||||
@@ -23,7 +23,7 @@ use common_procedure::{
|
||||
};
|
||||
use common_telemetry::logging;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use snafu::{OptionExt, ResultExt};
|
||||
use snafu::{ensure, ResultExt};
|
||||
use table::engine::{EngineContext, TableEngineProcedureRef, TableReference};
|
||||
use table::requests::DropTableRequest;
|
||||
|
||||
@@ -122,18 +122,21 @@ impl DropTableProcedure {
|
||||
async fn on_prepare(&mut self) -> Result<Status> {
|
||||
let request = &self.data.request;
|
||||
// Ensure the table exists.
|
||||
let _ = self
|
||||
let table_exists = self
|
||||
.catalog_manager
|
||||
.table(
|
||||
.table_exist(
|
||||
&request.catalog_name,
|
||||
&request.schema_name,
|
||||
&request.table_name,
|
||||
)
|
||||
.await
|
||||
.context(AccessCatalogSnafu)?
|
||||
.context(TableNotFoundSnafu {
|
||||
.context(AccessCatalogSnafu)?;
|
||||
ensure!(
|
||||
table_exists,
|
||||
TableNotFoundSnafu {
|
||||
name: &request.table_name,
|
||||
})?;
|
||||
}
|
||||
);
|
||||
|
||||
self.data.state = DropTableState::RemoveFromCatalog;
|
||||
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user