mirror of
https://github.com/GreptimeTeam/greptimedb.git
synced 2025-12-25 23:49:58 +00:00
Compare commits
50 Commits
v0.7.2
...
tests/cuck
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
ee67ce10c9 | ||
|
|
2ba721cc82 | ||
|
|
de468ee595 | ||
|
|
bb9bdf74ec | ||
|
|
be5574fdb3 | ||
|
|
f9afc5dbbf | ||
|
|
c7400a4182 | ||
|
|
bf07dd275a | ||
|
|
7e1eed4b18 | ||
|
|
d12379106e | ||
|
|
64941d848e | ||
|
|
96a40e0300 | ||
|
|
d2e081c1f9 | ||
|
|
cdbdb04d93 | ||
|
|
5af87baeb0 | ||
|
|
d5a948a0a6 | ||
|
|
bbea651d08 | ||
|
|
8060c81e1d | ||
|
|
e6507aaf34 | ||
|
|
87795248dd | ||
|
|
7a04bfe50a | ||
|
|
2f4726f7b5 | ||
|
|
75d85f9915 | ||
|
|
db329f6c80 | ||
|
|
544c4a70f8 | ||
|
|
02f806fba9 | ||
|
|
9459ace33e | ||
|
|
c1e005b148 | ||
|
|
c00c1d95ee | ||
|
|
5d739932c0 | ||
|
|
aab7367804 | ||
|
|
34f935df66 | ||
|
|
fda1523ced | ||
|
|
2c0c7759ee | ||
|
|
2398918adf | ||
|
|
50bea2f107 | ||
|
|
1629435888 | ||
|
|
b3c94a303b | ||
|
|
883b7fce96 | ||
|
|
ea9367f371 | ||
|
|
2896e1f868 | ||
|
|
183fccbbd6 | ||
|
|
b51089fa61 | ||
|
|
682b04cbe4 | ||
|
|
e1d2f9a596 | ||
|
|
2fca45b048 | ||
|
|
3e1a125732 | ||
|
|
34b1427a82 | ||
|
|
28fd0dc276 | ||
|
|
32b9639d7c |
27
.github/CODEOWNERS
vendored
Normal file
27
.github/CODEOWNERS
vendored
Normal file
@@ -0,0 +1,27 @@
|
||||
# GreptimeDB CODEOWNERS
|
||||
|
||||
# These owners will be the default owners for everything in the repo.
|
||||
|
||||
* @GreptimeTeam/db-approver
|
||||
|
||||
## [Module] Databse Engine
|
||||
/src/index @zhongzc
|
||||
/src/mito2 @evenyag @v0y4g3r @waynexia
|
||||
/src/query @evenyag
|
||||
|
||||
## [Module] Distributed
|
||||
/src/common/meta @MichaelScofield
|
||||
/src/common/procedure @MichaelScofield
|
||||
/src/meta-client @MichaelScofield
|
||||
/src/meta-srv @MichaelScofield
|
||||
|
||||
## [Module] Write Ahead Log
|
||||
/src/log-store @v0y4g3r
|
||||
/src/store-api @v0y4g3r
|
||||
|
||||
## [Module] Metrics Engine
|
||||
/src/metric-engine @waynexia
|
||||
/src/promql @waynexia
|
||||
|
||||
## [Module] Flow
|
||||
/src/flow @zhongzc @waynexia
|
||||
2
.github/ISSUE_TEMPLATE/bug_report.yml
vendored
2
.github/ISSUE_TEMPLATE/bug_report.yml
vendored
@@ -39,7 +39,7 @@ body:
|
||||
- Query Engine
|
||||
- Table Engine
|
||||
- Write Protocols
|
||||
- MetaSrv
|
||||
- Metasrv
|
||||
- Frontend
|
||||
- Datanode
|
||||
- Other
|
||||
|
||||
@@ -26,6 +26,8 @@ runs:
|
||||
using: composite
|
||||
steps:
|
||||
- uses: arduino/setup-protoc@v3
|
||||
with:
|
||||
repo-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
- name: Install rust toolchain
|
||||
uses: dtolnay/rust-toolchain@master
|
||||
|
||||
15
.github/workflows/develop.yml
vendored
15
.github/workflows/develop.yml
vendored
@@ -33,12 +33,17 @@ env:
|
||||
RUST_TOOLCHAIN: nightly-2023-12-19
|
||||
|
||||
jobs:
|
||||
typos:
|
||||
name: Spell Check with Typos
|
||||
check-typos-and-docs:
|
||||
name: Check typos and docs
|
||||
runs-on: ubuntu-20.04
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: crate-ci/typos@v1.13.10
|
||||
- name: Check the config docs
|
||||
run: |
|
||||
make config-docs && \
|
||||
git diff --name-only --exit-code ./config/config.md \
|
||||
|| (echo "'config/config.md' is not up-to-date, please run 'make config-docs'." && exit 1)
|
||||
|
||||
check:
|
||||
name: Check
|
||||
@@ -93,6 +98,8 @@ jobs:
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: arduino/setup-protoc@v3
|
||||
with:
|
||||
repo-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
- uses: dtolnay/rust-toolchain@master
|
||||
with:
|
||||
toolchain: ${{ env.RUST_TOOLCHAIN }}
|
||||
@@ -123,10 +130,12 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
strategy:
|
||||
matrix:
|
||||
target: [ "fuzz_create_table", "fuzz_alter_table" ]
|
||||
target: [ "fuzz_create_table", "fuzz_alter_table", "fuzz_create_database" ]
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: arduino/setup-protoc@v3
|
||||
with:
|
||||
repo-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
- uses: dtolnay/rust-toolchain@master
|
||||
with:
|
||||
toolchain: ${{ env.RUST_TOOLCHAIN }}
|
||||
|
||||
259
Cargo.lock
generated
259
Cargo.lock
generated
@@ -793,24 +793,6 @@ dependencies = [
|
||||
"syn 2.0.43",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "axum-test-helper"
|
||||
version = "0.3.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "298f62fa902c2515c169ab0bfb56c593229f33faa01131215d58e3d4898e3aa9"
|
||||
dependencies = [
|
||||
"axum",
|
||||
"bytes",
|
||||
"http",
|
||||
"http-body",
|
||||
"hyper",
|
||||
"reqwest",
|
||||
"serde",
|
||||
"tokio",
|
||||
"tower",
|
||||
"tower-service",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "backon"
|
||||
version = "0.4.1"
|
||||
@@ -1092,6 +1074,12 @@ dependencies = [
|
||||
"num-traits",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "bufstream"
|
||||
version = "0.1.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "40e38929add23cdf8a366df9b0e088953150724bcbe5fc330b0d8eb3b328eec8"
|
||||
|
||||
[[package]]
|
||||
name = "build-data"
|
||||
version = "0.1.5"
|
||||
@@ -1650,6 +1638,7 @@ dependencies = [
|
||||
"substrait 0.7.2",
|
||||
"table",
|
||||
"temp-env",
|
||||
"tempfile",
|
||||
"tikv-jemallocator",
|
||||
"tokio",
|
||||
"toml 0.8.8",
|
||||
@@ -2303,9 +2292,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "crc32fast"
|
||||
version = "1.3.2"
|
||||
version = "1.4.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b540bd8bc810d3885c6ea91e2018302f68baba2129ab3e88f32389ee9370880d"
|
||||
checksum = "b3855a8a784b474f333699ef2bbca9db2c4a1f6d9088a90a2d25b1eb53111eaa"
|
||||
dependencies = [
|
||||
"cfg-if 1.0.0",
|
||||
]
|
||||
@@ -2974,6 +2963,17 @@ dependencies = [
|
||||
"syn 1.0.109",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "derive_utils"
|
||||
version = "0.14.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "61bb5a1014ce6dfc2a378578509abe775a5aa06bff584a547555d9efdb81b926"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.43",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "diff"
|
||||
version = "0.1.13"
|
||||
@@ -3406,21 +3406,32 @@ name = "flow"
|
||||
version = "0.7.2"
|
||||
dependencies = [
|
||||
"api",
|
||||
"catalog",
|
||||
"common-catalog",
|
||||
"common-decimal",
|
||||
"common-error",
|
||||
"common-macro",
|
||||
"common-telemetry",
|
||||
"common-time",
|
||||
"datafusion-common",
|
||||
"datafusion-expr",
|
||||
"datafusion-substrait",
|
||||
"datatypes",
|
||||
"enum_dispatch",
|
||||
"hydroflow",
|
||||
"itertools 0.10.5",
|
||||
"num-traits",
|
||||
"prost 0.12.3",
|
||||
"query",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"servers",
|
||||
"session",
|
||||
"smallvec",
|
||||
"snafu",
|
||||
"strum 0.25.0",
|
||||
"substrait 0.7.2",
|
||||
"table",
|
||||
"tokio",
|
||||
"tonic 0.10.2",
|
||||
]
|
||||
@@ -3431,6 +3442,21 @@ version = "1.0.7"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1"
|
||||
|
||||
[[package]]
|
||||
name = "foreign-types"
|
||||
version = "0.3.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f6f339eb8adc052cd2ca78910fda869aefa38d22d5cb648e6485e4d3fc06f3b1"
|
||||
dependencies = [
|
||||
"foreign-types-shared",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "foreign-types-shared"
|
||||
version = "0.1.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "00b0228411908ca8685dba7fc2cdd70ec9990a6e753e89b6ac91a84c40fbaf4b"
|
||||
|
||||
[[package]]
|
||||
name = "form_urlencoded"
|
||||
version = "1.2.1"
|
||||
@@ -3781,7 +3807,7 @@ checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b"
|
||||
[[package]]
|
||||
name = "greptime-proto"
|
||||
version = "0.1.0"
|
||||
source = "git+https://github.com/GreptimeTeam/greptime-proto.git?rev=1bd2398b686e5ac6c1eef6daf615867ce27f75c1#1bd2398b686e5ac6c1eef6daf615867ce27f75c1"
|
||||
source = "git+https://github.com/GreptimeTeam/greptime-proto.git?rev=04d78b6e025ceb518040fdd10858c2a9d9345820#04d78b6e025ceb518040fdd10858c2a9d9345820"
|
||||
dependencies = [
|
||||
"prost 0.12.3",
|
||||
"serde",
|
||||
@@ -3794,9 +3820,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "h2"
|
||||
version = "0.3.24"
|
||||
version = "0.3.26"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "bb2c4422095b67ee78da96fbb51a4cc413b3b25883c7717ff7ca1ab31022c9c9"
|
||||
checksum = "81fe527a889e1532da5c525686d96d4c2e74cdd345badf8dfef9f6b39dd5f5e8"
|
||||
dependencies = [
|
||||
"bytes",
|
||||
"fnv",
|
||||
@@ -4059,9 +4085,8 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "hydroflow"
|
||||
version = "0.5.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a5129724896b4c3cf12f8e5f5af2f1d94b4c5933ae911189747025c6a5ff1346"
|
||||
version = "0.6.0"
|
||||
source = "git+https://github.com/GreptimeTeam/hydroflow.git?rev=ba2df44efd42b7c4d37ebefbf82e77c6f1d4cb94#ba2df44efd42b7c4d37ebefbf82e77c6f1d4cb94"
|
||||
dependencies = [
|
||||
"bincode",
|
||||
"byteorder",
|
||||
@@ -4092,9 +4117,8 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "hydroflow_datalog"
|
||||
version = "0.5.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "41813c88b02f3bfa8f5962e125495aa47c8d382cf5d135b02da40af4342bc6fb"
|
||||
version = "0.6.0"
|
||||
source = "git+https://github.com/GreptimeTeam/hydroflow.git?rev=ba2df44efd42b7c4d37ebefbf82e77c6f1d4cb94#ba2df44efd42b7c4d37ebefbf82e77c6f1d4cb94"
|
||||
dependencies = [
|
||||
"hydroflow_datalog_core",
|
||||
"proc-macro-crate 1.3.1",
|
||||
@@ -4105,9 +4129,8 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "hydroflow_datalog_core"
|
||||
version = "0.5.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ea77a3b2f09bba3d461f9ce0dee28798d3b07dafe77fc46de4675155f5925e53"
|
||||
version = "0.6.0"
|
||||
source = "git+https://github.com/GreptimeTeam/hydroflow.git?rev=ba2df44efd42b7c4d37ebefbf82e77c6f1d4cb94#ba2df44efd42b7c4d37ebefbf82e77c6f1d4cb94"
|
||||
dependencies = [
|
||||
"hydroflow_lang",
|
||||
"proc-macro-crate 1.3.1",
|
||||
@@ -4121,9 +4144,8 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "hydroflow_lang"
|
||||
version = "0.5.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f3191eee8ef49b4a814e4c33a0ce0d7470b733dc6118ea744f7f15168c38803f"
|
||||
version = "0.6.0"
|
||||
source = "git+https://github.com/GreptimeTeam/hydroflow.git?rev=ba2df44efd42b7c4d37ebefbf82e77c6f1d4cb94#ba2df44efd42b7c4d37ebefbf82e77c6f1d4cb94"
|
||||
dependencies = [
|
||||
"auto_impl",
|
||||
"clap 4.4.11",
|
||||
@@ -4142,9 +4164,8 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "hydroflow_macro"
|
||||
version = "0.5.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9be25d2a927fe4e6afe3e204786e968e983f53f313cc561950ff1cd09ecd92fc"
|
||||
version = "0.6.0"
|
||||
source = "git+https://github.com/GreptimeTeam/hydroflow.git?rev=ba2df44efd42b7c4d37ebefbf82e77c6f1d4cb94#ba2df44efd42b7c4d37ebefbf82e77c6f1d4cb94"
|
||||
dependencies = [
|
||||
"hydroflow_lang",
|
||||
"itertools 0.10.5",
|
||||
@@ -4425,6 +4446,15 @@ version = "0.3.14"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c8573b2b1fb643a372c73b23f4da5f888677feef3305146d68a539250a9bccc7"
|
||||
|
||||
[[package]]
|
||||
name = "io-enum"
|
||||
version = "1.1.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "53b53d712d99a73eec59ee5e4fe6057f8052142d38eeafbbffcb06b36d738a6e"
|
||||
dependencies = [
|
||||
"derive_utils",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "io-lifetimes"
|
||||
version = "1.0.11"
|
||||
@@ -4646,9 +4676,8 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lattices"
|
||||
version = "0.5.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4f3bff82353a971b61106a49369cfc1bd8398661107eadcb5387fcd21c43cac9"
|
||||
version = "0.5.3"
|
||||
source = "git+https://github.com/GreptimeTeam/hydroflow.git?rev=ba2df44efd42b7c4d37ebefbf82e77c6f1d4cb94#ba2df44efd42b7c4d37ebefbf82e77c6f1d4cb94"
|
||||
dependencies = [
|
||||
"cc-traits",
|
||||
"sealed",
|
||||
@@ -5329,6 +5358,7 @@ dependencies = [
|
||||
"common-test-util",
|
||||
"common-time",
|
||||
"common-wal",
|
||||
"crc32fast",
|
||||
"criterion",
|
||||
"datafusion",
|
||||
"datafusion-common",
|
||||
@@ -5438,6 +5468,32 @@ version = "0.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "97af489e1e21b68de4c390ecca6703318bc1aa16e9733bcb62c089b73c6fbb1b"
|
||||
|
||||
[[package]]
|
||||
name = "mysql"
|
||||
version = "25.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a4cc09a8118051e4617886c9c6e693c61444c2eeb5f9a792dc5d631501706565"
|
||||
dependencies = [
|
||||
"bufstream",
|
||||
"bytes",
|
||||
"crossbeam",
|
||||
"flate2",
|
||||
"io-enum",
|
||||
"libc",
|
||||
"lru",
|
||||
"mysql_common 0.32.0",
|
||||
"named_pipe",
|
||||
"native-tls",
|
||||
"once_cell",
|
||||
"pem",
|
||||
"percent-encoding",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"socket2 0.5.5",
|
||||
"twox-hash",
|
||||
"url",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "mysql-common-derive"
|
||||
version = "0.30.2"
|
||||
@@ -5619,6 +5675,33 @@ dependencies = [
|
||||
"syn 1.0.109",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "named_pipe"
|
||||
version = "0.4.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ad9c443cce91fc3e12f017290db75dde490d685cdaaf508d7159d7cf41f0eb2b"
|
||||
dependencies = [
|
||||
"winapi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "native-tls"
|
||||
version = "0.2.11"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "07226173c32f2926027b63cce4bcd8076c3552846cbe7925f3aaffeac0a3b92e"
|
||||
dependencies = [
|
||||
"lazy_static",
|
||||
"libc",
|
||||
"log",
|
||||
"openssl",
|
||||
"openssl-probe",
|
||||
"openssl-sys",
|
||||
"schannel",
|
||||
"security-framework",
|
||||
"security-framework-sys",
|
||||
"tempfile",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "ndk-context"
|
||||
version = "0.1.1"
|
||||
@@ -6023,12 +6106,50 @@ dependencies = [
|
||||
"tokio-rustls 0.25.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "openssl"
|
||||
version = "0.10.64"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "95a0481286a310808298130d22dd1fef0fa571e05a8f44ec801801e84b216b1f"
|
||||
dependencies = [
|
||||
"bitflags 2.4.1",
|
||||
"cfg-if 1.0.0",
|
||||
"foreign-types",
|
||||
"libc",
|
||||
"once_cell",
|
||||
"openssl-macros",
|
||||
"openssl-sys",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "openssl-macros"
|
||||
version = "0.1.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a948666b637a0f465e8564c73e89d4dde00d72d4d473cc972f390fc3dcee7d9c"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.43",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "openssl-probe"
|
||||
version = "0.1.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ff011a302c396a5197692431fc1948019154afc178baf7d8e37367442a4601cf"
|
||||
|
||||
[[package]]
|
||||
name = "openssl-sys"
|
||||
version = "0.9.102"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c597637d56fbc83893a35eb0dd04b2b8e7a50c91e64e9493e398b5df4fb45fa2"
|
||||
dependencies = [
|
||||
"cc",
|
||||
"libc",
|
||||
"pkg-config",
|
||||
"vcpkg",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "opentelemetry"
|
||||
version = "0.21.0"
|
||||
@@ -7294,9 +7415,8 @@ checksum = "3b7e158a385023d209d6d5f2585c4b468f6dcb3dd5aca9b75c4f1678c05bb375"
|
||||
|
||||
[[package]]
|
||||
name = "pusherator"
|
||||
version = "0.0.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "bd486cb5153e0d8fa91d3daebae48917ae299b2569cc79901922f3923dc312ef"
|
||||
version = "0.0.5"
|
||||
source = "git+https://github.com/GreptimeTeam/hydroflow.git?rev=ba2df44efd42b7c4d37ebefbf82e77c6f1d4cb94#ba2df44efd42b7c4d37ebefbf82e77c6f1d4cb94"
|
||||
dependencies = [
|
||||
"either",
|
||||
"variadics",
|
||||
@@ -9036,7 +9156,6 @@ dependencies = [
|
||||
"auth",
|
||||
"axum",
|
||||
"axum-macros",
|
||||
"axum-test-helper",
|
||||
"base64 0.21.5",
|
||||
"bytes",
|
||||
"catalog",
|
||||
@@ -9067,6 +9186,7 @@ dependencies = [
|
||||
"hashbrown 0.14.3",
|
||||
"headers",
|
||||
"hostname",
|
||||
"http",
|
||||
"http-body",
|
||||
"humantime-serde",
|
||||
"hyper",
|
||||
@@ -10073,6 +10193,32 @@ version = "0.4.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3369f5ac52d5eb6ab48c6b4ffdc8efbcad6b89c765749064ba298f2c68a16a76"
|
||||
|
||||
[[package]]
|
||||
name = "tests-chaos"
|
||||
version = "0.7.2"
|
||||
dependencies = [
|
||||
"axum",
|
||||
"axum-macros",
|
||||
"common-error",
|
||||
"common-macro",
|
||||
"common-telemetry",
|
||||
"common-time",
|
||||
"lazy_static",
|
||||
"mysql",
|
||||
"nix 0.26.4",
|
||||
"prometheus",
|
||||
"rand",
|
||||
"rand_chacha",
|
||||
"reqwest",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"snafu",
|
||||
"sqlx",
|
||||
"tests-fuzz",
|
||||
"tinytemplate",
|
||||
"tokio",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tests-fuzz"
|
||||
version = "0.7.2"
|
||||
@@ -10090,6 +10236,7 @@ dependencies = [
|
||||
"dotenv",
|
||||
"lazy_static",
|
||||
"libfuzzer-sys",
|
||||
"mysql",
|
||||
"partition",
|
||||
"rand",
|
||||
"rand_chacha",
|
||||
@@ -10111,7 +10258,6 @@ dependencies = [
|
||||
"async-trait",
|
||||
"auth",
|
||||
"axum",
|
||||
"axum-test-helper",
|
||||
"catalog",
|
||||
"chrono",
|
||||
"client",
|
||||
@@ -11368,9 +11514,11 @@ checksum = "830b7e5d4d90034032940e4ace0d9a9a057e7a45cd94e6c007832e39edb82f6d"
|
||||
|
||||
[[package]]
|
||||
name = "variadics"
|
||||
version = "0.0.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c4500f518837578bf2d62d9c12f47ecb5b5279da689574793b7bace8138b4784"
|
||||
version = "0.0.4"
|
||||
source = "git+https://github.com/GreptimeTeam/hydroflow.git?rev=ba2df44efd42b7c4d37ebefbf82e77c6f1d4cb94#ba2df44efd42b7c4d37ebefbf82e77c6f1d4cb94"
|
||||
dependencies = [
|
||||
"sealed",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "vcpkg"
|
||||
@@ -11447,6 +11595,12 @@ version = "0.11.0+wasi-snapshot-preview1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423"
|
||||
|
||||
[[package]]
|
||||
name = "wasite"
|
||||
version = "0.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b8dad83b4f25e74f184f64c43b150b91efe7647395b42289f38e50566d82855b"
|
||||
|
||||
[[package]]
|
||||
name = "wasm-bindgen"
|
||||
version = "0.2.89"
|
||||
@@ -11602,11 +11756,12 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "whoami"
|
||||
version = "1.4.1"
|
||||
version = "1.5.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "22fc3756b8a9133049b26c7f61ab35416c130e8c09b660f5b3958b446f52cc50"
|
||||
checksum = "a44ab49fad634e88f55bf8f9bb3abd2f27d7204172a112c7c9987e01c1c94ea9"
|
||||
dependencies = [
|
||||
"wasm-bindgen",
|
||||
"redox_syscall 0.4.1",
|
||||
"wasite",
|
||||
"web-sys",
|
||||
]
|
||||
|
||||
|
||||
@@ -55,6 +55,7 @@ members = [
|
||||
"src/store-api",
|
||||
"src/table",
|
||||
"src/index",
|
||||
"tests-chaos",
|
||||
"tests-fuzz",
|
||||
"tests-integration",
|
||||
"tests/runner",
|
||||
@@ -104,7 +105,7 @@ etcd-client = "0.12"
|
||||
fst = "0.4.7"
|
||||
futures = "0.3"
|
||||
futures-util = "0.3"
|
||||
greptime-proto = { git = "https://github.com/GreptimeTeam/greptime-proto.git", rev = "1bd2398b686e5ac6c1eef6daf615867ce27f75c1" }
|
||||
greptime-proto = { git = "https://github.com/GreptimeTeam/greptime-proto.git", rev = "04d78b6e025ceb518040fdd10858c2a9d9345820" }
|
||||
humantime = "2.1"
|
||||
humantime-serde = "1.1"
|
||||
itertools = "0.10"
|
||||
@@ -133,6 +134,7 @@ reqwest = { version = "0.11", default-features = false, features = [
|
||||
"json",
|
||||
"rustls-tls-native-roots",
|
||||
"stream",
|
||||
"multipart",
|
||||
] }
|
||||
rskafka = "0.5"
|
||||
rust_decimal = "1.33"
|
||||
@@ -211,6 +213,7 @@ sql = { path = "src/sql" }
|
||||
store-api = { path = "src/store-api" }
|
||||
substrait = { path = "src/common/substrait" }
|
||||
table = { path = "src/table" }
|
||||
tests-fuzz = { path = "tests-fuzz" }
|
||||
|
||||
[workspace.dependencies.meter-macros]
|
||||
git = "https://github.com/GreptimeTeam/greptime-meter.git"
|
||||
|
||||
14
Makefile
14
Makefile
@@ -169,6 +169,10 @@ check: ## Cargo check all the targets.
|
||||
clippy: ## Check clippy rules.
|
||||
cargo clippy --workspace --all-targets --all-features -- -D warnings
|
||||
|
||||
.PHONY: fix-clippy
|
||||
fix-clippy: ## Fix clippy violations.
|
||||
cargo clippy --workspace --all-targets --all-features --fix
|
||||
|
||||
.PHONY: fmt-check
|
||||
fmt-check: ## Check code format.
|
||||
cargo fmt --all -- --check
|
||||
@@ -188,6 +192,16 @@ run-it-in-container: start-etcd ## Run integration tests in dev-builder.
|
||||
-w /greptimedb ${IMAGE_REGISTRY}/${IMAGE_NAMESPACE}/dev-builder-${BASE_IMAGE}:latest \
|
||||
make test sqlness-test BUILD_JOBS=${BUILD_JOBS}
|
||||
|
||||
##@ Docs
|
||||
config-docs: ## Generate configuration documentation from toml files.
|
||||
docker run --rm \
|
||||
-v ${PWD}:/greptimedb \
|
||||
-w /greptimedb/config \
|
||||
toml2docs/toml2docs:latest \
|
||||
-p '##' \
|
||||
-t ./config-docs-template.md \
|
||||
-o ./config.md
|
||||
|
||||
##@ General
|
||||
|
||||
# The help target prints out all targets with their descriptions organized
|
||||
|
||||
@@ -143,7 +143,7 @@ cargo run -- standalone start
|
||||
- [GreptimeDB C++ Ingester](https://github.com/GreptimeTeam/greptimedb-ingester-cpp)
|
||||
- [GreptimeDB Erlang Ingester](https://github.com/GreptimeTeam/greptimedb-ingester-erl)
|
||||
- [GreptimeDB Rust Ingester](https://github.com/GreptimeTeam/greptimedb-ingester-rust)
|
||||
- [GreptimeDB JavaScript Ingester](https://github.com/GreptimeTeam/greptime-ingester-js)
|
||||
- [GreptimeDB JavaScript Ingester](https://github.com/GreptimeTeam/greptimedb-ingester-js)
|
||||
|
||||
### Grafana Dashboard
|
||||
|
||||
|
||||
14
cliff.toml
14
cliff.toml
@@ -53,7 +53,7 @@ Release date: {{ timestamp | date(format="%B %d, %Y") }}
|
||||
## New Contributors
|
||||
{% endif -%}
|
||||
{% for contributor in github.contributors | filter(attribute="is_first_time", value=true) %}
|
||||
* @{{ contributor.username }} made their first contribution
|
||||
* [@{{ contributor.username }}](https://github.com/{{ contributor.username }}) made their first contribution
|
||||
{%- if contributor.pr_number %} in \
|
||||
[#{{ contributor.pr_number }}]({{ self::remote_url() }}/pull/{{ contributor.pr_number }}) \
|
||||
{%- endif %}
|
||||
@@ -65,7 +65,17 @@ Release date: {{ timestamp | date(format="%B %d, %Y") }}
|
||||
|
||||
We would like to thank the following contributors from the GreptimeDB community:
|
||||
|
||||
{{ github.contributors | map(attribute="username") | join(sep=", ") }}
|
||||
{%- set contributors = github.contributors | sort(attribute="username") | map(attribute="username") -%}
|
||||
{%- set bots = ['dependabot[bot]'] %}
|
||||
|
||||
{% for contributor in contributors %}
|
||||
{%- if bots is containing(contributor) -%}{% continue %}{%- endif -%}
|
||||
{%- if loop.first -%}
|
||||
[@{{ contributor }}](https://github.com/{{ contributor }})
|
||||
{%- else -%}
|
||||
, [@{{ contributor }}](https://github.com/{{ contributor }})
|
||||
{%- endif -%}
|
||||
{%- endfor %}
|
||||
{%- endif %}
|
||||
{% raw %}\n{% endraw %}
|
||||
|
||||
|
||||
19
config/config-docs-template.md
Normal file
19
config/config-docs-template.md
Normal file
@@ -0,0 +1,19 @@
|
||||
# Configurations
|
||||
|
||||
## Standalone Mode
|
||||
|
||||
{{ toml2docs "./standalone.example.toml" }}
|
||||
|
||||
## Cluster Mode
|
||||
|
||||
### Frontend
|
||||
|
||||
{{ toml2docs "./frontend.example.toml" }}
|
||||
|
||||
### Metasrv
|
||||
|
||||
{{ toml2docs "./metasrv.example.toml" }}
|
||||
|
||||
### Datanode
|
||||
|
||||
{{ toml2docs "./datanode.example.toml" }}
|
||||
376
config/config.md
Normal file
376
config/config.md
Normal file
@@ -0,0 +1,376 @@
|
||||
# Configurations
|
||||
|
||||
## Standalone Mode
|
||||
|
||||
| Key | Type | Default | Descriptions |
|
||||
| --- | -----| ------- | ----------- |
|
||||
| `mode` | String | `standalone` | The running mode of the datanode. It can be `standalone` or `distributed`. |
|
||||
| `enable_telemetry` | Bool | `true` | Enable telemetry to collect anonymous usage data. |
|
||||
| `default_timezone` | String | `None` | The default timezone of the server. |
|
||||
| `http` | -- | -- | The HTTP server options. |
|
||||
| `http.addr` | String | `127.0.0.1:4000` | The address to bind the HTTP server. |
|
||||
| `http.timeout` | String | `30s` | HTTP request timeout. |
|
||||
| `http.body_limit` | String | `64MB` | HTTP request body limit.<br/>Support the following units are supported: `B`, `KB`, `KiB`, `MB`, `MiB`, `GB`, `GiB`, `TB`, `TiB`, `PB`, `PiB`. |
|
||||
| `grpc` | -- | -- | The gRPC server options. |
|
||||
| `grpc.addr` | String | `127.0.0.1:4001` | The address to bind the gRPC server. |
|
||||
| `grpc.runtime_size` | Integer | `8` | The number of server worker threads. |
|
||||
| `mysql` | -- | -- | MySQL server options. |
|
||||
| `mysql.enable` | Bool | `true` | Whether to enable. |
|
||||
| `mysql.addr` | String | `127.0.0.1:4002` | The addr to bind the MySQL server. |
|
||||
| `mysql.runtime_size` | Integer | `2` | The number of server worker threads. |
|
||||
| `mysql.tls` | -- | -- | -- |
|
||||
| `mysql.tls.mode` | String | `disable` | TLS mode, refer to https://www.postgresql.org/docs/current/libpq-ssl.html<br/>- `disable` (default value)<br/>- `prefer`<br/>- `require`<br/>- `verify-ca`<br/>- `verify-full` |
|
||||
| `mysql.tls.cert_path` | String | `None` | Certificate file path. |
|
||||
| `mysql.tls.key_path` | String | `None` | Private key file path. |
|
||||
| `mysql.tls.watch` | Bool | `false` | Watch for Certificate and key file change and auto reload |
|
||||
| `postgres` | -- | -- | PostgresSQL server options. |
|
||||
| `postgres.enable` | Bool | `true` | Whether to enable |
|
||||
| `postgres.addr` | String | `127.0.0.1:4003` | The addr to bind the PostgresSQL server. |
|
||||
| `postgres.runtime_size` | Integer | `2` | The number of server worker threads. |
|
||||
| `postgres.tls` | -- | -- | PostgresSQL server TLS options, see `mysql_options.tls` section. |
|
||||
| `postgres.tls.mode` | String | `disable` | TLS mode. |
|
||||
| `postgres.tls.cert_path` | String | `None` | Certificate file path. |
|
||||
| `postgres.tls.key_path` | String | `None` | Private key file path. |
|
||||
| `postgres.tls.watch` | Bool | `false` | Watch for Certificate and key file change and auto reload |
|
||||
| `opentsdb` | -- | -- | OpenTSDB protocol options. |
|
||||
| `opentsdb.enable` | Bool | `true` | Whether to enable |
|
||||
| `opentsdb.addr` | String | `127.0.0.1:4242` | OpenTSDB telnet API server address. |
|
||||
| `opentsdb.runtime_size` | Integer | `2` | The number of server worker threads. |
|
||||
| `influxdb` | -- | -- | InfluxDB protocol options. |
|
||||
| `influxdb.enable` | Bool | `true` | Whether to enable InfluxDB protocol in HTTP API. |
|
||||
| `prom_store` | -- | -- | Prometheus remote storage options |
|
||||
| `prom_store.enable` | Bool | `true` | Whether to enable Prometheus remote write and read in HTTP API. |
|
||||
| `prom_store.with_metric_engine` | Bool | `true` | Whether to store the data from Prometheus remote write in metric engine. |
|
||||
| `wal` | -- | -- | The WAL options. |
|
||||
| `wal.provider` | String | `raft_engine` | The provider of the WAL.<br/>- `raft_engine`: the wal is stored in the local file system by raft-engine.<br/>- `kafka`: it's remote wal that data is stored in Kafka. |
|
||||
| `wal.dir` | String | `None` | The directory to store the WAL files.<br/>**It's only used when the provider is `raft_engine`**. |
|
||||
| `wal.file_size` | String | `256MB` | The size of the WAL segment file.<br/>**It's only used when the provider is `raft_engine`**. |
|
||||
| `wal.purge_threshold` | String | `4GB` | The threshold of the WAL size to trigger a flush.<br/>**It's only used when the provider is `raft_engine`**. |
|
||||
| `wal.purge_interval` | String | `10m` | The interval to trigger a flush.<br/>**It's only used when the provider is `raft_engine`**. |
|
||||
| `wal.read_batch_size` | Integer | `128` | The read batch size.<br/>**It's only used when the provider is `raft_engine`**. |
|
||||
| `wal.sync_write` | Bool | `false` | Whether to use sync write.<br/>**It's only used when the provider is `raft_engine`**. |
|
||||
| `wal.enable_log_recycle` | Bool | `true` | Whether to reuse logically truncated log files.<br/>**It's only used when the provider is `raft_engine`**. |
|
||||
| `wal.prefill_log_files` | Bool | `false` | Whether to pre-create log files on start up.<br/>**It's only used when the provider is `raft_engine`**. |
|
||||
| `wal.sync_period` | String | `10s` | Duration for fsyncing log files.<br/>**It's only used when the provider is `raft_engine`**. |
|
||||
| `wal.broker_endpoints` | Array | -- | The Kafka broker endpoints.<br/>**It's only used when the provider is `kafka`**. |
|
||||
| `wal.max_batch_size` | String | `1MB` | The max size of a single producer batch.<br/>Warning: Kafka has a default limit of 1MB per message in a topic.<br/>**It's only used when the provider is `kafka`**. |
|
||||
| `wal.linger` | String | `200ms` | The linger duration of a kafka batch producer.<br/>**It's only used when the provider is `kafka`**. |
|
||||
| `wal.consumer_wait_timeout` | String | `100ms` | The consumer wait timeout.<br/>**It's only used when the provider is `kafka`**. |
|
||||
| `wal.backoff_init` | String | `500ms` | The initial backoff delay.<br/>**It's only used when the provider is `kafka`**. |
|
||||
| `wal.backoff_max` | String | `10s` | The maximum backoff delay.<br/>**It's only used when the provider is `kafka`**. |
|
||||
| `wal.backoff_base` | Integer | `2` | The exponential backoff rate, i.e. next backoff = base * current backoff.<br/>**It's only used when the provider is `kafka`**. |
|
||||
| `wal.backoff_deadline` | String | `5mins` | The deadline of retries.<br/>**It's only used when the provider is `kafka`**. |
|
||||
| `metadata_store` | -- | -- | Metadata storage options. |
|
||||
| `metadata_store.file_size` | String | `256MB` | Kv file size in bytes. |
|
||||
| `metadata_store.purge_threshold` | String | `4GB` | Kv purge threshold. |
|
||||
| `procedure` | -- | -- | Procedure storage options. |
|
||||
| `procedure.max_retry_times` | Integer | `3` | Procedure max retry time. |
|
||||
| `procedure.retry_delay` | String | `500ms` | Initial retry delay of procedures, increases exponentially |
|
||||
| `storage` | -- | -- | The data storage options. |
|
||||
| `storage.data_home` | String | `/tmp/greptimedb/` | The working home directory. |
|
||||
| `storage.type` | String | `File` | The storage type used to store the data.<br/>- `File`: the data is stored in the local file system.<br/>- `S3`: the data is stored in the S3 object storage.<br/>- `Gcs`: the data is stored in the Google Cloud Storage.<br/>- `Azblob`: the data is stored in the Azure Blob Storage.<br/>- `Oss`: the data is stored in the Aliyun OSS. |
|
||||
| `storage.cache_path` | String | `None` | Cache configuration for object storage such as 'S3' etc.<br/>The local file cache directory. |
|
||||
| `storage.cache_capacity` | String | `None` | The local file cache capacity in bytes. |
|
||||
| `storage.bucket` | String | `None` | The S3 bucket name.<br/>**It's only used when the storage type is `S3`, `Oss` and `Gcs`**. |
|
||||
| `storage.root` | String | `None` | The S3 data will be stored in the specified prefix, for example, `s3://${bucket}/${root}`.<br/>**It's only used when the storage type is `S3`, `Oss` and `Azblob`**. |
|
||||
| `storage.access_key_id` | String | `None` | The access key id of the aws account.<br/>It's **highly recommended** to use AWS IAM roles instead of hardcoding the access key id and secret key.<br/>**It's only used when the storage type is `S3` and `Oss`**. |
|
||||
| `storage.secret_access_key` | String | `None` | The secret access key of the aws account.<br/>It's **highly recommended** to use AWS IAM roles instead of hardcoding the access key id and secret key.<br/>**It's only used when the storage type is `S3`**. |
|
||||
| `storage.access_key_secret` | String | `None` | The secret access key of the aliyun account.<br/>**It's only used when the storage type is `Oss`**. |
|
||||
| `storage.account_name` | String | `None` | The account key of the azure account.<br/>**It's only used when the storage type is `Azblob`**. |
|
||||
| `storage.account_key` | String | `None` | The account key of the azure account.<br/>**It's only used when the storage type is `Azblob`**. |
|
||||
| `storage.scope` | String | `None` | The scope of the google cloud storage.<br/>**It's only used when the storage type is `Gcs`**. |
|
||||
| `storage.credential_path` | String | `None` | The credential path of the google cloud storage.<br/>**It's only used when the storage type is `Gcs`**. |
|
||||
| `storage.container` | String | `None` | The container of the azure account.<br/>**It's only used when the storage type is `Azblob`**. |
|
||||
| `storage.sas_token` | String | `None` | The sas token of the azure account.<br/>**It's only used when the storage type is `Azblob`**. |
|
||||
| `storage.endpoint` | String | `None` | The endpoint of the S3 service.<br/>**It's only used when the storage type is `S3`, `Oss`, `Gcs` and `Azblob`**. |
|
||||
| `storage.region` | String | `None` | The region of the S3 service.<br/>**It's only used when the storage type is `S3`, `Oss`, `Gcs` and `Azblob`**. |
|
||||
| `[[region_engine]]` | -- | -- | The region engine options. You can configure multiple region engines. |
|
||||
| `region_engine.mito` | -- | -- | The Mito engine options. |
|
||||
| `region_engine.mito.num_workers` | Integer | `8` | Number of region workers. |
|
||||
| `region_engine.mito.worker_channel_size` | Integer | `128` | Request channel size of each worker. |
|
||||
| `region_engine.mito.worker_request_batch_size` | Integer | `64` | Max batch size for a worker to handle requests. |
|
||||
| `region_engine.mito.manifest_checkpoint_distance` | Integer | `10` | Number of meta action updated to trigger a new checkpoint for the manifest. |
|
||||
| `region_engine.mito.compress_manifest` | Bool | `false` | Whether to compress manifest and checkpoint file by gzip (default false). |
|
||||
| `region_engine.mito.max_background_jobs` | Integer | `4` | Max number of running background jobs |
|
||||
| `region_engine.mito.auto_flush_interval` | String | `1h` | Interval to auto flush a region if it has not flushed yet. |
|
||||
| `region_engine.mito.global_write_buffer_size` | String | `1GB` | Global write buffer size for all regions. If not set, it's default to 1/8 of OS memory with a max limitation of 1GB. |
|
||||
| `region_engine.mito.global_write_buffer_reject_size` | String | `2GB` | Global write buffer size threshold to reject write requests. If not set, it's default to 2 times of `global_write_buffer_size` |
|
||||
| `region_engine.mito.sst_meta_cache_size` | String | `128MB` | Cache size for SST metadata. Setting it to 0 to disable the cache.<br/>If not set, it's default to 1/32 of OS memory with a max limitation of 128MB. |
|
||||
| `region_engine.mito.vector_cache_size` | String | `512MB` | Cache size for vectors and arrow arrays. Setting it to 0 to disable the cache.<br/>If not set, it's default to 1/16 of OS memory with a max limitation of 512MB. |
|
||||
| `region_engine.mito.page_cache_size` | String | `512MB` | Cache size for pages of SST row groups. Setting it to 0 to disable the cache.<br/>If not set, it's default to 1/16 of OS memory with a max limitation of 512MB. |
|
||||
| `region_engine.mito.sst_write_buffer_size` | String | `8MB` | Buffer size for SST writing. |
|
||||
| `region_engine.mito.scan_parallelism` | Integer | `0` | Parallelism to scan a region (default: 1/4 of cpu cores).<br/>- `0`: using the default value (1/4 of cpu cores).<br/>- `1`: scan in current thread.<br/>- `n`: scan in parallelism n. |
|
||||
| `region_engine.mito.parallel_scan_channel_size` | Integer | `32` | Capacity of the channel to send data from parallel scan tasks to the main task. |
|
||||
| `region_engine.mito.allow_stale_entries` | Bool | `false` | Whether to allow stale WAL entries read during replay. |
|
||||
| `region_engine.mito.inverted_index` | -- | -- | The options for inverted index in Mito engine. |
|
||||
| `region_engine.mito.inverted_index.create_on_flush` | String | `auto` | Whether to create the index on flush.<br/>- `auto`: automatically<br/>- `disable`: never |
|
||||
| `region_engine.mito.inverted_index.create_on_compaction` | String | `auto` | Whether to create the index on compaction.<br/>- `auto`: automatically<br/>- `disable`: never |
|
||||
| `region_engine.mito.inverted_index.apply_on_query` | String | `auto` | Whether to apply the index on query<br/>- `auto`: automatically<br/>- `disable`: never |
|
||||
| `region_engine.mito.inverted_index.mem_threshold_on_create` | String | `64M` | Memory threshold for performing an external sort during index creation.<br/>Setting to empty will disable external sorting, forcing all sorting operations to happen in memory. |
|
||||
| `region_engine.mito.inverted_index.intermediate_path` | String | `""` | File system path to store intermediate files for external sorting (default `{data_home}/index_intermediate`). |
|
||||
| `region_engine.mito.memtable` | -- | -- | -- |
|
||||
| `region_engine.mito.memtable.type` | String | `time_series` | Memtable type.<br/>- `time_series`: time-series memtable<br/>- `partition_tree`: partition tree memtable (experimental) |
|
||||
| `region_engine.mito.memtable.index_max_keys_per_shard` | Integer | `8192` | The max number of keys in one shard.<br/>Only available for `partition_tree` memtable. |
|
||||
| `region_engine.mito.memtable.data_freeze_threshold` | Integer | `32768` | The max rows of data inside the actively writing buffer in one shard.<br/>Only available for `partition_tree` memtable. |
|
||||
| `region_engine.mito.memtable.fork_dictionary_bytes` | String | `1GiB` | Max dictionary bytes.<br/>Only available for `partition_tree` memtable. |
|
||||
| `logging` | -- | -- | The logging options. |
|
||||
| `logging.dir` | String | `/tmp/greptimedb/logs` | The directory to store the log files. |
|
||||
| `logging.level` | String | `None` | The log level. Can be `info`/`debug`/`warn`/`error`. |
|
||||
| `logging.enable_otlp_tracing` | Bool | `false` | Enable OTLP tracing. |
|
||||
| `logging.otlp_endpoint` | String | `None` | The OTLP tracing endpoint. |
|
||||
| `logging.append_stdout` | Bool | `true` | Whether to append logs to stdout. |
|
||||
| `logging.tracing_sample_ratio` | -- | -- | The percentage of tracing will be sampled and exported.<br/>Valid range `[0, 1]`, 1 means all traces are sampled, 0 means all traces are not sampled, the default value is 1.<br/>ratio > 1 are treated as 1. Fractions < 0 are treated as 0 |
|
||||
| `logging.tracing_sample_ratio.default_ratio` | Float | `1.0` | -- |
|
||||
| `export_metrics` | -- | -- | The datanode can export its metrics and send to Prometheus compatible service (e.g. send to `greptimedb` itself) from remote-write API.<br/>This is only used for `greptimedb` to export its own metrics internally. It's different from prometheus scrape. |
|
||||
| `export_metrics.enable` | Bool | `false` | whether enable export metrics. |
|
||||
| `export_metrics.write_interval` | String | `30s` | The interval of export metrics. |
|
||||
| `export_metrics.self_import` | -- | -- | For `standalone` mode, `self_import` is recommend to collect metrics generated by itself |
|
||||
| `export_metrics.self_import.db` | String | `None` | -- |
|
||||
| `export_metrics.remote_write` | -- | -- | -- |
|
||||
| `export_metrics.remote_write.url` | String | `""` | The url the metrics send to. The url example can be: `http://127.0.0.1:4000/v1/prometheus/write?db=information_schema`. |
|
||||
| `export_metrics.remote_write.headers` | InlineTable | -- | HTTP headers of Prometheus remote-write carry. |
|
||||
|
||||
|
||||
## Cluster Mode
|
||||
|
||||
### Frontend
|
||||
|
||||
| Key | Type | Default | Descriptions |
|
||||
| --- | -----| ------- | ----------- |
|
||||
| `mode` | String | `standalone` | The running mode of the datanode. It can be `standalone` or `distributed`. |
|
||||
| `default_timezone` | String | `None` | The default timezone of the server. |
|
||||
| `heartbeat` | -- | -- | The heartbeat options. |
|
||||
| `heartbeat.interval` | String | `18s` | Interval for sending heartbeat messages to the metasrv. |
|
||||
| `heartbeat.retry_interval` | String | `3s` | Interval for retrying to send heartbeat messages to the metasrv. |
|
||||
| `http` | -- | -- | The HTTP server options. |
|
||||
| `http.addr` | String | `127.0.0.1:4000` | The address to bind the HTTP server. |
|
||||
| `http.timeout` | String | `30s` | HTTP request timeout. |
|
||||
| `http.body_limit` | String | `64MB` | HTTP request body limit.<br/>Support the following units are supported: `B`, `KB`, `KiB`, `MB`, `MiB`, `GB`, `GiB`, `TB`, `TiB`, `PB`, `PiB`. |
|
||||
| `grpc` | -- | -- | The gRPC server options. |
|
||||
| `grpc.addr` | String | `127.0.0.1:4001` | The address to bind the gRPC server. |
|
||||
| `grpc.runtime_size` | Integer | `8` | The number of server worker threads. |
|
||||
| `mysql` | -- | -- | MySQL server options. |
|
||||
| `mysql.enable` | Bool | `true` | Whether to enable. |
|
||||
| `mysql.addr` | String | `127.0.0.1:4002` | The addr to bind the MySQL server. |
|
||||
| `mysql.runtime_size` | Integer | `2` | The number of server worker threads. |
|
||||
| `mysql.tls` | -- | -- | -- |
|
||||
| `mysql.tls.mode` | String | `disable` | TLS mode, refer to https://www.postgresql.org/docs/current/libpq-ssl.html<br/>- `disable` (default value)<br/>- `prefer`<br/>- `require`<br/>- `verify-ca`<br/>- `verify-full` |
|
||||
| `mysql.tls.cert_path` | String | `None` | Certificate file path. |
|
||||
| `mysql.tls.key_path` | String | `None` | Private key file path. |
|
||||
| `mysql.tls.watch` | Bool | `false` | Watch for Certificate and key file change and auto reload |
|
||||
| `postgres` | -- | -- | PostgresSQL server options. |
|
||||
| `postgres.enable` | Bool | `true` | Whether to enable |
|
||||
| `postgres.addr` | String | `127.0.0.1:4003` | The addr to bind the PostgresSQL server. |
|
||||
| `postgres.runtime_size` | Integer | `2` | The number of server worker threads. |
|
||||
| `postgres.tls` | -- | -- | PostgresSQL server TLS options, see `mysql_options.tls` section. |
|
||||
| `postgres.tls.mode` | String | `disable` | TLS mode. |
|
||||
| `postgres.tls.cert_path` | String | `None` | Certificate file path. |
|
||||
| `postgres.tls.key_path` | String | `None` | Private key file path. |
|
||||
| `postgres.tls.watch` | Bool | `false` | Watch for Certificate and key file change and auto reload |
|
||||
| `opentsdb` | -- | -- | OpenTSDB protocol options. |
|
||||
| `opentsdb.enable` | Bool | `true` | Whether to enable |
|
||||
| `opentsdb.addr` | String | `127.0.0.1:4242` | OpenTSDB telnet API server address. |
|
||||
| `opentsdb.runtime_size` | Integer | `2` | The number of server worker threads. |
|
||||
| `influxdb` | -- | -- | InfluxDB protocol options. |
|
||||
| `influxdb.enable` | Bool | `true` | Whether to enable InfluxDB protocol in HTTP API. |
|
||||
| `prom_store` | -- | -- | Prometheus remote storage options |
|
||||
| `prom_store.enable` | Bool | `true` | Whether to enable Prometheus remote write and read in HTTP API. |
|
||||
| `prom_store.with_metric_engine` | Bool | `true` | Whether to store the data from Prometheus remote write in metric engine. |
|
||||
| `meta_client` | -- | -- | The metasrv client options. |
|
||||
| `meta_client.metasrv_addrs` | Array | -- | The addresses of the metasrv. |
|
||||
| `meta_client.timeout` | String | `3s` | Operation timeout. |
|
||||
| `meta_client.heartbeat_timeout` | String | `500ms` | Heartbeat timeout. |
|
||||
| `meta_client.ddl_timeout` | String | `10s` | DDL timeout. |
|
||||
| `meta_client.connect_timeout` | String | `1s` | Connect server timeout. |
|
||||
| `meta_client.tcp_nodelay` | Bool | `true` | `TCP_NODELAY` option for accepted connections. |
|
||||
| `meta_client.metadata_cache_max_capacity` | Integer | `100000` | The configuration about the cache of the metadata. |
|
||||
| `meta_client.metadata_cache_ttl` | String | `10m` | TTL of the metadata cache. |
|
||||
| `meta_client.metadata_cache_tti` | String | `5m` | -- |
|
||||
| `datanode` | -- | -- | Datanode options. |
|
||||
| `datanode.client` | -- | -- | Datanode client options. |
|
||||
| `datanode.client.timeout` | String | `10s` | -- |
|
||||
| `datanode.client.connect_timeout` | String | `10s` | -- |
|
||||
| `datanode.client.tcp_nodelay` | Bool | `true` | -- |
|
||||
| `logging` | -- | -- | The logging options. |
|
||||
| `logging.dir` | String | `/tmp/greptimedb/logs` | The directory to store the log files. |
|
||||
| `logging.level` | String | `None` | The log level. Can be `info`/`debug`/`warn`/`error`. |
|
||||
| `logging.enable_otlp_tracing` | Bool | `false` | Enable OTLP tracing. |
|
||||
| `logging.otlp_endpoint` | String | `None` | The OTLP tracing endpoint. |
|
||||
| `logging.append_stdout` | Bool | `true` | Whether to append logs to stdout. |
|
||||
| `logging.tracing_sample_ratio` | -- | -- | The percentage of tracing will be sampled and exported.<br/>Valid range `[0, 1]`, 1 means all traces are sampled, 0 means all traces are not sampled, the default value is 1.<br/>ratio > 1 are treated as 1. Fractions < 0 are treated as 0 |
|
||||
| `logging.tracing_sample_ratio.default_ratio` | Float | `1.0` | -- |
|
||||
| `export_metrics` | -- | -- | The datanode can export its metrics and send to Prometheus compatible service (e.g. send to `greptimedb` itself) from remote-write API.<br/>This is only used for `greptimedb` to export its own metrics internally. It's different from prometheus scrape. |
|
||||
| `export_metrics.enable` | Bool | `false` | whether enable export metrics. |
|
||||
| `export_metrics.write_interval` | String | `30s` | The interval of export metrics. |
|
||||
| `export_metrics.self_import` | -- | -- | For `standalone` mode, `self_import` is recommend to collect metrics generated by itself |
|
||||
| `export_metrics.self_import.db` | String | `None` | -- |
|
||||
| `export_metrics.remote_write` | -- | -- | -- |
|
||||
| `export_metrics.remote_write.url` | String | `""` | The url the metrics send to. The url example can be: `http://127.0.0.1:4000/v1/prometheus/write?db=information_schema`. |
|
||||
| `export_metrics.remote_write.headers` | InlineTable | -- | HTTP headers of Prometheus remote-write carry. |
|
||||
|
||||
|
||||
### Metasrv
|
||||
|
||||
| Key | Type | Default | Descriptions |
|
||||
| --- | -----| ------- | ----------- |
|
||||
| `data_home` | String | `/tmp/metasrv/` | The working home directory. |
|
||||
| `bind_addr` | String | `127.0.0.1:3002` | The bind address of metasrv. |
|
||||
| `server_addr` | String | `127.0.0.1:3002` | The communication server address for frontend and datanode to connect to metasrv, "127.0.0.1:3002" by default for localhost. |
|
||||
| `store_addr` | String | `127.0.0.1:2379` | Etcd server address. |
|
||||
| `selector` | String | `lease_based` | Datanode selector type.<br/>- `lease_based` (default value).<br/>- `load_based`<br/>For details, please see "https://docs.greptime.com/developer-guide/metasrv/selector". |
|
||||
| `use_memory_store` | Bool | `false` | Store data in memory. |
|
||||
| `enable_telemetry` | Bool | `true` | Whether to enable greptimedb telemetry. |
|
||||
| `store_key_prefix` | String | `""` | If it's not empty, the metasrv will store all data with this key prefix. |
|
||||
| `procedure` | -- | -- | Procedure storage options. |
|
||||
| `procedure.max_retry_times` | Integer | `12` | Procedure max retry time. |
|
||||
| `procedure.retry_delay` | String | `500ms` | Initial retry delay of procedures, increases exponentially |
|
||||
| `procedure.max_metadata_value_size` | String | `1500KiB` | Auto split large value<br/>GreptimeDB procedure uses etcd as the default metadata storage backend.<br/>The etcd the maximum size of any request is 1.5 MiB<br/>1500KiB = 1536KiB (1.5MiB) - 36KiB (reserved size of key)<br/>Comments out the `max_metadata_value_size`, for don't split large value (no limit). |
|
||||
| `failure_detector` | -- | -- | -- |
|
||||
| `failure_detector.threshold` | Float | `8.0` | -- |
|
||||
| `failure_detector.min_std_deviation` | String | `100ms` | -- |
|
||||
| `failure_detector.acceptable_heartbeat_pause` | String | `3000ms` | -- |
|
||||
| `failure_detector.first_heartbeat_estimate` | String | `1000ms` | -- |
|
||||
| `datanode` | -- | -- | Datanode options. |
|
||||
| `datanode.client` | -- | -- | Datanode client options. |
|
||||
| `datanode.client.timeout` | String | `10s` | -- |
|
||||
| `datanode.client.connect_timeout` | String | `10s` | -- |
|
||||
| `datanode.client.tcp_nodelay` | Bool | `true` | -- |
|
||||
| `wal` | -- | -- | -- |
|
||||
| `wal.provider` | String | `raft_engine` | -- |
|
||||
| `wal.broker_endpoints` | Array | -- | The broker endpoints of the Kafka cluster. |
|
||||
| `wal.num_topics` | Integer | `64` | Number of topics to be created upon start. |
|
||||
| `wal.selector_type` | String | `round_robin` | Topic selector type.<br/>Available selector types:<br/>- `round_robin` (default) |
|
||||
| `wal.topic_name_prefix` | String | `greptimedb_wal_topic` | A Kafka topic is constructed by concatenating `topic_name_prefix` and `topic_id`. |
|
||||
| `wal.replication_factor` | Integer | `1` | Expected number of replicas of each partition. |
|
||||
| `wal.create_topic_timeout` | String | `30s` | Above which a topic creation operation will be cancelled. |
|
||||
| `wal.backoff_init` | String | `500ms` | The initial backoff for kafka clients. |
|
||||
| `wal.backoff_max` | String | `10s` | The maximum backoff for kafka clients. |
|
||||
| `wal.backoff_base` | Integer | `2` | Exponential backoff rate, i.e. next backoff = base * current backoff. |
|
||||
| `wal.backoff_deadline` | String | `5mins` | Stop reconnecting if the total wait time reaches the deadline. If this config is missing, the reconnecting won't terminate. |
|
||||
| `logging` | -- | -- | The logging options. |
|
||||
| `logging.dir` | String | `/tmp/greptimedb/logs` | The directory to store the log files. |
|
||||
| `logging.level` | String | `None` | The log level. Can be `info`/`debug`/`warn`/`error`. |
|
||||
| `logging.enable_otlp_tracing` | Bool | `false` | Enable OTLP tracing. |
|
||||
| `logging.otlp_endpoint` | String | `None` | The OTLP tracing endpoint. |
|
||||
| `logging.append_stdout` | Bool | `true` | Whether to append logs to stdout. |
|
||||
| `logging.tracing_sample_ratio` | -- | -- | The percentage of tracing will be sampled and exported.<br/>Valid range `[0, 1]`, 1 means all traces are sampled, 0 means all traces are not sampled, the default value is 1.<br/>ratio > 1 are treated as 1. Fractions < 0 are treated as 0 |
|
||||
| `logging.tracing_sample_ratio.default_ratio` | Float | `1.0` | -- |
|
||||
| `export_metrics` | -- | -- | The datanode can export its metrics and send to Prometheus compatible service (e.g. send to `greptimedb` itself) from remote-write API.<br/>This is only used for `greptimedb` to export its own metrics internally. It's different from prometheus scrape. |
|
||||
| `export_metrics.enable` | Bool | `false` | whether enable export metrics. |
|
||||
| `export_metrics.write_interval` | String | `30s` | The interval of export metrics. |
|
||||
| `export_metrics.self_import` | -- | -- | For `standalone` mode, `self_import` is recommend to collect metrics generated by itself |
|
||||
| `export_metrics.self_import.db` | String | `None` | -- |
|
||||
| `export_metrics.remote_write` | -- | -- | -- |
|
||||
| `export_metrics.remote_write.url` | String | `""` | The url the metrics send to. The url example can be: `http://127.0.0.1:4000/v1/prometheus/write?db=information_schema`. |
|
||||
| `export_metrics.remote_write.headers` | InlineTable | -- | HTTP headers of Prometheus remote-write carry. |
|
||||
|
||||
|
||||
### Datanode
|
||||
|
||||
| Key | Type | Default | Descriptions |
|
||||
| --- | -----| ------- | ----------- |
|
||||
| `mode` | String | `standalone` | The running mode of the datanode. It can be `standalone` or `distributed`. |
|
||||
| `node_id` | Integer | `None` | The datanode identifier and should be unique in the cluster. |
|
||||
| `require_lease_before_startup` | Bool | `false` | Start services after regions have obtained leases.<br/>It will block the datanode start if it can't receive leases in the heartbeat from metasrv. |
|
||||
| `init_regions_in_background` | Bool | `false` | Initialize all regions in the background during the startup.<br/>By default, it provides services after all regions have been initialized. |
|
||||
| `rpc_addr` | String | `127.0.0.1:3001` | The gRPC address of the datanode. |
|
||||
| `rpc_hostname` | String | `None` | The hostname of the datanode. |
|
||||
| `rpc_runtime_size` | Integer | `8` | The number of gRPC server worker threads. |
|
||||
| `rpc_max_recv_message_size` | String | `512MB` | The maximum receive message size for gRPC server. |
|
||||
| `rpc_max_send_message_size` | String | `512MB` | The maximum send message size for gRPC server. |
|
||||
| `enable_telemetry` | Bool | `true` | Enable telemetry to collect anonymous usage data. |
|
||||
| `heartbeat` | -- | -- | The heartbeat options. |
|
||||
| `heartbeat.interval` | String | `3s` | Interval for sending heartbeat messages to the metasrv. |
|
||||
| `heartbeat.retry_interval` | String | `3s` | Interval for retrying to send heartbeat messages to the metasrv. |
|
||||
| `meta_client` | -- | -- | The metasrv client options. |
|
||||
| `meta_client.metasrv_addrs` | Array | -- | The addresses of the metasrv. |
|
||||
| `meta_client.timeout` | String | `3s` | Operation timeout. |
|
||||
| `meta_client.heartbeat_timeout` | String | `500ms` | Heartbeat timeout. |
|
||||
| `meta_client.ddl_timeout` | String | `10s` | DDL timeout. |
|
||||
| `meta_client.connect_timeout` | String | `1s` | Connect server timeout. |
|
||||
| `meta_client.tcp_nodelay` | Bool | `true` | `TCP_NODELAY` option for accepted connections. |
|
||||
| `meta_client.metadata_cache_max_capacity` | Integer | `100000` | The configuration about the cache of the metadata. |
|
||||
| `meta_client.metadata_cache_ttl` | String | `10m` | TTL of the metadata cache. |
|
||||
| `meta_client.metadata_cache_tti` | String | `5m` | -- |
|
||||
| `wal` | -- | -- | The WAL options. |
|
||||
| `wal.provider` | String | `raft_engine` | The provider of the WAL.<br/>- `raft_engine`: the wal is stored in the local file system by raft-engine.<br/>- `kafka`: it's remote wal that data is stored in Kafka. |
|
||||
| `wal.dir` | String | `None` | The directory to store the WAL files.<br/>**It's only used when the provider is `raft_engine`**. |
|
||||
| `wal.file_size` | String | `256MB` | The size of the WAL segment file.<br/>**It's only used when the provider is `raft_engine`**. |
|
||||
| `wal.purge_threshold` | String | `4GB` | The threshold of the WAL size to trigger a flush.<br/>**It's only used when the provider is `raft_engine`**. |
|
||||
| `wal.purge_interval` | String | `10m` | The interval to trigger a flush.<br/>**It's only used when the provider is `raft_engine`**. |
|
||||
| `wal.read_batch_size` | Integer | `128` | The read batch size.<br/>**It's only used when the provider is `raft_engine`**. |
|
||||
| `wal.sync_write` | Bool | `false` | Whether to use sync write.<br/>**It's only used when the provider is `raft_engine`**. |
|
||||
| `wal.enable_log_recycle` | Bool | `true` | Whether to reuse logically truncated log files.<br/>**It's only used when the provider is `raft_engine`**. |
|
||||
| `wal.prefill_log_files` | Bool | `false` | Whether to pre-create log files on start up.<br/>**It's only used when the provider is `raft_engine`**. |
|
||||
| `wal.sync_period` | String | `10s` | Duration for fsyncing log files.<br/>**It's only used when the provider is `raft_engine`**. |
|
||||
| `wal.broker_endpoints` | Array | -- | The Kafka broker endpoints.<br/>**It's only used when the provider is `kafka`**. |
|
||||
| `wal.max_batch_size` | String | `1MB` | The max size of a single producer batch.<br/>Warning: Kafka has a default limit of 1MB per message in a topic.<br/>**It's only used when the provider is `kafka`**. |
|
||||
| `wal.linger` | String | `200ms` | The linger duration of a kafka batch producer.<br/>**It's only used when the provider is `kafka`**. |
|
||||
| `wal.consumer_wait_timeout` | String | `100ms` | The consumer wait timeout.<br/>**It's only used when the provider is `kafka`**. |
|
||||
| `wal.backoff_init` | String | `500ms` | The initial backoff delay.<br/>**It's only used when the provider is `kafka`**. |
|
||||
| `wal.backoff_max` | String | `10s` | The maximum backoff delay.<br/>**It's only used when the provider is `kafka`**. |
|
||||
| `wal.backoff_base` | Integer | `2` | The exponential backoff rate, i.e. next backoff = base * current backoff.<br/>**It's only used when the provider is `kafka`**. |
|
||||
| `wal.backoff_deadline` | String | `5mins` | The deadline of retries.<br/>**It's only used when the provider is `kafka`**. |
|
||||
| `storage` | -- | -- | The data storage options. |
|
||||
| `storage.data_home` | String | `/tmp/greptimedb/` | The working home directory. |
|
||||
| `storage.type` | String | `File` | The storage type used to store the data.<br/>- `File`: the data is stored in the local file system.<br/>- `S3`: the data is stored in the S3 object storage.<br/>- `Gcs`: the data is stored in the Google Cloud Storage.<br/>- `Azblob`: the data is stored in the Azure Blob Storage.<br/>- `Oss`: the data is stored in the Aliyun OSS. |
|
||||
| `storage.cache_path` | String | `None` | Cache configuration for object storage such as 'S3' etc.<br/>The local file cache directory. |
|
||||
| `storage.cache_capacity` | String | `None` | The local file cache capacity in bytes. |
|
||||
| `storage.bucket` | String | `None` | The S3 bucket name.<br/>**It's only used when the storage type is `S3`, `Oss` and `Gcs`**. |
|
||||
| `storage.root` | String | `None` | The S3 data will be stored in the specified prefix, for example, `s3://${bucket}/${root}`.<br/>**It's only used when the storage type is `S3`, `Oss` and `Azblob`**. |
|
||||
| `storage.access_key_id` | String | `None` | The access key id of the aws account.<br/>It's **highly recommended** to use AWS IAM roles instead of hardcoding the access key id and secret key.<br/>**It's only used when the storage type is `S3` and `Oss`**. |
|
||||
| `storage.secret_access_key` | String | `None` | The secret access key of the aws account.<br/>It's **highly recommended** to use AWS IAM roles instead of hardcoding the access key id and secret key.<br/>**It's only used when the storage type is `S3`**. |
|
||||
| `storage.access_key_secret` | String | `None` | The secret access key of the aliyun account.<br/>**It's only used when the storage type is `Oss`**. |
|
||||
| `storage.account_name` | String | `None` | The account key of the azure account.<br/>**It's only used when the storage type is `Azblob`**. |
|
||||
| `storage.account_key` | String | `None` | The account key of the azure account.<br/>**It's only used when the storage type is `Azblob`**. |
|
||||
| `storage.scope` | String | `None` | The scope of the google cloud storage.<br/>**It's only used when the storage type is `Gcs`**. |
|
||||
| `storage.credential_path` | String | `None` | The credential path of the google cloud storage.<br/>**It's only used when the storage type is `Gcs`**. |
|
||||
| `storage.container` | String | `None` | The container of the azure account.<br/>**It's only used when the storage type is `Azblob`**. |
|
||||
| `storage.sas_token` | String | `None` | The sas token of the azure account.<br/>**It's only used when the storage type is `Azblob`**. |
|
||||
| `storage.endpoint` | String | `None` | The endpoint of the S3 service.<br/>**It's only used when the storage type is `S3`, `Oss`, `Gcs` and `Azblob`**. |
|
||||
| `storage.region` | String | `None` | The region of the S3 service.<br/>**It's only used when the storage type is `S3`, `Oss`, `Gcs` and `Azblob`**. |
|
||||
| `[[region_engine]]` | -- | -- | The region engine options. You can configure multiple region engines. |
|
||||
| `region_engine.mito` | -- | -- | The Mito engine options. |
|
||||
| `region_engine.mito.num_workers` | Integer | `8` | Number of region workers. |
|
||||
| `region_engine.mito.worker_channel_size` | Integer | `128` | Request channel size of each worker. |
|
||||
| `region_engine.mito.worker_request_batch_size` | Integer | `64` | Max batch size for a worker to handle requests. |
|
||||
| `region_engine.mito.manifest_checkpoint_distance` | Integer | `10` | Number of meta action updated to trigger a new checkpoint for the manifest. |
|
||||
| `region_engine.mito.compress_manifest` | Bool | `false` | Whether to compress manifest and checkpoint file by gzip (default false). |
|
||||
| `region_engine.mito.max_background_jobs` | Integer | `4` | Max number of running background jobs |
|
||||
| `region_engine.mito.auto_flush_interval` | String | `1h` | Interval to auto flush a region if it has not flushed yet. |
|
||||
| `region_engine.mito.global_write_buffer_size` | String | `1GB` | Global write buffer size for all regions. If not set, it's default to 1/8 of OS memory with a max limitation of 1GB. |
|
||||
| `region_engine.mito.global_write_buffer_reject_size` | String | `2GB` | Global write buffer size threshold to reject write requests. If not set, it's default to 2 times of `global_write_buffer_size` |
|
||||
| `region_engine.mito.sst_meta_cache_size` | String | `128MB` | Cache size for SST metadata. Setting it to 0 to disable the cache.<br/>If not set, it's default to 1/32 of OS memory with a max limitation of 128MB. |
|
||||
| `region_engine.mito.vector_cache_size` | String | `512MB` | Cache size for vectors and arrow arrays. Setting it to 0 to disable the cache.<br/>If not set, it's default to 1/16 of OS memory with a max limitation of 512MB. |
|
||||
| `region_engine.mito.page_cache_size` | String | `512MB` | Cache size for pages of SST row groups. Setting it to 0 to disable the cache.<br/>If not set, it's default to 1/16 of OS memory with a max limitation of 512MB. |
|
||||
| `region_engine.mito.sst_write_buffer_size` | String | `8MB` | Buffer size for SST writing. |
|
||||
| `region_engine.mito.scan_parallelism` | Integer | `0` | Parallelism to scan a region (default: 1/4 of cpu cores).<br/>- `0`: using the default value (1/4 of cpu cores).<br/>- `1`: scan in current thread.<br/>- `n`: scan in parallelism n. |
|
||||
| `region_engine.mito.parallel_scan_channel_size` | Integer | `32` | Capacity of the channel to send data from parallel scan tasks to the main task. |
|
||||
| `region_engine.mito.allow_stale_entries` | Bool | `false` | Whether to allow stale WAL entries read during replay. |
|
||||
| `region_engine.mito.inverted_index` | -- | -- | The options for inverted index in Mito engine. |
|
||||
| `region_engine.mito.inverted_index.create_on_flush` | String | `auto` | Whether to create the index on flush.<br/>- `auto`: automatically<br/>- `disable`: never |
|
||||
| `region_engine.mito.inverted_index.create_on_compaction` | String | `auto` | Whether to create the index on compaction.<br/>- `auto`: automatically<br/>- `disable`: never |
|
||||
| `region_engine.mito.inverted_index.apply_on_query` | String | `auto` | Whether to apply the index on query<br/>- `auto`: automatically<br/>- `disable`: never |
|
||||
| `region_engine.mito.inverted_index.mem_threshold_on_create` | String | `64M` | Memory threshold for performing an external sort during index creation.<br/>Setting to empty will disable external sorting, forcing all sorting operations to happen in memory. |
|
||||
| `region_engine.mito.inverted_index.intermediate_path` | String | `""` | File system path to store intermediate files for external sorting (default `{data_home}/index_intermediate`). |
|
||||
| `region_engine.mito.memtable` | -- | -- | -- |
|
||||
| `region_engine.mito.memtable.type` | String | `time_series` | Memtable type.<br/>- `time_series`: time-series memtable<br/>- `partition_tree`: partition tree memtable (experimental) |
|
||||
| `region_engine.mito.memtable.index_max_keys_per_shard` | Integer | `8192` | The max number of keys in one shard.<br/>Only available for `partition_tree` memtable. |
|
||||
| `region_engine.mito.memtable.data_freeze_threshold` | Integer | `32768` | The max rows of data inside the actively writing buffer in one shard.<br/>Only available for `partition_tree` memtable. |
|
||||
| `region_engine.mito.memtable.fork_dictionary_bytes` | String | `1GiB` | Max dictionary bytes.<br/>Only available for `partition_tree` memtable. |
|
||||
| `logging` | -- | -- | The logging options. |
|
||||
| `logging.dir` | String | `/tmp/greptimedb/logs` | The directory to store the log files. |
|
||||
| `logging.level` | String | `None` | The log level. Can be `info`/`debug`/`warn`/`error`. |
|
||||
| `logging.enable_otlp_tracing` | Bool | `false` | Enable OTLP tracing. |
|
||||
| `logging.otlp_endpoint` | String | `None` | The OTLP tracing endpoint. |
|
||||
| `logging.append_stdout` | Bool | `true` | Whether to append logs to stdout. |
|
||||
| `logging.tracing_sample_ratio` | -- | -- | The percentage of tracing will be sampled and exported.<br/>Valid range `[0, 1]`, 1 means all traces are sampled, 0 means all traces are not sampled, the default value is 1.<br/>ratio > 1 are treated as 1. Fractions < 0 are treated as 0 |
|
||||
| `logging.tracing_sample_ratio.default_ratio` | Float | `1.0` | -- |
|
||||
| `export_metrics` | -- | -- | The datanode can export its metrics and send to Prometheus compatible service (e.g. send to `greptimedb` itself) from remote-write API.<br/>This is only used for `greptimedb` to export its own metrics internally. It's different from prometheus scrape. |
|
||||
| `export_metrics.enable` | Bool | `false` | whether enable export metrics. |
|
||||
| `export_metrics.write_interval` | String | `30s` | The interval of export metrics. |
|
||||
| `export_metrics.self_import` | -- | -- | For `standalone` mode, `self_import` is recommend to collect metrics generated by itself |
|
||||
| `export_metrics.self_import.db` | String | `None` | -- |
|
||||
| `export_metrics.remote_write` | -- | -- | -- |
|
||||
| `export_metrics.remote_write.url` | String | `""` | The url the metrics send to. The url example can be: `http://127.0.0.1:4000/v1/prometheus/write?db=information_schema`. |
|
||||
| `export_metrics.remote_write.headers` | InlineTable | -- | HTTP headers of Prometheus remote-write carry. |
|
||||
@@ -1,171 +1,430 @@
|
||||
# Node running mode, see `standalone.example.toml`.
|
||||
mode = "distributed"
|
||||
# The datanode identifier, should be unique.
|
||||
## The running mode of the datanode. It can be `standalone` or `distributed`.
|
||||
mode = "standalone"
|
||||
|
||||
## The datanode identifier and should be unique in the cluster.
|
||||
## +toml2docs:none-default
|
||||
node_id = 42
|
||||
# gRPC server address, "127.0.0.1:3001" by default.
|
||||
rpc_addr = "127.0.0.1:3001"
|
||||
# Hostname of this node.
|
||||
rpc_hostname = "127.0.0.1"
|
||||
# The number of gRPC server worker threads, 8 by default.
|
||||
rpc_runtime_size = 8
|
||||
# Start services after regions have obtained leases.
|
||||
# It will block the datanode start if it can't receive leases in the heartbeat from metasrv.
|
||||
|
||||
## Start services after regions have obtained leases.
|
||||
## It will block the datanode start if it can't receive leases in the heartbeat from metasrv.
|
||||
require_lease_before_startup = false
|
||||
|
||||
# Initialize all regions in the background during the startup.
|
||||
# By default, it provides services after all regions have been initialized.
|
||||
## Initialize all regions in the background during the startup.
|
||||
## By default, it provides services after all regions have been initialized.
|
||||
init_regions_in_background = false
|
||||
|
||||
## The gRPC address of the datanode.
|
||||
rpc_addr = "127.0.0.1:3001"
|
||||
|
||||
## The hostname of the datanode.
|
||||
## +toml2docs:none-default
|
||||
rpc_hostname = "127.0.0.1"
|
||||
|
||||
## The number of gRPC server worker threads.
|
||||
rpc_runtime_size = 8
|
||||
|
||||
## The maximum receive message size for gRPC server.
|
||||
rpc_max_recv_message_size = "512MB"
|
||||
|
||||
## The maximum send message size for gRPC server.
|
||||
rpc_max_send_message_size = "512MB"
|
||||
|
||||
## Enable telemetry to collect anonymous usage data.
|
||||
enable_telemetry = true
|
||||
|
||||
## The heartbeat options.
|
||||
[heartbeat]
|
||||
# Interval for sending heartbeat messages to the Metasrv, 3 seconds by default.
|
||||
## Interval for sending heartbeat messages to the metasrv.
|
||||
interval = "3s"
|
||||
|
||||
# Metasrv client options.
|
||||
## Interval for retrying to send heartbeat messages to the metasrv.
|
||||
retry_interval = "3s"
|
||||
|
||||
## The metasrv client options.
|
||||
[meta_client]
|
||||
# Metasrv address list.
|
||||
## The addresses of the metasrv.
|
||||
metasrv_addrs = ["127.0.0.1:3002"]
|
||||
# Heartbeat timeout, 500 milliseconds by default.
|
||||
heartbeat_timeout = "500ms"
|
||||
# Operation timeout, 3 seconds by default.
|
||||
|
||||
## Operation timeout.
|
||||
timeout = "3s"
|
||||
# Connect server timeout, 1 second by default.
|
||||
|
||||
## Heartbeat timeout.
|
||||
heartbeat_timeout = "500ms"
|
||||
|
||||
## DDL timeout.
|
||||
ddl_timeout = "10s"
|
||||
|
||||
## Connect server timeout.
|
||||
connect_timeout = "1s"
|
||||
# `TCP_NODELAY` option for accepted connections, true by default.
|
||||
|
||||
## `TCP_NODELAY` option for accepted connections.
|
||||
tcp_nodelay = true
|
||||
|
||||
# WAL options.
|
||||
## The configuration about the cache of the metadata.
|
||||
metadata_cache_max_capacity = 100000
|
||||
|
||||
## TTL of the metadata cache.
|
||||
metadata_cache_ttl = "10m"
|
||||
|
||||
# TTI of the metadata cache.
|
||||
metadata_cache_tti = "5m"
|
||||
|
||||
## The WAL options.
|
||||
[wal]
|
||||
## The provider of the WAL.
|
||||
## - `raft_engine`: the wal is stored in the local file system by raft-engine.
|
||||
## - `kafka`: it's remote wal that data is stored in Kafka.
|
||||
provider = "raft_engine"
|
||||
|
||||
# Raft-engine wal options, see `standalone.example.toml`.
|
||||
# dir = "/tmp/greptimedb/wal"
|
||||
## The directory to store the WAL files.
|
||||
## **It's only used when the provider is `raft_engine`**.
|
||||
## +toml2docs:none-default
|
||||
dir = "/tmp/greptimedb/wal"
|
||||
|
||||
## The size of the WAL segment file.
|
||||
## **It's only used when the provider is `raft_engine`**.
|
||||
file_size = "256MB"
|
||||
|
||||
## The threshold of the WAL size to trigger a flush.
|
||||
## **It's only used when the provider is `raft_engine`**.
|
||||
purge_threshold = "4GB"
|
||||
|
||||
## The interval to trigger a flush.
|
||||
## **It's only used when the provider is `raft_engine`**.
|
||||
purge_interval = "10m"
|
||||
|
||||
## The read batch size.
|
||||
## **It's only used when the provider is `raft_engine`**.
|
||||
read_batch_size = 128
|
||||
|
||||
## Whether to use sync write.
|
||||
## **It's only used when the provider is `raft_engine`**.
|
||||
sync_write = false
|
||||
|
||||
# Kafka wal options, see `standalone.example.toml`.
|
||||
# broker_endpoints = ["127.0.0.1:9092"]
|
||||
# Warning: Kafka has a default limit of 1MB per message in a topic.
|
||||
# max_batch_size = "1MB"
|
||||
# linger = "200ms"
|
||||
# consumer_wait_timeout = "100ms"
|
||||
# backoff_init = "500ms"
|
||||
# backoff_max = "10s"
|
||||
# backoff_base = 2
|
||||
# backoff_deadline = "5mins"
|
||||
## Whether to reuse logically truncated log files.
|
||||
## **It's only used when the provider is `raft_engine`**.
|
||||
enable_log_recycle = true
|
||||
|
||||
# Storage options, see `standalone.example.toml`.
|
||||
## Whether to pre-create log files on start up.
|
||||
## **It's only used when the provider is `raft_engine`**.
|
||||
prefill_log_files = false
|
||||
|
||||
## Duration for fsyncing log files.
|
||||
## **It's only used when the provider is `raft_engine`**.
|
||||
sync_period = "10s"
|
||||
|
||||
## The Kafka broker endpoints.
|
||||
## **It's only used when the provider is `kafka`**.
|
||||
broker_endpoints = ["127.0.0.1:9092"]
|
||||
|
||||
## The max size of a single producer batch.
|
||||
## Warning: Kafka has a default limit of 1MB per message in a topic.
|
||||
## **It's only used when the provider is `kafka`**.
|
||||
max_batch_size = "1MB"
|
||||
|
||||
## The linger duration of a kafka batch producer.
|
||||
## **It's only used when the provider is `kafka`**.
|
||||
linger = "200ms"
|
||||
|
||||
## The consumer wait timeout.
|
||||
## **It's only used when the provider is `kafka`**.
|
||||
consumer_wait_timeout = "100ms"
|
||||
|
||||
## The initial backoff delay.
|
||||
## **It's only used when the provider is `kafka`**.
|
||||
backoff_init = "500ms"
|
||||
|
||||
## The maximum backoff delay.
|
||||
## **It's only used when the provider is `kafka`**.
|
||||
backoff_max = "10s"
|
||||
|
||||
## The exponential backoff rate, i.e. next backoff = base * current backoff.
|
||||
## **It's only used when the provider is `kafka`**.
|
||||
backoff_base = 2
|
||||
|
||||
## The deadline of retries.
|
||||
## **It's only used when the provider is `kafka`**.
|
||||
backoff_deadline = "5mins"
|
||||
|
||||
# Example of using S3 as the storage.
|
||||
# [storage]
|
||||
# type = "S3"
|
||||
# bucket = "greptimedb"
|
||||
# root = "data"
|
||||
# access_key_id = "test"
|
||||
# secret_access_key = "123456"
|
||||
# endpoint = "https://s3.amazonaws.com"
|
||||
# region = "us-west-2"
|
||||
|
||||
# Example of using Oss as the storage.
|
||||
# [storage]
|
||||
# type = "Oss"
|
||||
# bucket = "greptimedb"
|
||||
# root = "data"
|
||||
# access_key_id = "test"
|
||||
# access_key_secret = "123456"
|
||||
# endpoint = "https://oss-cn-hangzhou.aliyuncs.com"
|
||||
|
||||
# Example of using Azblob as the storage.
|
||||
# [storage]
|
||||
# type = "Azblob"
|
||||
# container = "greptimedb"
|
||||
# root = "data"
|
||||
# account_name = "test"
|
||||
# account_key = "123456"
|
||||
# endpoint = "https://greptimedb.blob.core.windows.net"
|
||||
# sas_token = ""
|
||||
|
||||
# Example of using Gcs as the storage.
|
||||
# [storage]
|
||||
# type = "Gcs"
|
||||
# bucket = "greptimedb"
|
||||
# root = "data"
|
||||
# scope = "test"
|
||||
# credential_path = "123456"
|
||||
# endpoint = "https://storage.googleapis.com"
|
||||
|
||||
## The data storage options.
|
||||
[storage]
|
||||
# The working home directory.
|
||||
## The working home directory.
|
||||
data_home = "/tmp/greptimedb/"
|
||||
# Storage type.
|
||||
type = "File"
|
||||
# TTL for all tables. Disabled by default.
|
||||
# global_ttl = "7d"
|
||||
|
||||
# Cache configuration for object storage such as 'S3' etc.
|
||||
# The local file cache directory
|
||||
# cache_path = "/path/local_cache"
|
||||
# The local file cache capacity in bytes.
|
||||
# cache_capacity = "256MB"
|
||||
## The storage type used to store the data.
|
||||
## - `File`: the data is stored in the local file system.
|
||||
## - `S3`: the data is stored in the S3 object storage.
|
||||
## - `Gcs`: the data is stored in the Google Cloud Storage.
|
||||
## - `Azblob`: the data is stored in the Azure Blob Storage.
|
||||
## - `Oss`: the data is stored in the Aliyun OSS.
|
||||
type = "File"
|
||||
|
||||
## Cache configuration for object storage such as 'S3' etc.
|
||||
## The local file cache directory.
|
||||
## +toml2docs:none-default
|
||||
cache_path = "/path/local_cache"
|
||||
|
||||
## The local file cache capacity in bytes.
|
||||
## +toml2docs:none-default
|
||||
cache_capacity = "256MB"
|
||||
|
||||
## The S3 bucket name.
|
||||
## **It's only used when the storage type is `S3`, `Oss` and `Gcs`**.
|
||||
## +toml2docs:none-default
|
||||
bucket = "greptimedb"
|
||||
|
||||
## The S3 data will be stored in the specified prefix, for example, `s3://${bucket}/${root}`.
|
||||
## **It's only used when the storage type is `S3`, `Oss` and `Azblob`**.
|
||||
## +toml2docs:none-default
|
||||
root = "greptimedb"
|
||||
|
||||
## The access key id of the aws account.
|
||||
## It's **highly recommended** to use AWS IAM roles instead of hardcoding the access key id and secret key.
|
||||
## **It's only used when the storage type is `S3` and `Oss`**.
|
||||
## +toml2docs:none-default
|
||||
access_key_id = "test"
|
||||
|
||||
## The secret access key of the aws account.
|
||||
## It's **highly recommended** to use AWS IAM roles instead of hardcoding the access key id and secret key.
|
||||
## **It's only used when the storage type is `S3`**.
|
||||
## +toml2docs:none-default
|
||||
secret_access_key = "test"
|
||||
|
||||
## The secret access key of the aliyun account.
|
||||
## **It's only used when the storage type is `Oss`**.
|
||||
## +toml2docs:none-default
|
||||
access_key_secret = "test"
|
||||
|
||||
## The account key of the azure account.
|
||||
## **It's only used when the storage type is `Azblob`**.
|
||||
## +toml2docs:none-default
|
||||
account_name = "test"
|
||||
|
||||
## The account key of the azure account.
|
||||
## **It's only used when the storage type is `Azblob`**.
|
||||
## +toml2docs:none-default
|
||||
account_key = "test"
|
||||
|
||||
## The scope of the google cloud storage.
|
||||
## **It's only used when the storage type is `Gcs`**.
|
||||
## +toml2docs:none-default
|
||||
scope = "test"
|
||||
|
||||
## The credential path of the google cloud storage.
|
||||
## **It's only used when the storage type is `Gcs`**.
|
||||
## +toml2docs:none-default
|
||||
credential_path = "test"
|
||||
|
||||
## The container of the azure account.
|
||||
## **It's only used when the storage type is `Azblob`**.
|
||||
## +toml2docs:none-default
|
||||
container = "greptimedb"
|
||||
|
||||
## The sas token of the azure account.
|
||||
## **It's only used when the storage type is `Azblob`**.
|
||||
## +toml2docs:none-default
|
||||
sas_token = ""
|
||||
|
||||
## The endpoint of the S3 service.
|
||||
## **It's only used when the storage type is `S3`, `Oss`, `Gcs` and `Azblob`**.
|
||||
## +toml2docs:none-default
|
||||
endpoint = "https://s3.amazonaws.com"
|
||||
|
||||
## The region of the S3 service.
|
||||
## **It's only used when the storage type is `S3`, `Oss`, `Gcs` and `Azblob`**.
|
||||
## +toml2docs:none-default
|
||||
region = "us-west-2"
|
||||
|
||||
# Custom storage options
|
||||
#[[storage.providers]]
|
||||
#type = "S3"
|
||||
#[[storage.providers]]
|
||||
#type = "Gcs"
|
||||
# [[storage.providers]]
|
||||
# type = "S3"
|
||||
# [[storage.providers]]
|
||||
# type = "Gcs"
|
||||
|
||||
# Mito engine options
|
||||
## The region engine options. You can configure multiple region engines.
|
||||
[[region_engine]]
|
||||
|
||||
## The Mito engine options.
|
||||
[region_engine.mito]
|
||||
# Number of region workers
|
||||
|
||||
## Number of region workers.
|
||||
num_workers = 8
|
||||
# Request channel size of each worker
|
||||
|
||||
## Request channel size of each worker.
|
||||
worker_channel_size = 128
|
||||
# Max batch size for a worker to handle requests
|
||||
|
||||
## Max batch size for a worker to handle requests.
|
||||
worker_request_batch_size = 64
|
||||
# Number of meta action updated to trigger a new checkpoint for the manifest
|
||||
|
||||
## Number of meta action updated to trigger a new checkpoint for the manifest.
|
||||
manifest_checkpoint_distance = 10
|
||||
# Whether to compress manifest and checkpoint file by gzip (default false).
|
||||
|
||||
## Whether to compress manifest and checkpoint file by gzip (default false).
|
||||
compress_manifest = false
|
||||
# Max number of running background jobs
|
||||
|
||||
## Max number of running background jobs
|
||||
max_background_jobs = 4
|
||||
# Interval to auto flush a region if it has not flushed yet.
|
||||
|
||||
## Interval to auto flush a region if it has not flushed yet.
|
||||
auto_flush_interval = "1h"
|
||||
# Global write buffer size for all regions. If not set, it's default to 1/8 of OS memory with a max limitation of 1GB.
|
||||
|
||||
## Global write buffer size for all regions. If not set, it's default to 1/8 of OS memory with a max limitation of 1GB.
|
||||
global_write_buffer_size = "1GB"
|
||||
# Global write buffer size threshold to reject write requests. If not set, it's default to 2 times of `global_write_buffer_size`
|
||||
|
||||
## Global write buffer size threshold to reject write requests. If not set, it's default to 2 times of `global_write_buffer_size`
|
||||
global_write_buffer_reject_size = "2GB"
|
||||
# Cache size for SST metadata. Setting it to 0 to disable the cache.
|
||||
# If not set, it's default to 1/32 of OS memory with a max limitation of 128MB.
|
||||
|
||||
## Cache size for SST metadata. Setting it to 0 to disable the cache.
|
||||
## If not set, it's default to 1/32 of OS memory with a max limitation of 128MB.
|
||||
sst_meta_cache_size = "128MB"
|
||||
# Cache size for vectors and arrow arrays. Setting it to 0 to disable the cache.
|
||||
# If not set, it's default to 1/16 of OS memory with a max limitation of 512MB.
|
||||
|
||||
## Cache size for vectors and arrow arrays. Setting it to 0 to disable the cache.
|
||||
## If not set, it's default to 1/16 of OS memory with a max limitation of 512MB.
|
||||
vector_cache_size = "512MB"
|
||||
# Cache size for pages of SST row groups. Setting it to 0 to disable the cache.
|
||||
# If not set, it's default to 1/16 of OS memory with a max limitation of 512MB.
|
||||
|
||||
## Cache size for pages of SST row groups. Setting it to 0 to disable the cache.
|
||||
## If not set, it's default to 1/16 of OS memory with a max limitation of 512MB.
|
||||
page_cache_size = "512MB"
|
||||
# Buffer size for SST writing.
|
||||
|
||||
## Buffer size for SST writing.
|
||||
sst_write_buffer_size = "8MB"
|
||||
# Parallelism to scan a region (default: 1/4 of cpu cores).
|
||||
# - 0: using the default value (1/4 of cpu cores).
|
||||
# - 1: scan in current thread.
|
||||
# - n: scan in parallelism n.
|
||||
|
||||
## Parallelism to scan a region (default: 1/4 of cpu cores).
|
||||
## - `0`: using the default value (1/4 of cpu cores).
|
||||
## - `1`: scan in current thread.
|
||||
## - `n`: scan in parallelism n.
|
||||
scan_parallelism = 0
|
||||
# Capacity of the channel to send data from parallel scan tasks to the main task (default 32).
|
||||
|
||||
## Capacity of the channel to send data from parallel scan tasks to the main task.
|
||||
parallel_scan_channel_size = 32
|
||||
# Whether to allow stale WAL entries read during replay.
|
||||
|
||||
## Whether to allow stale WAL entries read during replay.
|
||||
allow_stale_entries = false
|
||||
|
||||
## The options for inverted index in Mito engine.
|
||||
[region_engine.mito.inverted_index]
|
||||
# Whether to create the index on flush.
|
||||
# - "auto": automatically
|
||||
# - "disable": never
|
||||
|
||||
## Whether to create the index on flush.
|
||||
## - `auto`: automatically
|
||||
## - `disable`: never
|
||||
create_on_flush = "auto"
|
||||
# Whether to create the index on compaction.
|
||||
# - "auto": automatically
|
||||
# - "disable": never
|
||||
|
||||
## Whether to create the index on compaction.
|
||||
## - `auto`: automatically
|
||||
## - `disable`: never
|
||||
create_on_compaction = "auto"
|
||||
# Whether to apply the index on query
|
||||
# - "auto": automatically
|
||||
# - "disable": never
|
||||
|
||||
## Whether to apply the index on query
|
||||
## - `auto`: automatically
|
||||
## - `disable`: never
|
||||
apply_on_query = "auto"
|
||||
# Memory threshold for performing an external sort during index creation.
|
||||
# Setting to empty will disable external sorting, forcing all sorting operations to happen in memory.
|
||||
|
||||
## Memory threshold for performing an external sort during index creation.
|
||||
## Setting to empty will disable external sorting, forcing all sorting operations to happen in memory.
|
||||
mem_threshold_on_create = "64M"
|
||||
# File system path to store intermediate files for external sorting (default `{data_home}/index_intermediate`).
|
||||
|
||||
## File system path to store intermediate files for external sorting (default `{data_home}/index_intermediate`).
|
||||
intermediate_path = ""
|
||||
|
||||
[region_engine.mito.memtable]
|
||||
# Memtable type.
|
||||
# - "partition_tree": partition tree memtable
|
||||
# - "time_series": time-series memtable (deprecated)
|
||||
type = "partition_tree"
|
||||
# The max number of keys in one shard.
|
||||
## Memtable type.
|
||||
## - `time_series`: time-series memtable
|
||||
## - `partition_tree`: partition tree memtable (experimental)
|
||||
type = "time_series"
|
||||
|
||||
## The max number of keys in one shard.
|
||||
## Only available for `partition_tree` memtable.
|
||||
index_max_keys_per_shard = 8192
|
||||
# The max rows of data inside the actively writing buffer in one shard.
|
||||
|
||||
## The max rows of data inside the actively writing buffer in one shard.
|
||||
## Only available for `partition_tree` memtable.
|
||||
data_freeze_threshold = 32768
|
||||
# Max dictionary bytes.
|
||||
|
||||
## Max dictionary bytes.
|
||||
## Only available for `partition_tree` memtable.
|
||||
fork_dictionary_bytes = "1GiB"
|
||||
|
||||
# Log options, see `standalone.example.toml`
|
||||
# [logging]
|
||||
# dir = "/tmp/greptimedb/logs"
|
||||
# level = "info"
|
||||
## The logging options.
|
||||
[logging]
|
||||
## The directory to store the log files.
|
||||
dir = "/tmp/greptimedb/logs"
|
||||
|
||||
# Datanode export the metrics generated by itself
|
||||
# encoded to Prometheus remote-write format
|
||||
# and send to Prometheus remote-write compatible receiver (e.g. send to `greptimedb` itself)
|
||||
# This is only used for `greptimedb` to export its own metrics internally. It's different from prometheus scrape.
|
||||
# [export_metrics]
|
||||
# whether enable export metrics, default is false
|
||||
# enable = false
|
||||
# The interval of export metrics
|
||||
# write_interval = "30s"
|
||||
# [export_metrics.remote_write]
|
||||
# The url the metrics send to. The url is empty by default, url example: `http://127.0.0.1:4000/v1/prometheus/write?db=information_schema`
|
||||
# url = ""
|
||||
# HTTP headers of Prometheus remote-write carry
|
||||
# headers = {}
|
||||
## The log level. Can be `info`/`debug`/`warn`/`error`.
|
||||
## +toml2docs:none-default
|
||||
level = "info"
|
||||
|
||||
## Enable OTLP tracing.
|
||||
enable_otlp_tracing = false
|
||||
|
||||
## The OTLP tracing endpoint.
|
||||
## +toml2docs:none-default
|
||||
otlp_endpoint = ""
|
||||
|
||||
## Whether to append logs to stdout.
|
||||
append_stdout = true
|
||||
|
||||
## The percentage of tracing will be sampled and exported.
|
||||
## Valid range `[0, 1]`, 1 means all traces are sampled, 0 means all traces are not sampled, the default value is 1.
|
||||
## ratio > 1 are treated as 1. Fractions < 0 are treated as 0
|
||||
[logging.tracing_sample_ratio]
|
||||
default_ratio = 1.0
|
||||
|
||||
## The datanode can export its metrics and send to Prometheus compatible service (e.g. send to `greptimedb` itself) from remote-write API.
|
||||
## This is only used for `greptimedb` to export its own metrics internally. It's different from prometheus scrape.
|
||||
[export_metrics]
|
||||
|
||||
## whether enable export metrics.
|
||||
enable = false
|
||||
|
||||
## The interval of export metrics.
|
||||
write_interval = "30s"
|
||||
|
||||
## For `standalone` mode, `self_import` is recommend to collect metrics generated by itself
|
||||
[export_metrics.self_import]
|
||||
## +toml2docs:none-default
|
||||
db = "information_schema"
|
||||
|
||||
[export_metrics.remote_write]
|
||||
## The url the metrics send to. The url example can be: `http://127.0.0.1:4000/v1/prometheus/write?db=information_schema`.
|
||||
url = ""
|
||||
|
||||
## HTTP headers of Prometheus remote-write carry.
|
||||
headers = { }
|
||||
|
||||
@@ -1,106 +1,192 @@
|
||||
# Node running mode, see `standalone.example.toml`.
|
||||
mode = "distributed"
|
||||
# The default timezone of the server
|
||||
# default_timezone = "UTC"
|
||||
## The running mode of the datanode. It can be `standalone` or `distributed`.
|
||||
mode = "standalone"
|
||||
|
||||
## The default timezone of the server.
|
||||
## +toml2docs:none-default
|
||||
default_timezone = "UTC"
|
||||
|
||||
## The heartbeat options.
|
||||
[heartbeat]
|
||||
# Interval for sending heartbeat task to the Metasrv, 5 seconds by default.
|
||||
interval = "5s"
|
||||
# Interval for retry sending heartbeat task, 5 seconds by default.
|
||||
retry_interval = "5s"
|
||||
## Interval for sending heartbeat messages to the metasrv.
|
||||
interval = "18s"
|
||||
|
||||
# HTTP server options, see `standalone.example.toml`.
|
||||
## Interval for retrying to send heartbeat messages to the metasrv.
|
||||
retry_interval = "3s"
|
||||
|
||||
## The HTTP server options.
|
||||
[http]
|
||||
## The address to bind the HTTP server.
|
||||
addr = "127.0.0.1:4000"
|
||||
## HTTP request timeout.
|
||||
timeout = "30s"
|
||||
## HTTP request body limit.
|
||||
## Support the following units are supported: `B`, `KB`, `KiB`, `MB`, `MiB`, `GB`, `GiB`, `TB`, `TiB`, `PB`, `PiB`.
|
||||
body_limit = "64MB"
|
||||
|
||||
# gRPC server options, see `standalone.example.toml`.
|
||||
## The gRPC server options.
|
||||
[grpc]
|
||||
## The address to bind the gRPC server.
|
||||
addr = "127.0.0.1:4001"
|
||||
## The number of server worker threads.
|
||||
runtime_size = 8
|
||||
|
||||
# MySQL server options, see `standalone.example.toml`.
|
||||
## MySQL server options.
|
||||
[mysql]
|
||||
## Whether to enable.
|
||||
enable = true
|
||||
## The addr to bind the MySQL server.
|
||||
addr = "127.0.0.1:4002"
|
||||
## The number of server worker threads.
|
||||
runtime_size = 2
|
||||
|
||||
# MySQL server TLS options, see `standalone.example.toml`.
|
||||
# MySQL server TLS options.
|
||||
[mysql.tls]
|
||||
|
||||
## TLS mode, refer to https://www.postgresql.org/docs/current/libpq-ssl.html
|
||||
## - `disable` (default value)
|
||||
## - `prefer`
|
||||
## - `require`
|
||||
## - `verify-ca`
|
||||
## - `verify-full`
|
||||
mode = "disable"
|
||||
|
||||
## Certificate file path.
|
||||
## +toml2docs:none-default
|
||||
cert_path = ""
|
||||
|
||||
## Private key file path.
|
||||
## +toml2docs:none-default
|
||||
key_path = ""
|
||||
|
||||
## Watch for Certificate and key file change and auto reload
|
||||
watch = false
|
||||
|
||||
# PostgresSQL server options, see `standalone.example.toml`.
|
||||
## PostgresSQL server options.
|
||||
[postgres]
|
||||
## Whether to enable
|
||||
enable = true
|
||||
## The addr to bind the PostgresSQL server.
|
||||
addr = "127.0.0.1:4003"
|
||||
## The number of server worker threads.
|
||||
runtime_size = 2
|
||||
|
||||
# PostgresSQL server TLS options, see `standalone.example.toml`.
|
||||
## PostgresSQL server TLS options, see `mysql_options.tls` section.
|
||||
[postgres.tls]
|
||||
## TLS mode.
|
||||
mode = "disable"
|
||||
|
||||
## Certificate file path.
|
||||
## +toml2docs:none-default
|
||||
cert_path = ""
|
||||
|
||||
## Private key file path.
|
||||
## +toml2docs:none-default
|
||||
key_path = ""
|
||||
|
||||
## Watch for Certificate and key file change and auto reload
|
||||
watch = false
|
||||
|
||||
# OpenTSDB protocol options, see `standalone.example.toml`.
|
||||
## OpenTSDB protocol options.
|
||||
[opentsdb]
|
||||
## Whether to enable
|
||||
enable = true
|
||||
## OpenTSDB telnet API server address.
|
||||
addr = "127.0.0.1:4242"
|
||||
## The number of server worker threads.
|
||||
runtime_size = 2
|
||||
|
||||
# InfluxDB protocol options, see `standalone.example.toml`.
|
||||
## InfluxDB protocol options.
|
||||
[influxdb]
|
||||
## Whether to enable InfluxDB protocol in HTTP API.
|
||||
enable = true
|
||||
|
||||
# Prometheus remote storage options, see `standalone.example.toml`.
|
||||
## Prometheus remote storage options
|
||||
[prom_store]
|
||||
## Whether to enable Prometheus remote write and read in HTTP API.
|
||||
enable = true
|
||||
# Whether to store the data from Prometheus remote write in metric engine.
|
||||
# true by default
|
||||
## Whether to store the data from Prometheus remote write in metric engine.
|
||||
with_metric_engine = true
|
||||
|
||||
# Metasrv client options, see `datanode.example.toml`.
|
||||
## The metasrv client options.
|
||||
[meta_client]
|
||||
## The addresses of the metasrv.
|
||||
metasrv_addrs = ["127.0.0.1:3002"]
|
||||
|
||||
## Operation timeout.
|
||||
timeout = "3s"
|
||||
# DDL timeouts options.
|
||||
|
||||
## Heartbeat timeout.
|
||||
heartbeat_timeout = "500ms"
|
||||
|
||||
## DDL timeout.
|
||||
ddl_timeout = "10s"
|
||||
|
||||
## Connect server timeout.
|
||||
connect_timeout = "1s"
|
||||
|
||||
## `TCP_NODELAY` option for accepted connections.
|
||||
tcp_nodelay = true
|
||||
# The configuration about the cache of the Metadata.
|
||||
# default: 100000
|
||||
|
||||
## The configuration about the cache of the metadata.
|
||||
metadata_cache_max_capacity = 100000
|
||||
# default: 10m
|
||||
|
||||
## TTL of the metadata cache.
|
||||
metadata_cache_ttl = "10m"
|
||||
# default: 5m
|
||||
|
||||
# TTI of the metadata cache.
|
||||
metadata_cache_tti = "5m"
|
||||
|
||||
# Log options, see `standalone.example.toml`
|
||||
# [logging]
|
||||
# dir = "/tmp/greptimedb/logs"
|
||||
# level = "info"
|
||||
|
||||
# Datanode options.
|
||||
## Datanode options.
|
||||
[datanode]
|
||||
# Datanode client options.
|
||||
## Datanode client options.
|
||||
[datanode.client]
|
||||
timeout = "10s"
|
||||
connect_timeout = "10s"
|
||||
tcp_nodelay = true
|
||||
|
||||
# Frontend export the metrics generated by itself
|
||||
# encoded to Prometheus remote-write format
|
||||
# and send to Prometheus remote-write compatible receiver (e.g. send to `greptimedb` itself)
|
||||
# This is only used for `greptimedb` to export its own metrics internally. It's different from prometheus scrape.
|
||||
# [export_metrics]
|
||||
# whether enable export metrics, default is false
|
||||
# enable = false
|
||||
# The interval of export metrics
|
||||
# write_interval = "30s"
|
||||
# for `frontend`, `self_import` is recommend to collect metrics generated by itself
|
||||
# [export_metrics.self_import]
|
||||
# db = "information_schema"
|
||||
## The logging options.
|
||||
[logging]
|
||||
## The directory to store the log files.
|
||||
dir = "/tmp/greptimedb/logs"
|
||||
|
||||
## The log level. Can be `info`/`debug`/`warn`/`error`.
|
||||
## +toml2docs:none-default
|
||||
level = "info"
|
||||
|
||||
## Enable OTLP tracing.
|
||||
enable_otlp_tracing = false
|
||||
|
||||
## The OTLP tracing endpoint.
|
||||
## +toml2docs:none-default
|
||||
otlp_endpoint = ""
|
||||
|
||||
## Whether to append logs to stdout.
|
||||
append_stdout = true
|
||||
|
||||
## The percentage of tracing will be sampled and exported.
|
||||
## Valid range `[0, 1]`, 1 means all traces are sampled, 0 means all traces are not sampled, the default value is 1.
|
||||
## ratio > 1 are treated as 1. Fractions < 0 are treated as 0
|
||||
[logging.tracing_sample_ratio]
|
||||
default_ratio = 1.0
|
||||
|
||||
## The datanode can export its metrics and send to Prometheus compatible service (e.g. send to `greptimedb` itself) from remote-write API.
|
||||
## This is only used for `greptimedb` to export its own metrics internally. It's different from prometheus scrape.
|
||||
[export_metrics]
|
||||
|
||||
## whether enable export metrics.
|
||||
enable = false
|
||||
|
||||
## The interval of export metrics.
|
||||
write_interval = "30s"
|
||||
|
||||
## For `standalone` mode, `self_import` is recommend to collect metrics generated by itself
|
||||
[export_metrics.self_import]
|
||||
## +toml2docs:none-default
|
||||
db = "information_schema"
|
||||
|
||||
[export_metrics.remote_write]
|
||||
## The url the metrics send to. The url example can be: `http://127.0.0.1:4000/v1/prometheus/write?db=information_schema`.
|
||||
url = ""
|
||||
|
||||
## HTTP headers of Prometheus remote-write carry.
|
||||
headers = { }
|
||||
|
||||
@@ -1,39 +1,44 @@
|
||||
# The working home directory.
|
||||
## The working home directory.
|
||||
data_home = "/tmp/metasrv/"
|
||||
# The bind address of metasrv, "127.0.0.1:3002" by default.
|
||||
|
||||
## The bind address of metasrv.
|
||||
bind_addr = "127.0.0.1:3002"
|
||||
# The communication server address for frontend and datanode to connect to metasrv, "127.0.0.1:3002" by default for localhost.
|
||||
|
||||
## The communication server address for frontend and datanode to connect to metasrv, "127.0.0.1:3002" by default for localhost.
|
||||
server_addr = "127.0.0.1:3002"
|
||||
# Etcd server address, "127.0.0.1:2379" by default.
|
||||
|
||||
## Etcd server address.
|
||||
store_addr = "127.0.0.1:2379"
|
||||
# Datanode selector type.
|
||||
# - "lease_based" (default value).
|
||||
# - "load_based"
|
||||
# For details, please see "https://docs.greptime.com/developer-guide/metasrv/selector".
|
||||
|
||||
## Datanode selector type.
|
||||
## - `lease_based` (default value).
|
||||
## - `load_based`
|
||||
## For details, please see "https://docs.greptime.com/developer-guide/metasrv/selector".
|
||||
selector = "lease_based"
|
||||
# Store data in memory, false by default.
|
||||
|
||||
## Store data in memory.
|
||||
use_memory_store = false
|
||||
# Whether to enable greptimedb telemetry, true by default.
|
||||
|
||||
## Whether to enable greptimedb telemetry.
|
||||
enable_telemetry = true
|
||||
# If it's not empty, the metasrv will store all data with this key prefix.
|
||||
|
||||
## If it's not empty, the metasrv will store all data with this key prefix.
|
||||
store_key_prefix = ""
|
||||
|
||||
# Log options, see `standalone.example.toml`
|
||||
# [logging]
|
||||
# dir = "/tmp/greptimedb/logs"
|
||||
# level = "info"
|
||||
|
||||
# Procedure storage options.
|
||||
## Procedure storage options.
|
||||
[procedure]
|
||||
# Procedure max retry time.
|
||||
|
||||
## Procedure max retry time.
|
||||
max_retry_times = 12
|
||||
# Initial retry delay of procedures, increases exponentially
|
||||
|
||||
## Initial retry delay of procedures, increases exponentially
|
||||
retry_delay = "500ms"
|
||||
# Auto split large value
|
||||
# GreptimeDB procedure uses etcd as the default metadata storage backend.
|
||||
# The etcd the maximum size of any request is 1.5 MiB
|
||||
# 1500KiB = 1536KiB (1.5MiB) - 36KiB (reserved size of key)
|
||||
# Comments out the `max_metadata_value_size`, for don't split large value (no limit).
|
||||
|
||||
## Auto split large value
|
||||
## GreptimeDB procedure uses etcd as the default metadata storage backend.
|
||||
## The etcd the maximum size of any request is 1.5 MiB
|
||||
## 1500KiB = 1536KiB (1.5MiB) - 36KiB (reserved size of key)
|
||||
## Comments out the `max_metadata_value_size`, for don't split large value (no limit).
|
||||
max_metadata_value_size = "1500KiB"
|
||||
|
||||
# Failure detectors options.
|
||||
@@ -43,57 +48,96 @@ min_std_deviation = "100ms"
|
||||
acceptable_heartbeat_pause = "3000ms"
|
||||
first_heartbeat_estimate = "1000ms"
|
||||
|
||||
# # Datanode options.
|
||||
# [datanode]
|
||||
# # Datanode client options.
|
||||
# [datanode.client_options]
|
||||
# timeout = "10s"
|
||||
# connect_timeout = "10s"
|
||||
# tcp_nodelay = true
|
||||
## Datanode options.
|
||||
[datanode]
|
||||
## Datanode client options.
|
||||
[datanode.client]
|
||||
timeout = "10s"
|
||||
connect_timeout = "10s"
|
||||
tcp_nodelay = true
|
||||
|
||||
[wal]
|
||||
# Available wal providers:
|
||||
# - "raft_engine" (default)
|
||||
# - "kafka"
|
||||
# - `raft_engine` (default): there're none raft-engine wal config since metasrv only involves in remote wal currently.
|
||||
# - `kafka`: metasrv **have to be** configured with kafka wal config when using kafka wal provider in datanode.
|
||||
provider = "raft_engine"
|
||||
|
||||
# There're none raft-engine wal config since meta srv only involves in remote wal currently.
|
||||
|
||||
# Kafka wal config.
|
||||
# The broker endpoints of the Kafka cluster. ["127.0.0.1:9092"] by default.
|
||||
# broker_endpoints = ["127.0.0.1:9092"]
|
||||
# Number of topics to be created upon start.
|
||||
# num_topics = 64
|
||||
# Topic selector type.
|
||||
# Available selector types:
|
||||
# - "round_robin" (default)
|
||||
# selector_type = "round_robin"
|
||||
# A Kafka topic is constructed by concatenating `topic_name_prefix` and `topic_id`.
|
||||
# topic_name_prefix = "greptimedb_wal_topic"
|
||||
# Expected number of replicas of each partition.
|
||||
# replication_factor = 1
|
||||
# Above which a topic creation operation will be cancelled.
|
||||
# create_topic_timeout = "30s"
|
||||
# The initial backoff for kafka clients.
|
||||
# backoff_init = "500ms"
|
||||
# The maximum backoff for kafka clients.
|
||||
# backoff_max = "10s"
|
||||
# Exponential backoff rate, i.e. next backoff = base * current backoff.
|
||||
# backoff_base = 2
|
||||
# Stop reconnecting if the total wait time reaches the deadline. If this config is missing, the reconnecting won't terminate.
|
||||
# backoff_deadline = "5mins"
|
||||
|
||||
# Metasrv export the metrics generated by itself
|
||||
# encoded to Prometheus remote-write format
|
||||
# and send to Prometheus remote-write compatible receiver (e.g. send to `greptimedb` itself)
|
||||
# This is only used for `greptimedb` to export its own metrics internally. It's different from prometheus scrape.
|
||||
# [export_metrics]
|
||||
# whether enable export metrics, default is false
|
||||
# enable = false
|
||||
# The interval of export metrics
|
||||
# write_interval = "30s"
|
||||
# [export_metrics.remote_write]
|
||||
# The url the metrics send to. The url is empty by default, url example: `http://127.0.0.1:4000/v1/prometheus/write?db=information_schema`
|
||||
# url = ""
|
||||
# HTTP headers of Prometheus remote-write carry
|
||||
# headers = {}
|
||||
## The broker endpoints of the Kafka cluster.
|
||||
broker_endpoints = ["127.0.0.1:9092"]
|
||||
|
||||
## Number of topics to be created upon start.
|
||||
num_topics = 64
|
||||
|
||||
## Topic selector type.
|
||||
## Available selector types:
|
||||
## - `round_robin` (default)
|
||||
selector_type = "round_robin"
|
||||
|
||||
## A Kafka topic is constructed by concatenating `topic_name_prefix` and `topic_id`.
|
||||
topic_name_prefix = "greptimedb_wal_topic"
|
||||
|
||||
## Expected number of replicas of each partition.
|
||||
replication_factor = 1
|
||||
|
||||
## Above which a topic creation operation will be cancelled.
|
||||
create_topic_timeout = "30s"
|
||||
## The initial backoff for kafka clients.
|
||||
backoff_init = "500ms"
|
||||
|
||||
## The maximum backoff for kafka clients.
|
||||
backoff_max = "10s"
|
||||
|
||||
## Exponential backoff rate, i.e. next backoff = base * current backoff.
|
||||
backoff_base = 2
|
||||
|
||||
## Stop reconnecting if the total wait time reaches the deadline. If this config is missing, the reconnecting won't terminate.
|
||||
backoff_deadline = "5mins"
|
||||
|
||||
## The logging options.
|
||||
[logging]
|
||||
## The directory to store the log files.
|
||||
dir = "/tmp/greptimedb/logs"
|
||||
|
||||
## The log level. Can be `info`/`debug`/`warn`/`error`.
|
||||
## +toml2docs:none-default
|
||||
level = "info"
|
||||
|
||||
## Enable OTLP tracing.
|
||||
enable_otlp_tracing = false
|
||||
|
||||
## The OTLP tracing endpoint.
|
||||
## +toml2docs:none-default
|
||||
otlp_endpoint = ""
|
||||
|
||||
## Whether to append logs to stdout.
|
||||
append_stdout = true
|
||||
|
||||
## The percentage of tracing will be sampled and exported.
|
||||
## Valid range `[0, 1]`, 1 means all traces are sampled, 0 means all traces are not sampled, the default value is 1.
|
||||
## ratio > 1 are treated as 1. Fractions < 0 are treated as 0
|
||||
[logging.tracing_sample_ratio]
|
||||
default_ratio = 1.0
|
||||
|
||||
## The datanode can export its metrics and send to Prometheus compatible service (e.g. send to `greptimedb` itself) from remote-write API.
|
||||
## This is only used for `greptimedb` to export its own metrics internally. It's different from prometheus scrape.
|
||||
[export_metrics]
|
||||
|
||||
## whether enable export metrics.
|
||||
enable = false
|
||||
|
||||
## The interval of export metrics.
|
||||
write_interval = "30s"
|
||||
|
||||
## For `standalone` mode, `self_import` is recommend to collect metrics generated by itself
|
||||
[export_metrics.self_import]
|
||||
## +toml2docs:none-default
|
||||
db = "information_schema"
|
||||
|
||||
[export_metrics.remote_write]
|
||||
## The url the metrics send to. The url example can be: `http://127.0.0.1:4000/v1/prometheus/write?db=information_schema`.
|
||||
url = ""
|
||||
|
||||
## HTTP headers of Prometheus remote-write carry.
|
||||
headers = { }
|
||||
|
||||
@@ -1,286 +1,477 @@
|
||||
# Node running mode, "standalone" or "distributed".
|
||||
## The running mode of the datanode. It can be `standalone` or `distributed`.
|
||||
mode = "standalone"
|
||||
# Whether to enable greptimedb telemetry, true by default.
|
||||
enable_telemetry = true
|
||||
# The default timezone of the server
|
||||
# default_timezone = "UTC"
|
||||
|
||||
# HTTP server options.
|
||||
## Enable telemetry to collect anonymous usage data.
|
||||
enable_telemetry = true
|
||||
|
||||
## The default timezone of the server.
|
||||
## +toml2docs:none-default
|
||||
default_timezone = "UTC"
|
||||
|
||||
## The HTTP server options.
|
||||
[http]
|
||||
# Server address, "127.0.0.1:4000" by default.
|
||||
## The address to bind the HTTP server.
|
||||
addr = "127.0.0.1:4000"
|
||||
# HTTP request timeout, 30s by default.
|
||||
## HTTP request timeout.
|
||||
timeout = "30s"
|
||||
# HTTP request body limit, 64Mb by default.
|
||||
# the following units are supported: B, KB, KiB, MB, MiB, GB, GiB, TB, TiB, PB, PiB
|
||||
## HTTP request body limit.
|
||||
## Support the following units are supported: `B`, `KB`, `KiB`, `MB`, `MiB`, `GB`, `GiB`, `TB`, `TiB`, `PB`, `PiB`.
|
||||
body_limit = "64MB"
|
||||
|
||||
# gRPC server options.
|
||||
## The gRPC server options.
|
||||
[grpc]
|
||||
# Server address, "127.0.0.1:4001" by default.
|
||||
## The address to bind the gRPC server.
|
||||
addr = "127.0.0.1:4001"
|
||||
# The number of server worker threads, 8 by default.
|
||||
## The number of server worker threads.
|
||||
runtime_size = 8
|
||||
|
||||
# MySQL server options.
|
||||
## MySQL server options.
|
||||
[mysql]
|
||||
# Whether to enable
|
||||
## Whether to enable.
|
||||
enable = true
|
||||
# Server address, "127.0.0.1:4002" by default.
|
||||
## The addr to bind the MySQL server.
|
||||
addr = "127.0.0.1:4002"
|
||||
# The number of server worker threads, 2 by default.
|
||||
## The number of server worker threads.
|
||||
runtime_size = 2
|
||||
|
||||
# MySQL server TLS options.
|
||||
[mysql.tls]
|
||||
# TLS mode, refer to https://www.postgresql.org/docs/current/libpq-ssl.html
|
||||
# - "disable" (default value)
|
||||
# - "prefer"
|
||||
# - "require"
|
||||
# - "verify-ca"
|
||||
# - "verify-full"
|
||||
|
||||
## TLS mode, refer to https://www.postgresql.org/docs/current/libpq-ssl.html
|
||||
## - `disable` (default value)
|
||||
## - `prefer`
|
||||
## - `require`
|
||||
## - `verify-ca`
|
||||
## - `verify-full`
|
||||
mode = "disable"
|
||||
# Certificate file path.
|
||||
|
||||
## Certificate file path.
|
||||
## +toml2docs:none-default
|
||||
cert_path = ""
|
||||
# Private key file path.
|
||||
|
||||
## Private key file path.
|
||||
## +toml2docs:none-default
|
||||
key_path = ""
|
||||
# Watch for Certificate and key file change and auto reload
|
||||
|
||||
## Watch for Certificate and key file change and auto reload
|
||||
watch = false
|
||||
|
||||
# PostgresSQL server options.
|
||||
## PostgresSQL server options.
|
||||
[postgres]
|
||||
# Whether to enable
|
||||
## Whether to enable
|
||||
enable = true
|
||||
# Server address, "127.0.0.1:4003" by default.
|
||||
## The addr to bind the PostgresSQL server.
|
||||
addr = "127.0.0.1:4003"
|
||||
# The number of server worker threads, 2 by default.
|
||||
## The number of server worker threads.
|
||||
runtime_size = 2
|
||||
|
||||
# PostgresSQL server TLS options, see `[mysql_options.tls]` section.
|
||||
## PostgresSQL server TLS options, see `mysql_options.tls` section.
|
||||
[postgres.tls]
|
||||
# TLS mode.
|
||||
## TLS mode.
|
||||
mode = "disable"
|
||||
# certificate file path.
|
||||
|
||||
## Certificate file path.
|
||||
## +toml2docs:none-default
|
||||
cert_path = ""
|
||||
# private key file path.
|
||||
|
||||
## Private key file path.
|
||||
## +toml2docs:none-default
|
||||
key_path = ""
|
||||
# Watch for Certificate and key file change and auto reload
|
||||
|
||||
## Watch for Certificate and key file change and auto reload
|
||||
watch = false
|
||||
|
||||
# OpenTSDB protocol options.
|
||||
## OpenTSDB protocol options.
|
||||
[opentsdb]
|
||||
# Whether to enable
|
||||
## Whether to enable
|
||||
enable = true
|
||||
# OpenTSDB telnet API server address, "127.0.0.1:4242" by default.
|
||||
## OpenTSDB telnet API server address.
|
||||
addr = "127.0.0.1:4242"
|
||||
# The number of server worker threads, 2 by default.
|
||||
## The number of server worker threads.
|
||||
runtime_size = 2
|
||||
|
||||
# InfluxDB protocol options.
|
||||
## InfluxDB protocol options.
|
||||
[influxdb]
|
||||
# Whether to enable InfluxDB protocol in HTTP API, true by default.
|
||||
## Whether to enable InfluxDB protocol in HTTP API.
|
||||
enable = true
|
||||
|
||||
# Prometheus remote storage options
|
||||
## Prometheus remote storage options
|
||||
[prom_store]
|
||||
# Whether to enable Prometheus remote write and read in HTTP API, true by default.
|
||||
## Whether to enable Prometheus remote write and read in HTTP API.
|
||||
enable = true
|
||||
# Whether to store the data from Prometheus remote write in metric engine.
|
||||
# true by default
|
||||
## Whether to store the data from Prometheus remote write in metric engine.
|
||||
with_metric_engine = true
|
||||
|
||||
## The WAL options.
|
||||
[wal]
|
||||
# Available wal providers:
|
||||
# - "raft_engine" (default)
|
||||
# - "kafka"
|
||||
## The provider of the WAL.
|
||||
## - `raft_engine`: the wal is stored in the local file system by raft-engine.
|
||||
## - `kafka`: it's remote wal that data is stored in Kafka.
|
||||
provider = "raft_engine"
|
||||
|
||||
# Raft-engine wal options.
|
||||
# WAL data directory
|
||||
# dir = "/tmp/greptimedb/wal"
|
||||
# WAL file size in bytes.
|
||||
## The directory to store the WAL files.
|
||||
## **It's only used when the provider is `raft_engine`**.
|
||||
## +toml2docs:none-default
|
||||
dir = "/tmp/greptimedb/wal"
|
||||
|
||||
## The size of the WAL segment file.
|
||||
## **It's only used when the provider is `raft_engine`**.
|
||||
file_size = "256MB"
|
||||
# WAL purge threshold.
|
||||
|
||||
## The threshold of the WAL size to trigger a flush.
|
||||
## **It's only used when the provider is `raft_engine`**.
|
||||
purge_threshold = "4GB"
|
||||
# WAL purge interval in seconds.
|
||||
|
||||
## The interval to trigger a flush.
|
||||
## **It's only used when the provider is `raft_engine`**.
|
||||
purge_interval = "10m"
|
||||
# WAL read batch size.
|
||||
|
||||
## The read batch size.
|
||||
## **It's only used when the provider is `raft_engine`**.
|
||||
read_batch_size = 128
|
||||
# Whether to sync log file after every write.
|
||||
|
||||
## Whether to use sync write.
|
||||
## **It's only used when the provider is `raft_engine`**.
|
||||
sync_write = false
|
||||
# Whether to reuse logically truncated log files.
|
||||
|
||||
## Whether to reuse logically truncated log files.
|
||||
## **It's only used when the provider is `raft_engine`**.
|
||||
enable_log_recycle = true
|
||||
# Whether to pre-create log files on start up
|
||||
|
||||
## Whether to pre-create log files on start up.
|
||||
## **It's only used when the provider is `raft_engine`**.
|
||||
prefill_log_files = false
|
||||
# Duration for fsyncing log files.
|
||||
sync_period = "1000ms"
|
||||
|
||||
# Kafka wal options.
|
||||
# The broker endpoints of the Kafka cluster. ["127.0.0.1:9092"] by default.
|
||||
# broker_endpoints = ["127.0.0.1:9092"]
|
||||
## Duration for fsyncing log files.
|
||||
## **It's only used when the provider is `raft_engine`**.
|
||||
sync_period = "10s"
|
||||
|
||||
# Number of topics to be created upon start.
|
||||
# num_topics = 64
|
||||
# Topic selector type.
|
||||
# Available selector types:
|
||||
# - "round_robin" (default)
|
||||
# selector_type = "round_robin"
|
||||
# The prefix of topic name.
|
||||
# topic_name_prefix = "greptimedb_wal_topic"
|
||||
# The number of replicas of each partition.
|
||||
# Warning: the replication factor must be positive and must not be greater than the number of broker endpoints.
|
||||
# replication_factor = 1
|
||||
## The Kafka broker endpoints.
|
||||
## **It's only used when the provider is `kafka`**.
|
||||
broker_endpoints = ["127.0.0.1:9092"]
|
||||
|
||||
# The max size of a single producer batch.
|
||||
# Warning: Kafka has a default limit of 1MB per message in a topic.
|
||||
# max_batch_size = "1MB"
|
||||
# The linger duration.
|
||||
# linger = "200ms"
|
||||
# The consumer wait timeout.
|
||||
# consumer_wait_timeout = "100ms"
|
||||
# Create topic timeout.
|
||||
# create_topic_timeout = "30s"
|
||||
## The max size of a single producer batch.
|
||||
## Warning: Kafka has a default limit of 1MB per message in a topic.
|
||||
## **It's only used when the provider is `kafka`**.
|
||||
max_batch_size = "1MB"
|
||||
|
||||
# The initial backoff delay.
|
||||
# backoff_init = "500ms"
|
||||
# The maximum backoff delay.
|
||||
# backoff_max = "10s"
|
||||
# Exponential backoff rate, i.e. next backoff = base * current backoff.
|
||||
# backoff_base = 2
|
||||
# The deadline of retries.
|
||||
# backoff_deadline = "5mins"
|
||||
## The linger duration of a kafka batch producer.
|
||||
## **It's only used when the provider is `kafka`**.
|
||||
linger = "200ms"
|
||||
|
||||
# Metadata storage options.
|
||||
## The consumer wait timeout.
|
||||
## **It's only used when the provider is `kafka`**.
|
||||
consumer_wait_timeout = "100ms"
|
||||
|
||||
## The initial backoff delay.
|
||||
## **It's only used when the provider is `kafka`**.
|
||||
backoff_init = "500ms"
|
||||
|
||||
## The maximum backoff delay.
|
||||
## **It's only used when the provider is `kafka`**.
|
||||
backoff_max = "10s"
|
||||
|
||||
## The exponential backoff rate, i.e. next backoff = base * current backoff.
|
||||
## **It's only used when the provider is `kafka`**.
|
||||
backoff_base = 2
|
||||
|
||||
## The deadline of retries.
|
||||
## **It's only used when the provider is `kafka`**.
|
||||
backoff_deadline = "5mins"
|
||||
|
||||
## Metadata storage options.
|
||||
[metadata_store]
|
||||
# Kv file size in bytes.
|
||||
## Kv file size in bytes.
|
||||
file_size = "256MB"
|
||||
# Kv purge threshold.
|
||||
## Kv purge threshold.
|
||||
purge_threshold = "4GB"
|
||||
|
||||
# Procedure storage options.
|
||||
## Procedure storage options.
|
||||
[procedure]
|
||||
# Procedure max retry time.
|
||||
## Procedure max retry time.
|
||||
max_retry_times = 3
|
||||
# Initial retry delay of procedures, increases exponentially
|
||||
## Initial retry delay of procedures, increases exponentially
|
||||
retry_delay = "500ms"
|
||||
|
||||
# Storage options.
|
||||
# Example of using S3 as the storage.
|
||||
# [storage]
|
||||
# type = "S3"
|
||||
# bucket = "greptimedb"
|
||||
# root = "data"
|
||||
# access_key_id = "test"
|
||||
# secret_access_key = "123456"
|
||||
# endpoint = "https://s3.amazonaws.com"
|
||||
# region = "us-west-2"
|
||||
|
||||
# Example of using Oss as the storage.
|
||||
# [storage]
|
||||
# type = "Oss"
|
||||
# bucket = "greptimedb"
|
||||
# root = "data"
|
||||
# access_key_id = "test"
|
||||
# access_key_secret = "123456"
|
||||
# endpoint = "https://oss-cn-hangzhou.aliyuncs.com"
|
||||
|
||||
# Example of using Azblob as the storage.
|
||||
# [storage]
|
||||
# type = "Azblob"
|
||||
# container = "greptimedb"
|
||||
# root = "data"
|
||||
# account_name = "test"
|
||||
# account_key = "123456"
|
||||
# endpoint = "https://greptimedb.blob.core.windows.net"
|
||||
# sas_token = ""
|
||||
|
||||
# Example of using Gcs as the storage.
|
||||
# [storage]
|
||||
# type = "Gcs"
|
||||
# bucket = "greptimedb"
|
||||
# root = "data"
|
||||
# scope = "test"
|
||||
# credential_path = "123456"
|
||||
# endpoint = "https://storage.googleapis.com"
|
||||
|
||||
## The data storage options.
|
||||
[storage]
|
||||
# The working home directory.
|
||||
## The working home directory.
|
||||
data_home = "/tmp/greptimedb/"
|
||||
# Storage type.
|
||||
|
||||
## The storage type used to store the data.
|
||||
## - `File`: the data is stored in the local file system.
|
||||
## - `S3`: the data is stored in the S3 object storage.
|
||||
## - `Gcs`: the data is stored in the Google Cloud Storage.
|
||||
## - `Azblob`: the data is stored in the Azure Blob Storage.
|
||||
## - `Oss`: the data is stored in the Aliyun OSS.
|
||||
type = "File"
|
||||
# TTL for all tables. Disabled by default.
|
||||
# global_ttl = "7d"
|
||||
# Cache configuration for object storage such as 'S3' etc.
|
||||
# cache_path = "/path/local_cache"
|
||||
# The local file cache capacity in bytes.
|
||||
# cache_capacity = "256MB"
|
||||
|
||||
## Cache configuration for object storage such as 'S3' etc.
|
||||
## The local file cache directory.
|
||||
## +toml2docs:none-default
|
||||
cache_path = "/path/local_cache"
|
||||
|
||||
## The local file cache capacity in bytes.
|
||||
## +toml2docs:none-default
|
||||
cache_capacity = "256MB"
|
||||
|
||||
## The S3 bucket name.
|
||||
## **It's only used when the storage type is `S3`, `Oss` and `Gcs`**.
|
||||
## +toml2docs:none-default
|
||||
bucket = "greptimedb"
|
||||
|
||||
## The S3 data will be stored in the specified prefix, for example, `s3://${bucket}/${root}`.
|
||||
## **It's only used when the storage type is `S3`, `Oss` and `Azblob`**.
|
||||
## +toml2docs:none-default
|
||||
root = "greptimedb"
|
||||
|
||||
## The access key id of the aws account.
|
||||
## It's **highly recommended** to use AWS IAM roles instead of hardcoding the access key id and secret key.
|
||||
## **It's only used when the storage type is `S3` and `Oss`**.
|
||||
## +toml2docs:none-default
|
||||
access_key_id = "test"
|
||||
|
||||
## The secret access key of the aws account.
|
||||
## It's **highly recommended** to use AWS IAM roles instead of hardcoding the access key id and secret key.
|
||||
## **It's only used when the storage type is `S3`**.
|
||||
## +toml2docs:none-default
|
||||
secret_access_key = "test"
|
||||
|
||||
## The secret access key of the aliyun account.
|
||||
## **It's only used when the storage type is `Oss`**.
|
||||
## +toml2docs:none-default
|
||||
access_key_secret = "test"
|
||||
|
||||
## The account key of the azure account.
|
||||
## **It's only used when the storage type is `Azblob`**.
|
||||
## +toml2docs:none-default
|
||||
account_name = "test"
|
||||
|
||||
## The account key of the azure account.
|
||||
## **It's only used when the storage type is `Azblob`**.
|
||||
## +toml2docs:none-default
|
||||
account_key = "test"
|
||||
|
||||
## The scope of the google cloud storage.
|
||||
## **It's only used when the storage type is `Gcs`**.
|
||||
## +toml2docs:none-default
|
||||
scope = "test"
|
||||
|
||||
## The credential path of the google cloud storage.
|
||||
## **It's only used when the storage type is `Gcs`**.
|
||||
## +toml2docs:none-default
|
||||
credential_path = "test"
|
||||
|
||||
## The container of the azure account.
|
||||
## **It's only used when the storage type is `Azblob`**.
|
||||
## +toml2docs:none-default
|
||||
container = "greptimedb"
|
||||
|
||||
## The sas token of the azure account.
|
||||
## **It's only used when the storage type is `Azblob`**.
|
||||
## +toml2docs:none-default
|
||||
sas_token = ""
|
||||
|
||||
## The endpoint of the S3 service.
|
||||
## **It's only used when the storage type is `S3`, `Oss`, `Gcs` and `Azblob`**.
|
||||
## +toml2docs:none-default
|
||||
endpoint = "https://s3.amazonaws.com"
|
||||
|
||||
## The region of the S3 service.
|
||||
## **It's only used when the storage type is `S3`, `Oss`, `Gcs` and `Azblob`**.
|
||||
## +toml2docs:none-default
|
||||
region = "us-west-2"
|
||||
|
||||
# Custom storage options
|
||||
#[[storage.providers]]
|
||||
#type = "S3"
|
||||
#[[storage.providers]]
|
||||
#type = "Gcs"
|
||||
# [[storage.providers]]
|
||||
# type = "S3"
|
||||
# [[storage.providers]]
|
||||
# type = "Gcs"
|
||||
|
||||
# Mito engine options
|
||||
## The region engine options. You can configure multiple region engines.
|
||||
[[region_engine]]
|
||||
|
||||
## The Mito engine options.
|
||||
[region_engine.mito]
|
||||
# Number of region workers
|
||||
|
||||
## Number of region workers.
|
||||
num_workers = 8
|
||||
# Request channel size of each worker
|
||||
|
||||
## Request channel size of each worker.
|
||||
worker_channel_size = 128
|
||||
# Max batch size for a worker to handle requests
|
||||
|
||||
## Max batch size for a worker to handle requests.
|
||||
worker_request_batch_size = 64
|
||||
# Number of meta action updated to trigger a new checkpoint for the manifest
|
||||
|
||||
## Number of meta action updated to trigger a new checkpoint for the manifest.
|
||||
manifest_checkpoint_distance = 10
|
||||
# Whether to compress manifest and checkpoint file by gzip (default false).
|
||||
|
||||
## Whether to compress manifest and checkpoint file by gzip (default false).
|
||||
compress_manifest = false
|
||||
# Max number of running background jobs
|
||||
|
||||
## Max number of running background jobs
|
||||
max_background_jobs = 4
|
||||
# Interval to auto flush a region if it has not flushed yet.
|
||||
|
||||
## Interval to auto flush a region if it has not flushed yet.
|
||||
auto_flush_interval = "1h"
|
||||
# Global write buffer size for all regions. If not set, it's default to 1/8 of OS memory with a max limitation of 1GB.
|
||||
|
||||
## Global write buffer size for all regions. If not set, it's default to 1/8 of OS memory with a max limitation of 1GB.
|
||||
global_write_buffer_size = "1GB"
|
||||
# Global write buffer size threshold to reject write requests. If not set, it's default to 2 times of `global_write_buffer_size`
|
||||
|
||||
## Global write buffer size threshold to reject write requests. If not set, it's default to 2 times of `global_write_buffer_size`
|
||||
global_write_buffer_reject_size = "2GB"
|
||||
# Cache size for SST metadata. Setting it to 0 to disable the cache.
|
||||
# If not set, it's default to 1/32 of OS memory with a max limitation of 128MB.
|
||||
|
||||
## Cache size for SST metadata. Setting it to 0 to disable the cache.
|
||||
## If not set, it's default to 1/32 of OS memory with a max limitation of 128MB.
|
||||
sst_meta_cache_size = "128MB"
|
||||
# Cache size for vectors and arrow arrays. Setting it to 0 to disable the cache.
|
||||
# If not set, it's default to 1/16 of OS memory with a max limitation of 512MB.
|
||||
|
||||
## Cache size for vectors and arrow arrays. Setting it to 0 to disable the cache.
|
||||
## If not set, it's default to 1/16 of OS memory with a max limitation of 512MB.
|
||||
vector_cache_size = "512MB"
|
||||
# Cache size for pages of SST row groups. Setting it to 0 to disable the cache.
|
||||
# If not set, it's default to 1/16 of OS memory with a max limitation of 512MB.
|
||||
|
||||
## Cache size for pages of SST row groups. Setting it to 0 to disable the cache.
|
||||
## If not set, it's default to 1/16 of OS memory with a max limitation of 512MB.
|
||||
page_cache_size = "512MB"
|
||||
# Buffer size for SST writing.
|
||||
|
||||
## Buffer size for SST writing.
|
||||
sst_write_buffer_size = "8MB"
|
||||
# Parallelism to scan a region (default: 1/4 of cpu cores).
|
||||
# - 0: using the default value (1/4 of cpu cores).
|
||||
# - 1: scan in current thread.
|
||||
# - n: scan in parallelism n.
|
||||
|
||||
## Parallelism to scan a region (default: 1/4 of cpu cores).
|
||||
## - `0`: using the default value (1/4 of cpu cores).
|
||||
## - `1`: scan in current thread.
|
||||
## - `n`: scan in parallelism n.
|
||||
scan_parallelism = 0
|
||||
# Capacity of the channel to send data from parallel scan tasks to the main task (default 32).
|
||||
|
||||
## Capacity of the channel to send data from parallel scan tasks to the main task.
|
||||
parallel_scan_channel_size = 32
|
||||
# Whether to allow stale WAL entries read during replay.
|
||||
|
||||
## Whether to allow stale WAL entries read during replay.
|
||||
allow_stale_entries = false
|
||||
|
||||
## The options for inverted index in Mito engine.
|
||||
[region_engine.mito.inverted_index]
|
||||
# Whether to create the index on flush.
|
||||
# - "auto": automatically
|
||||
# - "disable": never
|
||||
|
||||
## Whether to create the index on flush.
|
||||
## - `auto`: automatically
|
||||
## - `disable`: never
|
||||
create_on_flush = "auto"
|
||||
# Whether to create the index on compaction.
|
||||
# - "auto": automatically
|
||||
# - "disable": never
|
||||
|
||||
## Whether to create the index on compaction.
|
||||
## - `auto`: automatically
|
||||
## - `disable`: never
|
||||
create_on_compaction = "auto"
|
||||
# Whether to apply the index on query
|
||||
# - "auto": automatically
|
||||
# - "disable": never
|
||||
|
||||
## Whether to apply the index on query
|
||||
## - `auto`: automatically
|
||||
## - `disable`: never
|
||||
apply_on_query = "auto"
|
||||
# Memory threshold for performing an external sort during index creation.
|
||||
# Setting to empty will disable external sorting, forcing all sorting operations to happen in memory.
|
||||
|
||||
## Memory threshold for performing an external sort during index creation.
|
||||
## Setting to empty will disable external sorting, forcing all sorting operations to happen in memory.
|
||||
mem_threshold_on_create = "64M"
|
||||
# File system path to store intermediate files for external sorting (default `{data_home}/index_intermediate`).
|
||||
|
||||
## File system path to store intermediate files for external sorting (default `{data_home}/index_intermediate`).
|
||||
intermediate_path = ""
|
||||
|
||||
[region_engine.mito.memtable]
|
||||
# Memtable type.
|
||||
# - "partition_tree": partition tree memtable
|
||||
# - "time_series": time-series memtable (deprecated)
|
||||
type = "partition_tree"
|
||||
# The max number of keys in one shard.
|
||||
## Memtable type.
|
||||
## - `time_series`: time-series memtable
|
||||
## - `partition_tree`: partition tree memtable (experimental)
|
||||
type = "time_series"
|
||||
|
||||
## The max number of keys in one shard.
|
||||
## Only available for `partition_tree` memtable.
|
||||
index_max_keys_per_shard = 8192
|
||||
# The max rows of data inside the actively writing buffer in one shard.
|
||||
|
||||
## The max rows of data inside the actively writing buffer in one shard.
|
||||
## Only available for `partition_tree` memtable.
|
||||
data_freeze_threshold = 32768
|
||||
# Max dictionary bytes.
|
||||
|
||||
## Max dictionary bytes.
|
||||
## Only available for `partition_tree` memtable.
|
||||
fork_dictionary_bytes = "1GiB"
|
||||
|
||||
# Log options
|
||||
# [logging]
|
||||
# Specify logs directory.
|
||||
# dir = "/tmp/greptimedb/logs"
|
||||
# Specify the log level [info | debug | error | warn]
|
||||
# level = "info"
|
||||
# whether enable tracing, default is false
|
||||
# enable_otlp_tracing = false
|
||||
# tracing exporter endpoint with format `ip:port`, we use grpc oltp as exporter, default endpoint is `localhost:4317`
|
||||
# otlp_endpoint = "localhost:4317"
|
||||
# Whether to append logs to stdout. Defaults to true.
|
||||
# append_stdout = true
|
||||
# The percentage of tracing will be sampled and exported. Valid range `[0, 1]`, 1 means all traces are sampled, 0 means all traces are not sampled, the default value is 1. ratio > 1 are treated as 1. Fractions < 0 are treated as 0
|
||||
# [logging.tracing_sample_ratio]
|
||||
# default_ratio = 0.0
|
||||
## The logging options.
|
||||
[logging]
|
||||
## The directory to store the log files.
|
||||
dir = "/tmp/greptimedb/logs"
|
||||
|
||||
# Standalone export the metrics generated by itself
|
||||
# encoded to Prometheus remote-write format
|
||||
# and send to Prometheus remote-write compatible receiver (e.g. send to `greptimedb` itself)
|
||||
# This is only used for `greptimedb` to export its own metrics internally. It's different from prometheus scrape.
|
||||
# [export_metrics]
|
||||
# whether enable export metrics, default is false
|
||||
# enable = false
|
||||
# The interval of export metrics
|
||||
# write_interval = "30s"
|
||||
# for `standalone`, `self_import` is recommend to collect metrics generated by itself
|
||||
# [export_metrics.self_import]
|
||||
# db = "information_schema"
|
||||
## The log level. Can be `info`/`debug`/`warn`/`error`.
|
||||
## +toml2docs:none-default
|
||||
level = "info"
|
||||
|
||||
## Enable OTLP tracing.
|
||||
enable_otlp_tracing = false
|
||||
|
||||
## The OTLP tracing endpoint.
|
||||
## +toml2docs:none-default
|
||||
otlp_endpoint = ""
|
||||
|
||||
## Whether to append logs to stdout.
|
||||
append_stdout = true
|
||||
|
||||
## The percentage of tracing will be sampled and exported.
|
||||
## Valid range `[0, 1]`, 1 means all traces are sampled, 0 means all traces are not sampled, the default value is 1.
|
||||
## ratio > 1 are treated as 1. Fractions < 0 are treated as 0
|
||||
[logging.tracing_sample_ratio]
|
||||
default_ratio = 1.0
|
||||
|
||||
## The datanode can export its metrics and send to Prometheus compatible service (e.g. send to `greptimedb` itself) from remote-write API.
|
||||
## This is only used for `greptimedb` to export its own metrics internally. It's different from prometheus scrape.
|
||||
[export_metrics]
|
||||
|
||||
## whether enable export metrics.
|
||||
enable = false
|
||||
|
||||
## The interval of export metrics.
|
||||
write_interval = "30s"
|
||||
|
||||
## For `standalone` mode, `self_import` is recommend to collect metrics generated by itself
|
||||
[export_metrics.self_import]
|
||||
## +toml2docs:none-default
|
||||
db = "information_schema"
|
||||
|
||||
[export_metrics.remote_write]
|
||||
## The url the metrics send to. The url example can be: `http://127.0.0.1:4000/v1/prometheus/write?db=information_schema`.
|
||||
url = ""
|
||||
|
||||
## HTTP headers of Prometheus remote-write carry.
|
||||
headers = { }
|
||||
|
||||
@@ -27,8 +27,8 @@ subgraph Frontend["Frontend"]
|
||||
end
|
||||
end
|
||||
|
||||
MyTable --> MetaSrv
|
||||
MetaSrv --> ETCD
|
||||
MyTable --> Metasrv
|
||||
Metasrv --> ETCD
|
||||
|
||||
MyTable-->TableEngine0
|
||||
MyTable-->TableEngine1
|
||||
@@ -95,8 +95,8 @@ subgraph Frontend["Frontend"]
|
||||
end
|
||||
end
|
||||
|
||||
MyTable --> MetaSrv
|
||||
MetaSrv --> ETCD
|
||||
MyTable --> Metasrv
|
||||
Metasrv --> ETCD
|
||||
|
||||
MyTable-->RegionEngine
|
||||
MyTable-->RegionEngine1
|
||||
|
||||
@@ -36,7 +36,7 @@ Hence, we choose the third option, and use a simple logical plan that's anagonis
|
||||
## Deploy mode and protocol
|
||||
- Greptime Flow is an independent streaming compute component. It can be used either within a standalone node or as a dedicated node at the same level as frontend in distributed mode.
|
||||
- It accepts insert request Rows, which is used between frontend and datanode.
|
||||
- New flow job is submitted in the format of modified SQL query like snowflake do, like: `CREATE TASK avg_over_5m WINDOW_SIZE = "5m" AS SELECT avg(value) FROM table WHERE time > now() - 5m GROUP BY time(1m)`. Flow job then got stored in MetaSrv.
|
||||
- New flow job is submitted in the format of modified SQL query like snowflake do, like: `CREATE TASK avg_over_5m WINDOW_SIZE = "5m" AS SELECT avg(value) FROM table WHERE time > now() - 5m GROUP BY time(1m)`. Flow job then got stored in Metasrv.
|
||||
- It also persists results in the format of Rows to frontend.
|
||||
- The query plan uses Substrait as codec format. It's the same with GreptimeDB's query engine.
|
||||
- Greptime Flow needs a WAL for recovering. It's possible to reuse datanode's.
|
||||
|
||||
@@ -216,7 +216,7 @@ pub enum Error {
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to perform metasrv operation"))]
|
||||
MetaSrv {
|
||||
Metasrv {
|
||||
location: Location,
|
||||
source: meta_client::error::Error,
|
||||
},
|
||||
@@ -304,7 +304,7 @@ impl ErrorExt for Error {
|
||||
| Error::CreateTable { source, .. }
|
||||
| Error::TableSchemaMismatch { source, .. } => source.status_code(),
|
||||
|
||||
Error::MetaSrv { source, .. } => source.status_code(),
|
||||
Error::Metasrv { source, .. } => source.status_code(),
|
||||
Error::SystemCatalogTableScan { source, .. } => source.status_code(),
|
||||
Error::SystemCatalogTableScanExec { source, .. } => source.status_code(),
|
||||
Error::InvalidTableInfoInCatalog { source, .. } => source.status_code(),
|
||||
|
||||
@@ -20,6 +20,7 @@ mod predicate;
|
||||
mod region_peers;
|
||||
mod runtime_metrics;
|
||||
pub mod schemata;
|
||||
mod table_constraints;
|
||||
mod table_names;
|
||||
pub mod tables;
|
||||
|
||||
@@ -52,6 +53,7 @@ use crate::information_schema::partitions::InformationSchemaPartitions;
|
||||
use crate::information_schema::region_peers::InformationSchemaRegionPeers;
|
||||
use crate::information_schema::runtime_metrics::InformationSchemaMetrics;
|
||||
use crate::information_schema::schemata::InformationSchemaSchemata;
|
||||
use crate::information_schema::table_constraints::InformationSchemaTableConstraints;
|
||||
use crate::information_schema::tables::InformationSchemaTables;
|
||||
use crate::CatalogManager;
|
||||
|
||||
@@ -173,6 +175,10 @@ impl InformationSchemaProvider {
|
||||
KEY_COLUMN_USAGE.to_string(),
|
||||
self.build_table(KEY_COLUMN_USAGE).unwrap(),
|
||||
);
|
||||
tables.insert(
|
||||
TABLE_CONSTRAINTS.to_string(),
|
||||
self.build_table(TABLE_CONSTRAINTS).unwrap(),
|
||||
);
|
||||
|
||||
// Add memory tables
|
||||
for name in MEMORY_TABLES.iter() {
|
||||
@@ -241,6 +247,10 @@ impl InformationSchemaProvider {
|
||||
self.catalog_name.clone(),
|
||||
self.catalog_manager.clone(),
|
||||
)) as _),
|
||||
TABLE_CONSTRAINTS => Some(Arc::new(InformationSchemaTableConstraints::new(
|
||||
self.catalog_name.clone(),
|
||||
self.catalog_manager.clone(),
|
||||
)) as _),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -274,8 +274,8 @@ impl InformationSchemaColumnsBuilder {
|
||||
};
|
||||
|
||||
self.add_column(
|
||||
idx,
|
||||
&predicates,
|
||||
idx,
|
||||
&catalog_name,
|
||||
&schema_name,
|
||||
&table.table_info().name,
|
||||
@@ -292,8 +292,8 @@ impl InformationSchemaColumnsBuilder {
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
fn add_column(
|
||||
&mut self,
|
||||
index: usize,
|
||||
predicates: &Predicates,
|
||||
index: usize,
|
||||
catalog_name: &str,
|
||||
schema_name: &str,
|
||||
table_name: &str,
|
||||
|
||||
@@ -49,6 +49,11 @@ pub const COLUMN_NAME: &str = "column_name";
|
||||
pub const ORDINAL_POSITION: &str = "ordinal_position";
|
||||
const INIT_CAPACITY: usize = 42;
|
||||
|
||||
/// Primary key constraint name
|
||||
pub(crate) const PRI_CONSTRAINT_NAME: &str = "PRIMARY";
|
||||
/// Time index constraint name
|
||||
pub(crate) const TIME_INDEX_CONSTRAINT_NAME: &str = "TIME INDEX";
|
||||
|
||||
/// The virtual table implementation for `information_schema.KEY_COLUMN_USAGE`.
|
||||
pub(super) struct InformationSchemaKeyColumnUsage {
|
||||
schema: SchemaRef,
|
||||
@@ -232,7 +237,7 @@ impl InformationSchemaKeyColumnUsageBuilder {
|
||||
self.add_key_column_usage(
|
||||
&predicates,
|
||||
&schema_name,
|
||||
"TIME INDEX",
|
||||
TIME_INDEX_CONSTRAINT_NAME,
|
||||
&catalog_name,
|
||||
&schema_name,
|
||||
&table_name,
|
||||
@@ -262,7 +267,7 @@ impl InformationSchemaKeyColumnUsageBuilder {
|
||||
self.add_key_column_usage(
|
||||
&predicates,
|
||||
&schema_name,
|
||||
"PRIMARY",
|
||||
PRI_CONSTRAINT_NAME,
|
||||
&catalog_name,
|
||||
&schema_name,
|
||||
&table_name,
|
||||
|
||||
286
src/catalog/src/information_schema/table_constraints.rs
Normal file
286
src/catalog/src/information_schema/table_constraints.rs
Normal file
@@ -0,0 +1,286 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::sync::{Arc, Weak};
|
||||
|
||||
use arrow_schema::SchemaRef as ArrowSchemaRef;
|
||||
use common_catalog::consts::INFORMATION_SCHEMA_TABLE_CONSTRAINTS_TABLE_ID;
|
||||
use common_error::ext::BoxedError;
|
||||
use common_query::physical_plan::TaskContext;
|
||||
use common_recordbatch::adapter::RecordBatchStreamAdapter;
|
||||
use common_recordbatch::{RecordBatch, SendableRecordBatchStream};
|
||||
use datafusion::physical_plan::stream::RecordBatchStreamAdapter as DfRecordBatchStreamAdapter;
|
||||
use datafusion::physical_plan::streaming::PartitionStream as DfPartitionStream;
|
||||
use datafusion::physical_plan::SendableRecordBatchStream as DfSendableRecordBatchStream;
|
||||
use datatypes::prelude::{ConcreteDataType, MutableVector};
|
||||
use datatypes::scalars::ScalarVectorBuilder;
|
||||
use datatypes::schema::{ColumnSchema, Schema, SchemaRef};
|
||||
use datatypes::value::Value;
|
||||
use datatypes::vectors::{ConstantVector, StringVector, StringVectorBuilder, VectorRef};
|
||||
use futures::TryStreamExt;
|
||||
use snafu::{OptionExt, ResultExt};
|
||||
use store_api::storage::{ScanRequest, TableId};
|
||||
|
||||
use super::{InformationTable, TABLE_CONSTRAINTS};
|
||||
use crate::error::{
|
||||
CreateRecordBatchSnafu, InternalSnafu, Result, UpgradeWeakCatalogManagerRefSnafu,
|
||||
};
|
||||
use crate::information_schema::key_column_usage::{
|
||||
PRI_CONSTRAINT_NAME, TIME_INDEX_CONSTRAINT_NAME,
|
||||
};
|
||||
use crate::information_schema::Predicates;
|
||||
use crate::CatalogManager;
|
||||
|
||||
/// The `TABLE_CONSTRAINTS` table describes which tables have constraints.
|
||||
pub(super) struct InformationSchemaTableConstraints {
|
||||
schema: SchemaRef,
|
||||
catalog_name: String,
|
||||
catalog_manager: Weak<dyn CatalogManager>,
|
||||
}
|
||||
|
||||
const CONSTRAINT_CATALOG: &str = "constraint_catalog";
|
||||
const CONSTRAINT_SCHEMA: &str = "constraint_schema";
|
||||
const CONSTRAINT_NAME: &str = "constraint_name";
|
||||
const TABLE_SCHEMA: &str = "table_schema";
|
||||
const TABLE_NAME: &str = "table_name";
|
||||
const CONSTRAINT_TYPE: &str = "constraint_type";
|
||||
const ENFORCED: &str = "enforced";
|
||||
|
||||
const INIT_CAPACITY: usize = 42;
|
||||
|
||||
const TIME_INDEX_CONSTRAINT_TYPE: &str = "TIME INDEX";
|
||||
const PRI_KEY_CONSTRAINT_TYPE: &str = "PRIMARY KEY";
|
||||
|
||||
impl InformationSchemaTableConstraints {
|
||||
pub(super) fn new(catalog_name: String, catalog_manager: Weak<dyn CatalogManager>) -> Self {
|
||||
Self {
|
||||
schema: Self::schema(),
|
||||
catalog_name,
|
||||
catalog_manager,
|
||||
}
|
||||
}
|
||||
|
||||
fn schema() -> SchemaRef {
|
||||
Arc::new(Schema::new(vec![
|
||||
ColumnSchema::new(
|
||||
CONSTRAINT_CATALOG,
|
||||
ConcreteDataType::string_datatype(),
|
||||
false,
|
||||
),
|
||||
ColumnSchema::new(
|
||||
CONSTRAINT_SCHEMA,
|
||||
ConcreteDataType::string_datatype(),
|
||||
false,
|
||||
),
|
||||
ColumnSchema::new(CONSTRAINT_NAME, ConcreteDataType::string_datatype(), false),
|
||||
ColumnSchema::new(TABLE_SCHEMA, ConcreteDataType::string_datatype(), false),
|
||||
ColumnSchema::new(TABLE_NAME, ConcreteDataType::string_datatype(), false),
|
||||
ColumnSchema::new(CONSTRAINT_TYPE, ConcreteDataType::string_datatype(), false),
|
||||
ColumnSchema::new(ENFORCED, ConcreteDataType::string_datatype(), false),
|
||||
]))
|
||||
}
|
||||
|
||||
fn builder(&self) -> InformationSchemaTableConstraintsBuilder {
|
||||
InformationSchemaTableConstraintsBuilder::new(
|
||||
self.schema.clone(),
|
||||
self.catalog_name.clone(),
|
||||
self.catalog_manager.clone(),
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
impl InformationTable for InformationSchemaTableConstraints {
|
||||
fn table_id(&self) -> TableId {
|
||||
INFORMATION_SCHEMA_TABLE_CONSTRAINTS_TABLE_ID
|
||||
}
|
||||
|
||||
fn table_name(&self) -> &'static str {
|
||||
TABLE_CONSTRAINTS
|
||||
}
|
||||
|
||||
fn schema(&self) -> SchemaRef {
|
||||
self.schema.clone()
|
||||
}
|
||||
|
||||
fn to_stream(&self, request: ScanRequest) -> Result<SendableRecordBatchStream> {
|
||||
let schema = self.schema.arrow_schema().clone();
|
||||
let mut builder = self.builder();
|
||||
let stream = Box::pin(DfRecordBatchStreamAdapter::new(
|
||||
schema,
|
||||
futures::stream::once(async move {
|
||||
builder
|
||||
.make_table_constraints(Some(request))
|
||||
.await
|
||||
.map(|x| x.into_df_record_batch())
|
||||
.map_err(Into::into)
|
||||
}),
|
||||
));
|
||||
Ok(Box::pin(
|
||||
RecordBatchStreamAdapter::try_new(stream)
|
||||
.map_err(BoxedError::new)
|
||||
.context(InternalSnafu)?,
|
||||
))
|
||||
}
|
||||
}
|
||||
|
||||
struct InformationSchemaTableConstraintsBuilder {
|
||||
schema: SchemaRef,
|
||||
catalog_name: String,
|
||||
catalog_manager: Weak<dyn CatalogManager>,
|
||||
|
||||
constraint_schemas: StringVectorBuilder,
|
||||
constraint_names: StringVectorBuilder,
|
||||
table_schemas: StringVectorBuilder,
|
||||
table_names: StringVectorBuilder,
|
||||
constraint_types: StringVectorBuilder,
|
||||
}
|
||||
|
||||
impl InformationSchemaTableConstraintsBuilder {
|
||||
fn new(
|
||||
schema: SchemaRef,
|
||||
catalog_name: String,
|
||||
catalog_manager: Weak<dyn CatalogManager>,
|
||||
) -> Self {
|
||||
Self {
|
||||
schema,
|
||||
catalog_name,
|
||||
catalog_manager,
|
||||
constraint_schemas: StringVectorBuilder::with_capacity(INIT_CAPACITY),
|
||||
constraint_names: StringVectorBuilder::with_capacity(INIT_CAPACITY),
|
||||
table_schemas: StringVectorBuilder::with_capacity(INIT_CAPACITY),
|
||||
table_names: StringVectorBuilder::with_capacity(INIT_CAPACITY),
|
||||
constraint_types: StringVectorBuilder::with_capacity(INIT_CAPACITY),
|
||||
}
|
||||
}
|
||||
|
||||
/// Construct the `information_schema.table_constraints` virtual table
|
||||
async fn make_table_constraints(
|
||||
&mut self,
|
||||
request: Option<ScanRequest>,
|
||||
) -> Result<RecordBatch> {
|
||||
let catalog_name = self.catalog_name.clone();
|
||||
let catalog_manager = self
|
||||
.catalog_manager
|
||||
.upgrade()
|
||||
.context(UpgradeWeakCatalogManagerRefSnafu)?;
|
||||
let predicates = Predicates::from_scan_request(&request);
|
||||
|
||||
for schema_name in catalog_manager.schema_names(&catalog_name).await? {
|
||||
let mut stream = catalog_manager.tables(&catalog_name, &schema_name).await;
|
||||
|
||||
while let Some(table) = stream.try_next().await? {
|
||||
let keys = &table.table_info().meta.primary_key_indices;
|
||||
let schema = table.schema();
|
||||
|
||||
if schema.timestamp_index().is_some() {
|
||||
self.add_table_constraint(
|
||||
&predicates,
|
||||
&schema_name,
|
||||
TIME_INDEX_CONSTRAINT_NAME,
|
||||
&schema_name,
|
||||
&table.table_info().name,
|
||||
TIME_INDEX_CONSTRAINT_TYPE,
|
||||
);
|
||||
}
|
||||
|
||||
if !keys.is_empty() {
|
||||
self.add_table_constraint(
|
||||
&predicates,
|
||||
&schema_name,
|
||||
PRI_CONSTRAINT_NAME,
|
||||
&schema_name,
|
||||
&table.table_info().name,
|
||||
PRI_KEY_CONSTRAINT_TYPE,
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
self.finish()
|
||||
}
|
||||
|
||||
fn add_table_constraint(
|
||||
&mut self,
|
||||
predicates: &Predicates,
|
||||
constraint_schema: &str,
|
||||
constraint_name: &str,
|
||||
table_schema: &str,
|
||||
table_name: &str,
|
||||
constraint_type: &str,
|
||||
) {
|
||||
let row = [
|
||||
(CONSTRAINT_SCHEMA, &Value::from(constraint_schema)),
|
||||
(CONSTRAINT_NAME, &Value::from(constraint_name)),
|
||||
(TABLE_SCHEMA, &Value::from(table_schema)),
|
||||
(TABLE_NAME, &Value::from(table_name)),
|
||||
(CONSTRAINT_TYPE, &Value::from(constraint_type)),
|
||||
];
|
||||
|
||||
if !predicates.eval(&row) {
|
||||
return;
|
||||
}
|
||||
|
||||
self.constraint_schemas.push(Some(constraint_schema));
|
||||
self.constraint_names.push(Some(constraint_name));
|
||||
self.table_schemas.push(Some(table_schema));
|
||||
self.table_names.push(Some(table_name));
|
||||
self.constraint_types.push(Some(constraint_type));
|
||||
}
|
||||
|
||||
fn finish(&mut self) -> Result<RecordBatch> {
|
||||
let rows_num = self.constraint_names.len();
|
||||
|
||||
let constraint_catalogs = Arc::new(ConstantVector::new(
|
||||
Arc::new(StringVector::from(vec!["def"])),
|
||||
rows_num,
|
||||
));
|
||||
let enforceds = Arc::new(ConstantVector::new(
|
||||
Arc::new(StringVector::from(vec!["YES"])),
|
||||
rows_num,
|
||||
));
|
||||
|
||||
let columns: Vec<VectorRef> = vec![
|
||||
constraint_catalogs,
|
||||
Arc::new(self.constraint_schemas.finish()),
|
||||
Arc::new(self.constraint_names.finish()),
|
||||
Arc::new(self.table_schemas.finish()),
|
||||
Arc::new(self.table_names.finish()),
|
||||
Arc::new(self.constraint_types.finish()),
|
||||
enforceds,
|
||||
];
|
||||
|
||||
RecordBatch::new(self.schema.clone(), columns).context(CreateRecordBatchSnafu)
|
||||
}
|
||||
}
|
||||
|
||||
impl DfPartitionStream for InformationSchemaTableConstraints {
|
||||
fn schema(&self) -> &ArrowSchemaRef {
|
||||
self.schema.arrow_schema()
|
||||
}
|
||||
|
||||
fn execute(&self, _: Arc<TaskContext>) -> DfSendableRecordBatchStream {
|
||||
let schema = self.schema.arrow_schema().clone();
|
||||
let mut builder = self.builder();
|
||||
Box::pin(DfRecordBatchStreamAdapter::new(
|
||||
schema,
|
||||
futures::stream::once(async move {
|
||||
builder
|
||||
.make_table_constraints(None)
|
||||
.await
|
||||
.map(|x| x.into_df_record_batch())
|
||||
.map_err(Into::into)
|
||||
}),
|
||||
))
|
||||
}
|
||||
}
|
||||
@@ -41,3 +41,4 @@ pub const SESSION_STATUS: &str = "session_status";
|
||||
pub const RUNTIME_METRICS: &str = "runtime_metrics";
|
||||
pub const PARTITIONS: &str = "partitions";
|
||||
pub const REGION_PEERS: &str = "greptime_region_peers";
|
||||
pub const TABLE_CONSTRAINTS: &str = "table_constraints";
|
||||
|
||||
@@ -37,6 +37,8 @@ use snafu::{ensure, ResultExt};
|
||||
use crate::error::{ConvertFlightDataSnafu, Error, IllegalFlightMessagesSnafu, ServerSnafu};
|
||||
use crate::{error, from_grpc_response, metrics, Client, Result, StreamInserter};
|
||||
|
||||
pub const DEFAULT_LOOKBACK_STRING: &str = "5m";
|
||||
|
||||
#[derive(Clone, Debug, Default)]
|
||||
pub struct Database {
|
||||
// The "catalog" and "schema" to be used in processing the requests at the server side.
|
||||
@@ -215,6 +217,7 @@ impl Database {
|
||||
start: start.to_string(),
|
||||
end: end.to_string(),
|
||||
step: step.to_string(),
|
||||
lookback: DEFAULT_LOOKBACK_STRING.to_string(),
|
||||
})),
|
||||
}))
|
||||
.await
|
||||
|
||||
@@ -76,6 +76,7 @@ tikv-jemallocator = "0.5"
|
||||
common-test-util.workspace = true
|
||||
serde.workspace = true
|
||||
temp-env = "0.3"
|
||||
tempfile.workspace = true
|
||||
|
||||
[target.'cfg(not(windows))'.dev-dependencies]
|
||||
rexpect = "0.5"
|
||||
|
||||
@@ -107,14 +107,11 @@ impl TableMetadataBencher {
|
||||
.unwrap();
|
||||
let start = Instant::now();
|
||||
let table_info = table_info.unwrap();
|
||||
let table_route = table_route.unwrap();
|
||||
let table_id = table_info.table_info.ident.table_id;
|
||||
let _ = self
|
||||
.table_metadata_manager
|
||||
.delete_table_metadata(
|
||||
table_id,
|
||||
&table_info.table_name(),
|
||||
table_route.unwrap().region_routes().unwrap(),
|
||||
)
|
||||
.delete_table_metadata(table_id, &table_info.table_name(), &table_route)
|
||||
.await;
|
||||
start.elapsed()
|
||||
},
|
||||
@@ -140,7 +137,7 @@ impl TableMetadataBencher {
|
||||
let start = Instant::now();
|
||||
let _ = self
|
||||
.table_metadata_manager
|
||||
.rename_table(table_info.unwrap(), new_table_name)
|
||||
.rename_table(&table_info.unwrap(), new_table_name)
|
||||
.await;
|
||||
|
||||
start.elapsed()
|
||||
|
||||
@@ -226,7 +226,10 @@ impl Export {
|
||||
}
|
||||
|
||||
async fn show_create_table(&self, catalog: &str, schema: &str, table: &str) -> Result<String> {
|
||||
let sql = format!("show create table {}.{}.{}", catalog, schema, table);
|
||||
let sql = format!(
|
||||
r#"show create table "{}"."{}"."{}""#,
|
||||
catalog, schema, table
|
||||
);
|
||||
let mut client = self.client.clone();
|
||||
client.set_catalog(catalog);
|
||||
client.set_schema(schema);
|
||||
@@ -273,7 +276,7 @@ impl Export {
|
||||
for (c, s, t) in table_list {
|
||||
match self.show_create_table(&c, &s, &t).await {
|
||||
Err(e) => {
|
||||
error!(e; "Failed to export table {}.{}.{}", c, s, t)
|
||||
error!(e; r#"Failed to export table "{}"."{}"."{}""#, c, s, t)
|
||||
}
|
||||
Ok(create_table) => {
|
||||
file.write_all(create_table.as_bytes())
|
||||
@@ -417,3 +420,84 @@ fn split_database(database: &str) -> Result<(String, Option<String>)> {
|
||||
Ok((catalog.to_string(), Some(schema.to_string())))
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use clap::Parser;
|
||||
use client::{Client, Database};
|
||||
use common_catalog::consts::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME};
|
||||
|
||||
use crate::error::Result;
|
||||
use crate::options::{CliOptions, Options};
|
||||
use crate::{cli, standalone, App};
|
||||
|
||||
#[tokio::test(flavor = "multi_thread")]
|
||||
async fn test_export_create_table_with_quoted_names() -> Result<()> {
|
||||
let output_dir = tempfile::tempdir().unwrap();
|
||||
|
||||
let standalone = standalone::Command::parse_from([
|
||||
"standalone",
|
||||
"start",
|
||||
"--data-home",
|
||||
&*output_dir.path().to_string_lossy(),
|
||||
]);
|
||||
let Options::Standalone(standalone_opts) =
|
||||
standalone.load_options(&CliOptions::default())?
|
||||
else {
|
||||
unreachable!()
|
||||
};
|
||||
let mut instance = standalone.build(*standalone_opts).await?;
|
||||
instance.start().await?;
|
||||
|
||||
let client = Client::with_urls(["127.0.0.1:4001"]);
|
||||
let database = Database::new(DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, client);
|
||||
database
|
||||
.sql(r#"CREATE DATABASE "cli.export.create_table";"#)
|
||||
.await
|
||||
.unwrap();
|
||||
database
|
||||
.sql(
|
||||
r#"CREATE TABLE "cli.export.create_table"."a.b.c"(
|
||||
ts TIMESTAMP,
|
||||
TIME INDEX (ts)
|
||||
) engine=mito;
|
||||
"#,
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let output_dir = tempfile::tempdir().unwrap();
|
||||
let cli = cli::Command::parse_from([
|
||||
"cli",
|
||||
"export",
|
||||
"--addr",
|
||||
"127.0.0.1:4001",
|
||||
"--output-dir",
|
||||
&*output_dir.path().to_string_lossy(),
|
||||
"--target",
|
||||
"create-table",
|
||||
]);
|
||||
let mut cli_app = cli.build().await?;
|
||||
cli_app.start().await?;
|
||||
|
||||
instance.stop().await?;
|
||||
|
||||
let output_file = output_dir
|
||||
.path()
|
||||
.join("greptime-cli.export.create_table.sql");
|
||||
let res = std::fs::read_to_string(output_file).unwrap();
|
||||
let expect = r#"CREATE TABLE IF NOT EXISTS "a.b.c" (
|
||||
"ts" TIMESTAMP(3) NOT NULL,
|
||||
TIME INDEX ("ts")
|
||||
)
|
||||
|
||||
ENGINE=mito
|
||||
WITH(
|
||||
regions = 1
|
||||
);
|
||||
"#;
|
||||
assert_eq!(res.trim(), expect.trim());
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
@@ -17,8 +17,8 @@ use std::time::Duration;
|
||||
use async_trait::async_trait;
|
||||
use clap::Parser;
|
||||
use common_telemetry::logging;
|
||||
use meta_srv::bootstrap::MetaSrvInstance;
|
||||
use meta_srv::metasrv::MetaSrvOptions;
|
||||
use meta_srv::bootstrap::MetasrvInstance;
|
||||
use meta_srv::metasrv::MetasrvOptions;
|
||||
use snafu::ResultExt;
|
||||
|
||||
use crate::error::{self, Result, StartMetaServerSnafu};
|
||||
@@ -26,11 +26,11 @@ use crate::options::{CliOptions, Options};
|
||||
use crate::App;
|
||||
|
||||
pub struct Instance {
|
||||
instance: MetaSrvInstance,
|
||||
instance: MetasrvInstance,
|
||||
}
|
||||
|
||||
impl Instance {
|
||||
fn new(instance: MetaSrvInstance) -> Self {
|
||||
fn new(instance: MetasrvInstance) -> Self {
|
||||
Self { instance }
|
||||
}
|
||||
}
|
||||
@@ -42,7 +42,7 @@ impl App for Instance {
|
||||
}
|
||||
|
||||
async fn start(&mut self) -> Result<()> {
|
||||
plugins::start_meta_srv_plugins(self.instance.plugins())
|
||||
plugins::start_metasrv_plugins(self.instance.plugins())
|
||||
.await
|
||||
.context(StartMetaServerSnafu)?;
|
||||
|
||||
@@ -64,7 +64,7 @@ pub struct Command {
|
||||
}
|
||||
|
||||
impl Command {
|
||||
pub async fn build(self, opts: MetaSrvOptions) -> Result<Instance> {
|
||||
pub async fn build(self, opts: MetasrvOptions) -> Result<Instance> {
|
||||
self.subcmd.build(opts).await
|
||||
}
|
||||
|
||||
@@ -79,7 +79,7 @@ enum SubCommand {
|
||||
}
|
||||
|
||||
impl SubCommand {
|
||||
async fn build(self, opts: MetaSrvOptions) -> Result<Instance> {
|
||||
async fn build(self, opts: MetasrvOptions) -> Result<Instance> {
|
||||
match self {
|
||||
SubCommand::Start(cmd) => cmd.build(opts).await,
|
||||
}
|
||||
@@ -127,10 +127,10 @@ struct StartCommand {
|
||||
|
||||
impl StartCommand {
|
||||
fn load_options(&self, cli_options: &CliOptions) -> Result<Options> {
|
||||
let mut opts: MetaSrvOptions = Options::load_layered_options(
|
||||
let mut opts: MetasrvOptions = Options::load_layered_options(
|
||||
self.config_file.as_deref(),
|
||||
self.env_prefix.as_ref(),
|
||||
MetaSrvOptions::env_list_keys(),
|
||||
MetasrvOptions::env_list_keys(),
|
||||
)?;
|
||||
|
||||
if let Some(dir) = &cli_options.log_dir {
|
||||
@@ -193,20 +193,20 @@ impl StartCommand {
|
||||
Ok(Options::Metasrv(Box::new(opts)))
|
||||
}
|
||||
|
||||
async fn build(self, mut opts: MetaSrvOptions) -> Result<Instance> {
|
||||
let plugins = plugins::setup_meta_srv_plugins(&mut opts)
|
||||
async fn build(self, mut opts: MetasrvOptions) -> Result<Instance> {
|
||||
let plugins = plugins::setup_metasrv_plugins(&mut opts)
|
||||
.await
|
||||
.context(StartMetaServerSnafu)?;
|
||||
|
||||
logging::info!("MetaSrv start command: {:#?}", self);
|
||||
logging::info!("MetaSrv options: {:#?}", opts);
|
||||
logging::info!("Metasrv start command: {:#?}", self);
|
||||
logging::info!("Metasrv options: {:#?}", opts);
|
||||
|
||||
let builder = meta_srv::bootstrap::metasrv_builder(&opts, plugins.clone(), None)
|
||||
.await
|
||||
.context(error::BuildMetaServerSnafu)?;
|
||||
let metasrv = builder.build().await.context(error::BuildMetaServerSnafu)?;
|
||||
|
||||
let instance = MetaSrvInstance::new(opts, plugins, metasrv)
|
||||
let instance = MetasrvInstance::new(opts, plugins, metasrv)
|
||||
.await
|
||||
.context(error::BuildMetaServerSnafu)?;
|
||||
|
||||
|
||||
@@ -15,12 +15,12 @@
|
||||
use clap::ArgMatches;
|
||||
use common_config::KvBackendConfig;
|
||||
use common_telemetry::logging::{LoggingOptions, TracingOptions};
|
||||
use common_wal::config::MetaSrvWalConfig;
|
||||
use common_wal::config::MetasrvWalConfig;
|
||||
use config::{Config, Environment, File, FileFormat};
|
||||
use datanode::config::{DatanodeOptions, ProcedureConfig};
|
||||
use frontend::error::{Result as FeResult, TomlFormatSnafu};
|
||||
use frontend::frontend::{FrontendOptions, TomlSerializable};
|
||||
use meta_srv::metasrv::MetaSrvOptions;
|
||||
use meta_srv::metasrv::MetasrvOptions;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use snafu::ResultExt;
|
||||
|
||||
@@ -38,7 +38,7 @@ pub struct MixOptions {
|
||||
pub frontend: FrontendOptions,
|
||||
pub datanode: DatanodeOptions,
|
||||
pub logging: LoggingOptions,
|
||||
pub wal_meta: MetaSrvWalConfig,
|
||||
pub wal_meta: MetasrvWalConfig,
|
||||
}
|
||||
|
||||
impl From<MixOptions> for FrontendOptions {
|
||||
@@ -56,7 +56,7 @@ impl TomlSerializable for MixOptions {
|
||||
pub enum Options {
|
||||
Datanode(Box<DatanodeOptions>),
|
||||
Frontend(Box<FrontendOptions>),
|
||||
Metasrv(Box<MetaSrvOptions>),
|
||||
Metasrv(Box<MetasrvOptions>),
|
||||
Standalone(Box<MixOptions>),
|
||||
Cli(Box<LoggingOptions>),
|
||||
}
|
||||
|
||||
@@ -86,6 +86,8 @@ pub const INFORMATION_SCHEMA_RUNTIME_METRICS_TABLE_ID: u32 = 27;
|
||||
pub const INFORMATION_SCHEMA_PARTITIONS_TABLE_ID: u32 = 28;
|
||||
/// id for information_schema.REGION_PEERS
|
||||
pub const INFORMATION_SCHEMA_REGION_PEERS_TABLE_ID: u32 = 29;
|
||||
/// id for information_schema.columns
|
||||
pub const INFORMATION_SCHEMA_TABLE_CONSTRAINTS_TABLE_ID: u32 = 30;
|
||||
/// ----- End of information_schema tables -----
|
||||
|
||||
pub const MITO_ENGINE: &str = "mito";
|
||||
|
||||
@@ -60,12 +60,6 @@ impl<
|
||||
.context(error::BufferedWriterClosedSnafu)?;
|
||||
let metadata = encoder.close().await?;
|
||||
|
||||
// Use `rows_written` to keep a track of if any rows have been written.
|
||||
// If no row's been written, then we can simply close the underlying
|
||||
// writer without flush so that no file will be actually created.
|
||||
if self.rows_written != 0 {
|
||||
self.bytes_written += self.try_flush(true).await?;
|
||||
}
|
||||
// It's important to shut down! flushes all pending writes
|
||||
self.close_inner_writer().await?;
|
||||
Ok((metadata, self.bytes_written))
|
||||
@@ -79,8 +73,15 @@ impl<
|
||||
Fut: Future<Output = Result<T>>,
|
||||
> LazyBufferedWriter<T, U, F>
|
||||
{
|
||||
/// Closes the writer without flushing the buffer data.
|
||||
/// Closes the writer and flushes the buffer data.
|
||||
pub async fn close_inner_writer(&mut self) -> Result<()> {
|
||||
// Use `rows_written` to keep a track of if any rows have been written.
|
||||
// If no row's been written, then we can simply close the underlying
|
||||
// writer without flush so that no file will be actually created.
|
||||
if self.rows_written != 0 {
|
||||
self.bytes_written += self.try_flush(true).await?;
|
||||
}
|
||||
|
||||
if let Some(writer) = &mut self.writer {
|
||||
writer.shutdown().await.context(error::AsyncWriteSnafu)?;
|
||||
}
|
||||
@@ -117,7 +118,7 @@ impl<
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub async fn try_flush(&mut self, all: bool) -> Result<u64> {
|
||||
async fn try_flush(&mut self, all: bool) -> Result<u64> {
|
||||
let mut bytes_written: u64 = 0;
|
||||
|
||||
// Once buffered data size reaches threshold, split the data in chunks (typically 4MB)
|
||||
|
||||
@@ -213,10 +213,6 @@ pub async fn stream_to_file<T: DfRecordBatchEncoder, U: Fn(SharedBuffer) -> T>(
|
||||
writer.write(&batch).await?;
|
||||
rows += batch.num_rows();
|
||||
}
|
||||
|
||||
// Flushes all pending writes
|
||||
let _ = writer.try_flush(true).await?;
|
||||
writer.close_inner_writer().await?;
|
||||
|
||||
Ok(rows)
|
||||
}
|
||||
|
||||
@@ -215,10 +215,7 @@ impl BufferedWriter {
|
||||
|
||||
/// Write a record batch to stream writer.
|
||||
pub async fn write(&mut self, arrow_batch: &RecordBatch) -> error::Result<()> {
|
||||
self.inner.write(arrow_batch).await?;
|
||||
self.inner.try_flush(false).await?;
|
||||
|
||||
Ok(())
|
||||
self.inner.write(arrow_batch).await
|
||||
}
|
||||
|
||||
/// Close parquet writer.
|
||||
|
||||
@@ -50,11 +50,13 @@ pub trait ClusterInfo {
|
||||
}
|
||||
|
||||
/// The key of [NodeInfo] in the storage. The format is `__meta_cluster_node_info-{cluster_id}-{role}-{node_id}`.
|
||||
/// This key cannot be used to describe the `Metasrv` because the `Metasrv` does not have
|
||||
/// a `cluster_id`, it serves multiple clusters.
|
||||
#[derive(Debug, Clone, Eq, Hash, PartialEq, Serialize, Deserialize)]
|
||||
pub struct NodeInfoKey {
|
||||
/// The cluster id.
|
||||
pub cluster_id: u64,
|
||||
/// The role of the node. It can be [Role::Datanode], [Role::Frontend], or [Role::Metasrv].
|
||||
/// The role of the node. It can be `[Role::Datanode]` or `[Role::Frontend]`.
|
||||
pub role: Role,
|
||||
/// The node id.
|
||||
pub node_id: u64,
|
||||
|
||||
@@ -35,6 +35,7 @@ use crate::ddl::DdlContext;
|
||||
use crate::error::{DecodeJsonSnafu, Error, MetadataCorruptionSnafu, Result};
|
||||
use crate::key::table_info::TableInfoValue;
|
||||
use crate::key::table_route::PhysicalTableRouteValue;
|
||||
use crate::key::DeserializedValueWithBytes;
|
||||
use crate::lock_key::{CatalogLock, SchemaLock, TableLock};
|
||||
use crate::rpc::ddl::AlterTableTask;
|
||||
use crate::rpc::router::find_leaders;
|
||||
@@ -245,10 +246,10 @@ pub struct AlterTablesData {
|
||||
tasks: Vec<AlterTableTask>,
|
||||
/// Table info values before the alter operation.
|
||||
/// Corresponding one-to-one with the AlterTableTask in tasks.
|
||||
table_info_values: Vec<TableInfoValue>,
|
||||
table_info_values: Vec<DeserializedValueWithBytes<TableInfoValue>>,
|
||||
/// Physical table info
|
||||
physical_table_id: TableId,
|
||||
physical_table_info: Option<TableInfoValue>,
|
||||
physical_table_info: Option<DeserializedValueWithBytes<TableInfoValue>>,
|
||||
physical_table_route: Option<PhysicalTableRouteValue>,
|
||||
physical_columns: Vec<ColumnMetadata>,
|
||||
}
|
||||
|
||||
@@ -24,6 +24,7 @@ use crate::error::{
|
||||
use crate::key::table_info::TableInfoValue;
|
||||
use crate::key::table_name::TableNameKey;
|
||||
use crate::key::table_route::TableRouteValue;
|
||||
use crate::key::DeserializedValueWithBytes;
|
||||
use crate::rpc::ddl::AlterTableTask;
|
||||
|
||||
impl AlterLogicalTablesProcedure {
|
||||
@@ -61,11 +62,9 @@ impl AlterLogicalTablesProcedure {
|
||||
.get_full_table_info(self.data.physical_table_id)
|
||||
.await?;
|
||||
|
||||
let physical_table_info = physical_table_info
|
||||
.with_context(|| TableInfoNotFoundSnafu {
|
||||
table: format!("table id - {}", self.data.physical_table_id),
|
||||
})?
|
||||
.into_inner();
|
||||
let physical_table_info = physical_table_info.with_context(|| TableInfoNotFoundSnafu {
|
||||
table: format!("table id - {}", self.data.physical_table_id),
|
||||
})?;
|
||||
let physical_table_route = physical_table_route
|
||||
.context(TableRouteNotFoundSnafu {
|
||||
table_id: self.data.physical_table_id,
|
||||
@@ -99,9 +98,9 @@ impl AlterLogicalTablesProcedure {
|
||||
async fn get_all_table_info_values(
|
||||
&self,
|
||||
table_ids: &[TableId],
|
||||
) -> Result<Vec<TableInfoValue>> {
|
||||
) -> Result<Vec<DeserializedValueWithBytes<TableInfoValue>>> {
|
||||
let table_info_manager = self.context.table_metadata_manager.table_info_manager();
|
||||
let mut table_info_map = table_info_manager.batch_get(table_ids).await?;
|
||||
let mut table_info_map = table_info_manager.batch_get_raw(table_ids).await?;
|
||||
let mut table_info_values = Vec::with_capacity(table_ids.len());
|
||||
for (table_id, task) in table_ids.iter().zip(self.data.tasks.iter()) {
|
||||
let table_info_value =
|
||||
|
||||
@@ -33,6 +33,7 @@ impl AlterLogicalTablesProcedure {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
// Safety: must exist.
|
||||
let physical_table_info = self.data.physical_table_info.as_ref().unwrap();
|
||||
|
||||
// Generates new table info
|
||||
@@ -45,10 +46,7 @@ impl AlterLogicalTablesProcedure {
|
||||
// Updates physical table's metadata
|
||||
self.context
|
||||
.table_metadata_manager
|
||||
.update_table_info(
|
||||
DeserializedValueWithBytes::from_inner(physical_table_info.clone()),
|
||||
new_raw_table_info,
|
||||
)
|
||||
.update_table_info(physical_table_info, new_raw_table_info)
|
||||
.await?;
|
||||
|
||||
Ok(())
|
||||
@@ -77,7 +75,9 @@ impl AlterLogicalTablesProcedure {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub(crate) fn build_update_metadata(&self) -> Result<Vec<(TableInfoValue, RawTableInfo)>> {
|
||||
pub(crate) fn build_update_metadata(
|
||||
&self,
|
||||
) -> Result<Vec<(DeserializedValueWithBytes<TableInfoValue>, RawTableInfo)>> {
|
||||
let mut table_info_values_to_update = Vec::with_capacity(self.data.tasks.len());
|
||||
for (task, table) in self
|
||||
.data
|
||||
@@ -94,8 +94,8 @@ impl AlterLogicalTablesProcedure {
|
||||
fn build_new_table_info(
|
||||
&self,
|
||||
task: &AlterTableTask,
|
||||
table: &TableInfoValue,
|
||||
) -> Result<(TableInfoValue, RawTableInfo)> {
|
||||
table: &DeserializedValueWithBytes<TableInfoValue>,
|
||||
) -> Result<(DeserializedValueWithBytes<TableInfoValue>, RawTableInfo)> {
|
||||
// Builds new_meta
|
||||
let table_info = TableInfo::try_from(table.table_info.clone())
|
||||
.context(error::ConvertRawTableInfoSnafu)?;
|
||||
|
||||
@@ -12,52 +12,49 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
mod check;
|
||||
mod metadata;
|
||||
mod region_request;
|
||||
mod update_metadata;
|
||||
|
||||
use std::vec;
|
||||
|
||||
use api::v1::alter_expr::Kind;
|
||||
use api::v1::region::{
|
||||
alter_request, region_request, AddColumn, AddColumns, AlterRequest, DropColumn, DropColumns,
|
||||
RegionColumnDef, RegionRequest, RegionRequestHeader,
|
||||
};
|
||||
use api::v1::{AlterExpr, RenameTable};
|
||||
use api::v1::RenameTable;
|
||||
use async_trait::async_trait;
|
||||
use common_error::ext::ErrorExt;
|
||||
use common_error::status_code::StatusCode;
|
||||
use common_grpc_expr::alter_expr_to_request;
|
||||
use common_procedure::error::{FromJsonSnafu, Result as ProcedureResult, ToJsonSnafu};
|
||||
use common_procedure::{
|
||||
Context as ProcedureContext, Error as ProcedureError, LockKey, Procedure, Status, StringKey,
|
||||
};
|
||||
use common_telemetry::tracing_context::TracingContext;
|
||||
use common_telemetry::{debug, info};
|
||||
use futures::future;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use snafu::{ensure, OptionExt, ResultExt};
|
||||
use store_api::storage::{ColumnId, RegionId};
|
||||
use snafu::ResultExt;
|
||||
use store_api::storage::RegionId;
|
||||
use strum::AsRefStr;
|
||||
use table::metadata::{RawTableInfo, TableId, TableInfo};
|
||||
use table::requests::AlterKind;
|
||||
use table::metadata::{RawTableInfo, TableId};
|
||||
use table::table_reference::TableReference;
|
||||
|
||||
use crate::cache_invalidator::Context;
|
||||
use crate::ddl::utils::add_peer_context_if_needed;
|
||||
use crate::ddl::DdlContext;
|
||||
use crate::error::{self, ConvertAlterTableRequestSnafu, Error, InvalidProtoMsgSnafu, Result};
|
||||
use crate::error::{Error, Result};
|
||||
use crate::instruction::CacheIdent;
|
||||
use crate::key::table_info::TableInfoValue;
|
||||
use crate::key::table_name::TableNameKey;
|
||||
use crate::key::DeserializedValueWithBytes;
|
||||
use crate::lock_key::{CatalogLock, SchemaLock, TableLock, TableNameLock};
|
||||
use crate::metrics;
|
||||
use crate::rpc::ddl::AlterTableTask;
|
||||
use crate::rpc::router::{find_leader_regions, find_leaders};
|
||||
use crate::table_name::TableName;
|
||||
|
||||
/// The alter table procedure
|
||||
pub struct AlterTableProcedure {
|
||||
// The runtime context.
|
||||
context: DdlContext,
|
||||
// The serialized data.
|
||||
data: AlterTableData,
|
||||
/// proto alter Kind for adding/dropping columns.
|
||||
kind: Option<alter_request::Kind>,
|
||||
}
|
||||
|
||||
impl AlterTableProcedure {
|
||||
@@ -65,123 +62,36 @@ impl AlterTableProcedure {
|
||||
|
||||
pub fn new(
|
||||
cluster_id: u64,
|
||||
table_id: TableId,
|
||||
task: AlterTableTask,
|
||||
table_info_value: DeserializedValueWithBytes<TableInfoValue>,
|
||||
context: DdlContext,
|
||||
) -> Result<Self> {
|
||||
let alter_kind = task
|
||||
.alter_table
|
||||
.kind
|
||||
.as_ref()
|
||||
.context(InvalidProtoMsgSnafu {
|
||||
err_msg: "'kind' is absent",
|
||||
})?;
|
||||
let (kind, next_column_id) =
|
||||
create_proto_alter_kind(&table_info_value.table_info, alter_kind)?;
|
||||
|
||||
debug!(
|
||||
"New AlterTableProcedure, kind: {:?}, next_column_id: {:?}",
|
||||
kind, next_column_id
|
||||
);
|
||||
|
||||
task.validate()?;
|
||||
Ok(Self {
|
||||
context,
|
||||
data: AlterTableData::new(task, table_info_value, cluster_id, next_column_id),
|
||||
kind,
|
||||
data: AlterTableData::new(task, table_id, cluster_id),
|
||||
})
|
||||
}
|
||||
|
||||
pub fn from_json(json: &str, context: DdlContext) -> ProcedureResult<Self> {
|
||||
let data: AlterTableData = serde_json::from_str(json).context(FromJsonSnafu)?;
|
||||
let alter_kind = data
|
||||
.task
|
||||
.alter_table
|
||||
.kind
|
||||
.as_ref()
|
||||
.context(InvalidProtoMsgSnafu {
|
||||
err_msg: "'kind' is absent",
|
||||
})
|
||||
.map_err(ProcedureError::external)?;
|
||||
let (kind, next_column_id) =
|
||||
create_proto_alter_kind(&data.table_info_value.table_info, alter_kind)
|
||||
.map_err(ProcedureError::external)?;
|
||||
assert_eq!(data.next_column_id, next_column_id);
|
||||
|
||||
Ok(AlterTableProcedure {
|
||||
context,
|
||||
data,
|
||||
kind,
|
||||
})
|
||||
Ok(AlterTableProcedure { context, data })
|
||||
}
|
||||
|
||||
// Checks whether the table exists.
|
||||
async fn on_prepare(&mut self) -> Result<Status> {
|
||||
let alter_expr = &self.alter_expr();
|
||||
let catalog = &alter_expr.catalog_name;
|
||||
let schema = &alter_expr.schema_name;
|
||||
|
||||
let alter_kind = self.alter_kind()?;
|
||||
let manager = &self.context.table_metadata_manager;
|
||||
|
||||
if let Kind::RenameTable(RenameTable { new_table_name }) = alter_kind {
|
||||
let new_table_name_key = TableNameKey::new(catalog, schema, new_table_name);
|
||||
|
||||
let exist = manager
|
||||
.table_name_manager()
|
||||
.exists(new_table_name_key)
|
||||
.await?;
|
||||
|
||||
ensure!(
|
||||
!exist,
|
||||
error::TableAlreadyExistsSnafu {
|
||||
table_name: TableName::from(new_table_name_key).to_string(),
|
||||
}
|
||||
)
|
||||
}
|
||||
|
||||
let table_name_key = TableNameKey::new(catalog, schema, &alter_expr.table_name);
|
||||
|
||||
let exist = manager.table_name_manager().exists(table_name_key).await?;
|
||||
|
||||
ensure!(
|
||||
exist,
|
||||
error::TableNotFoundSnafu {
|
||||
table_name: TableName::from(table_name_key).to_string()
|
||||
}
|
||||
);
|
||||
|
||||
pub(crate) async fn on_prepare(&mut self) -> Result<Status> {
|
||||
self.check_alter().await?;
|
||||
self.fill_table_info().await?;
|
||||
// Safety: Checked in `AlterTableProcedure::new`.
|
||||
let alter_kind = self.data.task.alter_table.kind.as_ref().unwrap();
|
||||
if matches!(alter_kind, Kind::RenameTable { .. }) {
|
||||
self.data.state = AlterTableState::UpdateMetadata;
|
||||
} else {
|
||||
self.data.state = AlterTableState::SubmitAlterRegionRequests;
|
||||
};
|
||||
|
||||
Ok(Status::executing(true))
|
||||
}
|
||||
|
||||
fn alter_expr(&self) -> &AlterExpr {
|
||||
&self.data.task.alter_table
|
||||
}
|
||||
|
||||
fn alter_kind(&self) -> Result<&Kind> {
|
||||
self.alter_expr()
|
||||
.kind
|
||||
.as_ref()
|
||||
.context(InvalidProtoMsgSnafu {
|
||||
err_msg: "'kind' is absent",
|
||||
})
|
||||
}
|
||||
|
||||
pub fn create_alter_region_request(&self, region_id: RegionId) -> Result<AlterRequest> {
|
||||
let table_info = self.data.table_info();
|
||||
|
||||
Ok(AlterRequest {
|
||||
region_id: region_id.as_u64(),
|
||||
schema_version: table_info.ident.version,
|
||||
kind: self.kind.clone(),
|
||||
})
|
||||
}
|
||||
|
||||
pub async fn submit_alter_region_requests(&mut self) -> Result<Status> {
|
||||
let table_id = self.data.table_id();
|
||||
let (_, physical_table_route) = self
|
||||
@@ -200,14 +110,7 @@ impl AlterTableProcedure {
|
||||
|
||||
for region in regions {
|
||||
let region_id = RegionId::new(table_id, region);
|
||||
let request = self.create_alter_region_request(region_id)?;
|
||||
let request = RegionRequest {
|
||||
header: Some(RegionRequestHeader {
|
||||
tracing_context: TracingContext::from_current_span().to_w3c(),
|
||||
..Default::default()
|
||||
}),
|
||||
body: Some(region_request::Body::Alter(request)),
|
||||
};
|
||||
let request = self.make_alter_region_request(region_id)?;
|
||||
debug!("Submitting {request:?} to {datanode}");
|
||||
|
||||
let datanode = datanode.clone();
|
||||
@@ -238,91 +141,39 @@ impl AlterTableProcedure {
|
||||
Ok(Status::executing(true))
|
||||
}
|
||||
|
||||
/// Update table metadata for rename table operation.
|
||||
async fn on_update_metadata_for_rename(&self, new_table_name: String) -> Result<()> {
|
||||
let table_metadata_manager = &self.context.table_metadata_manager;
|
||||
|
||||
let current_table_info_value = self.data.table_info_value.clone();
|
||||
|
||||
table_metadata_manager
|
||||
.rename_table(current_table_info_value, new_table_name)
|
||||
.await?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn on_update_metadata_for_alter(&self, new_table_info: RawTableInfo) -> Result<()> {
|
||||
let table_metadata_manager = &self.context.table_metadata_manager;
|
||||
let current_table_info_value = self.data.table_info_value.clone();
|
||||
|
||||
table_metadata_manager
|
||||
.update_table_info(current_table_info_value, new_table_info)
|
||||
.await?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn build_new_table_info(&self) -> Result<TableInfo> {
|
||||
// Builds new_meta
|
||||
let table_info = TableInfo::try_from(self.data.table_info().clone())
|
||||
.context(error::ConvertRawTableInfoSnafu)?;
|
||||
|
||||
let table_ref = self.data.table_ref();
|
||||
|
||||
let request = alter_expr_to_request(self.data.table_id(), self.alter_expr().clone())
|
||||
.context(ConvertAlterTableRequestSnafu)?;
|
||||
|
||||
let new_meta = table_info
|
||||
.meta
|
||||
.builder_with_alter_kind(table_ref.table, &request.alter_kind, false)
|
||||
.context(error::TableSnafu)?
|
||||
.build()
|
||||
.with_context(|_| error::BuildTableMetaSnafu {
|
||||
table_name: table_ref.table,
|
||||
})?;
|
||||
|
||||
let mut new_info = table_info.clone();
|
||||
new_info.meta = new_meta;
|
||||
new_info.ident.version = table_info.ident.version + 1;
|
||||
if let Some(column_id) = self.data.next_column_id {
|
||||
new_info.meta.next_column_id = new_info.meta.next_column_id.max(column_id);
|
||||
}
|
||||
|
||||
if let AlterKind::RenameTable { new_table_name } = &request.alter_kind {
|
||||
new_info.name = new_table_name.to_string();
|
||||
}
|
||||
|
||||
Ok(new_info)
|
||||
}
|
||||
|
||||
/// Update table metadata.
|
||||
async fn on_update_metadata(&mut self) -> Result<Status> {
|
||||
pub(crate) async fn on_update_metadata(&mut self) -> Result<Status> {
|
||||
let table_id = self.data.table_id();
|
||||
let table_ref = self.data.table_ref();
|
||||
let new_info = self.build_new_table_info()?;
|
||||
// Safety: checked before.
|
||||
let table_info_value = self.data.table_info_value.as_ref().unwrap();
|
||||
let new_info = self.build_new_table_info(&table_info_value.table_info)?;
|
||||
|
||||
debug!(
|
||||
"starting update table: {} metadata, new table info {:?}",
|
||||
"Starting update table: {} metadata, new table info {:?}",
|
||||
table_ref.to_string(),
|
||||
new_info
|
||||
);
|
||||
|
||||
if let Kind::RenameTable(RenameTable { new_table_name }) = self.alter_kind()? {
|
||||
self.on_update_metadata_for_rename(new_table_name.to_string())
|
||||
// Safety: Checked in `AlterTableProcedure::new`.
|
||||
let alter_kind = self.data.task.alter_table.kind.as_ref().unwrap();
|
||||
if let Kind::RenameTable(RenameTable { new_table_name }) = alter_kind {
|
||||
self.on_update_metadata_for_rename(new_table_name.to_string(), table_info_value)
|
||||
.await?;
|
||||
} else {
|
||||
self.on_update_metadata_for_alter(new_info.into()).await?;
|
||||
self.on_update_metadata_for_alter(new_info.into(), table_info_value)
|
||||
.await?;
|
||||
}
|
||||
|
||||
info!("Updated table metadata for table {table_ref}, table_id: {table_id}");
|
||||
|
||||
self.data.state = AlterTableState::InvalidateTableCache;
|
||||
Ok(Status::executing(true))
|
||||
}
|
||||
|
||||
/// Broadcasts the invalidating table cache instructions.
|
||||
async fn on_broadcast(&mut self) -> Result<Status> {
|
||||
let alter_kind = self.alter_kind()?;
|
||||
// Safety: Checked in `AlterTableProcedure::new`.
|
||||
let alter_kind = self.data.task.alter_table.kind.as_ref().unwrap();
|
||||
let cache_invalidator = &self.context.cache_invalidator;
|
||||
let cache_keys = if matches!(alter_kind, Kind::RenameTable { .. }) {
|
||||
vec![CacheIdent::TableName(self.data.table_ref().into())]
|
||||
@@ -348,7 +199,9 @@ impl AlterTableProcedure {
|
||||
lock_key.push(SchemaLock::read(table_ref.catalog, table_ref.schema).into());
|
||||
lock_key.push(TableLock::Write(table_id).into());
|
||||
|
||||
if let Ok(Kind::RenameTable(RenameTable { new_table_name })) = self.alter_kind() {
|
||||
// Safety: Checked in `AlterTableProcedure::new`.
|
||||
let alter_kind = self.data.task.alter_table.kind.as_ref().unwrap();
|
||||
if let Kind::RenameTable(RenameTable { new_table_name }) = alter_kind {
|
||||
lock_key.push(
|
||||
TableNameLock::new(table_ref.catalog, table_ref.schema, new_table_name).into(),
|
||||
)
|
||||
@@ -403,8 +256,9 @@ impl Procedure for AlterTableProcedure {
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, AsRefStr)]
|
||||
enum AlterTableState {
|
||||
/// Prepares to alter the table
|
||||
/// Prepares to alter the table.
|
||||
Prepare,
|
||||
/// Sends alter region requests to Datanode.
|
||||
SubmitAlterRegionRequests,
|
||||
/// Updates table metadata.
|
||||
UpdateMetadata,
|
||||
@@ -412,30 +266,25 @@ enum AlterTableState {
|
||||
InvalidateTableCache,
|
||||
}
|
||||
|
||||
// The serialized data of alter table.
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
pub struct AlterTableData {
|
||||
cluster_id: u64,
|
||||
state: AlterTableState,
|
||||
task: AlterTableTask,
|
||||
table_id: TableId,
|
||||
/// Table info value before alteration.
|
||||
table_info_value: DeserializedValueWithBytes<TableInfoValue>,
|
||||
/// Next column id of the table if the task adds columns to the table.
|
||||
next_column_id: Option<ColumnId>,
|
||||
table_info_value: Option<DeserializedValueWithBytes<TableInfoValue>>,
|
||||
}
|
||||
|
||||
impl AlterTableData {
|
||||
pub fn new(
|
||||
task: AlterTableTask,
|
||||
table_info_value: DeserializedValueWithBytes<TableInfoValue>,
|
||||
cluster_id: u64,
|
||||
next_column_id: Option<ColumnId>,
|
||||
) -> Self {
|
||||
pub fn new(task: AlterTableTask, table_id: TableId, cluster_id: u64) -> Self {
|
||||
Self {
|
||||
state: AlterTableState::Prepare,
|
||||
task,
|
||||
table_info_value,
|
||||
table_id,
|
||||
cluster_id,
|
||||
next_column_id,
|
||||
table_info_value: None,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -444,76 +293,12 @@ impl AlterTableData {
|
||||
}
|
||||
|
||||
fn table_id(&self) -> TableId {
|
||||
self.table_info().ident.table_id
|
||||
self.table_id
|
||||
}
|
||||
|
||||
fn table_info(&self) -> &RawTableInfo {
|
||||
&self.table_info_value.table_info
|
||||
}
|
||||
}
|
||||
|
||||
/// Creates region proto alter kind from `table_info` and `alter_kind`.
|
||||
///
|
||||
/// Returns the kind and next column id if it adds new columns.
|
||||
///
|
||||
/// # Panics
|
||||
/// Panics if kind is rename.
|
||||
pub fn create_proto_alter_kind(
|
||||
table_info: &RawTableInfo,
|
||||
alter_kind: &Kind,
|
||||
) -> Result<(Option<alter_request::Kind>, Option<ColumnId>)> {
|
||||
match alter_kind {
|
||||
Kind::AddColumns(x) => {
|
||||
let mut next_column_id = table_info.meta.next_column_id;
|
||||
|
||||
let add_columns = x
|
||||
.add_columns
|
||||
.iter()
|
||||
.map(|add_column| {
|
||||
let column_def =
|
||||
add_column
|
||||
.column_def
|
||||
.as_ref()
|
||||
.context(InvalidProtoMsgSnafu {
|
||||
err_msg: "'column_def' is absent",
|
||||
})?;
|
||||
|
||||
let column_id = next_column_id;
|
||||
next_column_id += 1;
|
||||
|
||||
let column_def = RegionColumnDef {
|
||||
column_def: Some(column_def.clone()),
|
||||
column_id,
|
||||
};
|
||||
|
||||
Ok(AddColumn {
|
||||
column_def: Some(column_def),
|
||||
location: add_column.location.clone(),
|
||||
})
|
||||
})
|
||||
.collect::<Result<Vec<_>>>()?;
|
||||
|
||||
Ok((
|
||||
Some(alter_request::Kind::AddColumns(AddColumns { add_columns })),
|
||||
Some(next_column_id),
|
||||
))
|
||||
}
|
||||
Kind::DropColumns(x) => {
|
||||
let drop_columns = x
|
||||
.drop_columns
|
||||
.iter()
|
||||
.map(|x| DropColumn {
|
||||
name: x.name.clone(),
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
Ok((
|
||||
Some(alter_request::Kind::DropColumns(DropColumns {
|
||||
drop_columns,
|
||||
})),
|
||||
None,
|
||||
))
|
||||
}
|
||||
Kind::RenameTable(_) => Ok((None, None)),
|
||||
fn table_info(&self) -> Option<&RawTableInfo> {
|
||||
self.table_info_value
|
||||
.as_ref()
|
||||
.map(|value| &value.table_info)
|
||||
}
|
||||
}
|
||||
|
||||
62
src/common/meta/src/ddl/alter_table/check.rs
Normal file
62
src/common/meta/src/ddl/alter_table/check.rs
Normal file
@@ -0,0 +1,62 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use api::v1::alter_expr::Kind;
|
||||
use api::v1::RenameTable;
|
||||
use common_catalog::format_full_table_name;
|
||||
use snafu::ensure;
|
||||
|
||||
use crate::ddl::alter_table::AlterTableProcedure;
|
||||
use crate::error::{self, Result};
|
||||
use crate::key::table_name::TableNameKey;
|
||||
|
||||
impl AlterTableProcedure {
|
||||
/// Checks:
|
||||
/// - The new table name doesn't exist (rename).
|
||||
/// - Table exists.
|
||||
pub(crate) async fn check_alter(&self) -> Result<()> {
|
||||
let alter_expr = &self.data.task.alter_table;
|
||||
let catalog = &alter_expr.catalog_name;
|
||||
let schema = &alter_expr.schema_name;
|
||||
let table_name = &alter_expr.table_name;
|
||||
// Safety: Checked in `AlterTableProcedure::new`.
|
||||
let alter_kind = self.data.task.alter_table.kind.as_ref().unwrap();
|
||||
|
||||
let manager = &self.context.table_metadata_manager;
|
||||
if let Kind::RenameTable(RenameTable { new_table_name }) = alter_kind {
|
||||
let new_table_name_key = TableNameKey::new(catalog, schema, new_table_name);
|
||||
let exists = manager
|
||||
.table_name_manager()
|
||||
.exists(new_table_name_key)
|
||||
.await?;
|
||||
ensure!(
|
||||
!exists,
|
||||
error::TableAlreadyExistsSnafu {
|
||||
table_name: format_full_table_name(catalog, schema, new_table_name),
|
||||
}
|
||||
)
|
||||
}
|
||||
|
||||
let table_name_key = TableNameKey::new(catalog, schema, table_name);
|
||||
let exists = manager.table_name_manager().exists(table_name_key).await?;
|
||||
ensure!(
|
||||
exists,
|
||||
error::TableNotFoundSnafu {
|
||||
table_name: format_full_table_name(catalog, schema, &alter_expr.table_name),
|
||||
}
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
42
src/common/meta/src/ddl/alter_table/metadata.rs
Normal file
42
src/common/meta/src/ddl/alter_table/metadata.rs
Normal file
@@ -0,0 +1,42 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use common_catalog::format_full_table_name;
|
||||
use snafu::OptionExt;
|
||||
|
||||
use crate::ddl::alter_table::AlterTableProcedure;
|
||||
use crate::error::{self, Result};
|
||||
|
||||
impl AlterTableProcedure {
|
||||
/// Fetches the table info.
|
||||
pub(crate) async fn fill_table_info(&mut self) -> Result<()> {
|
||||
let table_id = self.data.table_id();
|
||||
let alter_expr = &self.data.task.alter_table;
|
||||
let catalog = &alter_expr.catalog_name;
|
||||
let schema = &alter_expr.schema_name;
|
||||
let table_name = &alter_expr.table_name;
|
||||
|
||||
let table_info_value = self
|
||||
.context
|
||||
.table_metadata_manager
|
||||
.table_info_manager()
|
||||
.get(table_id)
|
||||
.await?
|
||||
.with_context(|| error::TableNotFoundSnafu {
|
||||
table_name: format_full_table_name(catalog, schema, table_name),
|
||||
})?;
|
||||
self.data.table_info_value = Some(table_info_value);
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
258
src/common/meta/src/ddl/alter_table/region_request.rs
Normal file
258
src/common/meta/src/ddl/alter_table/region_request.rs
Normal file
@@ -0,0 +1,258 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use api::v1::alter_expr::Kind;
|
||||
use api::v1::region::region_request::Body;
|
||||
use api::v1::region::{
|
||||
alter_request, AddColumn, AddColumns, AlterRequest, DropColumn, DropColumns, RegionColumnDef,
|
||||
RegionRequest, RegionRequestHeader,
|
||||
};
|
||||
use common_telemetry::tracing_context::TracingContext;
|
||||
use snafu::OptionExt;
|
||||
use store_api::storage::RegionId;
|
||||
use table::metadata::RawTableInfo;
|
||||
|
||||
use crate::ddl::alter_table::AlterTableProcedure;
|
||||
use crate::error::{InvalidProtoMsgSnafu, Result};
|
||||
|
||||
impl AlterTableProcedure {
|
||||
/// Makes alter region request.
|
||||
pub(crate) fn make_alter_region_request(&self, region_id: RegionId) -> Result<RegionRequest> {
|
||||
// Safety: Checked in `AlterTableProcedure::new`.
|
||||
let alter_kind = self.data.task.alter_table.kind.as_ref().unwrap();
|
||||
// Safety: checked
|
||||
let table_info = self.data.table_info().unwrap();
|
||||
let kind = create_proto_alter_kind(table_info, alter_kind)?;
|
||||
|
||||
Ok(RegionRequest {
|
||||
header: Some(RegionRequestHeader {
|
||||
tracing_context: TracingContext::from_current_span().to_w3c(),
|
||||
..Default::default()
|
||||
}),
|
||||
body: Some(Body::Alter(AlterRequest {
|
||||
region_id: region_id.as_u64(),
|
||||
schema_version: table_info.ident.version,
|
||||
kind,
|
||||
})),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
/// Creates region proto alter kind from `table_info` and `alter_kind`.
|
||||
///
|
||||
/// Returns the kind and next column id if it adds new columns.
|
||||
fn create_proto_alter_kind(
|
||||
table_info: &RawTableInfo,
|
||||
alter_kind: &Kind,
|
||||
) -> Result<Option<alter_request::Kind>> {
|
||||
match alter_kind {
|
||||
Kind::AddColumns(x) => {
|
||||
let mut next_column_id = table_info.meta.next_column_id;
|
||||
|
||||
let add_columns = x
|
||||
.add_columns
|
||||
.iter()
|
||||
.map(|add_column| {
|
||||
let column_def =
|
||||
add_column
|
||||
.column_def
|
||||
.as_ref()
|
||||
.context(InvalidProtoMsgSnafu {
|
||||
err_msg: "'column_def' is absent",
|
||||
})?;
|
||||
|
||||
let column_id = next_column_id;
|
||||
next_column_id += 1;
|
||||
|
||||
let column_def = RegionColumnDef {
|
||||
column_def: Some(column_def.clone()),
|
||||
column_id,
|
||||
};
|
||||
|
||||
Ok(AddColumn {
|
||||
column_def: Some(column_def),
|
||||
location: add_column.location.clone(),
|
||||
})
|
||||
})
|
||||
.collect::<Result<Vec<_>>>()?;
|
||||
|
||||
Ok(Some(alter_request::Kind::AddColumns(AddColumns {
|
||||
add_columns,
|
||||
})))
|
||||
}
|
||||
Kind::DropColumns(x) => {
|
||||
let drop_columns = x
|
||||
.drop_columns
|
||||
.iter()
|
||||
.map(|x| DropColumn {
|
||||
name: x.name.clone(),
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
Ok(Some(alter_request::Kind::DropColumns(DropColumns {
|
||||
drop_columns,
|
||||
})))
|
||||
}
|
||||
Kind::RenameTable(_) => Ok(None),
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::collections::HashMap;
|
||||
use std::sync::Arc;
|
||||
|
||||
use api::v1::add_column_location::LocationType;
|
||||
use api::v1::alter_expr::Kind;
|
||||
use api::v1::region::region_request::Body;
|
||||
use api::v1::region::RegionColumnDef;
|
||||
use api::v1::{
|
||||
region, AddColumn, AddColumnLocation, AddColumns, AlterExpr, ColumnDataType,
|
||||
ColumnDef as PbColumnDef, SemanticType,
|
||||
};
|
||||
use common_catalog::consts::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME};
|
||||
use store_api::storage::RegionId;
|
||||
|
||||
use crate::ddl::alter_table::AlterTableProcedure;
|
||||
use crate::ddl::test_util::columns::TestColumnDefBuilder;
|
||||
use crate::ddl::test_util::create_table::{
|
||||
build_raw_table_info_from_expr, TestCreateTableExprBuilder,
|
||||
};
|
||||
use crate::key::table_route::TableRouteValue;
|
||||
use crate::peer::Peer;
|
||||
use crate::rpc::ddl::AlterTableTask;
|
||||
use crate::rpc::router::{Region, RegionRoute};
|
||||
use crate::test_util::{new_ddl_context, MockDatanodeManager};
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_make_alter_region_request() {
|
||||
let datanode_manager = Arc::new(MockDatanodeManager::new(()));
|
||||
let ddl_context = new_ddl_context(datanode_manager);
|
||||
let cluster_id = 1;
|
||||
let table_id = 1024;
|
||||
let region_id = RegionId::new(table_id, 1);
|
||||
let table_name = "foo";
|
||||
|
||||
let create_table = TestCreateTableExprBuilder::default()
|
||||
.column_defs([
|
||||
TestColumnDefBuilder::default()
|
||||
.name("ts")
|
||||
.data_type(ColumnDataType::TimestampMillisecond)
|
||||
.semantic_type(SemanticType::Timestamp)
|
||||
.build()
|
||||
.unwrap()
|
||||
.into(),
|
||||
TestColumnDefBuilder::default()
|
||||
.name("host")
|
||||
.data_type(ColumnDataType::String)
|
||||
.semantic_type(SemanticType::Tag)
|
||||
.build()
|
||||
.unwrap()
|
||||
.into(),
|
||||
TestColumnDefBuilder::default()
|
||||
.name("cpu")
|
||||
.data_type(ColumnDataType::Float64)
|
||||
.semantic_type(SemanticType::Field)
|
||||
.build()
|
||||
.unwrap()
|
||||
.into(),
|
||||
])
|
||||
.table_id(table_id)
|
||||
.time_index("ts")
|
||||
.primary_keys(["host".into()])
|
||||
.table_name(table_name)
|
||||
.build()
|
||||
.unwrap()
|
||||
.into();
|
||||
let table_info = build_raw_table_info_from_expr(&create_table);
|
||||
|
||||
// Puts a value to table name key.
|
||||
ddl_context
|
||||
.table_metadata_manager
|
||||
.create_table_metadata(
|
||||
table_info,
|
||||
TableRouteValue::physical(vec![RegionRoute {
|
||||
region: Region::new_test(region_id),
|
||||
leader_peer: Some(Peer::empty(1)),
|
||||
follower_peers: vec![],
|
||||
leader_status: None,
|
||||
leader_down_since: None,
|
||||
}]),
|
||||
HashMap::new(),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let task = AlterTableTask {
|
||||
alter_table: AlterExpr {
|
||||
catalog_name: DEFAULT_CATALOG_NAME.to_string(),
|
||||
schema_name: DEFAULT_SCHEMA_NAME.to_string(),
|
||||
table_name: table_name.to_string(),
|
||||
kind: Some(Kind::AddColumns(AddColumns {
|
||||
add_columns: vec![AddColumn {
|
||||
column_def: Some(PbColumnDef {
|
||||
name: "my_tag3".to_string(),
|
||||
data_type: ColumnDataType::String as i32,
|
||||
is_nullable: true,
|
||||
default_constraint: b"hello".to_vec(),
|
||||
semantic_type: SemanticType::Tag as i32,
|
||||
comment: String::new(),
|
||||
..Default::default()
|
||||
}),
|
||||
location: Some(AddColumnLocation {
|
||||
location_type: LocationType::After as i32,
|
||||
after_column_name: "my_tag2".to_string(),
|
||||
}),
|
||||
}],
|
||||
})),
|
||||
},
|
||||
};
|
||||
|
||||
let mut procedure =
|
||||
AlterTableProcedure::new(cluster_id, table_id, task, ddl_context).unwrap();
|
||||
procedure.on_prepare().await.unwrap();
|
||||
let Some(Body::Alter(alter_region_request)) =
|
||||
procedure.make_alter_region_request(region_id).unwrap().body
|
||||
else {
|
||||
unreachable!()
|
||||
};
|
||||
assert_eq!(alter_region_request.region_id, region_id.as_u64());
|
||||
assert_eq!(alter_region_request.schema_version, 1);
|
||||
assert_eq!(
|
||||
alter_region_request.kind,
|
||||
Some(region::alter_request::Kind::AddColumns(
|
||||
region::AddColumns {
|
||||
add_columns: vec![region::AddColumn {
|
||||
column_def: Some(RegionColumnDef {
|
||||
column_def: Some(PbColumnDef {
|
||||
name: "my_tag3".to_string(),
|
||||
data_type: ColumnDataType::String as i32,
|
||||
is_nullable: true,
|
||||
default_constraint: b"hello".to_vec(),
|
||||
semantic_type: SemanticType::Tag as i32,
|
||||
comment: String::new(),
|
||||
..Default::default()
|
||||
}),
|
||||
column_id: 3,
|
||||
}),
|
||||
location: Some(AddColumnLocation {
|
||||
location_type: LocationType::After as i32,
|
||||
after_column_name: "my_tag2".to_string(),
|
||||
}),
|
||||
}]
|
||||
}
|
||||
))
|
||||
);
|
||||
}
|
||||
}
|
||||
87
src/common/meta/src/ddl/alter_table/update_metadata.rs
Normal file
87
src/common/meta/src/ddl/alter_table/update_metadata.rs
Normal file
@@ -0,0 +1,87 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use common_grpc_expr::alter_expr_to_request;
|
||||
use snafu::ResultExt;
|
||||
use table::metadata::{RawTableInfo, TableInfo};
|
||||
use table::requests::AlterKind;
|
||||
|
||||
use crate::ddl::alter_table::AlterTableProcedure;
|
||||
use crate::error::{self, Result};
|
||||
use crate::key::table_info::TableInfoValue;
|
||||
use crate::key::DeserializedValueWithBytes;
|
||||
|
||||
impl AlterTableProcedure {
|
||||
/// Builds new_meta
|
||||
pub(crate) fn build_new_table_info(&self, table_info: &RawTableInfo) -> Result<TableInfo> {
|
||||
let table_info =
|
||||
TableInfo::try_from(table_info.clone()).context(error::ConvertRawTableInfoSnafu)?;
|
||||
let table_ref = self.data.table_ref();
|
||||
let alter_expr = self.data.task.alter_table.clone();
|
||||
let request = alter_expr_to_request(self.data.table_id(), alter_expr)
|
||||
.context(error::ConvertAlterTableRequestSnafu)?;
|
||||
|
||||
let new_meta = table_info
|
||||
.meta
|
||||
.builder_with_alter_kind(table_ref.table, &request.alter_kind, false)
|
||||
.context(error::TableSnafu)?
|
||||
.build()
|
||||
.with_context(|_| error::BuildTableMetaSnafu {
|
||||
table_name: table_ref.table,
|
||||
})?;
|
||||
|
||||
let mut new_info = table_info.clone();
|
||||
new_info.meta = new_meta;
|
||||
new_info.ident.version = table_info.ident.version + 1;
|
||||
match request.alter_kind {
|
||||
AlterKind::AddColumns { columns } => {
|
||||
new_info.meta.next_column_id += columns.len() as u32;
|
||||
}
|
||||
AlterKind::RenameTable { new_table_name } => {
|
||||
new_info.name = new_table_name.to_string();
|
||||
}
|
||||
AlterKind::DropColumns { .. } => {}
|
||||
}
|
||||
|
||||
Ok(new_info)
|
||||
}
|
||||
|
||||
/// Updates table metadata for rename table operation.
|
||||
pub(crate) async fn on_update_metadata_for_rename(
|
||||
&self,
|
||||
new_table_name: String,
|
||||
current_table_info_value: &DeserializedValueWithBytes<TableInfoValue>,
|
||||
) -> Result<()> {
|
||||
let table_metadata_manager = &self.context.table_metadata_manager;
|
||||
table_metadata_manager
|
||||
.rename_table(current_table_info_value, new_table_name)
|
||||
.await?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Updates table metadata for alter table operation.
|
||||
pub(crate) async fn on_update_metadata_for_alter(
|
||||
&self,
|
||||
new_table_info: RawTableInfo,
|
||||
current_table_info_value: &DeserializedValueWithBytes<TableInfoValue>,
|
||||
) -> Result<()> {
|
||||
let table_metadata_manager = &self.context.table_metadata_manager;
|
||||
table_metadata_manager
|
||||
.update_table_info(current_table_info_value, new_table_info)
|
||||
.await?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
@@ -61,7 +61,7 @@ impl CreateLogicalTablesProcedure {
|
||||
// Update physical table's metadata
|
||||
self.context
|
||||
.table_metadata_manager
|
||||
.update_table_info(physical_table_info, new_table_info)
|
||||
.update_table_info(&physical_table_info, new_table_info)
|
||||
.await?;
|
||||
|
||||
// Invalid physical table cache
|
||||
|
||||
@@ -76,6 +76,7 @@ impl DropDatabaseCursor {
|
||||
.await?;
|
||||
Ok((
|
||||
Box::new(DropDatabaseExecutor::new(
|
||||
table_id,
|
||||
table_id,
|
||||
TableName::new(&ctx.catalog, &ctx.schema, &table_name),
|
||||
table_route.region_routes,
|
||||
@@ -86,6 +87,7 @@ impl DropDatabaseCursor {
|
||||
}
|
||||
(DropTableTarget::Physical, TableRouteValue::Physical(table_route)) => Ok((
|
||||
Box::new(DropDatabaseExecutor::new(
|
||||
table_id,
|
||||
table_id,
|
||||
TableName::new(&ctx.catalog, &ctx.schema, &table_name),
|
||||
table_route.region_routes,
|
||||
@@ -220,7 +222,7 @@ mod tests {
|
||||
.get_physical_table_route(physical_table_id)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(table_route.region_routes, executor.region_routes);
|
||||
assert_eq!(table_route.region_routes, executor.physical_region_routes);
|
||||
assert_eq!(executor.target, DropTableTarget::Logical);
|
||||
}
|
||||
|
||||
|
||||
@@ -26,6 +26,7 @@ use crate::ddl::drop_database::State;
|
||||
use crate::ddl::drop_table::executor::DropTableExecutor;
|
||||
use crate::ddl::DdlContext;
|
||||
use crate::error::{self, Result};
|
||||
use crate::key::table_route::TableRouteValue;
|
||||
use crate::region_keeper::OperatingRegionGuard;
|
||||
use crate::rpc::router::{operating_leader_regions, RegionRoute};
|
||||
use crate::table_name::TableName;
|
||||
@@ -33,8 +34,10 @@ use crate::table_name::TableName;
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
pub(crate) struct DropDatabaseExecutor {
|
||||
table_id: TableId,
|
||||
physical_table_id: TableId,
|
||||
table_name: TableName,
|
||||
pub(crate) region_routes: Vec<RegionRoute>,
|
||||
/// The physical table region routes.
|
||||
pub(crate) physical_region_routes: Vec<RegionRoute>,
|
||||
pub(crate) target: DropTableTarget,
|
||||
#[serde(skip)]
|
||||
dropping_regions: Vec<OperatingRegionGuard>,
|
||||
@@ -44,14 +47,16 @@ impl DropDatabaseExecutor {
|
||||
/// Returns a new [DropDatabaseExecutor].
|
||||
pub fn new(
|
||||
table_id: TableId,
|
||||
physical_table_id: TableId,
|
||||
table_name: TableName,
|
||||
region_routes: Vec<RegionRoute>,
|
||||
physical_region_routes: Vec<RegionRoute>,
|
||||
target: DropTableTarget,
|
||||
) -> Self {
|
||||
Self {
|
||||
table_name,
|
||||
table_id,
|
||||
region_routes,
|
||||
physical_table_id,
|
||||
table_name,
|
||||
physical_region_routes,
|
||||
target,
|
||||
dropping_regions: vec![],
|
||||
}
|
||||
@@ -60,7 +65,7 @@ impl DropDatabaseExecutor {
|
||||
|
||||
impl DropDatabaseExecutor {
|
||||
fn register_dropping_regions(&mut self, ddl_ctx: &DdlContext) -> Result<()> {
|
||||
let dropping_regions = operating_leader_regions(&self.region_routes);
|
||||
let dropping_regions = operating_leader_regions(&self.physical_region_routes);
|
||||
let mut dropping_region_guards = Vec::with_capacity(dropping_regions.len());
|
||||
for (region_id, datanode_id) in dropping_regions {
|
||||
let guard = ddl_ctx
|
||||
@@ -87,12 +92,18 @@ impl State for DropDatabaseExecutor {
|
||||
) -> Result<(Box<dyn State>, Status)> {
|
||||
self.register_dropping_regions(ddl_ctx)?;
|
||||
let executor = DropTableExecutor::new(self.table_name.clone(), self.table_id, true);
|
||||
// Deletes metadata for table permanently.
|
||||
let table_route_value = TableRouteValue::new(
|
||||
self.table_id,
|
||||
self.physical_table_id,
|
||||
self.physical_region_routes.clone(),
|
||||
);
|
||||
executor
|
||||
.on_remove_metadata(ddl_ctx, &self.region_routes)
|
||||
.on_destroy_metadata(ddl_ctx, &table_route_value)
|
||||
.await?;
|
||||
executor.invalidate_table_cache(ddl_ctx).await?;
|
||||
executor
|
||||
.on_drop_regions(ddl_ctx, &self.region_routes)
|
||||
.on_drop_regions(ddl_ctx, &self.physical_region_routes)
|
||||
.await?;
|
||||
info!("Table: {}({}) is dropped", self.table_name, self.table_id);
|
||||
|
||||
@@ -122,7 +133,9 @@ mod tests {
|
||||
use crate::ddl::drop_database::{DropDatabaseContext, DropTableTarget, State};
|
||||
use crate::ddl::test_util::{create_logical_table, create_physical_table};
|
||||
use crate::error::{self, Error, Result};
|
||||
use crate::key::datanode_table::DatanodeTableKey;
|
||||
use crate::peer::Peer;
|
||||
use crate::rpc::router::region_distribution;
|
||||
use crate::table_name::TableName;
|
||||
use crate::test_util::{new_ddl_context, MockDatanodeHandler, MockDatanodeManager};
|
||||
|
||||
@@ -157,6 +170,7 @@ mod tests {
|
||||
.unwrap();
|
||||
{
|
||||
let mut state = DropDatabaseExecutor::new(
|
||||
physical_table_id,
|
||||
physical_table_id,
|
||||
TableName::new(DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, "phy"),
|
||||
table_route.region_routes.clone(),
|
||||
@@ -181,9 +195,10 @@ mod tests {
|
||||
tables: None,
|
||||
};
|
||||
let mut state = DropDatabaseExecutor::new(
|
||||
physical_table_id,
|
||||
physical_table_id,
|
||||
TableName::new(DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, "phy"),
|
||||
table_route.region_routes,
|
||||
table_route.region_routes.clone(),
|
||||
DropTableTarget::Physical,
|
||||
);
|
||||
let (state, status) = state.next(&ddl_context, &mut ctx).await.unwrap();
|
||||
@@ -207,6 +222,7 @@ mod tests {
|
||||
.unwrap();
|
||||
{
|
||||
let mut state = DropDatabaseExecutor::new(
|
||||
logical_table_id,
|
||||
physical_table_id,
|
||||
TableName::new(DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, "metric"),
|
||||
table_route.region_routes.clone(),
|
||||
@@ -231,8 +247,9 @@ mod tests {
|
||||
tables: None,
|
||||
};
|
||||
let mut state = DropDatabaseExecutor::new(
|
||||
logical_table_id,
|
||||
physical_table_id,
|
||||
TableName::new(DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, "phy"),
|
||||
TableName::new(DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, "metric"),
|
||||
table_route.region_routes,
|
||||
DropTableTarget::Logical,
|
||||
);
|
||||
@@ -240,6 +257,33 @@ mod tests {
|
||||
assert!(!status.need_persist());
|
||||
let cursor = state.as_any().downcast_ref::<DropDatabaseCursor>().unwrap();
|
||||
assert_eq!(cursor.target, DropTableTarget::Logical);
|
||||
// Checks table info
|
||||
ddl_context
|
||||
.table_metadata_manager
|
||||
.table_info_manager()
|
||||
.get(physical_table_id)
|
||||
.await
|
||||
.unwrap()
|
||||
.unwrap();
|
||||
// Checks table route
|
||||
let table_route = ddl_context
|
||||
.table_metadata_manager
|
||||
.table_route_manager()
|
||||
.table_route_storage()
|
||||
.get(physical_table_id)
|
||||
.await
|
||||
.unwrap()
|
||||
.unwrap();
|
||||
let region_routes = table_route.region_routes().unwrap();
|
||||
for datanode_id in region_distribution(region_routes).into_keys() {
|
||||
ddl_context
|
||||
.table_metadata_manager
|
||||
.datanode_table_manager()
|
||||
.get(&DatanodeTableKey::new(datanode_id, physical_table_id))
|
||||
.await
|
||||
.unwrap()
|
||||
.unwrap();
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
@@ -279,6 +323,7 @@ mod tests {
|
||||
.await
|
||||
.unwrap();
|
||||
let mut state = DropDatabaseExecutor::new(
|
||||
physical_table_id,
|
||||
physical_table_id,
|
||||
TableName::new(DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, "phy"),
|
||||
table_route.region_routes,
|
||||
|
||||
@@ -12,27 +12,28 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
pub mod executor;
|
||||
pub(crate) mod executor;
|
||||
mod metadata;
|
||||
|
||||
use async_trait::async_trait;
|
||||
use common_procedure::error::{FromJsonSnafu, ToJsonSnafu};
|
||||
use common_procedure::{
|
||||
Context as ProcedureContext, LockKey, Procedure, Result as ProcedureResult, Status,
|
||||
Context as ProcedureContext, Error as ProcedureError, LockKey, Procedure,
|
||||
Result as ProcedureResult, Status,
|
||||
};
|
||||
use common_telemetry::info;
|
||||
use common_telemetry::tracing::warn;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use snafu::{OptionExt, ResultExt};
|
||||
use strum::AsRefStr;
|
||||
use table::metadata::{RawTableInfo, TableId};
|
||||
use table::metadata::TableId;
|
||||
use table::table_reference::TableReference;
|
||||
|
||||
use self::executor::DropTableExecutor;
|
||||
use crate::ddl::utils::handle_retry_error;
|
||||
use crate::ddl::DdlContext;
|
||||
use crate::error::{self, Result};
|
||||
use crate::key::table_info::TableInfoValue;
|
||||
use crate::key::table_route::TableRouteValue;
|
||||
use crate::key::DeserializedValueWithBytes;
|
||||
use crate::lock_key::{CatalogLock, SchemaLock, TableLock};
|
||||
use crate::metrics;
|
||||
use crate::region_keeper::OperatingRegionGuard;
|
||||
@@ -46,50 +47,50 @@ pub struct DropTableProcedure {
|
||||
pub data: DropTableData,
|
||||
/// The guards of opening regions.
|
||||
pub dropping_regions: Vec<OperatingRegionGuard>,
|
||||
/// The drop table executor.
|
||||
executor: DropTableExecutor,
|
||||
}
|
||||
|
||||
impl DropTableProcedure {
|
||||
pub const TYPE_NAME: &'static str = "metasrv-procedure::DropTable";
|
||||
|
||||
pub fn new(
|
||||
cluster_id: u64,
|
||||
task: DropTableTask,
|
||||
table_route_value: DeserializedValueWithBytes<TableRouteValue>,
|
||||
table_info_value: DeserializedValueWithBytes<TableInfoValue>,
|
||||
context: DdlContext,
|
||||
) -> Self {
|
||||
pub fn new(cluster_id: u64, task: DropTableTask, context: DdlContext) -> Self {
|
||||
let data = DropTableData::new(cluster_id, task);
|
||||
let executor = data.build_executor();
|
||||
Self {
|
||||
context,
|
||||
data: DropTableData::new(cluster_id, task, table_route_value, table_info_value),
|
||||
data,
|
||||
dropping_regions: vec![],
|
||||
executor,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn from_json(json: &str, context: DdlContext) -> ProcedureResult<Self> {
|
||||
let data = serde_json::from_str(json).context(FromJsonSnafu)?;
|
||||
let data: DropTableData = serde_json::from_str(json).context(FromJsonSnafu)?;
|
||||
let executor = data.build_executor();
|
||||
Ok(Self {
|
||||
context,
|
||||
data,
|
||||
dropping_regions: vec![],
|
||||
executor,
|
||||
})
|
||||
}
|
||||
|
||||
async fn on_prepare<'a>(&mut self, executor: &DropTableExecutor) -> Result<Status> {
|
||||
if executor.on_prepare(&self.context).await?.stop() {
|
||||
pub(crate) async fn on_prepare<'a>(&mut self) -> Result<Status> {
|
||||
if self.executor.on_prepare(&self.context).await?.stop() {
|
||||
return Ok(Status::done());
|
||||
}
|
||||
self.data.state = DropTableState::RemoveMetadata;
|
||||
self.fill_table_metadata().await?;
|
||||
self.data.state = DropTableState::DeleteMetadata;
|
||||
|
||||
Ok(Status::executing(true))
|
||||
}
|
||||
|
||||
/// Register dropping regions if doesn't exist.
|
||||
fn register_dropping_regions(&mut self) -> Result<()> {
|
||||
let region_routes = self.data.region_routes()?;
|
||||
let dropping_regions = operating_leader_regions(&self.data.physical_region_routes);
|
||||
|
||||
let dropping_regions = operating_leader_regions(region_routes);
|
||||
|
||||
if self.dropping_regions.len() == dropping_regions.len() {
|
||||
if !self.dropping_regions.is_empty() {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
@@ -112,7 +113,7 @@ impl DropTableProcedure {
|
||||
}
|
||||
|
||||
/// Removes the table metadata.
|
||||
async fn on_remove_metadata(&mut self, executor: &DropTableExecutor) -> Result<Status> {
|
||||
pub(crate) async fn on_delete_metadata(&mut self) -> Result<Status> {
|
||||
self.register_dropping_regions()?;
|
||||
// NOTES: If the meta server is crashed after the `RemoveMetadata`,
|
||||
// Corresponding regions of this table on the Datanode will be closed automatically.
|
||||
@@ -120,8 +121,15 @@ impl DropTableProcedure {
|
||||
|
||||
// TODO(weny): Considers introducing a RegionStatus to indicate the region is dropping.
|
||||
let table_id = self.data.table_id();
|
||||
executor
|
||||
.on_remove_metadata(&self.context, self.data.region_routes()?)
|
||||
let table_route_value = &TableRouteValue::new(
|
||||
self.data.task.table_id,
|
||||
// Safety: checked
|
||||
self.data.physical_table_id.unwrap(),
|
||||
self.data.physical_region_routes.clone(),
|
||||
);
|
||||
// Deletes table metadata logically.
|
||||
self.executor
|
||||
.on_delete_metadata(&self.context, table_route_value)
|
||||
.await?;
|
||||
info!("Deleted table metadata for table {table_id}");
|
||||
self.data.state = DropTableState::InvalidateTableCache;
|
||||
@@ -129,16 +137,31 @@ impl DropTableProcedure {
|
||||
}
|
||||
|
||||
/// Broadcasts invalidate table cache instruction.
|
||||
async fn on_broadcast(&mut self, executor: &DropTableExecutor) -> Result<Status> {
|
||||
executor.invalidate_table_cache(&self.context).await?;
|
||||
async fn on_broadcast(&mut self) -> Result<Status> {
|
||||
self.executor.invalidate_table_cache(&self.context).await?;
|
||||
self.data.state = DropTableState::DatanodeDropRegions;
|
||||
|
||||
Ok(Status::executing(true))
|
||||
}
|
||||
|
||||
pub async fn on_datanode_drop_regions(&self, executor: &DropTableExecutor) -> Result<Status> {
|
||||
executor
|
||||
.on_drop_regions(&self.context, self.data.region_routes()?)
|
||||
pub async fn on_datanode_drop_regions(&mut self) -> Result<Status> {
|
||||
self.executor
|
||||
.on_drop_regions(&self.context, &self.data.physical_region_routes)
|
||||
.await?;
|
||||
self.data.state = DropTableState::DeleteTombstone;
|
||||
Ok(Status::executing(true))
|
||||
}
|
||||
|
||||
/// Deletes metadata tombstone.
|
||||
async fn on_delete_metadata_tombstone(&self) -> Result<Status> {
|
||||
let table_route_value = &TableRouteValue::new(
|
||||
self.data.task.table_id,
|
||||
// Safety: checked
|
||||
self.data.physical_table_id.unwrap(),
|
||||
self.data.physical_region_routes.clone(),
|
||||
);
|
||||
self.executor
|
||||
.on_delete_metadata_tombstone(&self.context, table_route_value)
|
||||
.await?;
|
||||
Ok(Status::done())
|
||||
}
|
||||
@@ -151,21 +174,17 @@ impl Procedure for DropTableProcedure {
|
||||
}
|
||||
|
||||
async fn execute(&mut self, _ctx: &ProcedureContext) -> ProcedureResult<Status> {
|
||||
let executor = DropTableExecutor::new(
|
||||
self.data.task.table_name(),
|
||||
self.data.table_id(),
|
||||
self.data.task.drop_if_exists,
|
||||
);
|
||||
let state = &self.data.state;
|
||||
let _timer = metrics::METRIC_META_PROCEDURE_DROP_TABLE
|
||||
.with_label_values(&[state.as_ref()])
|
||||
.start_timer();
|
||||
|
||||
match self.data.state {
|
||||
DropTableState::Prepare => self.on_prepare(&executor).await,
|
||||
DropTableState::RemoveMetadata => self.on_remove_metadata(&executor).await,
|
||||
DropTableState::InvalidateTableCache => self.on_broadcast(&executor).await,
|
||||
DropTableState::DatanodeDropRegions => self.on_datanode_drop_regions(&executor).await,
|
||||
DropTableState::Prepare => self.on_prepare().await,
|
||||
DropTableState::DeleteMetadata => self.on_delete_metadata().await,
|
||||
DropTableState::InvalidateTableCache => self.on_broadcast().await,
|
||||
DropTableState::DatanodeDropRegions => self.on_datanode_drop_regions().await,
|
||||
DropTableState::DeleteTombstone => self.on_delete_metadata_tombstone().await,
|
||||
}
|
||||
.map_err(handle_retry_error)
|
||||
}
|
||||
@@ -185,31 +204,47 @@ impl Procedure for DropTableProcedure {
|
||||
|
||||
LockKey::new(lock_key)
|
||||
}
|
||||
|
||||
fn rollback_supported(&self) -> bool {
|
||||
!matches!(self.data.state, DropTableState::Prepare)
|
||||
}
|
||||
|
||||
async fn rollback(&mut self, _: &ProcedureContext) -> ProcedureResult<()> {
|
||||
warn!(
|
||||
"Rolling back the drop table procedure, table: {}",
|
||||
self.data.table_id()
|
||||
);
|
||||
|
||||
let table_route_value = &TableRouteValue::new(
|
||||
self.data.task.table_id,
|
||||
// Safety: checked
|
||||
self.data.physical_table_id.unwrap(),
|
||||
self.data.physical_region_routes.clone(),
|
||||
);
|
||||
self.executor
|
||||
.on_restore_metadata(&self.context, table_route_value)
|
||||
.await
|
||||
.map_err(ProcedureError::external)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
/// TODO(weny): simplify the table data.
|
||||
pub struct DropTableData {
|
||||
pub state: DropTableState,
|
||||
pub cluster_id: u64,
|
||||
pub task: DropTableTask,
|
||||
pub table_route_value: DeserializedValueWithBytes<TableRouteValue>,
|
||||
pub table_info_value: DeserializedValueWithBytes<TableInfoValue>,
|
||||
pub physical_region_routes: Vec<RegionRoute>,
|
||||
pub physical_table_id: Option<TableId>,
|
||||
}
|
||||
|
||||
impl DropTableData {
|
||||
pub fn new(
|
||||
cluster_id: u64,
|
||||
task: DropTableTask,
|
||||
table_route_value: DeserializedValueWithBytes<TableRouteValue>,
|
||||
table_info_value: DeserializedValueWithBytes<TableInfoValue>,
|
||||
) -> Self {
|
||||
pub fn new(cluster_id: u64, task: DropTableTask) -> Self {
|
||||
Self {
|
||||
state: DropTableState::Prepare,
|
||||
cluster_id,
|
||||
task,
|
||||
table_info_value,
|
||||
table_route_value,
|
||||
physical_region_routes: vec![],
|
||||
physical_table_id: None,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -217,27 +252,30 @@ impl DropTableData {
|
||||
self.task.table_ref()
|
||||
}
|
||||
|
||||
fn region_routes(&self) -> Result<&Vec<RegionRoute>> {
|
||||
self.table_route_value.region_routes()
|
||||
}
|
||||
|
||||
fn table_info(&self) -> &RawTableInfo {
|
||||
&self.table_info_value.table_info
|
||||
}
|
||||
|
||||
fn table_id(&self) -> TableId {
|
||||
self.table_info().ident.table_id
|
||||
self.task.table_id
|
||||
}
|
||||
|
||||
fn build_executor(&self) -> DropTableExecutor {
|
||||
DropTableExecutor::new(
|
||||
self.task.table_name(),
|
||||
self.task.table_id,
|
||||
self.task.drop_if_exists,
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
/// The state of drop table.
|
||||
#[derive(Debug, Serialize, Deserialize, AsRefStr)]
|
||||
pub enum DropTableState {
|
||||
/// Prepares to drop the table
|
||||
Prepare,
|
||||
/// Removes metadata
|
||||
RemoveMetadata,
|
||||
/// Deletes metadata logically
|
||||
DeleteMetadata,
|
||||
/// Invalidates Table Cache
|
||||
InvalidateTableCache,
|
||||
/// Drops regions on Datanode
|
||||
DatanodeDropRegions,
|
||||
/// Deletes metadata tombstone permanently
|
||||
DeleteTombstone,
|
||||
}
|
||||
|
||||
@@ -30,6 +30,7 @@ use crate::ddl::DdlContext;
|
||||
use crate::error::{self, Result};
|
||||
use crate::instruction::CacheIdent;
|
||||
use crate::key::table_name::TableNameKey;
|
||||
use crate::key::table_route::TableRouteValue;
|
||||
use crate::rpc::router::{find_leader_regions, find_leaders, RegionRoute};
|
||||
use crate::table_name::TableName;
|
||||
|
||||
@@ -99,14 +100,73 @@ impl DropTableExecutor {
|
||||
Ok(Control::Continue(()))
|
||||
}
|
||||
|
||||
/// Removes the table metadata.
|
||||
pub async fn on_remove_metadata(
|
||||
/// Deletes the table metadata **logically**.
|
||||
pub async fn on_delete_metadata(
|
||||
&self,
|
||||
ctx: &DdlContext,
|
||||
region_routes: &[RegionRoute],
|
||||
table_route_value: &TableRouteValue,
|
||||
) -> Result<()> {
|
||||
let table_name_key = TableNameKey::new(
|
||||
&self.table.catalog_name,
|
||||
&self.table.schema_name,
|
||||
&self.table.table_name,
|
||||
);
|
||||
if !ctx
|
||||
.table_metadata_manager
|
||||
.table_name_manager()
|
||||
.exists(table_name_key)
|
||||
.await?
|
||||
{
|
||||
return Ok(());
|
||||
}
|
||||
ctx.table_metadata_manager
|
||||
.delete_table_metadata(self.table_id, &self.table, table_route_value)
|
||||
.await
|
||||
}
|
||||
|
||||
/// Deletes the table metadata tombstone **permanently**.
|
||||
pub async fn on_delete_metadata_tombstone(
|
||||
&self,
|
||||
ctx: &DdlContext,
|
||||
table_route_value: &TableRouteValue,
|
||||
) -> Result<()> {
|
||||
ctx.table_metadata_manager
|
||||
.delete_table_metadata(self.table_id, &self.table, region_routes)
|
||||
.delete_table_metadata_tombstone(self.table_id, &self.table, table_route_value)
|
||||
.await
|
||||
}
|
||||
|
||||
/// Deletes metadata for table **permanently**.
|
||||
pub async fn on_destroy_metadata(
|
||||
&self,
|
||||
ctx: &DdlContext,
|
||||
table_route_value: &TableRouteValue,
|
||||
) -> Result<()> {
|
||||
ctx.table_metadata_manager
|
||||
.destroy_table_metadata(self.table_id, &self.table, table_route_value)
|
||||
.await
|
||||
}
|
||||
|
||||
/// Restores the table metadata.
|
||||
pub async fn on_restore_metadata(
|
||||
&self,
|
||||
ctx: &DdlContext,
|
||||
table_route_value: &TableRouteValue,
|
||||
) -> Result<()> {
|
||||
let table_name_key = TableNameKey::new(
|
||||
&self.table.catalog_name,
|
||||
&self.table.schema_name,
|
||||
&self.table.table_name,
|
||||
);
|
||||
if ctx
|
||||
.table_metadata_manager
|
||||
.table_name_manager()
|
||||
.exists(table_name_key)
|
||||
.await?
|
||||
{
|
||||
return Ok(());
|
||||
}
|
||||
ctx.table_metadata_manager
|
||||
.restore_table_metadata(self.table_id, &self.table, table_route_value)
|
||||
.await
|
||||
}
|
||||
|
||||
|
||||
34
src/common/meta/src/ddl/drop_table/metadata.rs
Normal file
34
src/common/meta/src/ddl/drop_table/metadata.rs
Normal file
@@ -0,0 +1,34 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use crate::ddl::drop_table::DropTableProcedure;
|
||||
use crate::error::Result;
|
||||
|
||||
impl DropTableProcedure {
|
||||
/// Fetches the table info and physical table route.
|
||||
pub(crate) async fn fill_table_metadata(&mut self) -> Result<()> {
|
||||
let task = &self.data.task;
|
||||
let (physical_table_id, physical_table_route_value) = self
|
||||
.context
|
||||
.table_metadata_manager
|
||||
.table_route_manager()
|
||||
.get_physical_table_route(task.table_id)
|
||||
.await?;
|
||||
|
||||
self.data.physical_region_routes = physical_table_route_value.region_routes;
|
||||
self.data.physical_table_id = Some(physical_table_id);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
@@ -15,6 +15,7 @@
|
||||
pub mod alter_table;
|
||||
pub mod columns;
|
||||
pub mod create_table;
|
||||
pub mod datanode_handler;
|
||||
|
||||
use std::collections::HashMap;
|
||||
|
||||
|
||||
@@ -28,7 +28,7 @@ pub struct TestAlterTableExpr {
|
||||
table_name: String,
|
||||
#[builder(setter(into))]
|
||||
add_columns: Vec<ColumnDef>,
|
||||
#[builder(setter(into))]
|
||||
#[builder(setter(into, strip_option))]
|
||||
new_table_name: Option<String>,
|
||||
}
|
||||
|
||||
|
||||
@@ -15,7 +15,8 @@
|
||||
use std::collections::HashMap;
|
||||
|
||||
use api::v1::column_def::try_as_column_schema;
|
||||
use api::v1::{ColumnDef, CreateTableExpr, SemanticType};
|
||||
use api::v1::meta::Partition;
|
||||
use api::v1::{ColumnDataType, ColumnDef, CreateTableExpr, SemanticType};
|
||||
use chrono::DateTime;
|
||||
use common_catalog::consts::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, MITO2_ENGINE};
|
||||
use datatypes::schema::RawSchema;
|
||||
@@ -24,6 +25,9 @@ use store_api::storage::TableId;
|
||||
use table::metadata::{RawTableInfo, RawTableMeta, TableIdent, TableType};
|
||||
use table::requests::TableOptions;
|
||||
|
||||
use crate::ddl::test_util::columns::TestColumnDefBuilder;
|
||||
use crate::rpc::ddl::CreateTableTask;
|
||||
|
||||
#[derive(Default, Builder)]
|
||||
#[builder(default)]
|
||||
pub struct TestCreateTableExpr {
|
||||
@@ -43,6 +47,7 @@ pub struct TestCreateTableExpr {
|
||||
primary_keys: Vec<String>,
|
||||
create_if_not_exists: bool,
|
||||
table_options: HashMap<String, String>,
|
||||
#[builder(setter(into, strip_option))]
|
||||
table_id: Option<TableId>,
|
||||
#[builder(setter(into), default = "MITO2_ENGINE.to_string()")]
|
||||
engine: String,
|
||||
@@ -129,3 +134,47 @@ pub fn build_raw_table_info_from_expr(expr: &CreateTableExpr) -> RawTableInfo {
|
||||
table_type: TableType::Base,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn test_create_table_task(name: &str, table_id: TableId) -> CreateTableTask {
|
||||
let create_table = TestCreateTableExprBuilder::default()
|
||||
.column_defs([
|
||||
TestColumnDefBuilder::default()
|
||||
.name("ts")
|
||||
.data_type(ColumnDataType::TimestampMillisecond)
|
||||
.semantic_type(SemanticType::Timestamp)
|
||||
.build()
|
||||
.unwrap()
|
||||
.into(),
|
||||
TestColumnDefBuilder::default()
|
||||
.name("host")
|
||||
.data_type(ColumnDataType::String)
|
||||
.semantic_type(SemanticType::Tag)
|
||||
.build()
|
||||
.unwrap()
|
||||
.into(),
|
||||
TestColumnDefBuilder::default()
|
||||
.name("cpu")
|
||||
.data_type(ColumnDataType::Float64)
|
||||
.semantic_type(SemanticType::Field)
|
||||
.build()
|
||||
.unwrap()
|
||||
.into(),
|
||||
])
|
||||
.table_id(table_id)
|
||||
.time_index("ts")
|
||||
.primary_keys(["host".into()])
|
||||
.table_name(name)
|
||||
.build()
|
||||
.unwrap()
|
||||
.into();
|
||||
let table_info = build_raw_table_info_from_expr(&create_table);
|
||||
CreateTableTask {
|
||||
create_table,
|
||||
// Single region
|
||||
partitions: vec![Partition {
|
||||
column_list: vec![],
|
||||
value_list: vec![],
|
||||
}],
|
||||
table_info,
|
||||
}
|
||||
}
|
||||
|
||||
169
src/common/meta/src/ddl/test_util/datanode_handler.rs
Normal file
169
src/common/meta/src/ddl/test_util/datanode_handler.rs
Normal file
@@ -0,0 +1,169 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use api::v1::region::{QueryRequest, RegionRequest};
|
||||
use common_error::ext::{BoxedError, ErrorExt, StackError};
|
||||
use common_error::status_code::StatusCode;
|
||||
use common_recordbatch::SendableRecordBatchStream;
|
||||
use common_telemetry::debug;
|
||||
use snafu::{ResultExt, Snafu};
|
||||
use tokio::sync::mpsc;
|
||||
|
||||
use crate::datanode_manager::HandleResponse;
|
||||
use crate::error::{self, Error, Result};
|
||||
use crate::peer::Peer;
|
||||
use crate::test_util::MockDatanodeHandler;
|
||||
|
||||
#[async_trait::async_trait]
|
||||
impl MockDatanodeHandler for () {
|
||||
async fn handle(&self, _peer: &Peer, _request: RegionRequest) -> Result<HandleResponse> {
|
||||
unreachable!()
|
||||
}
|
||||
|
||||
async fn handle_query(
|
||||
&self,
|
||||
_peer: &Peer,
|
||||
_request: QueryRequest,
|
||||
) -> Result<SendableRecordBatchStream> {
|
||||
unreachable!()
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct DatanodeWatcher(pub mpsc::Sender<(Peer, RegionRequest)>);
|
||||
|
||||
#[async_trait::async_trait]
|
||||
impl MockDatanodeHandler for DatanodeWatcher {
|
||||
async fn handle(&self, peer: &Peer, request: RegionRequest) -> Result<HandleResponse> {
|
||||
debug!("Returning Ok(0) for request: {request:?}, peer: {peer:?}");
|
||||
self.0.send((peer.clone(), request)).await.unwrap();
|
||||
Ok(HandleResponse::new(0))
|
||||
}
|
||||
|
||||
async fn handle_query(
|
||||
&self,
|
||||
_peer: &Peer,
|
||||
_request: QueryRequest,
|
||||
) -> Result<SendableRecordBatchStream> {
|
||||
unreachable!()
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct RetryErrorDatanodeHandler;
|
||||
|
||||
#[async_trait::async_trait]
|
||||
impl MockDatanodeHandler for RetryErrorDatanodeHandler {
|
||||
async fn handle(&self, peer: &Peer, request: RegionRequest) -> Result<HandleResponse> {
|
||||
debug!("Returning retry later for request: {request:?}, peer: {peer:?}");
|
||||
Err(Error::RetryLater {
|
||||
source: BoxedError::new(
|
||||
error::UnexpectedSnafu {
|
||||
err_msg: "retry later",
|
||||
}
|
||||
.build(),
|
||||
),
|
||||
})
|
||||
}
|
||||
|
||||
async fn handle_query(
|
||||
&self,
|
||||
_peer: &Peer,
|
||||
_request: QueryRequest,
|
||||
) -> Result<SendableRecordBatchStream> {
|
||||
unreachable!()
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct UnexpectedErrorDatanodeHandler;
|
||||
|
||||
#[async_trait::async_trait]
|
||||
impl MockDatanodeHandler for UnexpectedErrorDatanodeHandler {
|
||||
async fn handle(&self, peer: &Peer, request: RegionRequest) -> Result<HandleResponse> {
|
||||
debug!("Returning mock error for request: {request:?}, peer: {peer:?}");
|
||||
error::UnexpectedSnafu {
|
||||
err_msg: "mock error",
|
||||
}
|
||||
.fail()
|
||||
}
|
||||
|
||||
async fn handle_query(
|
||||
&self,
|
||||
_peer: &Peer,
|
||||
_request: QueryRequest,
|
||||
) -> Result<SendableRecordBatchStream> {
|
||||
unreachable!()
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct RequestOutdatedErrorDatanodeHandler;
|
||||
|
||||
#[derive(Debug, Snafu)]
|
||||
#[snafu(display("A mock RequestOutdated error"))]
|
||||
struct MockRequestOutdatedError;
|
||||
|
||||
impl StackError for MockRequestOutdatedError {
|
||||
fn debug_fmt(&self, _: usize, _: &mut Vec<String>) {}
|
||||
|
||||
fn next(&self) -> Option<&dyn StackError> {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
impl ErrorExt for MockRequestOutdatedError {
|
||||
fn as_any(&self) -> &dyn std::any::Any {
|
||||
self
|
||||
}
|
||||
|
||||
fn status_code(&self) -> StatusCode {
|
||||
StatusCode::RequestOutdated
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait::async_trait]
|
||||
impl MockDatanodeHandler for RequestOutdatedErrorDatanodeHandler {
|
||||
async fn handle(&self, peer: &Peer, request: RegionRequest) -> Result<HandleResponse> {
|
||||
debug!("Returning mock error for request: {request:?}, peer: {peer:?}");
|
||||
Err(BoxedError::new(MockRequestOutdatedError)).context(error::ExternalSnafu)
|
||||
}
|
||||
|
||||
async fn handle_query(
|
||||
&self,
|
||||
_peer: &Peer,
|
||||
_request: QueryRequest,
|
||||
) -> Result<SendableRecordBatchStream> {
|
||||
unreachable!()
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct NaiveDatanodeHandler;
|
||||
|
||||
#[async_trait::async_trait]
|
||||
impl MockDatanodeHandler for NaiveDatanodeHandler {
|
||||
async fn handle(&self, peer: &Peer, request: RegionRequest) -> Result<HandleResponse> {
|
||||
debug!("Returning Ok(0) for request: {request:?}, peer: {peer:?}");
|
||||
Ok(HandleResponse::new(0))
|
||||
}
|
||||
|
||||
async fn handle_query(
|
||||
&self,
|
||||
_peer: &Peer,
|
||||
_request: QueryRequest,
|
||||
) -> Result<SendableRecordBatchStream> {
|
||||
unreachable!()
|
||||
}
|
||||
}
|
||||
@@ -13,6 +13,8 @@
|
||||
// limitations under the License.
|
||||
|
||||
mod alter_logical_tables;
|
||||
mod alter_table;
|
||||
mod create_logical_tables;
|
||||
mod create_table;
|
||||
mod drop_database;
|
||||
mod drop_table;
|
||||
|
||||
@@ -23,8 +23,8 @@ use common_procedure_test::MockContextProvider;
|
||||
use crate::ddl::alter_logical_tables::AlterLogicalTablesProcedure;
|
||||
use crate::ddl::test_util::alter_table::TestAlterTableExprBuilder;
|
||||
use crate::ddl::test_util::columns::TestColumnDefBuilder;
|
||||
use crate::ddl::test_util::datanode_handler::NaiveDatanodeHandler;
|
||||
use crate::ddl::test_util::{create_logical_table, create_physical_table};
|
||||
use crate::ddl::tests::create_logical_tables::NaiveDatanodeHandler;
|
||||
use crate::error::Error::{AlterLogicalTablesInvalidArguments, TableNotFound};
|
||||
use crate::key::table_name::TableNameKey;
|
||||
use crate::rpc::ddl::AlterTableTask;
|
||||
|
||||
345
src/common/meta/src/ddl/tests/alter_table.rs
Normal file
345
src/common/meta/src/ddl/tests/alter_table.rs
Normal file
@@ -0,0 +1,345 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::assert_matches::assert_matches;
|
||||
use std::collections::HashMap;
|
||||
use std::sync::Arc;
|
||||
|
||||
use api::v1::alter_expr::Kind;
|
||||
use api::v1::region::{region_request, RegionRequest};
|
||||
use api::v1::{
|
||||
AddColumn, AddColumns, AlterExpr, ColumnDataType, ColumnDef as PbColumnDef, DropColumn,
|
||||
DropColumns, SemanticType,
|
||||
};
|
||||
use common_catalog::consts::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME};
|
||||
use common_error::ext::ErrorExt;
|
||||
use common_error::status_code::StatusCode;
|
||||
use store_api::storage::RegionId;
|
||||
use tokio::sync::mpsc::{self};
|
||||
|
||||
use crate::ddl::alter_table::AlterTableProcedure;
|
||||
use crate::ddl::test_util::alter_table::TestAlterTableExprBuilder;
|
||||
use crate::ddl::test_util::create_table::test_create_table_task;
|
||||
use crate::ddl::test_util::datanode_handler::{
|
||||
DatanodeWatcher, RequestOutdatedErrorDatanodeHandler,
|
||||
};
|
||||
use crate::key::table_name::TableNameKey;
|
||||
use crate::key::table_route::TableRouteValue;
|
||||
use crate::peer::Peer;
|
||||
use crate::rpc::ddl::AlterTableTask;
|
||||
use crate::rpc::router::{Region, RegionRoute};
|
||||
use crate::test_util::{new_ddl_context, MockDatanodeManager};
|
||||
|
||||
fn test_rename_alter_table_task(table_name: &str, new_table_name: &str) -> AlterTableTask {
|
||||
let builder = TestAlterTableExprBuilder::default()
|
||||
.table_name(table_name)
|
||||
.new_table_name(new_table_name)
|
||||
.build()
|
||||
.unwrap();
|
||||
|
||||
AlterTableTask {
|
||||
alter_table: builder.into(),
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_on_prepare_table_exists_err() {
|
||||
let datanode_manager = Arc::new(MockDatanodeManager::new(()));
|
||||
let ddl_context = new_ddl_context(datanode_manager);
|
||||
let cluster_id = 1;
|
||||
let task = test_create_table_task("foo", 1024);
|
||||
// Puts a value to table name key.
|
||||
ddl_context
|
||||
.table_metadata_manager
|
||||
.create_table_metadata(
|
||||
task.table_info.clone(),
|
||||
TableRouteValue::physical(vec![]),
|
||||
HashMap::new(),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let task = test_rename_alter_table_task("non-exists", "foo");
|
||||
let mut procedure = AlterTableProcedure::new(cluster_id, 1024, task, ddl_context).unwrap();
|
||||
let err = procedure.on_prepare().await.unwrap_err();
|
||||
assert_matches!(err.status_code(), StatusCode::TableAlreadyExists);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_on_prepare_table_not_exists_err() {
|
||||
let datanode_manager = Arc::new(MockDatanodeManager::new(()));
|
||||
let ddl_context = new_ddl_context(datanode_manager);
|
||||
let cluster_id = 1;
|
||||
let task = test_rename_alter_table_task("non-exists", "foo");
|
||||
let mut procedure = AlterTableProcedure::new(cluster_id, 1024, task, ddl_context).unwrap();
|
||||
let err = procedure.on_prepare().await.unwrap_err();
|
||||
assert_matches!(err.status_code(), StatusCode::TableNotFound);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_on_submit_alter_request() {
|
||||
let (tx, mut rx) = mpsc::channel(8);
|
||||
let datanode_handler = DatanodeWatcher(tx);
|
||||
let datanode_manager = Arc::new(MockDatanodeManager::new(datanode_handler));
|
||||
let ddl_context = new_ddl_context(datanode_manager);
|
||||
let cluster_id = 1;
|
||||
let table_id = 1024;
|
||||
let table_name = "foo";
|
||||
let task = test_create_table_task(table_name, table_id);
|
||||
// Puts a value to table name key.
|
||||
ddl_context
|
||||
.table_metadata_manager
|
||||
.create_table_metadata(
|
||||
task.table_info.clone(),
|
||||
TableRouteValue::physical(vec![
|
||||
RegionRoute {
|
||||
region: Region::new_test(RegionId::new(table_id, 1)),
|
||||
leader_peer: Some(Peer::empty(1)),
|
||||
follower_peers: vec![Peer::empty(5)],
|
||||
leader_status: None,
|
||||
leader_down_since: None,
|
||||
},
|
||||
RegionRoute {
|
||||
region: Region::new_test(RegionId::new(table_id, 2)),
|
||||
leader_peer: Some(Peer::empty(2)),
|
||||
follower_peers: vec![Peer::empty(4)],
|
||||
leader_status: None,
|
||||
leader_down_since: None,
|
||||
},
|
||||
RegionRoute {
|
||||
region: Region::new_test(RegionId::new(table_id, 3)),
|
||||
leader_peer: Some(Peer::empty(3)),
|
||||
follower_peers: vec![],
|
||||
leader_status: None,
|
||||
leader_down_since: None,
|
||||
},
|
||||
]),
|
||||
HashMap::new(),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let alter_table_task = AlterTableTask {
|
||||
alter_table: AlterExpr {
|
||||
catalog_name: DEFAULT_CATALOG_NAME.to_string(),
|
||||
schema_name: DEFAULT_SCHEMA_NAME.to_string(),
|
||||
table_name: table_name.to_string(),
|
||||
kind: Some(Kind::DropColumns(DropColumns {
|
||||
drop_columns: vec![DropColumn {
|
||||
name: "my_field_column".to_string(),
|
||||
}],
|
||||
})),
|
||||
},
|
||||
};
|
||||
let mut procedure =
|
||||
AlterTableProcedure::new(cluster_id, table_id, alter_table_task, ddl_context).unwrap();
|
||||
procedure.on_prepare().await.unwrap();
|
||||
procedure.submit_alter_region_requests().await.unwrap();
|
||||
|
||||
let check = |peer: Peer,
|
||||
request: RegionRequest,
|
||||
expected_peer_id: u64,
|
||||
expected_region_id: RegionId| {
|
||||
assert_eq!(peer.id, expected_peer_id);
|
||||
let Some(region_request::Body::Alter(req)) = request.body else {
|
||||
unreachable!();
|
||||
};
|
||||
assert_eq!(req.region_id, expected_region_id);
|
||||
};
|
||||
|
||||
let mut results = Vec::new();
|
||||
for _ in 0..3 {
|
||||
let result = rx.try_recv().unwrap();
|
||||
results.push(result);
|
||||
}
|
||||
results.sort_unstable_by(|(a, _), (b, _)| a.id.cmp(&b.id));
|
||||
|
||||
let (peer, request) = results.remove(0);
|
||||
check(peer, request, 1, RegionId::new(table_id, 1));
|
||||
let (peer, request) = results.remove(0);
|
||||
check(peer, request, 2, RegionId::new(table_id, 2));
|
||||
let (peer, request) = results.remove(0);
|
||||
check(peer, request, 3, RegionId::new(table_id, 3));
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_on_submit_alter_request_with_outdated_request() {
|
||||
let datanode_manager = Arc::new(MockDatanodeManager::new(
|
||||
RequestOutdatedErrorDatanodeHandler,
|
||||
));
|
||||
let ddl_context = new_ddl_context(datanode_manager);
|
||||
let cluster_id = 1;
|
||||
let table_id = 1024;
|
||||
let table_name = "foo";
|
||||
let task = test_create_table_task(table_name, table_id);
|
||||
// Puts a value to table name key.
|
||||
ddl_context
|
||||
.table_metadata_manager
|
||||
.create_table_metadata(
|
||||
task.table_info.clone(),
|
||||
TableRouteValue::physical(vec![
|
||||
RegionRoute {
|
||||
region: Region::new_test(RegionId::new(table_id, 1)),
|
||||
leader_peer: Some(Peer::empty(1)),
|
||||
follower_peers: vec![Peer::empty(5)],
|
||||
leader_status: None,
|
||||
leader_down_since: None,
|
||||
},
|
||||
RegionRoute {
|
||||
region: Region::new_test(RegionId::new(table_id, 2)),
|
||||
leader_peer: Some(Peer::empty(2)),
|
||||
follower_peers: vec![Peer::empty(4)],
|
||||
leader_status: None,
|
||||
leader_down_since: None,
|
||||
},
|
||||
RegionRoute {
|
||||
region: Region::new_test(RegionId::new(table_id, 3)),
|
||||
leader_peer: Some(Peer::empty(3)),
|
||||
follower_peers: vec![],
|
||||
leader_status: None,
|
||||
leader_down_since: None,
|
||||
},
|
||||
]),
|
||||
HashMap::new(),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let alter_table_task = AlterTableTask {
|
||||
alter_table: AlterExpr {
|
||||
catalog_name: DEFAULT_CATALOG_NAME.to_string(),
|
||||
schema_name: DEFAULT_SCHEMA_NAME.to_string(),
|
||||
table_name: table_name.to_string(),
|
||||
kind: Some(Kind::DropColumns(DropColumns {
|
||||
drop_columns: vec![DropColumn {
|
||||
name: "my_field_column".to_string(),
|
||||
}],
|
||||
})),
|
||||
},
|
||||
};
|
||||
let mut procedure =
|
||||
AlterTableProcedure::new(cluster_id, table_id, alter_table_task, ddl_context).unwrap();
|
||||
procedure.on_prepare().await.unwrap();
|
||||
procedure.submit_alter_region_requests().await.unwrap();
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_on_update_metadata_rename() {
|
||||
let datanode_manager = Arc::new(MockDatanodeManager::new(()));
|
||||
let ddl_context = new_ddl_context(datanode_manager);
|
||||
let cluster_id = 1;
|
||||
let table_name = "foo";
|
||||
let new_table_name = "bar";
|
||||
let table_id = 1024;
|
||||
let task = test_create_table_task(table_name, table_id);
|
||||
// Puts a value to table name key.
|
||||
ddl_context
|
||||
.table_metadata_manager
|
||||
.create_table_metadata(
|
||||
task.table_info.clone(),
|
||||
TableRouteValue::physical(vec![]),
|
||||
HashMap::new(),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let task = test_rename_alter_table_task(table_name, new_table_name);
|
||||
let mut procedure =
|
||||
AlterTableProcedure::new(cluster_id, table_id, task, ddl_context.clone()).unwrap();
|
||||
procedure.on_prepare().await.unwrap();
|
||||
procedure.on_update_metadata().await.unwrap();
|
||||
|
||||
let old_table_name_exists = ddl_context
|
||||
.table_metadata_manager
|
||||
.table_name_manager()
|
||||
.exists(TableNameKey::new(
|
||||
DEFAULT_CATALOG_NAME,
|
||||
DEFAULT_SCHEMA_NAME,
|
||||
table_name,
|
||||
))
|
||||
.await
|
||||
.unwrap();
|
||||
assert!(!old_table_name_exists);
|
||||
let value = ddl_context
|
||||
.table_metadata_manager
|
||||
.table_name_manager()
|
||||
.get(TableNameKey::new(
|
||||
DEFAULT_CATALOG_NAME,
|
||||
DEFAULT_SCHEMA_NAME,
|
||||
new_table_name,
|
||||
))
|
||||
.await
|
||||
.unwrap()
|
||||
.unwrap();
|
||||
assert_eq!(value.table_id(), table_id);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_on_update_metadata_add_columns() {
|
||||
let datanode_manager = Arc::new(MockDatanodeManager::new(()));
|
||||
let ddl_context = new_ddl_context(datanode_manager);
|
||||
let cluster_id = 1;
|
||||
let table_name = "foo";
|
||||
let table_id = 1024;
|
||||
let task = test_create_table_task(table_name, table_id);
|
||||
// Puts a value to table name key.
|
||||
ddl_context
|
||||
.table_metadata_manager
|
||||
.create_table_metadata(
|
||||
task.table_info.clone(),
|
||||
TableRouteValue::physical(vec![]),
|
||||
HashMap::new(),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let task = AlterTableTask {
|
||||
alter_table: AlterExpr {
|
||||
catalog_name: DEFAULT_CATALOG_NAME.to_string(),
|
||||
schema_name: DEFAULT_SCHEMA_NAME.to_string(),
|
||||
table_name: table_name.to_string(),
|
||||
kind: Some(Kind::AddColumns(AddColumns {
|
||||
add_columns: vec![AddColumn {
|
||||
column_def: Some(PbColumnDef {
|
||||
name: "my_tag3".to_string(),
|
||||
data_type: ColumnDataType::String as i32,
|
||||
semantic_type: SemanticType::Tag as i32,
|
||||
is_nullable: true,
|
||||
..Default::default()
|
||||
}),
|
||||
location: None,
|
||||
}],
|
||||
})),
|
||||
},
|
||||
};
|
||||
let mut procedure =
|
||||
AlterTableProcedure::new(cluster_id, table_id, task, ddl_context.clone()).unwrap();
|
||||
procedure.on_prepare().await.unwrap();
|
||||
procedure.on_update_metadata().await.unwrap();
|
||||
|
||||
let table_info = ddl_context
|
||||
.table_metadata_manager
|
||||
.table_info_manager()
|
||||
.get(table_id)
|
||||
.await
|
||||
.unwrap()
|
||||
.unwrap()
|
||||
.into_inner()
|
||||
.table_info;
|
||||
|
||||
assert_eq!(
|
||||
table_info.meta.schema.column_schemas.len() as u32,
|
||||
table_info.meta.next_column_id
|
||||
);
|
||||
}
|
||||
@@ -15,25 +15,21 @@
|
||||
use std::assert_matches::assert_matches;
|
||||
use std::sync::Arc;
|
||||
|
||||
use api::v1::region::{QueryRequest, RegionRequest};
|
||||
use common_error::ext::ErrorExt;
|
||||
use common_error::status_code::StatusCode;
|
||||
use common_procedure::{Context as ProcedureContext, Procedure, ProcedureId, Status};
|
||||
use common_procedure_test::MockContextProvider;
|
||||
use common_recordbatch::SendableRecordBatchStream;
|
||||
use common_telemetry::debug;
|
||||
use store_api::storage::RegionId;
|
||||
|
||||
use crate::datanode_manager::HandleResponse;
|
||||
use crate::ddl::create_logical_tables::CreateLogicalTablesProcedure;
|
||||
use crate::ddl::test_util::datanode_handler::NaiveDatanodeHandler;
|
||||
use crate::ddl::test_util::{
|
||||
create_physical_table_metadata, test_create_logical_table_task, test_create_physical_table_task,
|
||||
};
|
||||
use crate::ddl::{TableMetadata, TableMetadataAllocatorContext};
|
||||
use crate::error::{Error, Result};
|
||||
use crate::error::Error;
|
||||
use crate::key::table_route::TableRouteValue;
|
||||
use crate::peer::Peer;
|
||||
use crate::test_util::{new_ddl_context, MockDatanodeHandler, MockDatanodeManager};
|
||||
use crate::test_util::{new_ddl_context, MockDatanodeManager};
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_on_prepare_physical_table_not_found() {
|
||||
@@ -229,25 +225,6 @@ async fn test_on_prepare_part_logical_tables_exist() {
|
||||
assert_matches!(status, Status::Executing { persist: true });
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct NaiveDatanodeHandler;
|
||||
|
||||
#[async_trait::async_trait]
|
||||
impl MockDatanodeHandler for NaiveDatanodeHandler {
|
||||
async fn handle(&self, peer: &Peer, request: RegionRequest) -> Result<HandleResponse> {
|
||||
debug!("Returning Ok(0) for request: {request:?}, peer: {peer:?}");
|
||||
Ok(HandleResponse::new(0))
|
||||
}
|
||||
|
||||
async fn handle_query(
|
||||
&self,
|
||||
_peer: &Peer,
|
||||
_request: QueryRequest,
|
||||
) -> Result<SendableRecordBatchStream> {
|
||||
unreachable!()
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_on_create_metadata() {
|
||||
let datanode_manager = Arc::new(MockDatanodeManager::new(NaiveDatanodeHandler));
|
||||
|
||||
@@ -17,42 +17,24 @@ use std::collections::HashMap;
|
||||
use std::sync::Arc;
|
||||
|
||||
use api::v1::meta::Partition;
|
||||
use api::v1::region::{QueryRequest, RegionRequest};
|
||||
use api::v1::{ColumnDataType, SemanticType};
|
||||
use common_error::ext::{BoxedError, ErrorExt};
|
||||
use common_error::ext::ErrorExt;
|
||||
use common_error::status_code::StatusCode;
|
||||
use common_procedure::{Context as ProcedureContext, Procedure, ProcedureId, Status};
|
||||
use common_procedure_test::MockContextProvider;
|
||||
use common_recordbatch::SendableRecordBatchStream;
|
||||
use common_telemetry::debug;
|
||||
|
||||
use crate::datanode_manager::HandleResponse;
|
||||
use crate::ddl::create_table::CreateTableProcedure;
|
||||
use crate::ddl::test_util::columns::TestColumnDefBuilder;
|
||||
use crate::ddl::test_util::create_table::{
|
||||
build_raw_table_info_from_expr, TestCreateTableExprBuilder,
|
||||
};
|
||||
use crate::error;
|
||||
use crate::error::{Error, Result};
|
||||
use crate::ddl::test_util::datanode_handler::{
|
||||
NaiveDatanodeHandler, RetryErrorDatanodeHandler, UnexpectedErrorDatanodeHandler,
|
||||
};
|
||||
use crate::error::Error;
|
||||
use crate::key::table_route::TableRouteValue;
|
||||
use crate::peer::Peer;
|
||||
use crate::rpc::ddl::CreateTableTask;
|
||||
use crate::test_util::{new_ddl_context, MockDatanodeHandler, MockDatanodeManager};
|
||||
|
||||
#[async_trait::async_trait]
|
||||
impl MockDatanodeHandler for () {
|
||||
async fn handle(&self, _peer: &Peer, _request: RegionRequest) -> Result<HandleResponse> {
|
||||
unreachable!()
|
||||
}
|
||||
|
||||
async fn handle_query(
|
||||
&self,
|
||||
_peer: &Peer,
|
||||
_request: QueryRequest,
|
||||
) -> Result<SendableRecordBatchStream> {
|
||||
unreachable!()
|
||||
}
|
||||
}
|
||||
use crate::test_util::{new_ddl_context, MockDatanodeManager};
|
||||
|
||||
fn test_create_table_task(name: &str) -> CreateTableTask {
|
||||
let create_table = TestCreateTableExprBuilder::default()
|
||||
@@ -174,32 +156,6 @@ async fn test_on_prepare_with_no_partition_err() {
|
||||
.contains("The number of partitions must be greater than 0"),);
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct RetryErrorDatanodeHandler;
|
||||
|
||||
#[async_trait::async_trait]
|
||||
impl MockDatanodeHandler for RetryErrorDatanodeHandler {
|
||||
async fn handle(&self, peer: &Peer, request: RegionRequest) -> Result<HandleResponse> {
|
||||
debug!("Returning retry later for request: {request:?}, peer: {peer:?}");
|
||||
Err(Error::RetryLater {
|
||||
source: BoxedError::new(
|
||||
error::UnexpectedSnafu {
|
||||
err_msg: "retry later",
|
||||
}
|
||||
.build(),
|
||||
),
|
||||
})
|
||||
}
|
||||
|
||||
async fn handle_query(
|
||||
&self,
|
||||
_peer: &Peer,
|
||||
_request: QueryRequest,
|
||||
) -> Result<SendableRecordBatchStream> {
|
||||
unreachable!()
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_on_datanode_create_regions_should_retry() {
|
||||
common_telemetry::init_default_ut_logging();
|
||||
@@ -218,28 +174,6 @@ async fn test_on_datanode_create_regions_should_retry() {
|
||||
assert!(error.is_retry_later());
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct UnexpectedErrorDatanodeHandler;
|
||||
|
||||
#[async_trait::async_trait]
|
||||
impl MockDatanodeHandler for UnexpectedErrorDatanodeHandler {
|
||||
async fn handle(&self, peer: &Peer, request: RegionRequest) -> Result<HandleResponse> {
|
||||
debug!("Returning mock error for request: {request:?}, peer: {peer:?}");
|
||||
error::UnexpectedSnafu {
|
||||
err_msg: "mock error",
|
||||
}
|
||||
.fail()
|
||||
}
|
||||
|
||||
async fn handle_query(
|
||||
&self,
|
||||
_peer: &Peer,
|
||||
_request: QueryRequest,
|
||||
) -> Result<SendableRecordBatchStream> {
|
||||
unreachable!()
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_on_datanode_create_regions_should_not_retry() {
|
||||
common_telemetry::init_default_ut_logging();
|
||||
@@ -258,25 +192,6 @@ async fn test_on_datanode_create_regions_should_not_retry() {
|
||||
assert!(!error.is_retry_later());
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct NaiveDatanodeHandler;
|
||||
|
||||
#[async_trait::async_trait]
|
||||
impl MockDatanodeHandler for NaiveDatanodeHandler {
|
||||
async fn handle(&self, peer: &Peer, request: RegionRequest) -> Result<HandleResponse> {
|
||||
debug!("Returning Ok(0) for request: {request:?}, peer: {peer:?}");
|
||||
Ok(HandleResponse::new(0))
|
||||
}
|
||||
|
||||
async fn handle_query(
|
||||
&self,
|
||||
_peer: &Peer,
|
||||
_request: QueryRequest,
|
||||
) -> Result<SendableRecordBatchStream> {
|
||||
unreachable!()
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_on_create_metadata_error() {
|
||||
common_telemetry::init_default_ut_logging();
|
||||
|
||||
@@ -20,8 +20,8 @@ use common_procedure_test::MockContextProvider;
|
||||
use futures::TryStreamExt;
|
||||
|
||||
use crate::ddl::drop_database::DropDatabaseProcedure;
|
||||
use crate::ddl::test_util::datanode_handler::{NaiveDatanodeHandler, RetryErrorDatanodeHandler};
|
||||
use crate::ddl::test_util::{create_logical_table, create_physical_table};
|
||||
use crate::ddl::tests::create_table::{NaiveDatanodeHandler, RetryErrorDatanodeHandler};
|
||||
use crate::key::schema_name::SchemaNameKey;
|
||||
use crate::test_util::{new_ddl_context, MockDatanodeManager};
|
||||
|
||||
|
||||
291
src/common/meta/src/ddl/tests/drop_table.rs
Normal file
291
src/common/meta/src/ddl/tests/drop_table.rs
Normal file
@@ -0,0 +1,291 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::collections::HashMap;
|
||||
use std::sync::Arc;
|
||||
|
||||
use api::v1::region::{region_request, RegionRequest};
|
||||
use common_catalog::consts::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME};
|
||||
use common_error::ext::ErrorExt;
|
||||
use common_error::status_code::StatusCode;
|
||||
use common_procedure::{Context as ProcedureContext, Procedure, ProcedureId};
|
||||
use common_procedure_test::MockContextProvider;
|
||||
use store_api::storage::RegionId;
|
||||
use tokio::sync::mpsc;
|
||||
|
||||
use crate::ddl::create_logical_tables::CreateLogicalTablesProcedure;
|
||||
use crate::ddl::drop_table::DropTableProcedure;
|
||||
use crate::ddl::test_util::create_table::test_create_table_task;
|
||||
use crate::ddl::test_util::datanode_handler::{DatanodeWatcher, NaiveDatanodeHandler};
|
||||
use crate::ddl::test_util::{
|
||||
create_physical_table_metadata, test_create_logical_table_task, test_create_physical_table_task,
|
||||
};
|
||||
use crate::ddl::{TableMetadata, TableMetadataAllocatorContext};
|
||||
use crate::key::table_route::TableRouteValue;
|
||||
use crate::kv_backend::memory::MemoryKvBackend;
|
||||
use crate::peer::Peer;
|
||||
use crate::rpc::ddl::DropTableTask;
|
||||
use crate::rpc::router::{Region, RegionRoute};
|
||||
use crate::test_util::{new_ddl_context, new_ddl_context_with_kv_backend, MockDatanodeManager};
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_on_prepare_table_not_exists_err() {
|
||||
let datanode_manager = Arc::new(MockDatanodeManager::new(()));
|
||||
let ddl_context = new_ddl_context(datanode_manager);
|
||||
let cluster_id = 1;
|
||||
let table_name = "foo";
|
||||
let table_id = 1024;
|
||||
let task = test_create_table_task(table_name, table_id);
|
||||
// Puts a value to table name key.
|
||||
ddl_context
|
||||
.table_metadata_manager
|
||||
.create_table_metadata(
|
||||
task.table_info.clone(),
|
||||
TableRouteValue::physical(vec![]),
|
||||
HashMap::new(),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let task = DropTableTask {
|
||||
catalog: DEFAULT_CATALOG_NAME.to_string(),
|
||||
schema: DEFAULT_SCHEMA_NAME.to_string(),
|
||||
table: "bar".to_string(),
|
||||
table_id,
|
||||
drop_if_exists: false,
|
||||
};
|
||||
|
||||
let mut procedure = DropTableProcedure::new(cluster_id, task, ddl_context);
|
||||
let err = procedure.on_prepare().await.unwrap_err();
|
||||
assert_eq!(err.status_code(), StatusCode::TableNotFound);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_on_prepare_table() {
|
||||
let datanode_manager = Arc::new(MockDatanodeManager::new(()));
|
||||
let ddl_context = new_ddl_context(datanode_manager);
|
||||
let cluster_id = 1;
|
||||
let table_name = "foo";
|
||||
let table_id = 1024;
|
||||
let task = test_create_table_task(table_name, table_id);
|
||||
// Puts a value to table name key.
|
||||
ddl_context
|
||||
.table_metadata_manager
|
||||
.create_table_metadata(
|
||||
task.table_info.clone(),
|
||||
TableRouteValue::physical(vec![]),
|
||||
HashMap::new(),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let task = DropTableTask {
|
||||
catalog: DEFAULT_CATALOG_NAME.to_string(),
|
||||
schema: DEFAULT_SCHEMA_NAME.to_string(),
|
||||
table: "bar".to_string(),
|
||||
table_id,
|
||||
drop_if_exists: true,
|
||||
};
|
||||
|
||||
// Drop if exists
|
||||
let mut procedure = DropTableProcedure::new(cluster_id, task, ddl_context.clone());
|
||||
procedure.on_prepare().await.unwrap();
|
||||
|
||||
let task = DropTableTask {
|
||||
catalog: DEFAULT_CATALOG_NAME.to_string(),
|
||||
schema: DEFAULT_SCHEMA_NAME.to_string(),
|
||||
table: table_name.to_string(),
|
||||
table_id,
|
||||
drop_if_exists: false,
|
||||
};
|
||||
|
||||
// Drop table
|
||||
let mut procedure = DropTableProcedure::new(cluster_id, task, ddl_context);
|
||||
procedure.on_prepare().await.unwrap();
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_on_datanode_drop_regions() {
|
||||
let (tx, mut rx) = mpsc::channel(8);
|
||||
let datanode_handler = DatanodeWatcher(tx);
|
||||
let datanode_manager = Arc::new(MockDatanodeManager::new(datanode_handler));
|
||||
let ddl_context = new_ddl_context(datanode_manager);
|
||||
let cluster_id = 1;
|
||||
let table_id = 1024;
|
||||
let table_name = "foo";
|
||||
let task = test_create_table_task(table_name, table_id);
|
||||
// Puts a value to table name key.
|
||||
ddl_context
|
||||
.table_metadata_manager
|
||||
.create_table_metadata(
|
||||
task.table_info.clone(),
|
||||
TableRouteValue::physical(vec![
|
||||
RegionRoute {
|
||||
region: Region::new_test(RegionId::new(table_id, 1)),
|
||||
leader_peer: Some(Peer::empty(1)),
|
||||
follower_peers: vec![Peer::empty(5)],
|
||||
leader_status: None,
|
||||
leader_down_since: None,
|
||||
},
|
||||
RegionRoute {
|
||||
region: Region::new_test(RegionId::new(table_id, 2)),
|
||||
leader_peer: Some(Peer::empty(2)),
|
||||
follower_peers: vec![Peer::empty(4)],
|
||||
leader_status: None,
|
||||
leader_down_since: None,
|
||||
},
|
||||
RegionRoute {
|
||||
region: Region::new_test(RegionId::new(table_id, 3)),
|
||||
leader_peer: Some(Peer::empty(3)),
|
||||
follower_peers: vec![],
|
||||
leader_status: None,
|
||||
leader_down_since: None,
|
||||
},
|
||||
]),
|
||||
HashMap::new(),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let task = DropTableTask {
|
||||
catalog: DEFAULT_CATALOG_NAME.to_string(),
|
||||
schema: DEFAULT_SCHEMA_NAME.to_string(),
|
||||
table: table_name.to_string(),
|
||||
table_id,
|
||||
drop_if_exists: false,
|
||||
};
|
||||
// Drop table
|
||||
let mut procedure = DropTableProcedure::new(cluster_id, task, ddl_context);
|
||||
procedure.on_prepare().await.unwrap();
|
||||
procedure.on_datanode_drop_regions().await.unwrap();
|
||||
|
||||
let check = |peer: Peer,
|
||||
request: RegionRequest,
|
||||
expected_peer_id: u64,
|
||||
expected_region_id: RegionId| {
|
||||
assert_eq!(peer.id, expected_peer_id);
|
||||
let Some(region_request::Body::Drop(req)) = request.body else {
|
||||
unreachable!();
|
||||
};
|
||||
assert_eq!(req.region_id, expected_region_id);
|
||||
};
|
||||
|
||||
let mut results = Vec::new();
|
||||
for _ in 0..3 {
|
||||
let result = rx.try_recv().unwrap();
|
||||
results.push(result);
|
||||
}
|
||||
results.sort_unstable_by(|(a, _), (b, _)| a.id.cmp(&b.id));
|
||||
|
||||
let (peer, request) = results.remove(0);
|
||||
check(peer, request, 1, RegionId::new(table_id, 1));
|
||||
let (peer, request) = results.remove(0);
|
||||
check(peer, request, 2, RegionId::new(table_id, 2));
|
||||
let (peer, request) = results.remove(0);
|
||||
check(peer, request, 3, RegionId::new(table_id, 3));
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_on_rollback() {
|
||||
let datanode_manager = Arc::new(MockDatanodeManager::new(NaiveDatanodeHandler));
|
||||
let kv_backend = Arc::new(MemoryKvBackend::new());
|
||||
let ddl_context = new_ddl_context_with_kv_backend(datanode_manager, kv_backend.clone());
|
||||
let cluster_id = 1;
|
||||
// Prepares physical table metadata.
|
||||
let mut create_physical_table_task = test_create_physical_table_task("phy_table");
|
||||
let TableMetadata {
|
||||
table_id,
|
||||
table_route,
|
||||
..
|
||||
} = ddl_context
|
||||
.table_metadata_allocator
|
||||
.create(
|
||||
&TableMetadataAllocatorContext { cluster_id },
|
||||
&create_physical_table_task,
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
create_physical_table_task.set_table_id(table_id);
|
||||
create_physical_table_metadata(
|
||||
&ddl_context,
|
||||
create_physical_table_task.table_info.clone(),
|
||||
TableRouteValue::Physical(table_route),
|
||||
)
|
||||
.await;
|
||||
// The create logical table procedure.
|
||||
let physical_table_id = table_id;
|
||||
// Creates the logical table metadata.
|
||||
let task = test_create_logical_table_task("foo");
|
||||
let mut procedure = CreateLogicalTablesProcedure::new(
|
||||
cluster_id,
|
||||
vec![task],
|
||||
physical_table_id,
|
||||
ddl_context.clone(),
|
||||
);
|
||||
procedure.on_prepare().await.unwrap();
|
||||
let ctx = ProcedureContext {
|
||||
procedure_id: ProcedureId::random(),
|
||||
provider: Arc::new(MockContextProvider::default()),
|
||||
};
|
||||
procedure.execute(&ctx).await.unwrap();
|
||||
// Triggers procedure to create table metadata
|
||||
let status = procedure.execute(&ctx).await.unwrap();
|
||||
let table_ids = status.downcast_output_ref::<Vec<u32>>().unwrap();
|
||||
assert_eq!(*table_ids, vec![1025]);
|
||||
|
||||
let expected_kvs = kv_backend.dump();
|
||||
// Drops the physical table
|
||||
{
|
||||
let task = DropTableTask {
|
||||
catalog: DEFAULT_CATALOG_NAME.to_string(),
|
||||
schema: DEFAULT_SCHEMA_NAME.to_string(),
|
||||
table: "phy_table".to_string(),
|
||||
table_id: physical_table_id,
|
||||
drop_if_exists: false,
|
||||
};
|
||||
let mut procedure = DropTableProcedure::new(cluster_id, task, ddl_context.clone());
|
||||
procedure.on_prepare().await.unwrap();
|
||||
procedure.on_delete_metadata().await.unwrap();
|
||||
let ctx = ProcedureContext {
|
||||
procedure_id: ProcedureId::random(),
|
||||
provider: Arc::new(MockContextProvider::default()),
|
||||
};
|
||||
procedure.rollback(&ctx).await.unwrap();
|
||||
// Rollback again
|
||||
procedure.rollback(&ctx).await.unwrap();
|
||||
let kvs = kv_backend.dump();
|
||||
assert_eq!(kvs, expected_kvs);
|
||||
}
|
||||
|
||||
// Drops the logical table
|
||||
let task = DropTableTask {
|
||||
catalog: DEFAULT_CATALOG_NAME.to_string(),
|
||||
schema: DEFAULT_SCHEMA_NAME.to_string(),
|
||||
table: "foo".to_string(),
|
||||
table_id: table_ids[0],
|
||||
drop_if_exists: false,
|
||||
};
|
||||
let mut procedure = DropTableProcedure::new(cluster_id, task, ddl_context.clone());
|
||||
procedure.on_prepare().await.unwrap();
|
||||
procedure.on_delete_metadata().await.unwrap();
|
||||
let ctx = ProcedureContext {
|
||||
procedure_id: ProcedureId::random(),
|
||||
provider: Arc::new(MockContextProvider::default()),
|
||||
};
|
||||
procedure.rollback(&ctx).await.unwrap();
|
||||
// Rollback again
|
||||
procedure.rollback(&ctx).await.unwrap();
|
||||
let kvs = kv_backend.dump();
|
||||
assert_eq!(kvs, expected_kvs);
|
||||
}
|
||||
@@ -42,7 +42,6 @@ use crate::error::{
|
||||
};
|
||||
use crate::key::table_info::TableInfoValue;
|
||||
use crate::key::table_name::TableNameKey;
|
||||
use crate::key::table_route::TableRouteValue;
|
||||
use crate::key::{DeserializedValueWithBytes, TableMetadataManagerRef};
|
||||
use crate::region_keeper::MemoryRegionKeeperRef;
|
||||
use crate::rpc::ddl::DdlTask::{
|
||||
@@ -206,13 +205,12 @@ impl DdlManager {
|
||||
pub async fn submit_alter_table_task(
|
||||
&self,
|
||||
cluster_id: ClusterId,
|
||||
table_id: TableId,
|
||||
alter_table_task: AlterTableTask,
|
||||
table_info_value: DeserializedValueWithBytes<TableInfoValue>,
|
||||
) -> Result<(ProcedureId, Option<Output>)> {
|
||||
let context = self.create_context();
|
||||
|
||||
let procedure =
|
||||
AlterTableProcedure::new(cluster_id, alter_table_task, table_info_value, context)?;
|
||||
let procedure = AlterTableProcedure::new(cluster_id, table_id, alter_table_task, context)?;
|
||||
|
||||
let procedure_with_id = ProcedureWithId::with_random_id(Box::new(procedure));
|
||||
|
||||
@@ -285,18 +283,10 @@ impl DdlManager {
|
||||
&self,
|
||||
cluster_id: ClusterId,
|
||||
drop_table_task: DropTableTask,
|
||||
table_info_value: DeserializedValueWithBytes<TableInfoValue>,
|
||||
table_route_value: DeserializedValueWithBytes<TableRouteValue>,
|
||||
) -> Result<(ProcedureId, Option<Output>)> {
|
||||
let context = self.create_context();
|
||||
|
||||
let procedure = DropTableProcedure::new(
|
||||
cluster_id,
|
||||
drop_table_task,
|
||||
table_route_value,
|
||||
table_info_value,
|
||||
context,
|
||||
);
|
||||
let procedure = DropTableProcedure::new(cluster_id, drop_table_task, context);
|
||||
|
||||
let procedure_with_id = ProcedureWithId::with_random_id(Box::new(procedure));
|
||||
|
||||
@@ -442,12 +432,12 @@ async fn handle_alter_table_task(
|
||||
})?
|
||||
.table_id();
|
||||
|
||||
let (table_info_value, table_route_value) = ddl_manager
|
||||
let table_route_value = ddl_manager
|
||||
.table_metadata_manager()
|
||||
.get_full_table_info(table_id)
|
||||
.await?;
|
||||
|
||||
let table_route_value = table_route_value
|
||||
.table_route_manager()
|
||||
.table_route_storage()
|
||||
.get_raw(table_id)
|
||||
.await?
|
||||
.context(TableRouteNotFoundSnafu { table_id })?
|
||||
.into_inner();
|
||||
|
||||
@@ -458,12 +448,8 @@ async fn handle_alter_table_task(
|
||||
}
|
||||
);
|
||||
|
||||
let table_info_value = table_info_value.with_context(|| TableInfoNotFoundSnafu {
|
||||
table: table_ref.to_string(),
|
||||
})?;
|
||||
|
||||
let (id, _) = ddl_manager
|
||||
.submit_alter_table_task(cluster_id, alter_table_task, table_info_value)
|
||||
.submit_alter_table_task(cluster_id, table_id, alter_table_task)
|
||||
.await?;
|
||||
|
||||
info!("Table: {table_id} is altered via procedure_id {id:?}");
|
||||
@@ -480,32 +466,8 @@ async fn handle_drop_table_task(
|
||||
drop_table_task: DropTableTask,
|
||||
) -> Result<SubmitDdlTaskResponse> {
|
||||
let table_id = drop_table_task.table_id;
|
||||
let table_metadata_manager = &ddl_manager.table_metadata_manager();
|
||||
let table_ref = drop_table_task.table_ref();
|
||||
|
||||
let table_info_value = table_metadata_manager
|
||||
.table_info_manager()
|
||||
.get(table_id)
|
||||
.await?;
|
||||
let (_, table_route_value) = table_metadata_manager
|
||||
.table_route_manager()
|
||||
.get_physical_table_route(table_id)
|
||||
.await?;
|
||||
|
||||
let table_info_value = table_info_value.with_context(|| TableInfoNotFoundSnafu {
|
||||
table: table_ref.to_string(),
|
||||
})?;
|
||||
|
||||
let table_route_value =
|
||||
DeserializedValueWithBytes::from_inner(TableRouteValue::Physical(table_route_value));
|
||||
|
||||
let (id, _) = ddl_manager
|
||||
.submit_drop_table_task(
|
||||
cluster_id,
|
||||
drop_table_task,
|
||||
table_info_value,
|
||||
table_route_value,
|
||||
)
|
||||
.submit_drop_table_task(cluster_id, drop_table_task)
|
||||
.await?;
|
||||
|
||||
info!("Table: {table_id} is dropped via procedure_id {id:?}");
|
||||
|
||||
@@ -421,6 +421,9 @@ pub enum Error {
|
||||
#[snafu(display("Invalid role: {}", role))]
|
||||
InvalidRole { role: i32, location: Location },
|
||||
|
||||
#[snafu(display("Atomic key changed: {err_msg}"))]
|
||||
CasKeyChanged { err_msg: String, location: Location },
|
||||
|
||||
#[snafu(display("Failed to parse {} from utf8", name))]
|
||||
FromUtf8 {
|
||||
name: String,
|
||||
@@ -440,7 +443,8 @@ impl ErrorExt for Error {
|
||||
| EtcdTxnOpResponse { .. }
|
||||
| EtcdFailed { .. }
|
||||
| EtcdTxnFailed { .. }
|
||||
| ConnectEtcd { .. } => StatusCode::Internal,
|
||||
| ConnectEtcd { .. }
|
||||
| CasKeyChanged { .. } => StatusCode::Internal,
|
||||
|
||||
SerdeJson { .. }
|
||||
| ParseOption { .. }
|
||||
|
||||
@@ -56,9 +56,12 @@ pub mod table_region;
|
||||
pub mod table_route;
|
||||
#[cfg(any(test, feature = "testing"))]
|
||||
pub mod test_utils;
|
||||
// TODO(weny): remove it.
|
||||
#[allow(dead_code)]
|
||||
mod tombstone;
|
||||
mod txn_helper;
|
||||
|
||||
use std::collections::{BTreeMap, HashMap};
|
||||
use std::collections::{BTreeMap, HashMap, HashSet};
|
||||
use std::fmt::Debug;
|
||||
use std::ops::Deref;
|
||||
use std::sync::Arc;
|
||||
@@ -83,9 +86,13 @@ use self::catalog_name::{CatalogManager, CatalogNameKey, CatalogNameValue};
|
||||
use self::datanode_table::RegionInfo;
|
||||
use self::schema_name::{SchemaManager, SchemaNameKey, SchemaNameValue};
|
||||
use self::table_route::{TableRouteManager, TableRouteValue};
|
||||
use self::tombstone::TombstoneManager;
|
||||
use crate::ddl::utils::region_storage_path;
|
||||
use crate::error::{self, Result, SerdeJsonSnafu};
|
||||
use crate::kv_backend::txn::{Txn, TxnOpResponse};
|
||||
use crate::error::{self, Result, SerdeJsonSnafu, UnexpectedSnafu};
|
||||
use crate::key::table_route::TableRouteKey;
|
||||
use crate::key::tombstone::Key;
|
||||
use crate::key::txn_helper::TxnOpGetResponseSet;
|
||||
use crate::kv_backend::txn::{Txn, TxnOp, TxnOpResponse};
|
||||
use crate::kv_backend::KvBackendRef;
|
||||
use crate::rpc::router::{region_distribution, RegionRoute, RegionStatus};
|
||||
use crate::table_name::TableName;
|
||||
@@ -97,7 +104,6 @@ pub const MAINTENANCE_KEY: &str = "maintenance";
|
||||
const DATANODE_TABLE_KEY_PREFIX: &str = "__dn_table";
|
||||
const TABLE_REGION_KEY_PREFIX: &str = "__table_region";
|
||||
|
||||
pub const REMOVED_PREFIX: &str = "__removed";
|
||||
pub const TABLE_INFO_KEY_PREFIX: &str = "__table_info";
|
||||
pub const TABLE_NAME_KEY_PREFIX: &str = "__table_name";
|
||||
pub const CATALOG_NAME_KEY_PREFIX: &str = "__catalog_name";
|
||||
@@ -145,6 +151,33 @@ pub trait TableMetaKey {
|
||||
fn as_raw_key(&self) -> Vec<u8>;
|
||||
}
|
||||
|
||||
pub(crate) trait TableMetaKeyGetTxnOp {
|
||||
fn build_get_op(
|
||||
&self,
|
||||
) -> (
|
||||
TxnOp,
|
||||
impl for<'a> FnMut(&'a mut TxnOpGetResponseSet) -> Option<Vec<u8>>,
|
||||
);
|
||||
}
|
||||
|
||||
impl TableMetaKey for String {
|
||||
fn as_raw_key(&self) -> Vec<u8> {
|
||||
self.as_bytes().to_vec()
|
||||
}
|
||||
}
|
||||
|
||||
impl TableMetaKeyGetTxnOp for String {
|
||||
fn build_get_op(
|
||||
&self,
|
||||
) -> (
|
||||
TxnOp,
|
||||
impl for<'a> FnMut(&'a mut TxnOpGetResponseSet) -> Option<Vec<u8>>,
|
||||
) {
|
||||
let key = self.as_raw_key();
|
||||
(TxnOp::Get(key.clone()), TxnOpGetResponseSet::filter(key))
|
||||
}
|
||||
}
|
||||
|
||||
pub trait TableMetaValue {
|
||||
fn try_from_raw_value(raw_value: &[u8]) -> Result<Self>
|
||||
where
|
||||
@@ -162,6 +195,7 @@ pub struct TableMetadataManager {
|
||||
catalog_manager: CatalogManager,
|
||||
schema_manager: SchemaManager,
|
||||
table_route_manager: TableRouteManager,
|
||||
tombstone_manager: TombstoneManager,
|
||||
kv_backend: KvBackendRef,
|
||||
}
|
||||
|
||||
@@ -283,7 +317,7 @@ impl<T: Serialize + DeserializeOwned + TableMetaValue> DeserializedValueWithByte
|
||||
self.bytes.to_vec()
|
||||
}
|
||||
|
||||
/// Notes: used for test purpose.
|
||||
#[cfg(any(test, feature = "testing"))]
|
||||
pub fn from_inner(inner: T) -> Self {
|
||||
let bytes = serde_json::to_vec(&inner).unwrap();
|
||||
|
||||
@@ -303,6 +337,7 @@ impl TableMetadataManager {
|
||||
catalog_manager: CatalogManager::new(kv_backend.clone()),
|
||||
schema_manager: SchemaManager::new(kv_backend.clone()),
|
||||
table_route_manager: TableRouteManager::new(kv_backend.clone()),
|
||||
tombstone_manager: TombstoneManager::new(kv_backend.clone()),
|
||||
kv_backend,
|
||||
}
|
||||
}
|
||||
@@ -363,19 +398,16 @@ impl TableMetadataManager {
|
||||
Option<DeserializedValueWithBytes<TableInfoValue>>,
|
||||
Option<DeserializedValueWithBytes<TableRouteValue>>,
|
||||
)> {
|
||||
let (get_table_route_txn, table_route_decoder) = self
|
||||
.table_route_manager
|
||||
.table_route_storage()
|
||||
.build_get_txn(table_id);
|
||||
let (get_table_info_txn, table_info_decoder) =
|
||||
self.table_info_manager.build_get_txn(table_id);
|
||||
|
||||
let txn = Txn::merge_all(vec![get_table_route_txn, get_table_info_txn]);
|
||||
let res = self.kv_backend.txn(txn).await?;
|
||||
|
||||
let table_info_value = table_info_decoder(&res.responses)?;
|
||||
let table_route_value = table_route_decoder(&res.responses)?;
|
||||
let table_info_key = TableInfoKey::new(table_id);
|
||||
let table_route_key = TableRouteKey::new(table_id);
|
||||
let (table_info_txn, table_info_filter) = table_info_key.build_get_op();
|
||||
let (table_route_txn, table_route_filter) = table_route_key.build_get_op();
|
||||
|
||||
let txn = Txn::new().and_then(vec![table_info_txn, table_route_txn]);
|
||||
let mut res = self.kv_backend.txn(txn).await?;
|
||||
let mut set = TxnOpGetResponseSet::from(&mut res.responses);
|
||||
let table_info_value = TxnOpGetResponseSet::decode_with(table_info_filter)(&mut set)?;
|
||||
let table_route_value = TxnOpGetResponseSet::decode_with(table_route_filter)(&mut set)?;
|
||||
Ok((table_info_value, table_route_value))
|
||||
}
|
||||
|
||||
@@ -545,47 +577,106 @@ impl TableMetadataManager {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Deletes metadata for table.
|
||||
/// The caller MUST ensure it has the exclusive access to `TableNameKey`.
|
||||
pub async fn delete_table_metadata(
|
||||
fn table_metadata_keys(
|
||||
&self,
|
||||
table_id: TableId,
|
||||
table_name: &TableName,
|
||||
region_routes: &[RegionRoute],
|
||||
) -> Result<()> {
|
||||
// Deletes table name.
|
||||
table_route_value: &TableRouteValue,
|
||||
) -> Result<Vec<Key>> {
|
||||
// Builds keys
|
||||
let datanode_ids = if table_route_value.is_physical() {
|
||||
region_distribution(table_route_value.region_routes()?)
|
||||
.into_keys()
|
||||
.collect()
|
||||
} else {
|
||||
vec![]
|
||||
};
|
||||
let mut keys = Vec::with_capacity(3 + datanode_ids.len());
|
||||
let table_name = TableNameKey::new(
|
||||
&table_name.catalog_name,
|
||||
&table_name.schema_name,
|
||||
&table_name.table_name,
|
||||
);
|
||||
let table_info_key = TableInfoKey::new(table_id);
|
||||
let table_route_key = TableRouteKey::new(table_id);
|
||||
let datanode_table_keys = datanode_ids
|
||||
.into_iter()
|
||||
.map(|datanode_id| DatanodeTableKey::new(datanode_id, table_id))
|
||||
.collect::<HashSet<_>>();
|
||||
|
||||
let delete_table_name_txn = self.table_name_manager().build_delete_txn(&table_name)?;
|
||||
keys.push(Key::compare_and_swap(table_name.as_raw_key()));
|
||||
keys.push(Key::new(table_info_key.as_raw_key()));
|
||||
keys.push(Key::new(table_route_key.as_raw_key()));
|
||||
for key in &datanode_table_keys {
|
||||
keys.push(Key::new(key.as_raw_key()));
|
||||
}
|
||||
Ok(keys)
|
||||
}
|
||||
|
||||
// Deletes table info.
|
||||
let delete_table_info_txn = self.table_info_manager().build_delete_txn(table_id)?;
|
||||
/// Deletes metadata for table **logically**.
|
||||
/// The caller MUST ensure it has the exclusive access to `TableNameKey`.
|
||||
pub async fn delete_table_metadata(
|
||||
&self,
|
||||
table_id: TableId,
|
||||
table_name: &TableName,
|
||||
table_route_value: &TableRouteValue,
|
||||
) -> Result<()> {
|
||||
let keys = self.table_metadata_keys(table_id, table_name, table_route_value)?;
|
||||
self.tombstone_manager.create(keys).await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
// Deletes datanode table key value pairs.
|
||||
let distribution = region_distribution(region_routes);
|
||||
let delete_datanode_txn = self
|
||||
.datanode_table_manager()
|
||||
.build_delete_txn(table_id, distribution)?;
|
||||
/// Deletes metadata tombstone for table **permanently**.
|
||||
/// The caller MUST ensure it has the exclusive access to `TableNameKey`.
|
||||
pub async fn delete_table_metadata_tombstone(
|
||||
&self,
|
||||
table_id: TableId,
|
||||
table_name: &TableName,
|
||||
table_route_value: &TableRouteValue,
|
||||
) -> Result<()> {
|
||||
let keys = self
|
||||
.table_metadata_keys(table_id, table_name, table_route_value)?
|
||||
.into_iter()
|
||||
.map(|key| key.into_bytes())
|
||||
.collect::<Vec<_>>();
|
||||
self.tombstone_manager.delete(keys).await
|
||||
}
|
||||
|
||||
// Deletes table route.
|
||||
let delete_table_route_txn = self
|
||||
.table_route_manager()
|
||||
.table_route_storage()
|
||||
.build_delete_txn(table_id)?;
|
||||
/// Restores metadata for table.
|
||||
/// The caller MUST ensure it has the exclusive access to `TableNameKey`.
|
||||
pub async fn restore_table_metadata(
|
||||
&self,
|
||||
table_id: TableId,
|
||||
table_name: &TableName,
|
||||
table_route_value: &TableRouteValue,
|
||||
) -> Result<()> {
|
||||
let keys = self.table_metadata_keys(table_id, table_name, table_route_value)?;
|
||||
self.tombstone_manager.restore(keys).await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
let txn = Txn::merge_all(vec![
|
||||
delete_table_name_txn,
|
||||
delete_table_info_txn,
|
||||
delete_datanode_txn,
|
||||
delete_table_route_txn,
|
||||
]);
|
||||
/// Deletes metadata for table **permanently**.
|
||||
/// The caller MUST ensure it has the exclusive access to `TableNameKey`.
|
||||
pub async fn destroy_table_metadata(
|
||||
&self,
|
||||
table_id: TableId,
|
||||
table_name: &TableName,
|
||||
table_route_value: &TableRouteValue,
|
||||
) -> Result<()> {
|
||||
let operations = self
|
||||
.table_metadata_keys(table_id, table_name, table_route_value)?
|
||||
.into_iter()
|
||||
.map(|key| TxnOp::Delete(key.into_bytes()))
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
// It's always successes.
|
||||
let _ = self.kv_backend.txn(txn).await?;
|
||||
let txn = Txn::new().and_then(operations);
|
||||
let resp = self.kv_backend.txn(txn).await?;
|
||||
ensure!(
|
||||
resp.succeeded,
|
||||
UnexpectedSnafu {
|
||||
err_msg: format!("Failed to destroy table metadata: {table_id}")
|
||||
}
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
@@ -595,7 +686,7 @@ impl TableMetadataManager {
|
||||
/// and the new `TableNameKey` MUST be empty.
|
||||
pub async fn rename_table(
|
||||
&self,
|
||||
current_table_info_value: DeserializedValueWithBytes<TableInfoValue>,
|
||||
current_table_info_value: &DeserializedValueWithBytes<TableInfoValue>,
|
||||
new_table_name: String,
|
||||
) -> Result<()> {
|
||||
let current_table_info = ¤t_table_info_value.table_info;
|
||||
@@ -629,7 +720,7 @@ impl TableMetadataManager {
|
||||
// Updates table info.
|
||||
let (update_table_info_txn, on_update_table_info_failure) = self
|
||||
.table_info_manager()
|
||||
.build_update_txn(table_id, ¤t_table_info_value, &new_table_info_value)?;
|
||||
.build_update_txn(table_id, current_table_info_value, &new_table_info_value)?;
|
||||
|
||||
let txn = Txn::merge_all(vec![update_table_name_txn, update_table_info_txn]);
|
||||
|
||||
@@ -653,7 +744,7 @@ impl TableMetadataManager {
|
||||
/// Updates table info and returns an error if different metadata exists.
|
||||
pub async fn update_table_info(
|
||||
&self,
|
||||
current_table_info_value: DeserializedValueWithBytes<TableInfoValue>,
|
||||
current_table_info_value: &DeserializedValueWithBytes<TableInfoValue>,
|
||||
new_table_info: RawTableInfo,
|
||||
) -> Result<()> {
|
||||
let table_id = current_table_info_value.table_info.ident.table_id;
|
||||
@@ -663,7 +754,7 @@ impl TableMetadataManager {
|
||||
// Updates table info.
|
||||
let (update_table_info_txn, on_update_table_info_failure) = self
|
||||
.table_info_manager()
|
||||
.build_update_txn(table_id, ¤t_table_info_value, &new_table_info_value)?;
|
||||
.build_update_txn(table_id, current_table_info_value, &new_table_info_value)?;
|
||||
|
||||
let r = self.kv_backend.txn(update_table_info_txn).await?;
|
||||
|
||||
@@ -687,7 +778,7 @@ impl TableMetadataManager {
|
||||
|
||||
pub async fn batch_update_table_info_values(
|
||||
&self,
|
||||
table_info_value_pairs: Vec<(TableInfoValue, RawTableInfo)>,
|
||||
table_info_value_pairs: Vec<(DeserializedValueWithBytes<TableInfoValue>, RawTableInfo)>,
|
||||
) -> Result<()> {
|
||||
let len = table_info_value_pairs.len();
|
||||
let mut txns = Vec::with_capacity(len);
|
||||
@@ -708,7 +799,7 @@ impl TableMetadataManager {
|
||||
let (update_table_info_txn, on_update_table_info_failure) =
|
||||
self.table_info_manager().build_update_txn(
|
||||
table_id,
|
||||
&DeserializedValueWithBytes::from_inner(table_info_value),
|
||||
&table_info_value,
|
||||
&new_table_info_value,
|
||||
)?;
|
||||
|
||||
@@ -873,6 +964,38 @@ macro_rules! impl_table_meta_value {
|
||||
}
|
||||
}
|
||||
|
||||
macro_rules! impl_table_meta_key_get_txn_op {
|
||||
($($key: ty), *) => {
|
||||
$(
|
||||
impl $crate::key::TableMetaKeyGetTxnOp for $key {
|
||||
/// Returns a [TxnOp] to retrieve the corresponding value
|
||||
/// and a filter to retrieve the value from the [TxnOpGetResponseSet]
|
||||
fn build_get_op(
|
||||
&self,
|
||||
) -> (
|
||||
TxnOp,
|
||||
impl for<'a> FnMut(
|
||||
&'a mut TxnOpGetResponseSet,
|
||||
) -> Option<Vec<u8>>,
|
||||
) {
|
||||
let raw_key = self.as_raw_key();
|
||||
(
|
||||
TxnOp::Get(raw_key.clone()),
|
||||
TxnOpGetResponseSet::filter(raw_key),
|
||||
)
|
||||
}
|
||||
}
|
||||
)*
|
||||
}
|
||||
}
|
||||
|
||||
impl_table_meta_key_get_txn_op! {
|
||||
TableNameKey<'_>,
|
||||
TableInfoKey,
|
||||
TableRouteKey,
|
||||
DatanodeTableKey
|
||||
}
|
||||
|
||||
#[macro_export]
|
||||
macro_rules! impl_optional_meta_value {
|
||||
($($val_ty: ty), *) => {
|
||||
@@ -907,6 +1030,7 @@ mod tests {
|
||||
use std::sync::Arc;
|
||||
|
||||
use bytes::Bytes;
|
||||
use common_catalog::consts::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME};
|
||||
use common_time::util::current_time_millis;
|
||||
use futures::TryStreamExt;
|
||||
use store_api::storage::RegionId;
|
||||
@@ -914,6 +1038,7 @@ mod tests {
|
||||
|
||||
use super::datanode_table::DatanodeTableKey;
|
||||
use super::test_utils;
|
||||
use crate::ddl::test_util::create_table::test_create_table_task;
|
||||
use crate::ddl::utils::region_storage_path;
|
||||
use crate::error::Result;
|
||||
use crate::key::datanode_table::RegionInfo;
|
||||
@@ -1155,15 +1280,10 @@ mod tests {
|
||||
table_info.schema_name,
|
||||
table_info.name,
|
||||
);
|
||||
let table_route_value = &TableRouteValue::physical(region_routes.clone());
|
||||
// deletes metadata.
|
||||
table_metadata_manager
|
||||
.delete_table_metadata(table_id, &table_name, region_routes)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
// if metadata was already deleted, it should be ok.
|
||||
table_metadata_manager
|
||||
.delete_table_metadata(table_id, &table_name, region_routes)
|
||||
.delete_table_metadata(table_id, &table_name, table_route_value)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
@@ -1229,12 +1349,12 @@ mod tests {
|
||||
DeserializedValueWithBytes::from_inner(TableInfoValue::new(table_info.clone()));
|
||||
|
||||
table_metadata_manager
|
||||
.rename_table(table_info_value.clone(), new_table_name.clone())
|
||||
.rename_table(&table_info_value, new_table_name.clone())
|
||||
.await
|
||||
.unwrap();
|
||||
// if remote metadata was updated, it should be ok.
|
||||
table_metadata_manager
|
||||
.rename_table(table_info_value.clone(), new_table_name.clone())
|
||||
.rename_table(&table_info_value, new_table_name.clone())
|
||||
.await
|
||||
.unwrap();
|
||||
let mut modified_table_info = table_info.clone();
|
||||
@@ -1244,7 +1364,7 @@ mod tests {
|
||||
// if the table_info_value is wrong, it should return an error.
|
||||
// The ABA problem.
|
||||
assert!(table_metadata_manager
|
||||
.rename_table(modified_table_info_value.clone(), new_table_name.clone())
|
||||
.rename_table(&modified_table_info_value, new_table_name.clone())
|
||||
.await
|
||||
.is_err());
|
||||
|
||||
@@ -1302,12 +1422,12 @@ mod tests {
|
||||
DeserializedValueWithBytes::from_inner(TableInfoValue::new(table_info.clone()));
|
||||
// should be ok.
|
||||
table_metadata_manager
|
||||
.update_table_info(current_table_info_value.clone(), new_table_info.clone())
|
||||
.update_table_info(¤t_table_info_value, new_table_info.clone())
|
||||
.await
|
||||
.unwrap();
|
||||
// if table info was updated, it should be ok.
|
||||
table_metadata_manager
|
||||
.update_table_info(current_table_info_value.clone(), new_table_info.clone())
|
||||
.update_table_info(¤t_table_info_value, new_table_info.clone())
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
@@ -1329,7 +1449,7 @@ mod tests {
|
||||
// if the current_table_info_value is wrong, it should return an error.
|
||||
// The ABA problem.
|
||||
assert!(table_metadata_manager
|
||||
.update_table_info(wrong_table_info_value, new_table_info)
|
||||
.update_table_info(&wrong_table_info_value, new_table_info)
|
||||
.await
|
||||
.is_err())
|
||||
}
|
||||
@@ -1559,4 +1679,118 @@ mod tests {
|
||||
.await
|
||||
.is_err());
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_destroy_table_metadata() {
|
||||
let mem_kv = Arc::new(MemoryKvBackend::default());
|
||||
let table_metadata_manager = TableMetadataManager::new(mem_kv.clone());
|
||||
let table_id = 1025;
|
||||
let table_name = "foo";
|
||||
let task = test_create_table_task(table_name, table_id);
|
||||
let options = [(0, "test".to_string())].into();
|
||||
table_metadata_manager
|
||||
.create_table_metadata(
|
||||
task.table_info,
|
||||
TableRouteValue::physical(vec![
|
||||
RegionRoute {
|
||||
region: Region::new_test(RegionId::new(table_id, 1)),
|
||||
leader_peer: Some(Peer::empty(1)),
|
||||
follower_peers: vec![Peer::empty(5)],
|
||||
leader_status: None,
|
||||
leader_down_since: None,
|
||||
},
|
||||
RegionRoute {
|
||||
region: Region::new_test(RegionId::new(table_id, 2)),
|
||||
leader_peer: Some(Peer::empty(2)),
|
||||
follower_peers: vec![Peer::empty(4)],
|
||||
leader_status: None,
|
||||
leader_down_since: None,
|
||||
},
|
||||
RegionRoute {
|
||||
region: Region::new_test(RegionId::new(table_id, 3)),
|
||||
leader_peer: Some(Peer::empty(3)),
|
||||
follower_peers: vec![],
|
||||
leader_status: None,
|
||||
leader_down_since: None,
|
||||
},
|
||||
]),
|
||||
options,
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
let table_name = TableName::new(DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, table_name);
|
||||
let table_route_value = table_metadata_manager
|
||||
.table_route_manager
|
||||
.table_route_storage()
|
||||
.get_raw(table_id)
|
||||
.await
|
||||
.unwrap()
|
||||
.unwrap();
|
||||
table_metadata_manager
|
||||
.destroy_table_metadata(table_id, &table_name, &table_route_value)
|
||||
.await
|
||||
.unwrap();
|
||||
assert!(mem_kv.is_empty());
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_restore_table_metadata() {
|
||||
let mem_kv = Arc::new(MemoryKvBackend::default());
|
||||
let table_metadata_manager = TableMetadataManager::new(mem_kv.clone());
|
||||
let table_id = 1025;
|
||||
let table_name = "foo";
|
||||
let task = test_create_table_task(table_name, table_id);
|
||||
let options = [(0, "test".to_string())].into();
|
||||
table_metadata_manager
|
||||
.create_table_metadata(
|
||||
task.table_info,
|
||||
TableRouteValue::physical(vec![
|
||||
RegionRoute {
|
||||
region: Region::new_test(RegionId::new(table_id, 1)),
|
||||
leader_peer: Some(Peer::empty(1)),
|
||||
follower_peers: vec![Peer::empty(5)],
|
||||
leader_status: None,
|
||||
leader_down_since: None,
|
||||
},
|
||||
RegionRoute {
|
||||
region: Region::new_test(RegionId::new(table_id, 2)),
|
||||
leader_peer: Some(Peer::empty(2)),
|
||||
follower_peers: vec![Peer::empty(4)],
|
||||
leader_status: None,
|
||||
leader_down_since: None,
|
||||
},
|
||||
RegionRoute {
|
||||
region: Region::new_test(RegionId::new(table_id, 3)),
|
||||
leader_peer: Some(Peer::empty(3)),
|
||||
follower_peers: vec![],
|
||||
leader_status: None,
|
||||
leader_down_since: None,
|
||||
},
|
||||
]),
|
||||
options,
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
let expected_result = mem_kv.dump();
|
||||
let table_route_value = table_metadata_manager
|
||||
.table_route_manager
|
||||
.table_route_storage()
|
||||
.get_raw(table_id)
|
||||
.await
|
||||
.unwrap()
|
||||
.unwrap();
|
||||
let region_routes = table_route_value.region_routes().unwrap();
|
||||
let table_name = TableName::new(DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, table_name);
|
||||
let table_route_value = TableRouteValue::physical(region_routes.clone());
|
||||
table_metadata_manager
|
||||
.delete_table_metadata(table_id, &table_name, &table_route_value)
|
||||
.await
|
||||
.unwrap();
|
||||
table_metadata_manager
|
||||
.restore_table_metadata(table_id, &table_name, &table_route_value)
|
||||
.await
|
||||
.unwrap();
|
||||
let kvs = mem_kv.dump();
|
||||
assert_eq!(kvs, expected_result);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -55,6 +55,7 @@ pub struct RegionInfo {
|
||||
pub region_wal_options: HashMap<RegionNumber, String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, Hash)]
|
||||
pub struct DatanodeTableKey {
|
||||
pub datanode_id: DatanodeId,
|
||||
pub table_id: TableId,
|
||||
|
||||
@@ -18,10 +18,11 @@ use serde::{Deserialize, Serialize};
|
||||
use table::metadata::{RawTableInfo, TableId};
|
||||
use table::table_reference::TableReference;
|
||||
|
||||
use super::{txn_helper, DeserializedValueWithBytes, TableMetaValue, TABLE_INFO_KEY_PREFIX};
|
||||
use crate::error::Result;
|
||||
use crate::key::TableMetaKey;
|
||||
use crate::kv_backend::txn::{Txn, TxnOp, TxnOpResponse};
|
||||
use crate::key::{
|
||||
txn_helper, DeserializedValueWithBytes, TableMetaKey, TableMetaValue, TABLE_INFO_KEY_PREFIX,
|
||||
};
|
||||
use crate::kv_backend::txn::{Txn, TxnOpResponse};
|
||||
use crate::kv_backend::KvBackendRef;
|
||||
use crate::rpc::store::BatchGetRequest;
|
||||
use crate::table_name::TableName;
|
||||
@@ -101,20 +102,6 @@ impl TableInfoManager {
|
||||
Self { kv_backend }
|
||||
}
|
||||
|
||||
pub(crate) fn build_get_txn(
|
||||
&self,
|
||||
table_id: TableId,
|
||||
) -> (
|
||||
Txn,
|
||||
impl FnOnce(&Vec<TxnOpResponse>) -> Result<Option<DeserializedValueWithBytes<TableInfoValue>>>,
|
||||
) {
|
||||
let key = TableInfoKey::new(table_id);
|
||||
let raw_key = key.as_raw_key();
|
||||
let txn = Txn::new().and_then(vec![TxnOp::Get(raw_key.clone())]);
|
||||
|
||||
(txn, txn_helper::build_txn_response_decoder_fn(raw_key))
|
||||
}
|
||||
|
||||
/// Builds a create table info transaction, it expected the `__table_info/{table_id}` wasn't occupied.
|
||||
pub(crate) fn build_create_txn(
|
||||
&self,
|
||||
@@ -156,16 +143,6 @@ impl TableInfoManager {
|
||||
Ok((txn, txn_helper::build_txn_response_decoder_fn(raw_key)))
|
||||
}
|
||||
|
||||
/// Builds a delete table info transaction.
|
||||
pub(crate) fn build_delete_txn(&self, table_id: TableId) -> Result<Txn> {
|
||||
let key = TableInfoKey::new(table_id);
|
||||
let raw_key = key.as_raw_key();
|
||||
|
||||
let txn = Txn::new().and_then(vec![TxnOp::Delete(raw_key)]);
|
||||
|
||||
Ok(txn)
|
||||
}
|
||||
|
||||
pub async fn get(
|
||||
&self,
|
||||
table_id: TableId,
|
||||
@@ -209,6 +186,38 @@ impl TableInfoManager {
|
||||
|
||||
Ok(values)
|
||||
}
|
||||
|
||||
/// Returns batch of `DeserializedValueWithBytes<TableInfoValue>`.
|
||||
pub async fn batch_get_raw(
|
||||
&self,
|
||||
table_ids: &[TableId],
|
||||
) -> Result<HashMap<TableId, DeserializedValueWithBytes<TableInfoValue>>> {
|
||||
let lookup_table = table_ids
|
||||
.iter()
|
||||
.map(|id| (TableInfoKey::new(*id).as_raw_key(), id))
|
||||
.collect::<HashMap<_, _>>();
|
||||
|
||||
let resp = self
|
||||
.kv_backend
|
||||
.batch_get(BatchGetRequest {
|
||||
keys: lookup_table.keys().cloned().collect::<Vec<_>>(),
|
||||
})
|
||||
.await?;
|
||||
|
||||
let values = resp
|
||||
.kvs
|
||||
.iter()
|
||||
.map(|kv| {
|
||||
Ok((
|
||||
// Safety: must exist.
|
||||
**lookup_table.get(kv.key()).unwrap(),
|
||||
DeserializedValueWithBytes::from_inner_slice(&kv.value)?,
|
||||
))
|
||||
})
|
||||
.collect::<Result<HashMap<_, _>>>()?;
|
||||
|
||||
Ok(values)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
|
||||
@@ -194,14 +194,6 @@ impl TableNameManager {
|
||||
Ok(txn)
|
||||
}
|
||||
|
||||
/// Builds a delete table name transaction. It only executes while the primary keys comparing successes.
|
||||
pub(crate) fn build_delete_txn(&self, key: &TableNameKey<'_>) -> Result<Txn> {
|
||||
let raw_key = key.as_raw_key();
|
||||
let txn = Txn::new().and_then(vec![TxnOp::Delete(raw_key)]);
|
||||
|
||||
Ok(txn)
|
||||
}
|
||||
|
||||
pub async fn get(&self, key: TableNameKey<'_>) -> Result<Option<TableNameValue>> {
|
||||
let raw_key = key.as_raw_key();
|
||||
self.kv_backend
|
||||
|
||||
@@ -26,7 +26,7 @@ use crate::error::{
|
||||
UnexpectedLogicalRouteTableSnafu,
|
||||
};
|
||||
use crate::key::{RegionDistribution, TableMetaKey, TABLE_ROUTE_PREFIX};
|
||||
use crate::kv_backend::txn::{Txn, TxnOp, TxnOpResponse};
|
||||
use crate::kv_backend::txn::{Txn, TxnOpResponse};
|
||||
use crate::kv_backend::KvBackendRef;
|
||||
use crate::rpc::router::{region_distribution, RegionRoute};
|
||||
use crate::rpc::store::BatchGetRequest;
|
||||
@@ -61,6 +61,27 @@ pub struct LogicalTableRouteValue {
|
||||
}
|
||||
|
||||
impl TableRouteValue {
|
||||
/// Returns a [TableRouteValue::Physical] if `table_id` equals `physical_table_id`.
|
||||
/// Otherwise returns a [TableRouteValue::Logical].
|
||||
pub(crate) fn new(
|
||||
table_id: TableId,
|
||||
physical_table_id: TableId,
|
||||
region_routes: Vec<RegionRoute>,
|
||||
) -> Self {
|
||||
if table_id == physical_table_id {
|
||||
TableRouteValue::physical(region_routes)
|
||||
} else {
|
||||
let region_routes = region_routes
|
||||
.into_iter()
|
||||
.map(|region| {
|
||||
debug_assert_eq!(region.region.id.table_id(), physical_table_id);
|
||||
RegionId::new(table_id, region.region.id.region_number())
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
TableRouteValue::logical(physical_table_id, region_routes)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn physical(region_routes: Vec<RegionRoute>) -> Self {
|
||||
Self::Physical(PhysicalTableRouteValue::new(region_routes))
|
||||
}
|
||||
@@ -425,21 +446,6 @@ impl TableRouteStorage {
|
||||
Self { kv_backend }
|
||||
}
|
||||
|
||||
/// Builds a get table route transaction(readonly).
|
||||
pub(crate) fn build_get_txn(
|
||||
&self,
|
||||
table_id: TableId,
|
||||
) -> (
|
||||
Txn,
|
||||
impl FnOnce(&Vec<TxnOpResponse>) -> Result<Option<DeserializedValueWithBytes<TableRouteValue>>>,
|
||||
) {
|
||||
let key = TableRouteKey::new(table_id);
|
||||
let raw_key = key.as_raw_key();
|
||||
let txn = Txn::new().and_then(vec![TxnOp::Get(raw_key.clone())]);
|
||||
|
||||
(txn, txn_helper::build_txn_response_decoder_fn(raw_key))
|
||||
}
|
||||
|
||||
/// Builds a create table route transaction,
|
||||
/// it expected the `__table_route/{table_id}` wasn't occupied.
|
||||
pub fn build_create_txn(
|
||||
@@ -483,17 +489,6 @@ impl TableRouteStorage {
|
||||
Ok((txn, txn_helper::build_txn_response_decoder_fn(raw_key)))
|
||||
}
|
||||
|
||||
/// Builds a delete table route transaction,
|
||||
/// it expected the remote value equals the `table_route_value`.
|
||||
pub(crate) fn build_delete_txn(&self, table_id: TableId) -> Result<Txn> {
|
||||
let key = TableRouteKey::new(table_id);
|
||||
let raw_key = key.as_raw_key();
|
||||
|
||||
let txn = Txn::new().and_then(vec![TxnOp::Delete(raw_key)]);
|
||||
|
||||
Ok(txn)
|
||||
}
|
||||
|
||||
/// Returns the [`TableRouteValue`].
|
||||
pub async fn get(&self, table_id: TableId) -> Result<Option<TableRouteValue>> {
|
||||
let key = TableRouteKey::new(table_id);
|
||||
@@ -517,6 +512,37 @@ impl TableRouteStorage {
|
||||
.transpose()
|
||||
}
|
||||
|
||||
/// Returns the physical `DeserializedValueWithBytes<TableRouteValue>` recursively.
|
||||
///
|
||||
/// Returns a [TableRouteNotFound](crate::error::Error::TableRouteNotFound) Error if:
|
||||
/// - the physical table(`logical_or_physical_table_id`) does not exist
|
||||
/// - the corresponding physical table of the logical table(`logical_or_physical_table_id`) does not exist.
|
||||
pub async fn get_raw_physical_table_route(
|
||||
&self,
|
||||
logical_or_physical_table_id: TableId,
|
||||
) -> Result<(TableId, DeserializedValueWithBytes<TableRouteValue>)> {
|
||||
let table_route =
|
||||
self.get_raw(logical_or_physical_table_id)
|
||||
.await?
|
||||
.context(TableRouteNotFoundSnafu {
|
||||
table_id: logical_or_physical_table_id,
|
||||
})?;
|
||||
|
||||
match table_route.get_inner_ref() {
|
||||
TableRouteValue::Physical(_) => Ok((logical_or_physical_table_id, table_route)),
|
||||
TableRouteValue::Logical(x) => {
|
||||
let physical_table_id = x.physical_table_id();
|
||||
let physical_table_route =
|
||||
self.get_raw(physical_table_id)
|
||||
.await?
|
||||
.context(TableRouteNotFoundSnafu {
|
||||
table_id: physical_table_id,
|
||||
})?;
|
||||
Ok((physical_table_id, physical_table_route))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns batch of [`TableRouteValue`] that respects the order of `table_ids`.
|
||||
pub async fn batch_get(&self, table_ids: &[TableId]) -> Result<Vec<Option<TableRouteValue>>> {
|
||||
let keys = table_ids
|
||||
|
||||
544
src/common/meta/src/key/tombstone.rs
Normal file
544
src/common/meta/src/key/tombstone.rs
Normal file
@@ -0,0 +1,544 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use snafu::{ensure, OptionExt};
|
||||
|
||||
use super::TableMetaKeyGetTxnOp;
|
||||
use crate::error::{self, Result};
|
||||
use crate::key::txn_helper::TxnOpGetResponseSet;
|
||||
use crate::kv_backend::txn::{Compare, CompareOp, Txn, TxnOp};
|
||||
use crate::kv_backend::KvBackendRef;
|
||||
|
||||
/// [TombstoneManager] provides the ability to:
|
||||
/// - logically delete values
|
||||
/// - restore the deleted values
|
||||
pub(crate) struct TombstoneManager {
|
||||
kv_backend: KvBackendRef,
|
||||
}
|
||||
|
||||
const TOMBSTONE_PREFIX: &str = "__tombstone/";
|
||||
|
||||
pub(crate) struct TombstoneKey<T>(T);
|
||||
|
||||
fn to_tombstone(key: &[u8]) -> Vec<u8> {
|
||||
[TOMBSTONE_PREFIX.as_bytes(), key].concat()
|
||||
}
|
||||
|
||||
impl TombstoneKey<&Vec<u8>> {
|
||||
/// Returns the origin key and tombstone key.
|
||||
fn to_keys(&self) -> (Vec<u8>, Vec<u8>) {
|
||||
let key = self.0;
|
||||
let tombstone_key = to_tombstone(key);
|
||||
(key.clone(), tombstone_key)
|
||||
}
|
||||
|
||||
/// Returns the origin key and tombstone key.
|
||||
fn into_keys(self) -> (Vec<u8>, Vec<u8>) {
|
||||
self.to_keys()
|
||||
}
|
||||
|
||||
/// Returns the tombstone key.
|
||||
fn to_tombstone_key(&self) -> Vec<u8> {
|
||||
let key = self.0;
|
||||
to_tombstone(key)
|
||||
}
|
||||
}
|
||||
|
||||
impl TableMetaKeyGetTxnOp for TombstoneKey<&Vec<u8>> {
|
||||
fn build_get_op(
|
||||
&self,
|
||||
) -> (
|
||||
TxnOp,
|
||||
impl FnMut(&'_ mut TxnOpGetResponseSet) -> Option<Vec<u8>>,
|
||||
) {
|
||||
TxnOpGetResponseSet::build_get_op(to_tombstone(self.0))
|
||||
}
|
||||
}
|
||||
|
||||
/// The key used in the [TombstoneManager].
|
||||
pub(crate) struct Key {
|
||||
bytes: Vec<u8>,
|
||||
// Atomic Key:
|
||||
// The value corresponding to the key remains consistent between two transactions.
|
||||
atomic: bool,
|
||||
}
|
||||
|
||||
impl Key {
|
||||
/// Returns a new atomic key.
|
||||
pub(crate) fn compare_and_swap<T: Into<Vec<u8>>>(key: T) -> Self {
|
||||
Self {
|
||||
bytes: key.into(),
|
||||
atomic: true,
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns a new normal key.
|
||||
pub(crate) fn new<T: Into<Vec<u8>>>(key: T) -> Self {
|
||||
Self {
|
||||
bytes: key.into(),
|
||||
atomic: false,
|
||||
}
|
||||
}
|
||||
|
||||
/// Into bytes
|
||||
pub(crate) fn into_bytes(self) -> Vec<u8> {
|
||||
self.bytes
|
||||
}
|
||||
|
||||
fn get_inner(&self) -> &Vec<u8> {
|
||||
&self.bytes
|
||||
}
|
||||
|
||||
fn is_atomic(&self) -> bool {
|
||||
self.atomic
|
||||
}
|
||||
}
|
||||
|
||||
impl TableMetaKeyGetTxnOp for Key {
|
||||
fn build_get_op(
|
||||
&self,
|
||||
) -> (
|
||||
TxnOp,
|
||||
impl FnMut(&'_ mut TxnOpGetResponseSet) -> Option<Vec<u8>>,
|
||||
) {
|
||||
let key = self.get_inner().clone();
|
||||
(TxnOp::Get(key.clone()), TxnOpGetResponseSet::filter(key))
|
||||
}
|
||||
}
|
||||
|
||||
fn format_on_failure_error_message<F: FnMut(&mut TxnOpGetResponseSet) -> Option<Vec<u8>>>(
|
||||
mut set: TxnOpGetResponseSet,
|
||||
on_failure_kv_and_filters: Vec<(Vec<u8>, Vec<u8>, F)>,
|
||||
) -> String {
|
||||
on_failure_kv_and_filters
|
||||
.into_iter()
|
||||
.flat_map(|(key, value, mut filter)| {
|
||||
let got = filter(&mut set);
|
||||
let Some(got) = got else {
|
||||
return Some(format!(
|
||||
"For key: {} was expected: {}, but value does not exists",
|
||||
String::from_utf8_lossy(&key),
|
||||
String::from_utf8_lossy(&value),
|
||||
));
|
||||
};
|
||||
|
||||
if got != value {
|
||||
Some(format!(
|
||||
"For key: {} was expected: {}, but got: {}",
|
||||
String::from_utf8_lossy(&key),
|
||||
String::from_utf8_lossy(&value),
|
||||
String::from_utf8_lossy(&got),
|
||||
))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
})
|
||||
.collect::<Vec<_>>()
|
||||
.join("; ")
|
||||
}
|
||||
|
||||
fn format_keys(keys: &[Key]) -> String {
|
||||
keys.iter()
|
||||
.map(|key| String::from_utf8_lossy(&key.bytes))
|
||||
.collect::<Vec<_>>()
|
||||
.join(", ")
|
||||
}
|
||||
|
||||
impl TombstoneManager {
|
||||
/// Returns [TombstoneManager].
|
||||
pub fn new(kv_backend: KvBackendRef) -> Self {
|
||||
Self { kv_backend }
|
||||
}
|
||||
|
||||
/// Creates tombstones for keys.
|
||||
///
|
||||
/// Preforms to:
|
||||
/// - retrieve all values corresponding `keys`.
|
||||
/// - stores tombstone values.
|
||||
pub(crate) async fn create(&self, keys: Vec<Key>) -> Result<()> {
|
||||
// Builds transaction to retrieve all values
|
||||
let (operations, mut filters): (Vec<_>, Vec<_>) =
|
||||
keys.iter().map(|key| key.build_get_op()).unzip();
|
||||
|
||||
let txn = Txn::new().and_then(operations);
|
||||
let mut resp = self.kv_backend.txn(txn).await?;
|
||||
ensure!(
|
||||
resp.succeeded,
|
||||
error::UnexpectedSnafu {
|
||||
err_msg: format!(
|
||||
"Failed to retrieves the metadata, keys: {}",
|
||||
format_keys(&keys)
|
||||
),
|
||||
}
|
||||
);
|
||||
|
||||
let mut set = TxnOpGetResponseSet::from(&mut resp.responses);
|
||||
// Builds the create tombstone transaction.
|
||||
let mut tombstone_operations = Vec::with_capacity(keys.len() * 2);
|
||||
let mut tombstone_comparison = vec![];
|
||||
let mut on_failure_operations = vec![];
|
||||
let mut on_failure_kv_and_filters = vec![];
|
||||
for (idx, key) in keys.iter().enumerate() {
|
||||
let filter = &mut filters[idx];
|
||||
let value = filter(&mut set).with_context(|| error::UnexpectedSnafu {
|
||||
err_msg: format!(
|
||||
"Missing value, key: {}",
|
||||
String::from_utf8_lossy(key.get_inner())
|
||||
),
|
||||
})?;
|
||||
let (origin_key, tombstone_key) = TombstoneKey(key.get_inner()).into_keys();
|
||||
// Compares the atomic key.
|
||||
if key.is_atomic() {
|
||||
tombstone_comparison.push(Compare::with_not_exist_value(
|
||||
tombstone_key.clone(),
|
||||
CompareOp::Equal,
|
||||
));
|
||||
tombstone_comparison.push(Compare::with_value(
|
||||
origin_key.clone(),
|
||||
CompareOp::Equal,
|
||||
value.clone(),
|
||||
));
|
||||
let (op, filter) = TxnOpGetResponseSet::build_get_op(origin_key.clone());
|
||||
on_failure_operations.push(op);
|
||||
on_failure_kv_and_filters.push((origin_key.clone(), value.clone(), filter));
|
||||
}
|
||||
tombstone_operations.push(TxnOp::Delete(origin_key));
|
||||
tombstone_operations.push(TxnOp::Put(tombstone_key, value));
|
||||
}
|
||||
|
||||
let txn = if !tombstone_comparison.is_empty() {
|
||||
Txn::new().when(tombstone_comparison)
|
||||
} else {
|
||||
Txn::new()
|
||||
}
|
||||
.and_then(tombstone_operations);
|
||||
|
||||
let txn = if !on_failure_operations.is_empty() {
|
||||
txn.or_else(on_failure_operations)
|
||||
} else {
|
||||
txn
|
||||
};
|
||||
|
||||
let mut resp = self.kv_backend.txn(txn).await?;
|
||||
// TODO(weny): add tests for atomic key changed.
|
||||
if !resp.succeeded {
|
||||
let set = TxnOpGetResponseSet::from(&mut resp.responses);
|
||||
let err_msg = format_on_failure_error_message(set, on_failure_kv_and_filters);
|
||||
return error::CasKeyChangedSnafu { err_msg }.fail();
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Restores tombstones for keys.
|
||||
///
|
||||
/// Preforms to:
|
||||
/// - retrieve all tombstone values corresponding `keys`.
|
||||
/// - stores tombstone values.
|
||||
pub(crate) async fn restore(&self, keys: Vec<Key>) -> Result<()> {
|
||||
// Builds transaction to retrieve all tombstone values
|
||||
let tombstone_keys = keys
|
||||
.iter()
|
||||
.map(|key| TombstoneKey(key.get_inner()))
|
||||
.collect::<Vec<_>>();
|
||||
let (operations, mut filters): (Vec<_>, Vec<_>) =
|
||||
tombstone_keys.iter().map(|key| key.build_get_op()).unzip();
|
||||
|
||||
let txn = Txn::new().and_then(operations);
|
||||
let mut resp = self.kv_backend.txn(txn).await?;
|
||||
ensure!(
|
||||
resp.succeeded,
|
||||
error::UnexpectedSnafu {
|
||||
err_msg: format!(
|
||||
"Failed to retrieves the metadata, keys: {}",
|
||||
format_keys(&keys)
|
||||
),
|
||||
}
|
||||
);
|
||||
|
||||
let mut set = TxnOpGetResponseSet::from(&mut resp.responses);
|
||||
|
||||
// Builds the restore tombstone transaction.
|
||||
let mut tombstone_operations = Vec::with_capacity(keys.len() * 2);
|
||||
let mut tombstone_comparison = vec![];
|
||||
let mut on_failure_operations = vec![];
|
||||
let mut on_failure_kv_and_filters = vec![];
|
||||
for (idx, key) in keys.iter().enumerate() {
|
||||
let filter = &mut filters[idx];
|
||||
let value = filter(&mut set).with_context(|| error::UnexpectedSnafu {
|
||||
err_msg: format!(
|
||||
"Missing value, key: {}",
|
||||
String::from_utf8_lossy(key.get_inner())
|
||||
),
|
||||
})?;
|
||||
let (origin_key, tombstone_key) = tombstone_keys[idx].to_keys();
|
||||
// Compares the atomic key.
|
||||
if key.is_atomic() {
|
||||
tombstone_comparison.push(Compare::with_not_exist_value(
|
||||
origin_key.clone(),
|
||||
CompareOp::Equal,
|
||||
));
|
||||
tombstone_comparison.push(Compare::with_value(
|
||||
tombstone_key.clone(),
|
||||
CompareOp::Equal,
|
||||
value.clone(),
|
||||
));
|
||||
let (op, filter) = tombstone_keys[idx].build_get_op();
|
||||
on_failure_operations.push(op);
|
||||
on_failure_kv_and_filters.push((tombstone_key.clone(), value.clone(), filter));
|
||||
}
|
||||
tombstone_operations.push(TxnOp::Delete(tombstone_key));
|
||||
tombstone_operations.push(TxnOp::Put(origin_key, value));
|
||||
}
|
||||
|
||||
let txn = if !tombstone_comparison.is_empty() {
|
||||
Txn::new().when(tombstone_comparison)
|
||||
} else {
|
||||
Txn::new()
|
||||
}
|
||||
.and_then(tombstone_operations);
|
||||
|
||||
let txn = if !on_failure_operations.is_empty() {
|
||||
txn.or_else(on_failure_operations)
|
||||
} else {
|
||||
txn
|
||||
};
|
||||
|
||||
let mut resp = self.kv_backend.txn(txn).await?;
|
||||
// TODO(weny): add tests for atomic key changed.
|
||||
if !resp.succeeded {
|
||||
let set = TxnOpGetResponseSet::from(&mut resp.responses);
|
||||
let err_msg = format_on_failure_error_message(set, on_failure_kv_and_filters);
|
||||
return error::CasKeyChangedSnafu { err_msg }.fail();
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Deletes tombstones for keys.
|
||||
pub(crate) async fn delete(&self, keys: Vec<Vec<u8>>) -> Result<()> {
|
||||
let operations = keys
|
||||
.iter()
|
||||
.map(|key| TxnOp::Delete(TombstoneKey(key).to_tombstone_key()))
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
let txn = Txn::new().and_then(operations);
|
||||
// Always success.
|
||||
let _ = self.kv_backend.txn(txn).await?;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
|
||||
use std::sync::Arc;
|
||||
|
||||
use crate::key::tombstone::{Key, TombstoneKey, TombstoneManager};
|
||||
use crate::kv_backend::memory::MemoryKvBackend;
|
||||
use crate::kv_backend::KvBackend;
|
||||
use crate::rpc::store::PutRequest;
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_create_tombstone() {
|
||||
let kv_backend = Arc::new(MemoryKvBackend::default());
|
||||
let tombstone_manager = TombstoneManager::new(kv_backend.clone());
|
||||
kv_backend
|
||||
.put(PutRequest::new().with_key("bar").with_value("baz"))
|
||||
.await
|
||||
.unwrap();
|
||||
kv_backend
|
||||
.put(PutRequest::new().with_key("foo").with_value("hi"))
|
||||
.await
|
||||
.unwrap();
|
||||
tombstone_manager
|
||||
.create(vec![Key::compare_and_swap("bar"), Key::new("foo")])
|
||||
.await
|
||||
.unwrap();
|
||||
assert!(!kv_backend.exists(b"bar").await.unwrap());
|
||||
assert!(!kv_backend.exists(b"foo").await.unwrap());
|
||||
assert_eq!(
|
||||
kv_backend
|
||||
.get(&TombstoneKey(&"bar".into()).to_tombstone_key())
|
||||
.await
|
||||
.unwrap()
|
||||
.unwrap()
|
||||
.value,
|
||||
b"baz"
|
||||
);
|
||||
assert_eq!(
|
||||
kv_backend
|
||||
.get(&TombstoneKey(&"foo".into()).to_tombstone_key())
|
||||
.await
|
||||
.unwrap()
|
||||
.unwrap()
|
||||
.value,
|
||||
b"hi"
|
||||
);
|
||||
assert_eq!(kv_backend.len(), 2);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_create_tombstone_without_atomic_key() {
|
||||
let kv_backend = Arc::new(MemoryKvBackend::default());
|
||||
let tombstone_manager = TombstoneManager::new(kv_backend.clone());
|
||||
kv_backend
|
||||
.put(PutRequest::new().with_key("bar").with_value("baz"))
|
||||
.await
|
||||
.unwrap();
|
||||
kv_backend
|
||||
.put(PutRequest::new().with_key("foo").with_value("hi"))
|
||||
.await
|
||||
.unwrap();
|
||||
tombstone_manager
|
||||
.create(vec![Key::new("bar"), Key::new("foo")])
|
||||
.await
|
||||
.unwrap();
|
||||
assert!(!kv_backend.exists(b"bar").await.unwrap());
|
||||
assert!(!kv_backend.exists(b"foo").await.unwrap());
|
||||
assert_eq!(
|
||||
kv_backend
|
||||
.get(&TombstoneKey(&"bar".into()).to_tombstone_key())
|
||||
.await
|
||||
.unwrap()
|
||||
.unwrap()
|
||||
.value,
|
||||
b"baz"
|
||||
);
|
||||
assert_eq!(
|
||||
kv_backend
|
||||
.get(&TombstoneKey(&"foo".into()).to_tombstone_key())
|
||||
.await
|
||||
.unwrap()
|
||||
.unwrap()
|
||||
.value,
|
||||
b"hi"
|
||||
);
|
||||
assert_eq!(kv_backend.len(), 2);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_create_tombstone_origin_value_not_found_err() {
|
||||
let kv_backend = Arc::new(MemoryKvBackend::default());
|
||||
let tombstone_manager = TombstoneManager::new(kv_backend.clone());
|
||||
|
||||
kv_backend
|
||||
.put(PutRequest::new().with_key("bar").with_value("baz"))
|
||||
.await
|
||||
.unwrap();
|
||||
kv_backend
|
||||
.put(PutRequest::new().with_key("foo").with_value("hi"))
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let err = tombstone_manager
|
||||
.create(vec![Key::compare_and_swap("bar"), Key::new("baz")])
|
||||
.await
|
||||
.unwrap_err();
|
||||
assert!(err.to_string().contains("Missing value"));
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_restore_tombstone() {
|
||||
let kv_backend = Arc::new(MemoryKvBackend::default());
|
||||
let tombstone_manager = TombstoneManager::new(kv_backend.clone());
|
||||
kv_backend
|
||||
.put(PutRequest::new().with_key("bar").with_value("baz"))
|
||||
.await
|
||||
.unwrap();
|
||||
kv_backend
|
||||
.put(PutRequest::new().with_key("foo").with_value("hi"))
|
||||
.await
|
||||
.unwrap();
|
||||
let expected_kvs = kv_backend.dump();
|
||||
tombstone_manager
|
||||
.create(vec![Key::compare_and_swap("bar"), Key::new("foo")])
|
||||
.await
|
||||
.unwrap();
|
||||
tombstone_manager
|
||||
.restore(vec![Key::compare_and_swap("bar"), Key::new("foo")])
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(expected_kvs, kv_backend.dump());
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_restore_tombstone_without_atomic_key() {
|
||||
let kv_backend = Arc::new(MemoryKvBackend::default());
|
||||
let tombstone_manager = TombstoneManager::new(kv_backend.clone());
|
||||
kv_backend
|
||||
.put(PutRequest::new().with_key("bar").with_value("baz"))
|
||||
.await
|
||||
.unwrap();
|
||||
kv_backend
|
||||
.put(PutRequest::new().with_key("foo").with_value("hi"))
|
||||
.await
|
||||
.unwrap();
|
||||
let expected_kvs = kv_backend.dump();
|
||||
tombstone_manager
|
||||
.create(vec![Key::compare_and_swap("bar"), Key::new("foo")])
|
||||
.await
|
||||
.unwrap();
|
||||
tombstone_manager
|
||||
.restore(vec![Key::new("bar"), Key::new("foo")])
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(expected_kvs, kv_backend.dump());
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_restore_tombstone_origin_value_not_found_err() {
|
||||
let kv_backend = Arc::new(MemoryKvBackend::default());
|
||||
let tombstone_manager = TombstoneManager::new(kv_backend.clone());
|
||||
kv_backend
|
||||
.put(PutRequest::new().with_key("bar").with_value("baz"))
|
||||
.await
|
||||
.unwrap();
|
||||
kv_backend
|
||||
.put(PutRequest::new().with_key("foo").with_value("hi"))
|
||||
.await
|
||||
.unwrap();
|
||||
tombstone_manager
|
||||
.create(vec![Key::compare_and_swap("bar"), Key::new("foo")])
|
||||
.await
|
||||
.unwrap();
|
||||
let err = tombstone_manager
|
||||
.restore(vec![Key::new("bar"), Key::new("baz")])
|
||||
.await
|
||||
.unwrap_err();
|
||||
assert!(err.to_string().contains("Missing value"));
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_delete_tombstone() {
|
||||
let kv_backend = Arc::new(MemoryKvBackend::default());
|
||||
let tombstone_manager = TombstoneManager::new(kv_backend.clone());
|
||||
kv_backend
|
||||
.put(PutRequest::new().with_key("bar").with_value("baz"))
|
||||
.await
|
||||
.unwrap();
|
||||
kv_backend
|
||||
.put(PutRequest::new().with_key("foo").with_value("hi"))
|
||||
.await
|
||||
.unwrap();
|
||||
tombstone_manager
|
||||
.create(vec![Key::compare_and_swap("bar"), Key::new("foo")])
|
||||
.await
|
||||
.unwrap();
|
||||
tombstone_manager
|
||||
.delete(vec![b"bar".to_vec(), b"foo".to_vec()])
|
||||
.await
|
||||
.unwrap();
|
||||
assert!(kv_backend.is_empty());
|
||||
}
|
||||
}
|
||||
@@ -18,7 +18,69 @@ use serde::Serialize;
|
||||
use crate::error::Result;
|
||||
use crate::key::{DeserializedValueWithBytes, TableMetaValue};
|
||||
use crate::kv_backend::txn::{Compare, CompareOp, Txn, TxnOp, TxnOpResponse};
|
||||
use crate::rpc::KeyValue;
|
||||
|
||||
/// The response set of [TxnOpResponse::ResponseGet]
|
||||
pub(crate) struct TxnOpGetResponseSet(Vec<KeyValue>);
|
||||
|
||||
impl TxnOpGetResponseSet {
|
||||
/// Returns a [TxnOp] to retrieve the value corresponding `key` and
|
||||
/// a filter to consume corresponding [KeyValue] from [TxnOpGetResponseSet].
|
||||
pub(crate) fn build_get_op<T: Into<Vec<u8>>>(
|
||||
key: T,
|
||||
) -> (
|
||||
TxnOp,
|
||||
impl FnMut(&'_ mut TxnOpGetResponseSet) -> Option<Vec<u8>>,
|
||||
) {
|
||||
let key = key.into();
|
||||
(TxnOp::Get(key.clone()), TxnOpGetResponseSet::filter(key))
|
||||
}
|
||||
|
||||
/// Returns a filter to consume a [KeyValue] where the key equals `key`.
|
||||
pub(crate) fn filter(key: Vec<u8>) -> impl FnMut(&mut TxnOpGetResponseSet) -> Option<Vec<u8>> {
|
||||
move |set| {
|
||||
let pos = set.0.iter().position(|kv| kv.key == key);
|
||||
match pos {
|
||||
Some(pos) => Some(set.0.remove(pos).value),
|
||||
None => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns a decoder to decode bytes to `DeserializedValueWithBytes<T>`.
|
||||
pub(crate) fn decode_with<F, T>(
|
||||
mut f: F,
|
||||
) -> impl FnMut(&mut TxnOpGetResponseSet) -> Result<Option<DeserializedValueWithBytes<T>>>
|
||||
where
|
||||
F: FnMut(&mut TxnOpGetResponseSet) -> Option<Vec<u8>>,
|
||||
T: Serialize + DeserializeOwned + TableMetaValue,
|
||||
{
|
||||
move |set| {
|
||||
f(set)
|
||||
.map(|value| DeserializedValueWithBytes::from_inner_slice(&value))
|
||||
.transpose()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<&mut Vec<TxnOpResponse>> for TxnOpGetResponseSet {
|
||||
fn from(value: &mut Vec<TxnOpResponse>) -> Self {
|
||||
let value = value
|
||||
.extract_if(|resp| matches!(resp, TxnOpResponse::ResponseGet(_)))
|
||||
.flat_map(|resp| {
|
||||
// Safety: checked
|
||||
let TxnOpResponse::ResponseGet(r) = resp else {
|
||||
unreachable!()
|
||||
};
|
||||
|
||||
r.kvs
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
TxnOpGetResponseSet(value)
|
||||
}
|
||||
}
|
||||
|
||||
// TODO(weny): using `TxnOpGetResponseSet`.
|
||||
pub(crate) fn build_txn_response_decoder_fn<T>(
|
||||
raw_key: Vec<u8>,
|
||||
) -> impl FnOnce(&Vec<TxnOpResponse>) -> Result<Option<DeserializedValueWithBytes<T>>>
|
||||
|
||||
@@ -626,4 +626,95 @@ mod tests {
|
||||
assert_eq!(b"test_key".to_vec(), delete.key);
|
||||
let _ = delete.options.unwrap();
|
||||
}
|
||||
|
||||
use crate::kv_backend::test::{
|
||||
prepare_kv_with_prefix, test_kv_batch_delete_with_prefix, test_kv_batch_get_with_prefix,
|
||||
test_kv_compare_and_put_with_prefix, test_kv_delete_range_with_prefix,
|
||||
test_kv_put_with_prefix, test_kv_range_2_with_prefix, test_kv_range_with_prefix,
|
||||
unprepare_kv,
|
||||
};
|
||||
|
||||
async fn build_kv_backend() -> Option<EtcdStore> {
|
||||
let endpoints = std::env::var("GT_ETCD_ENDPOINTS").unwrap_or_default();
|
||||
if endpoints.is_empty() {
|
||||
return None;
|
||||
}
|
||||
|
||||
let endpoints = endpoints
|
||||
.split(',')
|
||||
.map(|s| s.to_string())
|
||||
.collect::<Vec<String>>();
|
||||
|
||||
let client = Client::connect(endpoints, None)
|
||||
.await
|
||||
.expect("malformed endpoints");
|
||||
|
||||
Some(EtcdStore {
|
||||
client,
|
||||
max_txn_ops: 128,
|
||||
})
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_put() {
|
||||
if let Some(kv_backend) = build_kv_backend().await {
|
||||
let prefix = b"put/";
|
||||
prepare_kv_with_prefix(&kv_backend, prefix.to_vec()).await;
|
||||
test_kv_put_with_prefix(&kv_backend, prefix.to_vec()).await;
|
||||
unprepare_kv(&kv_backend, prefix).await;
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_range() {
|
||||
if let Some(kv_backend) = build_kv_backend().await {
|
||||
let prefix = b"range/";
|
||||
prepare_kv_with_prefix(&kv_backend, prefix.to_vec()).await;
|
||||
test_kv_range_with_prefix(&kv_backend, prefix.to_vec()).await;
|
||||
unprepare_kv(&kv_backend, prefix).await;
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_range_2() {
|
||||
if let Some(kv_backend) = build_kv_backend().await {
|
||||
test_kv_range_2_with_prefix(kv_backend, b"range2/".to_vec()).await;
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_batch_get() {
|
||||
if let Some(kv_backend) = build_kv_backend().await {
|
||||
let prefix = b"batchGet/";
|
||||
prepare_kv_with_prefix(&kv_backend, prefix.to_vec()).await;
|
||||
test_kv_batch_get_with_prefix(&kv_backend, prefix.to_vec()).await;
|
||||
unprepare_kv(&kv_backend, prefix).await;
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread")]
|
||||
async fn test_compare_and_put() {
|
||||
if let Some(kv_backend) = build_kv_backend().await {
|
||||
let kv_backend = Arc::new(kv_backend);
|
||||
test_kv_compare_and_put_with_prefix(kv_backend, b"compareAndPut/".to_vec()).await;
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_delete_range() {
|
||||
if let Some(kv_backend) = build_kv_backend().await {
|
||||
let prefix = b"deleteRange/";
|
||||
prepare_kv_with_prefix(&kv_backend, prefix.to_vec()).await;
|
||||
test_kv_delete_range_with_prefix(kv_backend, prefix.to_vec()).await;
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_batch_delete() {
|
||||
if let Some(kv_backend) = build_kv_backend().await {
|
||||
let prefix = b"batchDelete/";
|
||||
prepare_kv_with_prefix(&kv_backend, prefix.to_vec()).await;
|
||||
test_kv_batch_delete_with_prefix(kv_backend, prefix.to_vec()).await;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -70,6 +70,25 @@ impl<T> MemoryKvBackend<T> {
|
||||
let mut kvs = self.kvs.write().unwrap();
|
||||
kvs.clear();
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
/// Returns true if the `kvs` is empty.
|
||||
pub fn is_empty(&self) -> bool {
|
||||
self.kvs.read().unwrap().is_empty()
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
/// Returns the `kvs`.
|
||||
pub fn dump(&self) -> BTreeMap<Vec<u8>, Vec<u8>> {
|
||||
let kvs = self.kvs.read().unwrap();
|
||||
kvs.clone()
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
/// Returns the length of `kvs`
|
||||
pub fn len(&self) -> usize {
|
||||
self.kvs.read().unwrap().len()
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
@@ -357,14 +376,14 @@ mod tests {
|
||||
async fn test_put() {
|
||||
let kv_backend = mock_mem_store_with_data().await;
|
||||
|
||||
test_kv_put(kv_backend).await;
|
||||
test_kv_put(&kv_backend).await;
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_range() {
|
||||
let kv_backend = mock_mem_store_with_data().await;
|
||||
|
||||
test_kv_range(kv_backend).await;
|
||||
test_kv_range(&kv_backend).await;
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
@@ -378,7 +397,7 @@ mod tests {
|
||||
async fn test_batch_get() {
|
||||
let kv_backend = mock_mem_store_with_data().await;
|
||||
|
||||
test_kv_batch_get(kv_backend).await;
|
||||
test_kv_batch_get(&kv_backend).await;
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread")]
|
||||
|
||||
@@ -21,25 +21,33 @@ use crate::rpc::store::{BatchGetRequest, PutRequest};
|
||||
use crate::rpc::KeyValue;
|
||||
use crate::util;
|
||||
|
||||
pub fn mock_kvs() -> Vec<KeyValue> {
|
||||
pub fn mock_kvs(prefix: Vec<u8>) -> Vec<KeyValue> {
|
||||
vec![
|
||||
KeyValue {
|
||||
key: b"key1".to_vec(),
|
||||
key: [prefix.clone(), b"key1".to_vec()].concat(),
|
||||
value: b"val1".to_vec(),
|
||||
},
|
||||
KeyValue {
|
||||
key: b"key2".to_vec(),
|
||||
key: [prefix.clone(), b"key2".to_vec()].concat(),
|
||||
value: b"val2".to_vec(),
|
||||
},
|
||||
KeyValue {
|
||||
key: b"key3".to_vec(),
|
||||
key: [prefix.clone(), b"key3".to_vec()].concat(),
|
||||
value: b"val3".to_vec(),
|
||||
},
|
||||
KeyValue {
|
||||
key: [prefix.clone(), b"key11".to_vec()].concat(),
|
||||
value: b"val11".to_vec(),
|
||||
},
|
||||
]
|
||||
}
|
||||
|
||||
pub async fn prepare_kv(kv_backend: &impl KvBackend) {
|
||||
let kvs = mock_kvs();
|
||||
prepare_kv_with_prefix(kv_backend, vec![]).await;
|
||||
}
|
||||
|
||||
pub async fn prepare_kv_with_prefix(kv_backend: &impl KvBackend, prefix: Vec<u8>) {
|
||||
let kvs = mock_kvs(prefix);
|
||||
assert!(kv_backend
|
||||
.batch_put(BatchPutRequest {
|
||||
kvs,
|
||||
@@ -47,21 +55,29 @@ pub async fn prepare_kv(kv_backend: &impl KvBackend) {
|
||||
})
|
||||
.await
|
||||
.is_ok());
|
||||
}
|
||||
|
||||
pub async fn unprepare_kv(kv_backend: &impl KvBackend, prefix: &[u8]) {
|
||||
let range_end = util::get_prefix_end_key(prefix);
|
||||
assert!(kv_backend
|
||||
.put(PutRequest {
|
||||
key: b"key11".to_vec(),
|
||||
value: b"val11".to_vec(),
|
||||
.delete_range(DeleteRangeRequest {
|
||||
key: prefix.to_vec(),
|
||||
range_end,
|
||||
..Default::default()
|
||||
})
|
||||
.await
|
||||
.is_ok());
|
||||
}
|
||||
|
||||
pub async fn test_kv_put(kv_backend: impl KvBackend) {
|
||||
pub async fn test_kv_put(kv_backend: &impl KvBackend) {
|
||||
test_kv_put_with_prefix(kv_backend, vec![]).await;
|
||||
}
|
||||
|
||||
pub async fn test_kv_put_with_prefix(kv_backend: &impl KvBackend, prefix: Vec<u8>) {
|
||||
let put_key = [prefix.clone(), b"key11".to_vec()].concat();
|
||||
let resp = kv_backend
|
||||
.put(PutRequest {
|
||||
key: b"key11".to_vec(),
|
||||
key: put_key.clone(),
|
||||
value: b"val12".to_vec(),
|
||||
prev_kv: false,
|
||||
})
|
||||
@@ -71,20 +87,25 @@ pub async fn test_kv_put(kv_backend: impl KvBackend) {
|
||||
|
||||
let resp = kv_backend
|
||||
.put(PutRequest {
|
||||
key: b"key11".to_vec(),
|
||||
key: put_key.clone(),
|
||||
value: b"val13".to_vec(),
|
||||
prev_kv: true,
|
||||
})
|
||||
.await
|
||||
.unwrap();
|
||||
let prev_kv = resp.prev_kv.unwrap();
|
||||
assert_eq!(b"key11", prev_kv.key());
|
||||
assert_eq!(put_key, prev_kv.key());
|
||||
assert_eq!(b"val12", prev_kv.value());
|
||||
}
|
||||
|
||||
pub async fn test_kv_range(kv_backend: impl KvBackend) {
|
||||
let key = b"key1".to_vec();
|
||||
let range_end = util::get_prefix_end_key(b"key1");
|
||||
pub async fn test_kv_range(kv_backend: &impl KvBackend) {
|
||||
test_kv_range_with_prefix(kv_backend, vec![]).await;
|
||||
}
|
||||
|
||||
pub async fn test_kv_range_with_prefix(kv_backend: &impl KvBackend, prefix: Vec<u8>) {
|
||||
let key = [prefix.clone(), b"key1".to_vec()].concat();
|
||||
let key11 = [prefix.clone(), b"key11".to_vec()].concat();
|
||||
let range_end = util::get_prefix_end_key(&key);
|
||||
|
||||
let resp = kv_backend
|
||||
.range(RangeRequest {
|
||||
@@ -97,9 +118,9 @@ pub async fn test_kv_range(kv_backend: impl KvBackend) {
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(2, resp.kvs.len());
|
||||
assert_eq!(b"key1", resp.kvs[0].key());
|
||||
assert_eq!(key, resp.kvs[0].key);
|
||||
assert_eq!(b"val1", resp.kvs[0].value());
|
||||
assert_eq!(b"key11", resp.kvs[1].key());
|
||||
assert_eq!(key11, resp.kvs[1].key);
|
||||
assert_eq!(b"val11", resp.kvs[1].value());
|
||||
|
||||
let resp = kv_backend
|
||||
@@ -113,9 +134,9 @@ pub async fn test_kv_range(kv_backend: impl KvBackend) {
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(2, resp.kvs.len());
|
||||
assert_eq!(b"key1", resp.kvs[0].key());
|
||||
assert_eq!(key, resp.kvs[0].key);
|
||||
assert_eq!(b"", resp.kvs[0].value());
|
||||
assert_eq!(b"key11", resp.kvs[1].key());
|
||||
assert_eq!(key11, resp.kvs[1].key);
|
||||
assert_eq!(b"", resp.kvs[1].value());
|
||||
|
||||
let resp = kv_backend
|
||||
@@ -129,12 +150,12 @@ pub async fn test_kv_range(kv_backend: impl KvBackend) {
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(1, resp.kvs.len());
|
||||
assert_eq!(b"key1", resp.kvs[0].key());
|
||||
assert_eq!(key, resp.kvs[0].key);
|
||||
assert_eq!(b"val1", resp.kvs[0].value());
|
||||
|
||||
let resp = kv_backend
|
||||
.range(RangeRequest {
|
||||
key,
|
||||
key: key.clone(),
|
||||
range_end,
|
||||
limit: 1,
|
||||
keys_only: false,
|
||||
@@ -143,24 +164,41 @@ pub async fn test_kv_range(kv_backend: impl KvBackend) {
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(1, resp.kvs.len());
|
||||
assert_eq!(b"key1", resp.kvs[0].key());
|
||||
assert_eq!(key, resp.kvs[0].key);
|
||||
assert_eq!(b"val1", resp.kvs[0].value());
|
||||
}
|
||||
|
||||
pub async fn test_kv_range_2(kv_backend: impl KvBackend) {
|
||||
test_kv_range_2_with_prefix(kv_backend, vec![]).await;
|
||||
}
|
||||
|
||||
pub async fn test_kv_range_2_with_prefix(kv_backend: impl KvBackend, prefix: Vec<u8>) {
|
||||
let atest = [prefix.clone(), b"atest".to_vec()].concat();
|
||||
let test = [prefix.clone(), b"test".to_vec()].concat();
|
||||
|
||||
kv_backend
|
||||
.put(PutRequest::new().with_key("atest").with_value("value"))
|
||||
.put(
|
||||
PutRequest::new()
|
||||
.with_key(atest.clone())
|
||||
.with_value("value"),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
kv_backend
|
||||
.put(PutRequest::new().with_key("test").with_value("value"))
|
||||
.put(PutRequest::new().with_key(test.clone()).with_value("value"))
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
// If both key and range_end are ‘\0’, then range represents all keys.
|
||||
let all_start = [prefix.clone(), b"\0".to_vec()].concat();
|
||||
let all_end = if prefix.is_empty() {
|
||||
b"\0".to_vec()
|
||||
} else {
|
||||
util::get_prefix_end_key(&prefix)
|
||||
};
|
||||
let result = kv_backend
|
||||
.range(RangeRequest::new().with_range(b"\0".to_vec(), b"\0".to_vec()))
|
||||
.range(RangeRequest::new().with_range(all_start, all_end.clone()))
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
@@ -168,26 +206,28 @@ pub async fn test_kv_range_2(kv_backend: impl KvBackend) {
|
||||
assert!(!result.more);
|
||||
|
||||
// If range_end is ‘\0’, the range is all keys greater than or equal to the key argument.
|
||||
let a_start = [prefix.clone(), b"a".to_vec()].concat();
|
||||
let result = kv_backend
|
||||
.range(RangeRequest::new().with_range(b"a".to_vec(), b"\0".to_vec()))
|
||||
.range(RangeRequest::new().with_range(a_start.clone(), all_end.clone()))
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(result.kvs.len(), 2);
|
||||
|
||||
let b_start = [prefix.clone(), b"b".to_vec()].concat();
|
||||
let result = kv_backend
|
||||
.range(RangeRequest::new().with_range(b"b".to_vec(), b"\0".to_vec()))
|
||||
.range(RangeRequest::new().with_range(b_start, all_end.clone()))
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(result.kvs.len(), 1);
|
||||
assert_eq!(result.kvs[0].key, b"test");
|
||||
assert_eq!(result.kvs[0].key, test);
|
||||
|
||||
// Fetches the keys >= "a", set limit to 1, the `more` should be true.
|
||||
let result = kv_backend
|
||||
.range(
|
||||
RangeRequest::new()
|
||||
.with_range(b"a".to_vec(), b"\0".to_vec())
|
||||
.with_range(a_start.clone(), all_end.clone())
|
||||
.with_limit(1),
|
||||
)
|
||||
.await
|
||||
@@ -199,7 +239,7 @@ pub async fn test_kv_range_2(kv_backend: impl KvBackend) {
|
||||
let result = kv_backend
|
||||
.range(
|
||||
RangeRequest::new()
|
||||
.with_range(b"a".to_vec(), b"\0".to_vec())
|
||||
.with_range(a_start.clone(), all_end.clone())
|
||||
.with_limit(2),
|
||||
)
|
||||
.await
|
||||
@@ -211,16 +251,27 @@ pub async fn test_kv_range_2(kv_backend: impl KvBackend) {
|
||||
let result = kv_backend
|
||||
.range(
|
||||
RangeRequest::new()
|
||||
.with_range(b"a".to_vec(), b"\0".to_vec())
|
||||
.with_range(a_start.clone(), all_end.clone())
|
||||
.with_limit(3),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(result.kvs.len(), 2);
|
||||
assert!(!result.more);
|
||||
|
||||
let req = BatchDeleteRequest {
|
||||
keys: vec![atest, test],
|
||||
prev_kv: false,
|
||||
};
|
||||
let resp = kv_backend.batch_delete(req).await.unwrap();
|
||||
assert!(resp.prev_kvs.is_empty());
|
||||
}
|
||||
|
||||
pub async fn test_kv_batch_get(kv_backend: impl KvBackend) {
|
||||
pub async fn test_kv_batch_get(kv_backend: &impl KvBackend) {
|
||||
test_kv_batch_get_with_prefix(kv_backend, vec![]).await;
|
||||
}
|
||||
|
||||
pub async fn test_kv_batch_get_with_prefix(kv_backend: &impl KvBackend, prefix: Vec<u8>) {
|
||||
let keys = vec![];
|
||||
let resp = kv_backend
|
||||
.batch_get(BatchGetRequest { keys })
|
||||
@@ -229,7 +280,8 @@ pub async fn test_kv_batch_get(kv_backend: impl KvBackend) {
|
||||
|
||||
assert!(resp.kvs.is_empty());
|
||||
|
||||
let keys = vec![b"key10".to_vec()];
|
||||
let key10 = [prefix.clone(), b"key10".to_vec()].concat();
|
||||
let keys = vec![key10];
|
||||
let resp = kv_backend
|
||||
.batch_get(BatchGetRequest { keys })
|
||||
.await
|
||||
@@ -237,29 +289,42 @@ pub async fn test_kv_batch_get(kv_backend: impl KvBackend) {
|
||||
|
||||
assert!(resp.kvs.is_empty());
|
||||
|
||||
let keys = vec![b"key1".to_vec(), b"key3".to_vec(), b"key4".to_vec()];
|
||||
let key1 = [prefix.clone(), b"key1".to_vec()].concat();
|
||||
let key3 = [prefix.clone(), b"key3".to_vec()].concat();
|
||||
let key4 = [prefix.clone(), b"key4".to_vec()].concat();
|
||||
let keys = vec![key1.clone(), key3.clone(), key4];
|
||||
let resp = kv_backend
|
||||
.batch_get(BatchGetRequest { keys })
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(2, resp.kvs.len());
|
||||
assert_eq!(b"key1", resp.kvs[0].key());
|
||||
assert_eq!(key1, resp.kvs[0].key);
|
||||
assert_eq!(b"val1", resp.kvs[0].value());
|
||||
assert_eq!(b"key3", resp.kvs[1].key());
|
||||
assert_eq!(key3, resp.kvs[1].key);
|
||||
assert_eq!(b"val3", resp.kvs[1].value());
|
||||
}
|
||||
|
||||
pub async fn test_kv_compare_and_put(kv_backend: Arc<dyn KvBackend<Error = Error>>) {
|
||||
test_kv_compare_and_put_with_prefix(kv_backend, vec![]).await;
|
||||
}
|
||||
|
||||
pub async fn test_kv_compare_and_put_with_prefix(
|
||||
kv_backend: Arc<dyn KvBackend<Error = Error>>,
|
||||
prefix: Vec<u8>,
|
||||
) {
|
||||
let success = Arc::new(AtomicU8::new(0));
|
||||
let key = [prefix.clone(), b"key".to_vec()].concat();
|
||||
|
||||
let mut joins = vec![];
|
||||
for _ in 0..20 {
|
||||
let kv_backend_clone = kv_backend.clone();
|
||||
let success_clone = success.clone();
|
||||
let key_clone = key.clone();
|
||||
|
||||
let join = tokio::spawn(async move {
|
||||
let req = CompareAndPutRequest {
|
||||
key: b"key".to_vec(),
|
||||
key: key_clone,
|
||||
expect: vec![],
|
||||
value: b"val_new".to_vec(),
|
||||
};
|
||||
@@ -276,11 +341,19 @@ pub async fn test_kv_compare_and_put(kv_backend: Arc<dyn KvBackend<Error = Error
|
||||
}
|
||||
|
||||
assert_eq!(1, success.load(Ordering::SeqCst));
|
||||
|
||||
let resp = kv_backend.delete(&key, false).await.unwrap();
|
||||
assert!(resp.is_none());
|
||||
}
|
||||
|
||||
pub async fn test_kv_delete_range(kv_backend: impl KvBackend) {
|
||||
test_kv_delete_range_with_prefix(kv_backend, vec![]).await;
|
||||
}
|
||||
|
||||
pub async fn test_kv_delete_range_with_prefix(kv_backend: impl KvBackend, prefix: Vec<u8>) {
|
||||
let key3 = [prefix.clone(), b"key3".to_vec()].concat();
|
||||
let req = DeleteRangeRequest {
|
||||
key: b"key3".to_vec(),
|
||||
key: key3.clone(),
|
||||
range_end: vec![],
|
||||
prev_kv: true,
|
||||
};
|
||||
@@ -288,14 +361,15 @@ pub async fn test_kv_delete_range(kv_backend: impl KvBackend) {
|
||||
let resp = kv_backend.delete_range(req).await.unwrap();
|
||||
assert_eq!(1, resp.prev_kvs.len());
|
||||
assert_eq!(1, resp.deleted);
|
||||
assert_eq!(b"key3", resp.prev_kvs[0].key());
|
||||
assert_eq!(key3, resp.prev_kvs[0].key);
|
||||
assert_eq!(b"val3", resp.prev_kvs[0].value());
|
||||
|
||||
let resp = kv_backend.get(b"key3").await.unwrap();
|
||||
let resp = kv_backend.get(&key3).await.unwrap();
|
||||
assert!(resp.is_none());
|
||||
|
||||
let key2 = [prefix.clone(), b"key2".to_vec()].concat();
|
||||
let req = DeleteRangeRequest {
|
||||
key: b"key2".to_vec(),
|
||||
key: key2.clone(),
|
||||
range_end: vec![],
|
||||
prev_kv: false,
|
||||
};
|
||||
@@ -304,11 +378,11 @@ pub async fn test_kv_delete_range(kv_backend: impl KvBackend) {
|
||||
assert_eq!(1, resp.deleted);
|
||||
assert!(resp.prev_kvs.is_empty());
|
||||
|
||||
let resp = kv_backend.get(b"key2").await.unwrap();
|
||||
let resp = kv_backend.get(&key2).await.unwrap();
|
||||
assert!(resp.is_none());
|
||||
|
||||
let key = b"key1".to_vec();
|
||||
let range_end = util::get_prefix_end_key(b"key1");
|
||||
let key = [prefix.clone(), b"key1".to_vec()].concat();
|
||||
let range_end = util::get_prefix_end_key(&key);
|
||||
|
||||
let req = DeleteRangeRequest {
|
||||
key: key.clone(),
|
||||
@@ -328,34 +402,45 @@ pub async fn test_kv_delete_range(kv_backend: impl KvBackend) {
|
||||
}
|
||||
|
||||
pub async fn test_kv_batch_delete(kv_backend: impl KvBackend) {
|
||||
assert!(kv_backend.get(b"key1").await.unwrap().is_some());
|
||||
assert!(kv_backend.get(b"key100").await.unwrap().is_none());
|
||||
test_kv_batch_delete_with_prefix(kv_backend, vec![]).await;
|
||||
}
|
||||
|
||||
pub async fn test_kv_batch_delete_with_prefix(kv_backend: impl KvBackend, prefix: Vec<u8>) {
|
||||
let key1 = [prefix.clone(), b"key1".to_vec()].concat();
|
||||
let key100 = [prefix.clone(), b"key100".to_vec()].concat();
|
||||
assert!(kv_backend.get(&key1).await.unwrap().is_some());
|
||||
assert!(kv_backend.get(&key100).await.unwrap().is_none());
|
||||
|
||||
let req = BatchDeleteRequest {
|
||||
keys: vec![b"key1".to_vec(), b"key100".to_vec()],
|
||||
keys: vec![key1.clone(), key100.clone()],
|
||||
prev_kv: true,
|
||||
};
|
||||
let resp = kv_backend.batch_delete(req).await.unwrap();
|
||||
assert_eq!(1, resp.prev_kvs.len());
|
||||
assert_eq!(
|
||||
vec![KeyValue {
|
||||
key: b"key1".to_vec(),
|
||||
key: key1.clone(),
|
||||
value: b"val1".to_vec()
|
||||
}],
|
||||
resp.prev_kvs
|
||||
);
|
||||
assert!(kv_backend.get(b"key1").await.unwrap().is_none());
|
||||
assert!(kv_backend.get(&key1).await.unwrap().is_none());
|
||||
|
||||
assert!(kv_backend.get(b"key2").await.unwrap().is_some());
|
||||
assert!(kv_backend.get(b"key3").await.unwrap().is_some());
|
||||
let key2 = [prefix.clone(), b"key2".to_vec()].concat();
|
||||
let key3 = [prefix.clone(), b"key3".to_vec()].concat();
|
||||
let key11 = [prefix.clone(), b"key11".to_vec()].concat();
|
||||
assert!(kv_backend.get(&key2).await.unwrap().is_some());
|
||||
assert!(kv_backend.get(&key3).await.unwrap().is_some());
|
||||
assert!(kv_backend.get(&key11).await.unwrap().is_some());
|
||||
|
||||
let req = BatchDeleteRequest {
|
||||
keys: vec![b"key2".to_vec(), b"key3".to_vec()],
|
||||
keys: vec![key2.clone(), key3.clone(), key11.clone()],
|
||||
prev_kv: false,
|
||||
};
|
||||
let resp = kv_backend.batch_delete(req).await.unwrap();
|
||||
assert!(resp.prev_kvs.is_empty());
|
||||
|
||||
assert!(kv_backend.get(b"key2").await.unwrap().is_none());
|
||||
assert!(kv_backend.get(b"key3").await.unwrap().is_none());
|
||||
assert!(kv_backend.get(&key2).await.unwrap().is_none());
|
||||
assert!(kv_backend.get(&key3).await.unwrap().is_none());
|
||||
assert!(kv_backend.get(&key11).await.unwrap().is_none());
|
||||
}
|
||||
|
||||
@@ -16,6 +16,7 @@
|
||||
#![feature(btree_extract_if)]
|
||||
#![feature(async_closure)]
|
||||
#![feature(let_chains)]
|
||||
#![feature(extract_if)]
|
||||
|
||||
pub mod cache_invalidator;
|
||||
pub mod cluster;
|
||||
|
||||
@@ -474,6 +474,16 @@ pub struct AlterTableTask {
|
||||
}
|
||||
|
||||
impl AlterTableTask {
|
||||
pub fn validate(&self) -> Result<()> {
|
||||
self.alter_table
|
||||
.kind
|
||||
.as_ref()
|
||||
.context(error::UnexpectedSnafu {
|
||||
err_msg: "'kind' is absent",
|
||||
})?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn table_ref(&self) -> TableReference {
|
||||
TableReference {
|
||||
catalog: &self.alter_table.catalog_name,
|
||||
@@ -625,7 +635,7 @@ impl TryFrom<PbCreateDatabaseTask> for CreateDatabaseTask {
|
||||
fn try_from(pb: PbCreateDatabaseTask) -> Result<Self> {
|
||||
let CreateDatabaseExpr {
|
||||
catalog_name,
|
||||
database_name,
|
||||
schema_name,
|
||||
create_if_not_exists,
|
||||
options,
|
||||
} = pb.create_database.context(error::InvalidProtoMsgSnafu {
|
||||
@@ -634,7 +644,7 @@ impl TryFrom<PbCreateDatabaseTask> for CreateDatabaseTask {
|
||||
|
||||
Ok(CreateDatabaseTask {
|
||||
catalog: catalog_name,
|
||||
schema: database_name,
|
||||
schema: schema_name,
|
||||
create_if_not_exists,
|
||||
options: Some(options),
|
||||
})
|
||||
@@ -655,7 +665,7 @@ impl TryFrom<CreateDatabaseTask> for PbCreateDatabaseTask {
|
||||
Ok(PbCreateDatabaseTask {
|
||||
create_database: Some(CreateDatabaseExpr {
|
||||
catalog_name: catalog,
|
||||
database_name: schema,
|
||||
schema_name: schema,
|
||||
create_if_not_exists,
|
||||
options: options.unwrap_or_default(),
|
||||
}),
|
||||
|
||||
@@ -56,6 +56,12 @@ pub fn procedure_state_to_pb_response(state: &ProcedureState) -> PbProcedureStat
|
||||
ProcedureState::Done { .. } => (PbProcedureStatus::Done, String::default()),
|
||||
ProcedureState::Retrying { error } => (PbProcedureStatus::Retrying, error.to_string()),
|
||||
ProcedureState::Failed { error } => (PbProcedureStatus::Failed, error.to_string()),
|
||||
ProcedureState::PrepareRollback { error } => {
|
||||
(PbProcedureStatus::PrepareRollback, error.to_string())
|
||||
}
|
||||
ProcedureState::RollingBack { error } => {
|
||||
(PbProcedureStatus::RollingBack, error.to_string())
|
||||
}
|
||||
};
|
||||
|
||||
PbProcedureStateResponse {
|
||||
|
||||
@@ -27,6 +27,7 @@ use crate::ddl::DdlContext;
|
||||
use crate::error::Result;
|
||||
use crate::key::TableMetadataManager;
|
||||
use crate::kv_backend::memory::MemoryKvBackend;
|
||||
use crate::kv_backend::KvBackendRef;
|
||||
use crate::peer::Peer;
|
||||
use crate::region_keeper::MemoryRegionKeeper;
|
||||
use crate::sequence::SequenceBuilder;
|
||||
@@ -86,6 +87,14 @@ impl<T: MockDatanodeHandler + 'static> DatanodeManager for MockDatanodeManager<T
|
||||
/// Returns a test purpose [DdlContext].
|
||||
pub fn new_ddl_context(datanode_manager: DatanodeManagerRef) -> DdlContext {
|
||||
let kv_backend = Arc::new(MemoryKvBackend::new());
|
||||
new_ddl_context_with_kv_backend(datanode_manager, kv_backend)
|
||||
}
|
||||
|
||||
/// Returns a test purpose [DdlContext] with a specified [KvBackendRef].
|
||||
pub fn new_ddl_context_with_kv_backend(
|
||||
datanode_manager: DatanodeManagerRef,
|
||||
kv_backend: KvBackendRef,
|
||||
) -> DdlContext {
|
||||
let table_metadata_manager = Arc::new(TableMetadataManager::new(kv_backend.clone()));
|
||||
|
||||
DdlContext {
|
||||
|
||||
@@ -17,7 +17,7 @@ pub mod kafka;
|
||||
use std::collections::HashMap;
|
||||
use std::sync::Arc;
|
||||
|
||||
use common_wal::config::MetaSrvWalConfig;
|
||||
use common_wal::config::MetasrvWalConfig;
|
||||
use common_wal::options::{KafkaWalOptions, WalOptions, WAL_OPTIONS_KEY};
|
||||
use snafu::ResultExt;
|
||||
use store_api::storage::{RegionId, RegionNumber};
|
||||
@@ -39,10 +39,10 @@ pub type WalOptionsAllocatorRef = Arc<WalOptionsAllocator>;
|
||||
|
||||
impl WalOptionsAllocator {
|
||||
/// Creates a WalOptionsAllocator.
|
||||
pub fn new(config: MetaSrvWalConfig, kv_backend: KvBackendRef) -> Self {
|
||||
pub fn new(config: MetasrvWalConfig, kv_backend: KvBackendRef) -> Self {
|
||||
match config {
|
||||
MetaSrvWalConfig::RaftEngine => Self::RaftEngine,
|
||||
MetaSrvWalConfig::Kafka(kafka_config) => {
|
||||
MetasrvWalConfig::RaftEngine => Self::RaftEngine,
|
||||
MetasrvWalConfig::Kafka(kafka_config) => {
|
||||
Self::Kafka(KafkaTopicManager::new(kafka_config, kv_backend))
|
||||
}
|
||||
}
|
||||
@@ -118,7 +118,7 @@ pub fn prepare_wal_options(
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use common_wal::config::kafka::MetaSrvKafkaConfig;
|
||||
use common_wal::config::kafka::MetasrvKafkaConfig;
|
||||
use common_wal::test_util::run_test_with_kafka_wal;
|
||||
|
||||
use super::*;
|
||||
@@ -129,7 +129,7 @@ mod tests {
|
||||
#[tokio::test]
|
||||
async fn test_allocator_with_raft_engine() {
|
||||
let kv_backend = Arc::new(MemoryKvBackend::new()) as KvBackendRef;
|
||||
let wal_config = MetaSrvWalConfig::RaftEngine;
|
||||
let wal_config = MetasrvWalConfig::RaftEngine;
|
||||
let allocator = WalOptionsAllocator::new(wal_config, kv_backend);
|
||||
allocator.start().await.unwrap();
|
||||
|
||||
@@ -155,7 +155,7 @@ mod tests {
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
// Creates a topic manager.
|
||||
let config = MetaSrvKafkaConfig {
|
||||
let config = MetasrvKafkaConfig {
|
||||
replication_factor: broker_endpoints.len() as i16,
|
||||
broker_endpoints,
|
||||
..Default::default()
|
||||
|
||||
@@ -16,7 +16,7 @@ use std::collections::HashSet;
|
||||
use std::sync::Arc;
|
||||
|
||||
use common_telemetry::{error, info};
|
||||
use common_wal::config::kafka::MetaSrvKafkaConfig;
|
||||
use common_wal::config::kafka::MetasrvKafkaConfig;
|
||||
use common_wal::TopicSelectorType;
|
||||
use rskafka::client::controller::ControllerClient;
|
||||
use rskafka::client::error::Error as RsKafkaError;
|
||||
@@ -46,7 +46,7 @@ const DEFAULT_PARTITION: i32 = 0;
|
||||
|
||||
/// Manages topic initialization and selection.
|
||||
pub struct TopicManager {
|
||||
config: MetaSrvKafkaConfig,
|
||||
config: MetasrvKafkaConfig,
|
||||
pub(crate) topic_pool: Vec<String>,
|
||||
pub(crate) topic_selector: TopicSelectorRef,
|
||||
kv_backend: KvBackendRef,
|
||||
@@ -54,7 +54,7 @@ pub struct TopicManager {
|
||||
|
||||
impl TopicManager {
|
||||
/// Creates a new topic manager.
|
||||
pub fn new(config: MetaSrvKafkaConfig, kv_backend: KvBackendRef) -> Self {
|
||||
pub fn new(config: MetasrvKafkaConfig, kv_backend: KvBackendRef) -> Self {
|
||||
// Topics should be created.
|
||||
let topics = (0..config.num_topics)
|
||||
.map(|topic_id| format!("{}_{topic_id}", config.topic_name_prefix))
|
||||
@@ -283,7 +283,7 @@ mod tests {
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
// Creates a topic manager.
|
||||
let config = MetaSrvKafkaConfig {
|
||||
let config = MetasrvKafkaConfig {
|
||||
replication_factor: broker_endpoints.len() as i16,
|
||||
broker_endpoints,
|
||||
..Default::default()
|
||||
|
||||
@@ -104,12 +104,24 @@ pub enum Error {
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Rollback Procedure recovered: {error}"))]
|
||||
RollbackProcedureRecovered { error: String, location: Location },
|
||||
|
||||
#[snafu(display("Procedure retry exceeded max times, procedure_id: {}", procedure_id))]
|
||||
RetryTimesExceeded {
|
||||
source: Arc<Error>,
|
||||
procedure_id: ProcedureId,
|
||||
},
|
||||
|
||||
#[snafu(display(
|
||||
"Procedure rollback exceeded max times, procedure_id: {}",
|
||||
procedure_id
|
||||
))]
|
||||
RollbackTimesExceeded {
|
||||
source: Arc<Error>,
|
||||
procedure_id: ProcedureId,
|
||||
},
|
||||
|
||||
#[snafu(display("Corrupted data, error: "))]
|
||||
CorruptedData {
|
||||
#[snafu(source)]
|
||||
@@ -145,6 +157,9 @@ pub enum Error {
|
||||
|
||||
#[snafu(display("Unexpected: {err_msg}"))]
|
||||
Unexpected { location: Location, err_msg: String },
|
||||
|
||||
#[snafu(display("Not support to rollback the procedure"))]
|
||||
RollbackNotSupported { location: Location },
|
||||
}
|
||||
|
||||
pub type Result<T> = std::result::Result<T, Error>;
|
||||
@@ -161,9 +176,12 @@ impl ErrorExt for Error {
|
||||
| Error::DeleteState { .. }
|
||||
| Error::FromJson { .. }
|
||||
| Error::RetryTimesExceeded { .. }
|
||||
| Error::RollbackTimesExceeded { .. }
|
||||
| Error::RetryLater { .. }
|
||||
| Error::WaitWatcher { .. }
|
||||
| Error::ManagerNotStart { .. } => StatusCode::Internal,
|
||||
| Error::ManagerNotStart { .. }
|
||||
| Error::RollbackProcedureRecovered { .. }
|
||||
| Error::RollbackNotSupported { .. } => StatusCode::Internal,
|
||||
Error::LoaderConflict { .. } | Error::DuplicateProcedure { .. } => {
|
||||
StatusCode::InvalidArguments
|
||||
}
|
||||
|
||||
@@ -31,11 +31,11 @@ use tokio::sync::{Mutex as TokioMutex, Notify};
|
||||
|
||||
use self::rwlock::KeyRwLock;
|
||||
use crate::error::{
|
||||
DuplicateProcedureSnafu, Error, LoaderConflictSnafu, ManagerNotStartSnafu, Result,
|
||||
self, DuplicateProcedureSnafu, Error, LoaderConflictSnafu, ManagerNotStartSnafu, Result,
|
||||
StartRemoveOutdatedMetaTaskSnafu, StopRemoveOutdatedMetaTaskSnafu,
|
||||
};
|
||||
use crate::local::runner::Runner;
|
||||
use crate::procedure::BoxedProcedureLoader;
|
||||
use crate::procedure::{BoxedProcedureLoader, InitProcedureState};
|
||||
use crate::store::{ProcedureMessage, ProcedureStore, StateStoreRef};
|
||||
use crate::{
|
||||
BoxedProcedure, ContextProvider, LockKey, ProcedureId, ProcedureManager, ProcedureState,
|
||||
@@ -72,8 +72,13 @@ pub(crate) struct ProcedureMeta {
|
||||
}
|
||||
|
||||
impl ProcedureMeta {
|
||||
fn new(id: ProcedureId, parent_id: Option<ProcedureId>, lock_key: LockKey) -> ProcedureMeta {
|
||||
let (state_sender, state_receiver) = watch::channel(ProcedureState::Running);
|
||||
fn new(
|
||||
id: ProcedureId,
|
||||
procedure_state: ProcedureState,
|
||||
parent_id: Option<ProcedureId>,
|
||||
lock_key: LockKey,
|
||||
) -> ProcedureMeta {
|
||||
let (state_sender, state_receiver) = watch::channel(procedure_state);
|
||||
ProcedureMeta {
|
||||
id,
|
||||
parent_id,
|
||||
@@ -424,12 +429,18 @@ impl LocalManager {
|
||||
fn submit_root(
|
||||
&self,
|
||||
procedure_id: ProcedureId,
|
||||
procedure_state: ProcedureState,
|
||||
step: u32,
|
||||
procedure: BoxedProcedure,
|
||||
) -> Result<Watcher> {
|
||||
ensure!(self.manager_ctx.running(), ManagerNotStartSnafu);
|
||||
|
||||
let meta = Arc::new(ProcedureMeta::new(procedure_id, None, procedure.lock_key()));
|
||||
let meta = Arc::new(ProcedureMeta::new(
|
||||
procedure_id,
|
||||
procedure_state,
|
||||
None,
|
||||
procedure.lock_key(),
|
||||
));
|
||||
let runner = Runner {
|
||||
meta: meta.clone(),
|
||||
procedure,
|
||||
@@ -468,13 +479,11 @@ impl LocalManager {
|
||||
Ok(watcher)
|
||||
}
|
||||
|
||||
/// Recovers unfinished procedures and reruns them.
|
||||
async fn recover(&self) -> Result<()> {
|
||||
logging::info!("LocalManager start to recover");
|
||||
let recover_start = Instant::now();
|
||||
|
||||
let (messages, finished_ids) = self.procedure_store.load_messages().await?;
|
||||
|
||||
fn submit_recovered_messages(
|
||||
&self,
|
||||
messages: HashMap<ProcedureId, ProcedureMessage>,
|
||||
init_state: InitProcedureState,
|
||||
) {
|
||||
for (procedure_id, message) in &messages {
|
||||
if message.parent_id.is_none() {
|
||||
// This is the root procedure. We only submit the root procedure as it will
|
||||
@@ -494,8 +503,21 @@ impl LocalManager {
|
||||
loaded_procedure.step
|
||||
);
|
||||
|
||||
let procedure_state = match init_state {
|
||||
InitProcedureState::RollingBack => ProcedureState::RollingBack {
|
||||
error: Arc::new(
|
||||
error::RollbackProcedureRecoveredSnafu {
|
||||
error: message.error.clone().unwrap_or("Unknown error".to_string()),
|
||||
}
|
||||
.build(),
|
||||
),
|
||||
},
|
||||
InitProcedureState::Running => ProcedureState::Running,
|
||||
};
|
||||
|
||||
if let Err(e) = self.submit_root(
|
||||
*procedure_id,
|
||||
procedure_state,
|
||||
loaded_procedure.step,
|
||||
loaded_procedure.procedure,
|
||||
) {
|
||||
@@ -503,6 +525,18 @@ impl LocalManager {
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Recovers unfinished procedures and reruns them.
|
||||
async fn recover(&self) -> Result<()> {
|
||||
logging::info!("LocalManager start to recover");
|
||||
let recover_start = Instant::now();
|
||||
|
||||
let (messages, rollback_messages, finished_ids) =
|
||||
self.procedure_store.load_messages().await?;
|
||||
// Submits recovered messages first.
|
||||
self.submit_recovered_messages(rollback_messages, InitProcedureState::RollingBack);
|
||||
self.submit_recovered_messages(messages, InitProcedureState::Running);
|
||||
|
||||
if !finished_ids.is_empty() {
|
||||
logging::info!(
|
||||
@@ -587,7 +621,12 @@ impl ProcedureManager for LocalManager {
|
||||
DuplicateProcedureSnafu { procedure_id }
|
||||
);
|
||||
|
||||
self.submit_root(procedure.id, 0, procedure.procedure)
|
||||
self.submit_root(
|
||||
procedure.id,
|
||||
ProcedureState::Running,
|
||||
0,
|
||||
procedure.procedure,
|
||||
)
|
||||
}
|
||||
|
||||
async fn procedure_state(&self, procedure_id: ProcedureId) -> Result<Option<ProcedureState>> {
|
||||
@@ -626,7 +665,12 @@ pub(crate) mod test_util {
|
||||
use super::*;
|
||||
|
||||
pub(crate) fn procedure_meta_for_test() -> ProcedureMeta {
|
||||
ProcedureMeta::new(ProcedureId::random(), None, LockKey::default())
|
||||
ProcedureMeta::new(
|
||||
ProcedureId::random(),
|
||||
ProcedureState::Running,
|
||||
None,
|
||||
LockKey::default(),
|
||||
)
|
||||
}
|
||||
|
||||
pub(crate) fn new_object_store(dir: &TempDir) -> ObjectStore {
|
||||
@@ -914,6 +958,14 @@ mod tests {
|
||||
}
|
||||
}
|
||||
|
||||
async fn rollback(&mut self, _: &Context) -> Result<()> {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn rollback_supported(&self) -> bool {
|
||||
true
|
||||
}
|
||||
|
||||
fn dump(&self) -> Result<String> {
|
||||
Ok(String::new())
|
||||
}
|
||||
@@ -923,24 +975,29 @@ mod tests {
|
||||
}
|
||||
}
|
||||
|
||||
let check_procedure = |procedure| {
|
||||
async {
|
||||
let procedure_id = ProcedureId::random();
|
||||
let mut watcher = manager
|
||||
.submit(ProcedureWithId {
|
||||
id: procedure_id,
|
||||
procedure: Box::new(procedure),
|
||||
})
|
||||
.await
|
||||
.unwrap();
|
||||
// Wait for the notification.
|
||||
watcher.changed().await.unwrap();
|
||||
assert!(watcher.borrow().is_failed());
|
||||
}
|
||||
let check_procedure = |procedure| async {
|
||||
let procedure_id = ProcedureId::random();
|
||||
manager
|
||||
.submit(ProcedureWithId {
|
||||
id: procedure_id,
|
||||
procedure: Box::new(procedure),
|
||||
})
|
||||
.await
|
||||
.unwrap()
|
||||
};
|
||||
|
||||
check_procedure(MockProcedure { panic: false }).await;
|
||||
check_procedure(MockProcedure { panic: true }).await;
|
||||
let mut watcher = check_procedure(MockProcedure { panic: false }).await;
|
||||
// Wait for the notification.
|
||||
watcher.changed().await.unwrap();
|
||||
assert!(watcher.borrow().is_prepare_rollback());
|
||||
watcher.changed().await.unwrap();
|
||||
assert!(watcher.borrow().is_rolling_back());
|
||||
watcher.changed().await.unwrap();
|
||||
assert!(watcher.borrow().is_failed());
|
||||
// The runner won't rollback a panicked procedure.
|
||||
let mut watcher = check_procedure(MockProcedure { panic: true }).await;
|
||||
watcher.changed().await.unwrap();
|
||||
assert!(watcher.borrow().is_failed());
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
|
||||
@@ -23,37 +23,9 @@ use super::rwlock::OwnedKeyRwLockGuard;
|
||||
use crate::error::{self, ProcedurePanicSnafu, Result};
|
||||
use crate::local::{ManagerContext, ProcedureMeta, ProcedureMetaRef};
|
||||
use crate::procedure::{Output, StringKey};
|
||||
use crate::store::ProcedureStore;
|
||||
use crate::ProcedureState::Retrying;
|
||||
use crate::store::{ProcedureMessage, ProcedureStore};
|
||||
use crate::{BoxedProcedure, Context, Error, ProcedureId, ProcedureState, ProcedureWithId, Status};
|
||||
|
||||
#[derive(Debug)]
|
||||
enum ExecResult {
|
||||
Continue,
|
||||
Done,
|
||||
RetryLater,
|
||||
Failed,
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
impl ExecResult {
|
||||
fn is_continue(&self) -> bool {
|
||||
matches!(self, ExecResult::Continue)
|
||||
}
|
||||
|
||||
fn is_done(&self) -> bool {
|
||||
matches!(self, ExecResult::Done)
|
||||
}
|
||||
|
||||
fn is_retry_later(&self) -> bool {
|
||||
matches!(self, ExecResult::RetryLater)
|
||||
}
|
||||
|
||||
fn is_failed(&self) -> bool {
|
||||
matches!(self, ExecResult::Failed)
|
||||
}
|
||||
}
|
||||
|
||||
/// A guard to cleanup procedure state.
|
||||
struct ProcedureGuard {
|
||||
meta: ProcedureMetaRef,
|
||||
@@ -208,129 +180,164 @@ impl Runner {
|
||||
async fn execute_once_with_retry(&mut self, ctx: &Context) {
|
||||
let mut retry = self.exponential_builder.build();
|
||||
let mut retry_times = 0;
|
||||
|
||||
let mut rollback = self.exponential_builder.build();
|
||||
let mut rollback_times = 0;
|
||||
|
||||
loop {
|
||||
// Don't store state if `ProcedureManager` is stopped.
|
||||
if !self.running() {
|
||||
self.meta.set_state(ProcedureState::Failed {
|
||||
error: Arc::new(error::ManagerNotStartSnafu {}.build()),
|
||||
});
|
||||
self.meta.set_state(ProcedureState::failed(Arc::new(
|
||||
error::ManagerNotStartSnafu {}.build(),
|
||||
)));
|
||||
return;
|
||||
}
|
||||
match self.execute_once(ctx).await {
|
||||
ExecResult::Done | ExecResult::Failed => return,
|
||||
ExecResult::Continue => (),
|
||||
ExecResult::RetryLater => {
|
||||
let state = self.meta.state();
|
||||
match state {
|
||||
ProcedureState::Running => {}
|
||||
ProcedureState::Retrying { error } => {
|
||||
retry_times += 1;
|
||||
if let Some(d) = retry.next() {
|
||||
self.wait_on_err(d, retry_times).await;
|
||||
} else {
|
||||
assert!(self.meta.state().is_retrying());
|
||||
if let Retrying { error } = self.meta.state() {
|
||||
self.meta.set_state(ProcedureState::failed(Arc::new(
|
||||
self.meta
|
||||
.set_state(ProcedureState::prepare_rollback(Arc::new(
|
||||
Error::RetryTimesExceeded {
|
||||
source: error,
|
||||
source: error.clone(),
|
||||
procedure_id: self.meta.id,
|
||||
},
|
||||
)))
|
||||
}
|
||||
)));
|
||||
}
|
||||
}
|
||||
ProcedureState::PrepareRollback { error }
|
||||
| ProcedureState::RollingBack { error } => {
|
||||
rollback_times += 1;
|
||||
if let Some(d) = rollback.next() {
|
||||
self.wait_on_err(d, rollback_times).await;
|
||||
} else {
|
||||
self.meta.set_state(ProcedureState::failed(Arc::new(
|
||||
Error::RollbackTimesExceeded {
|
||||
source: error.clone(),
|
||||
procedure_id: self.meta.id,
|
||||
},
|
||||
)));
|
||||
return;
|
||||
}
|
||||
}
|
||||
ProcedureState::Done { .. } => return,
|
||||
ProcedureState::Failed { .. } => return,
|
||||
}
|
||||
self.execute_once(ctx).await;
|
||||
}
|
||||
}
|
||||
|
||||
async fn rollback(&mut self, ctx: &Context, err: Arc<Error>) {
|
||||
if self.procedure.rollback_supported() {
|
||||
if let Err(e) = self.procedure.rollback(ctx).await {
|
||||
self.meta
|
||||
.set_state(ProcedureState::rolling_back(Arc::new(e)));
|
||||
return;
|
||||
}
|
||||
}
|
||||
self.meta.set_state(ProcedureState::failed(err));
|
||||
}
|
||||
|
||||
async fn rollback(&mut self, error: Arc<Error>) -> ExecResult {
|
||||
if let Err(e) = self.rollback_procedure().await {
|
||||
self.rolling_back = true;
|
||||
self.meta.set_state(ProcedureState::retrying(Arc::new(e)));
|
||||
return ExecResult::RetryLater;
|
||||
async fn prepare_rollback(&mut self, err: Arc<Error>) {
|
||||
if let Err(e) = self.write_procedure_state(err.to_string()).await {
|
||||
self.meta
|
||||
.set_state(ProcedureState::prepare_rollback(Arc::new(e)));
|
||||
return;
|
||||
}
|
||||
if self.procedure.rollback_supported() {
|
||||
self.meta.set_state(ProcedureState::rolling_back(err));
|
||||
} else {
|
||||
self.meta.set_state(ProcedureState::failed(err));
|
||||
}
|
||||
self.meta.set_state(ProcedureState::failed(error));
|
||||
ExecResult::Failed
|
||||
}
|
||||
|
||||
async fn execute_once(&mut self, ctx: &Context) -> ExecResult {
|
||||
// if rolling_back, there is no need to execute again.
|
||||
if self.rolling_back {
|
||||
// We can definitely get the previous error here.
|
||||
let state = self.meta.state();
|
||||
let err = state.error().unwrap();
|
||||
return self.rollback(err.clone()).await;
|
||||
}
|
||||
match self.procedure.execute(ctx).await {
|
||||
Ok(status) => {
|
||||
logging::debug!(
|
||||
"Execute procedure {}-{} once, status: {:?}, need_persist: {}",
|
||||
self.procedure.type_name(),
|
||||
self.meta.id,
|
||||
status,
|
||||
status.need_persist(),
|
||||
);
|
||||
async fn execute_once(&mut self, ctx: &Context) {
|
||||
match self.meta.state() {
|
||||
ProcedureState::Running | ProcedureState::Retrying { .. } => {
|
||||
match self.procedure.execute(ctx).await {
|
||||
Ok(status) => {
|
||||
logging::debug!(
|
||||
"Execute procedure {}-{} once, status: {:?}, need_persist: {}",
|
||||
self.procedure.type_name(),
|
||||
self.meta.id,
|
||||
status,
|
||||
status.need_persist(),
|
||||
);
|
||||
|
||||
// Don't store state if `ProcedureManager` is stopped.
|
||||
if !self.running() {
|
||||
self.meta.set_state(ProcedureState::Failed {
|
||||
error: Arc::new(error::ManagerNotStartSnafu {}.build()),
|
||||
});
|
||||
return ExecResult::Failed;
|
||||
}
|
||||
|
||||
if status.need_persist() {
|
||||
if let Err(err) = self.persist_procedure().await {
|
||||
self.meta.set_state(ProcedureState::retrying(Arc::new(err)));
|
||||
return ExecResult::RetryLater;
|
||||
}
|
||||
}
|
||||
|
||||
match status {
|
||||
Status::Executing { .. } => (),
|
||||
Status::Suspended { subprocedures, .. } => {
|
||||
self.on_suspended(subprocedures).await;
|
||||
}
|
||||
Status::Done { output } => {
|
||||
if let Err(e) = self.commit_procedure().await {
|
||||
self.meta.set_state(ProcedureState::retrying(Arc::new(e)));
|
||||
return ExecResult::RetryLater;
|
||||
// Don't store state if `ProcedureManager` is stopped.
|
||||
if !self.running() {
|
||||
self.meta.set_state(ProcedureState::failed(Arc::new(
|
||||
error::ManagerNotStartSnafu {}.build(),
|
||||
)));
|
||||
return;
|
||||
}
|
||||
|
||||
self.done(output);
|
||||
return ExecResult::Done;
|
||||
if status.need_persist() {
|
||||
if let Err(err) = self.persist_procedure().await {
|
||||
self.meta.set_state(ProcedureState::retrying(Arc::new(err)));
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
match status {
|
||||
Status::Executing { .. } => (),
|
||||
Status::Suspended { subprocedures, .. } => {
|
||||
self.on_suspended(subprocedures).await;
|
||||
}
|
||||
Status::Done { output } => {
|
||||
if let Err(e) = self.commit_procedure().await {
|
||||
self.meta.set_state(ProcedureState::retrying(Arc::new(e)));
|
||||
return;
|
||||
}
|
||||
|
||||
self.done(output);
|
||||
}
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
logging::error!(
|
||||
e;
|
||||
"Failed to execute procedure {}-{}, retry: {}",
|
||||
self.procedure.type_name(),
|
||||
self.meta.id,
|
||||
e.is_retry_later(),
|
||||
);
|
||||
|
||||
// Don't store state if `ProcedureManager` is stopped.
|
||||
if !self.running() {
|
||||
self.meta.set_state(ProcedureState::failed(Arc::new(
|
||||
error::ManagerNotStartSnafu {}.build(),
|
||||
)));
|
||||
return;
|
||||
}
|
||||
|
||||
if e.is_retry_later() {
|
||||
self.meta.set_state(ProcedureState::retrying(Arc::new(e)));
|
||||
return;
|
||||
}
|
||||
|
||||
self.meta
|
||||
.set_state(ProcedureState::prepare_rollback(Arc::new(e)));
|
||||
}
|
||||
}
|
||||
|
||||
ExecResult::Continue
|
||||
}
|
||||
Err(e) => {
|
||||
logging::error!(
|
||||
e;
|
||||
"Failed to execute procedure {}-{}, retry: {}",
|
||||
self.procedure.type_name(),
|
||||
self.meta.id,
|
||||
e.is_retry_later(),
|
||||
);
|
||||
|
||||
// Don't store state if `ProcedureManager` is stopped.
|
||||
if !self.running() {
|
||||
self.meta.set_state(ProcedureState::Failed {
|
||||
error: Arc::new(error::ManagerNotStartSnafu {}.build()),
|
||||
});
|
||||
return ExecResult::Failed;
|
||||
}
|
||||
|
||||
if e.is_retry_later() {
|
||||
self.meta.set_state(ProcedureState::retrying(Arc::new(e)));
|
||||
return ExecResult::RetryLater;
|
||||
}
|
||||
|
||||
// Write rollback key so we can skip this procedure while recovering procedures.
|
||||
self.rollback(Arc::new(e)).await
|
||||
}
|
||||
ProcedureState::PrepareRollback { error } => self.prepare_rollback(error).await,
|
||||
ProcedureState::RollingBack { error } => self.rollback(ctx, error).await,
|
||||
ProcedureState::Failed { .. } | ProcedureState::Done { .. } => (),
|
||||
}
|
||||
}
|
||||
|
||||
/// Submit a subprocedure with specific `procedure_id`.
|
||||
fn submit_subprocedure(&self, procedure_id: ProcedureId, mut procedure: BoxedProcedure) {
|
||||
fn submit_subprocedure(
|
||||
&self,
|
||||
procedure_id: ProcedureId,
|
||||
procedure_state: ProcedureState,
|
||||
mut procedure: BoxedProcedure,
|
||||
) {
|
||||
if self.manager_ctx.contains_procedure(procedure_id) {
|
||||
// If the parent has already submitted this procedure, don't submit it again.
|
||||
return;
|
||||
@@ -350,6 +357,7 @@ impl Runner {
|
||||
|
||||
let meta = Arc::new(ProcedureMeta::new(
|
||||
procedure_id,
|
||||
procedure_state,
|
||||
Some(self.meta.id),
|
||||
procedure.lock_key(),
|
||||
));
|
||||
@@ -407,7 +415,11 @@ impl Runner {
|
||||
subprocedure.id,
|
||||
);
|
||||
|
||||
self.submit_subprocedure(subprocedure.id, subprocedure.procedure);
|
||||
self.submit_subprocedure(
|
||||
subprocedure.id,
|
||||
ProcedureState::Running,
|
||||
subprocedure.procedure,
|
||||
);
|
||||
}
|
||||
|
||||
logging::info!(
|
||||
@@ -469,9 +481,19 @@ impl Runner {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn rollback_procedure(&mut self) -> Result<()> {
|
||||
async fn write_procedure_state(&mut self, error: String) -> Result<()> {
|
||||
// Persists procedure state
|
||||
let type_name = self.procedure.type_name().to_string();
|
||||
let data = self.procedure.dump()?;
|
||||
let message = ProcedureMessage {
|
||||
type_name,
|
||||
data,
|
||||
parent_id: self.meta.parent_id,
|
||||
step: self.step,
|
||||
error: Some(error),
|
||||
};
|
||||
self.store
|
||||
.rollback_procedure(self.meta.id, self.step)
|
||||
.rollback_procedure(self.meta.id, message)
|
||||
.await
|
||||
.map_err(|e| {
|
||||
logging::error!(
|
||||
@@ -510,6 +532,7 @@ mod tests {
|
||||
use futures_util::future::BoxFuture;
|
||||
use futures_util::FutureExt;
|
||||
use object_store::ObjectStore;
|
||||
use tokio::sync::mpsc;
|
||||
|
||||
use super::*;
|
||||
use crate::local::test_util;
|
||||
@@ -566,11 +589,13 @@ mod tests {
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
type RollbackFn = Box<dyn FnMut(Context) -> BoxFuture<'static, Result<()>> + Send>;
|
||||
|
||||
struct ProcedureAdapter<F> {
|
||||
data: String,
|
||||
lock_key: LockKey,
|
||||
exec_fn: F,
|
||||
rollback_fn: Option<RollbackFn>,
|
||||
}
|
||||
|
||||
impl<F> ProcedureAdapter<F> {
|
||||
@@ -597,6 +622,17 @@ mod tests {
|
||||
f.await
|
||||
}
|
||||
|
||||
async fn rollback(&mut self, ctx: &Context) -> Result<()> {
|
||||
if let Some(f) = &mut self.rollback_fn {
|
||||
return (f)(ctx.clone()).await;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn rollback_supported(&self) -> bool {
|
||||
self.rollback_fn.is_some()
|
||||
}
|
||||
|
||||
fn dump(&self) -> Result<String> {
|
||||
Ok(self.data.clone())
|
||||
}
|
||||
@@ -623,6 +659,7 @@ mod tests {
|
||||
data: "normal".to_string(),
|
||||
lock_key: LockKey::single_exclusive("catalog.schema.table"),
|
||||
exec_fn,
|
||||
rollback_fn: None,
|
||||
};
|
||||
|
||||
let dir = create_temp_dir("normal");
|
||||
@@ -633,8 +670,9 @@ mod tests {
|
||||
let mut runner = new_runner(meta, Box::new(normal), procedure_store.clone());
|
||||
runner.manager_ctx.start();
|
||||
|
||||
let res = runner.execute_once(&ctx).await;
|
||||
assert!(res.is_continue(), "{res:?}");
|
||||
runner.execute_once(&ctx).await;
|
||||
let state = runner.meta.state();
|
||||
assert!(state.is_running(), "{state:?}");
|
||||
check_files(
|
||||
&object_store,
|
||||
&procedure_store,
|
||||
@@ -643,8 +681,9 @@ mod tests {
|
||||
)
|
||||
.await;
|
||||
|
||||
let res = runner.execute_once(&ctx).await;
|
||||
assert!(res.is_done(), "{res:?}");
|
||||
runner.execute_once(&ctx).await;
|
||||
let state = runner.meta.state();
|
||||
assert!(state.is_done(), "{state:?}");
|
||||
check_files(
|
||||
&object_store,
|
||||
&procedure_store,
|
||||
@@ -684,6 +723,7 @@ mod tests {
|
||||
data: "suspend".to_string(),
|
||||
lock_key: LockKey::single_exclusive("catalog.schema.table"),
|
||||
exec_fn,
|
||||
rollback_fn: None,
|
||||
};
|
||||
|
||||
let dir = create_temp_dir("suspend");
|
||||
@@ -694,8 +734,9 @@ mod tests {
|
||||
let mut runner = new_runner(meta, Box::new(suspend), procedure_store);
|
||||
runner.manager_ctx.start();
|
||||
|
||||
let res = runner.execute_once(&ctx).await;
|
||||
assert!(res.is_continue(), "{res:?}");
|
||||
runner.execute_once(&ctx).await;
|
||||
let state = runner.meta.state();
|
||||
assert!(state.is_running(), "{state:?}");
|
||||
}
|
||||
|
||||
fn new_child_procedure(procedure_id: ProcedureId, keys: &[&str]) -> ProcedureWithId {
|
||||
@@ -716,6 +757,7 @@ mod tests {
|
||||
data: "child".to_string(),
|
||||
lock_key: LockKey::new_exclusive(keys.iter().map(|k| k.to_string())),
|
||||
exec_fn,
|
||||
rollback_fn: None,
|
||||
};
|
||||
|
||||
ProcedureWithId {
|
||||
@@ -784,6 +826,7 @@ mod tests {
|
||||
data: "parent".to_string(),
|
||||
lock_key: LockKey::single_exclusive("catalog.schema.table"),
|
||||
exec_fn,
|
||||
rollback_fn: None,
|
||||
};
|
||||
|
||||
let dir = create_temp_dir("parent");
|
||||
@@ -830,6 +873,7 @@ mod tests {
|
||||
data: "normal".to_string(),
|
||||
lock_key: LockKey::single_exclusive("catalog.schema.table"),
|
||||
exec_fn,
|
||||
rollback_fn: None,
|
||||
};
|
||||
|
||||
let dir = create_temp_dir("test_running_is_stopped");
|
||||
@@ -840,8 +884,9 @@ mod tests {
|
||||
let mut runner = new_runner(meta, Box::new(normal), procedure_store.clone());
|
||||
runner.manager_ctx.start();
|
||||
|
||||
let res = runner.execute_once(&ctx).await;
|
||||
assert!(res.is_continue(), "{res:?}");
|
||||
runner.execute_once(&ctx).await;
|
||||
let state = runner.meta.state();
|
||||
assert!(state.is_running(), "{state:?}");
|
||||
check_files(
|
||||
&object_store,
|
||||
&procedure_store,
|
||||
@@ -851,8 +896,9 @@ mod tests {
|
||||
.await;
|
||||
|
||||
runner.manager_ctx.stop();
|
||||
let res = runner.execute_once(&ctx).await;
|
||||
assert!(res.is_failed());
|
||||
runner.execute_once(&ctx).await;
|
||||
let state = runner.meta.state();
|
||||
assert!(state.is_failed(), "{state:?}");
|
||||
// Shouldn't write any files
|
||||
check_files(
|
||||
&object_store,
|
||||
@@ -871,6 +917,7 @@ mod tests {
|
||||
data: "fail".to_string(),
|
||||
lock_key: LockKey::single_exclusive("catalog.schema.table"),
|
||||
exec_fn,
|
||||
rollback_fn: None,
|
||||
};
|
||||
|
||||
let dir = create_temp_dir("test_running_is_stopped_on_error");
|
||||
@@ -881,8 +928,9 @@ mod tests {
|
||||
let mut runner = new_runner(meta, Box::new(normal), procedure_store.clone());
|
||||
runner.manager_ctx.stop();
|
||||
|
||||
let res = runner.execute_once(&ctx).await;
|
||||
assert!(res.is_failed(), "{res:?}");
|
||||
runner.execute_once(&ctx).await;
|
||||
let state = runner.meta.state();
|
||||
assert!(state.is_failed(), "{state:?}");
|
||||
// Shouldn't write any files
|
||||
check_files(&object_store, &procedure_store, ctx.procedure_id, &[]).await;
|
||||
}
|
||||
@@ -895,6 +943,7 @@ mod tests {
|
||||
data: "fail".to_string(),
|
||||
lock_key: LockKey::single_exclusive("catalog.schema.table"),
|
||||
exec_fn,
|
||||
rollback_fn: None,
|
||||
};
|
||||
|
||||
let dir = create_temp_dir("fail");
|
||||
@@ -905,9 +954,53 @@ mod tests {
|
||||
let mut runner = new_runner(meta.clone(), Box::new(fail), procedure_store.clone());
|
||||
runner.manager_ctx.start();
|
||||
|
||||
let res = runner.execute_once(&ctx).await;
|
||||
assert!(res.is_failed(), "{res:?}");
|
||||
assert!(meta.state().is_failed());
|
||||
runner.execute_once(&ctx).await;
|
||||
let state = runner.meta.state();
|
||||
assert!(state.is_prepare_rollback(), "{state:?}");
|
||||
|
||||
runner.execute_once(&ctx).await;
|
||||
let state = runner.meta.state();
|
||||
assert!(state.is_failed(), "{state:?}");
|
||||
check_files(
|
||||
&object_store,
|
||||
&procedure_store,
|
||||
ctx.procedure_id,
|
||||
&["0000000000.rollback"],
|
||||
)
|
||||
.await;
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_execute_with_rollback_on_error() {
|
||||
let exec_fn =
|
||||
|_| async { Err(Error::external(MockError::new(StatusCode::Unexpected))) }.boxed();
|
||||
let rollback_fn = move |_| async move { Ok(()) }.boxed();
|
||||
let fail = ProcedureAdapter {
|
||||
data: "fail".to_string(),
|
||||
lock_key: LockKey::single_exclusive("catalog.schema.table"),
|
||||
exec_fn,
|
||||
rollback_fn: Some(Box::new(rollback_fn)),
|
||||
};
|
||||
|
||||
let dir = create_temp_dir("fail");
|
||||
let meta = fail.new_meta(ROOT_ID);
|
||||
let ctx = context_without_provider(meta.id);
|
||||
let object_store = test_util::new_object_store(&dir);
|
||||
let procedure_store = Arc::new(ProcedureStore::from_object_store(object_store.clone()));
|
||||
let mut runner = new_runner(meta.clone(), Box::new(fail), procedure_store.clone());
|
||||
runner.manager_ctx.start();
|
||||
|
||||
runner.execute_once(&ctx).await;
|
||||
let state = runner.meta.state();
|
||||
assert!(state.is_prepare_rollback(), "{state:?}");
|
||||
|
||||
runner.execute_once(&ctx).await;
|
||||
let state = runner.meta.state();
|
||||
assert!(state.is_rolling_back(), "{state:?}");
|
||||
|
||||
runner.execute_once(&ctx).await;
|
||||
let state = runner.meta.state();
|
||||
assert!(state.is_failed(), "{state:?}");
|
||||
check_files(
|
||||
&object_store,
|
||||
&procedure_store,
|
||||
@@ -937,6 +1030,7 @@ mod tests {
|
||||
data: "retry_later".to_string(),
|
||||
lock_key: LockKey::single_exclusive("catalog.schema.table"),
|
||||
exec_fn,
|
||||
rollback_fn: None,
|
||||
};
|
||||
|
||||
let dir = create_temp_dir("retry_later");
|
||||
@@ -946,13 +1040,13 @@ mod tests {
|
||||
let procedure_store = Arc::new(ProcedureStore::from_object_store(object_store.clone()));
|
||||
let mut runner = new_runner(meta.clone(), Box::new(retry_later), procedure_store.clone());
|
||||
runner.manager_ctx.start();
|
||||
runner.execute_once(&ctx).await;
|
||||
let state = runner.meta.state();
|
||||
assert!(state.is_retrying(), "{state:?}");
|
||||
|
||||
let res = runner.execute_once(&ctx).await;
|
||||
assert!(res.is_retry_later(), "{res:?}");
|
||||
assert!(meta.state().is_retrying());
|
||||
|
||||
let res = runner.execute_once(&ctx).await;
|
||||
assert!(res.is_done(), "{res:?}");
|
||||
runner.execute_once(&ctx).await;
|
||||
let state = runner.meta.state();
|
||||
assert!(state.is_done(), "{state:?}");
|
||||
assert!(meta.state().is_done());
|
||||
check_files(
|
||||
&object_store,
|
||||
@@ -972,6 +1066,7 @@ mod tests {
|
||||
data: "exceed_max_retry_later".to_string(),
|
||||
lock_key: LockKey::single_exclusive("catalog.schema.table"),
|
||||
exec_fn,
|
||||
rollback_fn: None,
|
||||
};
|
||||
|
||||
let dir = create_temp_dir("exceed_max_retry_later");
|
||||
@@ -995,6 +1090,85 @@ mod tests {
|
||||
assert!(err.contains("Procedure retry exceeded max times"));
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_rollback_exceed_max_retry_later() {
|
||||
let exec_fn =
|
||||
|_| async { Err(Error::retry_later(MockError::new(StatusCode::Unexpected))) }.boxed();
|
||||
let rollback_fn = move |_| {
|
||||
async move { Err(Error::retry_later(MockError::new(StatusCode::Unexpected))) }.boxed()
|
||||
};
|
||||
let exceed_max_retry_later = ProcedureAdapter {
|
||||
data: "exceed_max_rollback".to_string(),
|
||||
lock_key: LockKey::single_exclusive("catalog.schema.table"),
|
||||
exec_fn,
|
||||
rollback_fn: Some(Box::new(rollback_fn)),
|
||||
};
|
||||
|
||||
let dir = create_temp_dir("exceed_max_rollback");
|
||||
let meta = exceed_max_retry_later.new_meta(ROOT_ID);
|
||||
let object_store = test_util::new_object_store(&dir);
|
||||
let procedure_store = Arc::new(ProcedureStore::from_object_store(object_store.clone()));
|
||||
let mut runner = new_runner(
|
||||
meta.clone(),
|
||||
Box::new(exceed_max_retry_later),
|
||||
procedure_store,
|
||||
);
|
||||
runner.manager_ctx.start();
|
||||
runner.exponential_builder = ExponentialBuilder::default()
|
||||
.with_min_delay(Duration::from_millis(1))
|
||||
.with_max_times(3);
|
||||
|
||||
// Run the runner and execute the procedure.
|
||||
runner.execute_procedure_in_loop().await;
|
||||
let err = meta.state().error().unwrap().to_string();
|
||||
assert!(err.contains("Procedure rollback exceeded max times"));
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_rollback_after_retry_fail() {
|
||||
let exec_fn = move |_| {
|
||||
async move { Err(Error::retry_later(MockError::new(StatusCode::Unexpected))) }.boxed()
|
||||
};
|
||||
|
||||
let (tx, mut rx) = mpsc::channel(1);
|
||||
let rollback_fn = move |_| {
|
||||
let tx = tx.clone();
|
||||
async move {
|
||||
tx.send(()).await.unwrap();
|
||||
Ok(())
|
||||
}
|
||||
.boxed()
|
||||
};
|
||||
let retry_later = ProcedureAdapter {
|
||||
data: "rollback_after_retry_fail".to_string(),
|
||||
lock_key: LockKey::single_exclusive("catalog.schema.table"),
|
||||
exec_fn,
|
||||
rollback_fn: Some(Box::new(rollback_fn)),
|
||||
};
|
||||
|
||||
let dir = create_temp_dir("retry_later");
|
||||
let meta = retry_later.new_meta(ROOT_ID);
|
||||
let ctx = context_without_provider(meta.id);
|
||||
let object_store = test_util::new_object_store(&dir);
|
||||
let procedure_store = Arc::new(ProcedureStore::from_object_store(object_store.clone()));
|
||||
let mut runner = new_runner(meta.clone(), Box::new(retry_later), procedure_store.clone());
|
||||
runner.manager_ctx.start();
|
||||
runner.exponential_builder = ExponentialBuilder::default()
|
||||
.with_min_delay(Duration::from_millis(1))
|
||||
.with_max_times(3);
|
||||
// Run the runner and execute the procedure.
|
||||
runner.execute_procedure_in_loop().await;
|
||||
rx.recv().await.unwrap();
|
||||
assert_eq!(rx.try_recv().unwrap_err(), mpsc::error::TryRecvError::Empty);
|
||||
check_files(
|
||||
&object_store,
|
||||
&procedure_store,
|
||||
ctx.procedure_id,
|
||||
&["0000000000.rollback"],
|
||||
)
|
||||
.await;
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_child_error() {
|
||||
let mut times = 0;
|
||||
@@ -1013,6 +1187,7 @@ mod tests {
|
||||
data: "fail".to_string(),
|
||||
lock_key: LockKey::single_exclusive("catalog.schema.table.region-0"),
|
||||
exec_fn,
|
||||
rollback_fn: None,
|
||||
};
|
||||
|
||||
Ok(Status::Suspended {
|
||||
@@ -1047,6 +1222,7 @@ mod tests {
|
||||
data: "parent".to_string(),
|
||||
lock_key: LockKey::single_exclusive("catalog.schema.table"),
|
||||
exec_fn,
|
||||
rollback_fn: None,
|
||||
};
|
||||
|
||||
let dir = create_temp_dir("child_err");
|
||||
|
||||
@@ -23,7 +23,7 @@ use smallvec::{smallvec, SmallVec};
|
||||
use snafu::{ResultExt, Snafu};
|
||||
use uuid::Uuid;
|
||||
|
||||
use crate::error::{Error, Result};
|
||||
use crate::error::{self, Error, Result};
|
||||
use crate::watcher::Watcher;
|
||||
|
||||
pub type Output = Arc<dyn Any + Send + Sync>;
|
||||
@@ -125,6 +125,18 @@ pub trait Procedure: Send {
|
||||
/// The implementation must be idempotent.
|
||||
async fn execute(&mut self, ctx: &Context) -> Result<Status>;
|
||||
|
||||
/// Rollback the failed procedure.
|
||||
///
|
||||
/// The implementation must be idempotent.
|
||||
async fn rollback(&mut self, _: &Context) -> Result<()> {
|
||||
error::RollbackNotSupportedSnafu {}.fail()
|
||||
}
|
||||
|
||||
/// Indicates whether it supports rolling back the procedure.
|
||||
fn rollback_supported(&self) -> bool {
|
||||
false
|
||||
}
|
||||
|
||||
/// Dump the state of the procedure to a string.
|
||||
fn dump(&self) -> Result<String>;
|
||||
|
||||
@@ -289,6 +301,10 @@ pub enum ProcedureState {
|
||||
Done { output: Option<Output> },
|
||||
/// The procedure is failed and can be retried.
|
||||
Retrying { error: Arc<Error> },
|
||||
/// The procedure is failed and commits state before rolling back the procedure.
|
||||
PrepareRollback { error: Arc<Error> },
|
||||
/// The procedure is failed and can be rollback.
|
||||
RollingBack { error: Arc<Error> },
|
||||
/// The procedure is failed and cannot proceed anymore.
|
||||
Failed { error: Arc<Error> },
|
||||
}
|
||||
@@ -299,6 +315,16 @@ impl ProcedureState {
|
||||
ProcedureState::Failed { error }
|
||||
}
|
||||
|
||||
/// Returns a [ProcedureState] with prepare rollback state.
|
||||
pub fn prepare_rollback(error: Arc<Error>) -> ProcedureState {
|
||||
ProcedureState::PrepareRollback { error }
|
||||
}
|
||||
|
||||
/// Returns a [ProcedureState] with rolling back state.
|
||||
pub fn rolling_back(error: Arc<Error>) -> ProcedureState {
|
||||
ProcedureState::RollingBack { error }
|
||||
}
|
||||
|
||||
/// Returns a [ProcedureState] with retrying state.
|
||||
pub fn retrying(error: Arc<Error>) -> ProcedureState {
|
||||
ProcedureState::Retrying { error }
|
||||
@@ -324,16 +350,34 @@ impl ProcedureState {
|
||||
matches!(self, ProcedureState::Retrying { .. })
|
||||
}
|
||||
|
||||
/// Returns true if the procedure state is rolling back.
|
||||
pub fn is_rolling_back(&self) -> bool {
|
||||
matches!(self, ProcedureState::RollingBack { .. })
|
||||
}
|
||||
|
||||
/// Returns true if the procedure state is prepare rollback.
|
||||
pub fn is_prepare_rollback(&self) -> bool {
|
||||
matches!(self, ProcedureState::PrepareRollback { .. })
|
||||
}
|
||||
|
||||
/// Returns the error.
|
||||
pub fn error(&self) -> Option<&Arc<Error>> {
|
||||
match self {
|
||||
ProcedureState::Failed { error } => Some(error),
|
||||
ProcedureState::Retrying { error } => Some(error),
|
||||
ProcedureState::RollingBack { error } => Some(error),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// The initial procedure state.
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum InitProcedureState {
|
||||
Running,
|
||||
RollingBack,
|
||||
}
|
||||
|
||||
// TODO(yingwen): Shutdown
|
||||
/// `ProcedureManager` executes [Procedure] submitted to it.
|
||||
#[async_trait]
|
||||
|
||||
@@ -50,6 +50,9 @@ pub struct ProcedureMessage {
|
||||
pub parent_id: Option<ProcedureId>,
|
||||
/// Current step.
|
||||
pub step: u32,
|
||||
/// Errors raised during the procedure.
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub error: Option<String>,
|
||||
}
|
||||
|
||||
/// Procedure storage layer.
|
||||
@@ -85,6 +88,7 @@ impl ProcedureStore {
|
||||
data,
|
||||
parent_id,
|
||||
step,
|
||||
error: None,
|
||||
};
|
||||
let key = ParsedKey {
|
||||
prefix: &self.proc_path,
|
||||
@@ -122,16 +126,19 @@ impl ProcedureStore {
|
||||
pub(crate) async fn rollback_procedure(
|
||||
&self,
|
||||
procedure_id: ProcedureId,
|
||||
step: u32,
|
||||
message: ProcedureMessage,
|
||||
) -> Result<()> {
|
||||
let key = ParsedKey {
|
||||
prefix: &self.proc_path,
|
||||
procedure_id,
|
||||
step,
|
||||
step: message.step,
|
||||
key_type: KeyType::Rollback,
|
||||
}
|
||||
.to_string();
|
||||
self.store.put(&key, Vec::new()).await?;
|
||||
|
||||
self.store
|
||||
.put(&key, serde_json::to_vec(&message).context(ToJsonSnafu)?)
|
||||
.await?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
@@ -176,11 +183,18 @@ impl ProcedureStore {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Load procedures from the storage. Returns a map of uncommitted procedures and a list
|
||||
/// of finished procedures' ids.
|
||||
/// Load procedures from the storage.
|
||||
/// Returns:
|
||||
/// - a map of uncommitted procedures
|
||||
/// - a map of rolling back procedures
|
||||
/// - a list of finished procedures' ids
|
||||
pub(crate) async fn load_messages(
|
||||
&self,
|
||||
) -> Result<(HashMap<ProcedureId, ProcedureMessage>, Vec<ProcedureId>)> {
|
||||
) -> Result<(
|
||||
HashMap<ProcedureId, ProcedureMessage>,
|
||||
HashMap<ProcedureId, ProcedureMessage>,
|
||||
Vec<ProcedureId>,
|
||||
)> {
|
||||
// Track the key-value pair by procedure id.
|
||||
let mut procedure_key_values: HashMap<_, (ParsedKey, Vec<u8>)> = HashMap::new();
|
||||
|
||||
@@ -204,21 +218,33 @@ impl ProcedureStore {
|
||||
}
|
||||
|
||||
let mut messages = HashMap::with_capacity(procedure_key_values.len());
|
||||
let mut rollback_messages = HashMap::new();
|
||||
let mut finished_ids = Vec::new();
|
||||
for (procedure_id, (parsed_key, value)) in procedure_key_values {
|
||||
if parsed_key.key_type == KeyType::Step {
|
||||
let Some(message) = self.load_one_message(&parsed_key, &value) else {
|
||||
// We don't abort the loading process and just ignore errors to ensure all remaining
|
||||
// procedures are loaded.
|
||||
continue;
|
||||
};
|
||||
let _ = messages.insert(procedure_id, message);
|
||||
} else {
|
||||
finished_ids.push(procedure_id);
|
||||
match parsed_key.key_type {
|
||||
KeyType::Step => {
|
||||
let Some(message) = self.load_one_message(&parsed_key, &value) else {
|
||||
// We don't abort the loading process and just ignore errors to ensure all remaining
|
||||
// procedures are loaded.
|
||||
continue;
|
||||
};
|
||||
let _ = messages.insert(procedure_id, message);
|
||||
}
|
||||
KeyType::Commit => {
|
||||
finished_ids.push(procedure_id);
|
||||
}
|
||||
KeyType::Rollback => {
|
||||
let Some(message) = self.load_one_message(&parsed_key, &value) else {
|
||||
// We don't abort the loading process and just ignore errors to ensure all remaining
|
||||
// procedures are loaded.
|
||||
continue;
|
||||
};
|
||||
let _ = rollback_messages.insert(procedure_id, message);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok((messages, finished_ids))
|
||||
Ok((messages, rollback_messages, finished_ids))
|
||||
}
|
||||
|
||||
fn load_one_message(&self, key: &ParsedKey, value: &[u8]) -> Option<ProcedureMessage> {
|
||||
@@ -430,6 +456,7 @@ mod tests {
|
||||
data: "no parent id".to_string(),
|
||||
parent_id: None,
|
||||
step: 4,
|
||||
error: None,
|
||||
};
|
||||
|
||||
let json = serde_json::to_string(&message).unwrap();
|
||||
@@ -490,8 +517,9 @@ mod tests {
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let (messages, finished) = store.load_messages().await.unwrap();
|
||||
let (messages, rollback_messages, finished) = store.load_messages().await.unwrap();
|
||||
assert_eq!(1, messages.len());
|
||||
assert!(rollback_messages.is_empty());
|
||||
assert!(finished.is_empty());
|
||||
let msg = messages.get(&procedure_id).unwrap();
|
||||
let expect = ProcedureMessage {
|
||||
@@ -499,6 +527,7 @@ mod tests {
|
||||
data: "test store procedure".to_string(),
|
||||
parent_id: None,
|
||||
step: 0,
|
||||
error: None,
|
||||
};
|
||||
assert_eq!(expect, *msg);
|
||||
}
|
||||
@@ -518,8 +547,9 @@ mod tests {
|
||||
.unwrap();
|
||||
store.commit_procedure(procedure_id, 1).await.unwrap();
|
||||
|
||||
let (messages, finished) = store.load_messages().await.unwrap();
|
||||
let (messages, rollback_messages, finished) = store.load_messages().await.unwrap();
|
||||
assert!(messages.is_empty());
|
||||
assert!(rollback_messages.is_empty());
|
||||
assert_eq!(&[procedure_id], &finished[..]);
|
||||
}
|
||||
|
||||
@@ -533,14 +563,32 @@ mod tests {
|
||||
let type_name = procedure.type_name().to_string();
|
||||
let data = procedure.dump().unwrap();
|
||||
store
|
||||
.store_procedure(procedure_id, 0, type_name, data, None)
|
||||
.store_procedure(
|
||||
procedure_id,
|
||||
0,
|
||||
type_name.to_string(),
|
||||
data.to_string(),
|
||||
None,
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
let message = ProcedureMessage {
|
||||
type_name,
|
||||
data,
|
||||
parent_id: None,
|
||||
step: 1,
|
||||
error: None,
|
||||
};
|
||||
store
|
||||
.rollback_procedure(procedure_id, message)
|
||||
.await
|
||||
.unwrap();
|
||||
store.rollback_procedure(procedure_id, 1).await.unwrap();
|
||||
|
||||
let (messages, finished) = store.load_messages().await.unwrap();
|
||||
let (messages, rollback_messages, finished) = store.load_messages().await.unwrap();
|
||||
assert!(messages.is_empty());
|
||||
assert_eq!(&[procedure_id], &finished[..]);
|
||||
assert_eq!(1, rollback_messages.len());
|
||||
assert!(finished.is_empty());
|
||||
assert!(rollback_messages.contains_key(&procedure_id));
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
@@ -565,8 +613,9 @@ mod tests {
|
||||
|
||||
store.delete_procedure(procedure_id).await.unwrap();
|
||||
|
||||
let (messages, finished) = store.load_messages().await.unwrap();
|
||||
let (messages, rollback_messages, finished) = store.load_messages().await.unwrap();
|
||||
assert!(messages.is_empty());
|
||||
assert!(rollback_messages.is_empty());
|
||||
assert!(finished.is_empty());
|
||||
}
|
||||
|
||||
@@ -595,8 +644,9 @@ mod tests {
|
||||
|
||||
store.delete_procedure(procedure_id).await.unwrap();
|
||||
|
||||
let (messages, finished) = store.load_messages().await.unwrap();
|
||||
let (messages, rollback_messages, finished) = store.load_messages().await.unwrap();
|
||||
assert!(messages.is_empty());
|
||||
assert!(rollback_messages.is_empty());
|
||||
assert!(finished.is_empty());
|
||||
}
|
||||
|
||||
@@ -657,8 +707,9 @@ mod tests {
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let (messages, finished) = store.load_messages().await.unwrap();
|
||||
let (messages, rollback_messages, finished) = store.load_messages().await.unwrap();
|
||||
assert_eq!(2, messages.len());
|
||||
assert!(rollback_messages.is_empty());
|
||||
assert_eq!(1, finished.len());
|
||||
|
||||
let msg = messages.get(&id0).unwrap();
|
||||
|
||||
@@ -37,6 +37,12 @@ pub async fn wait(watcher: &mut Watcher) -> Result<Option<Output>> {
|
||||
ProcedureState::Retrying { error } => {
|
||||
debug!("retrying, source: {}", error)
|
||||
}
|
||||
ProcedureState::RollingBack { error } => {
|
||||
debug!("rolling back, source: {:?}", error)
|
||||
}
|
||||
ProcedureState::PrepareRollback { error } => {
|
||||
debug!("commit rollback, source: {}", error)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -17,12 +17,12 @@
|
||||
mod df_substrait;
|
||||
pub mod error;
|
||||
pub mod extension_serializer;
|
||||
|
||||
use std::sync::Arc;
|
||||
|
||||
use async_trait::async_trait;
|
||||
use bytes::{Buf, Bytes};
|
||||
use datafusion::catalog::CatalogList;
|
||||
pub use substrait_proto;
|
||||
|
||||
pub use crate::df_substrait::DFLogicalSubstraitConvertor;
|
||||
|
||||
|
||||
@@ -17,16 +17,16 @@ pub mod raft_engine;
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::config::kafka::{DatanodeKafkaConfig, MetaSrvKafkaConfig, StandaloneKafkaConfig};
|
||||
use crate::config::kafka::{DatanodeKafkaConfig, MetasrvKafkaConfig, StandaloneKafkaConfig};
|
||||
use crate::config::raft_engine::RaftEngineConfig;
|
||||
|
||||
/// Wal configurations for metasrv.
|
||||
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Default)]
|
||||
#[serde(tag = "provider", rename_all = "snake_case")]
|
||||
pub enum MetaSrvWalConfig {
|
||||
pub enum MetasrvWalConfig {
|
||||
#[default]
|
||||
RaftEngine,
|
||||
Kafka(MetaSrvKafkaConfig),
|
||||
Kafka(MetasrvKafkaConfig),
|
||||
}
|
||||
|
||||
/// Wal configurations for datanode.
|
||||
@@ -57,11 +57,11 @@ impl Default for StandaloneWalConfig {
|
||||
}
|
||||
}
|
||||
|
||||
impl From<StandaloneWalConfig> for MetaSrvWalConfig {
|
||||
impl From<StandaloneWalConfig> for MetasrvWalConfig {
|
||||
fn from(config: StandaloneWalConfig) -> Self {
|
||||
match config {
|
||||
StandaloneWalConfig::RaftEngine(_) => Self::RaftEngine,
|
||||
StandaloneWalConfig::Kafka(config) => Self::Kafka(MetaSrvKafkaConfig {
|
||||
StandaloneWalConfig::Kafka(config) => Self::Kafka(MetasrvKafkaConfig {
|
||||
broker_endpoints: config.broker_endpoints,
|
||||
num_topics: config.num_topics,
|
||||
selector_type: config.selector_type,
|
||||
@@ -100,7 +100,7 @@ mod tests {
|
||||
|
||||
use super::*;
|
||||
use crate::config::kafka::common::BackoffConfig;
|
||||
use crate::config::{DatanodeKafkaConfig, MetaSrvKafkaConfig, StandaloneKafkaConfig};
|
||||
use crate::config::{DatanodeKafkaConfig, MetasrvKafkaConfig, StandaloneKafkaConfig};
|
||||
use crate::TopicSelectorType;
|
||||
|
||||
#[test]
|
||||
@@ -109,8 +109,8 @@ mod tests {
|
||||
let toml_str = r#"
|
||||
provider = "raft_engine"
|
||||
"#;
|
||||
let metasrv_wal_config: MetaSrvWalConfig = toml::from_str(toml_str).unwrap();
|
||||
assert_eq!(metasrv_wal_config, MetaSrvWalConfig::RaftEngine);
|
||||
let metasrv_wal_config: MetasrvWalConfig = toml::from_str(toml_str).unwrap();
|
||||
assert_eq!(metasrv_wal_config, MetasrvWalConfig::RaftEngine);
|
||||
|
||||
let datanode_wal_config: DatanodeWalConfig = toml::from_str(toml_str).unwrap();
|
||||
assert_eq!(
|
||||
@@ -166,9 +166,9 @@ mod tests {
|
||||
backoff_deadline = "5mins"
|
||||
"#;
|
||||
|
||||
// Deserialized to MetaSrvWalConfig.
|
||||
let metasrv_wal_config: MetaSrvWalConfig = toml::from_str(toml_str).unwrap();
|
||||
let expected = MetaSrvKafkaConfig {
|
||||
// Deserialized to MetasrvWalConfig.
|
||||
let metasrv_wal_config: MetasrvWalConfig = toml::from_str(toml_str).unwrap();
|
||||
let expected = MetasrvKafkaConfig {
|
||||
broker_endpoints: vec!["127.0.0.1:9092".to_string()],
|
||||
num_topics: 32,
|
||||
selector_type: TopicSelectorType::RoundRobin,
|
||||
@@ -183,7 +183,7 @@ mod tests {
|
||||
deadline: Some(Duration::from_secs(60 * 5)),
|
||||
},
|
||||
};
|
||||
assert_eq!(metasrv_wal_config, MetaSrvWalConfig::Kafka(expected));
|
||||
assert_eq!(metasrv_wal_config, MetasrvWalConfig::Kafka(expected));
|
||||
|
||||
// Deserialized to DatanodeWalConfig.
|
||||
let datanode_wal_config: DatanodeWalConfig = toml::from_str(toml_str).unwrap();
|
||||
|
||||
@@ -18,5 +18,5 @@ pub mod metasrv;
|
||||
pub mod standalone;
|
||||
|
||||
pub use datanode::DatanodeKafkaConfig;
|
||||
pub use metasrv::MetaSrvKafkaConfig;
|
||||
pub use metasrv::MetasrvKafkaConfig;
|
||||
pub use standalone::StandaloneKafkaConfig;
|
||||
|
||||
@@ -22,7 +22,7 @@ use crate::{TopicSelectorType, BROKER_ENDPOINT, TOPIC_NAME_PREFIX};
|
||||
/// Kafka wal configurations for metasrv.
|
||||
#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
|
||||
#[serde(default)]
|
||||
pub struct MetaSrvKafkaConfig {
|
||||
pub struct MetasrvKafkaConfig {
|
||||
/// The broker endpoints of the Kafka cluster.
|
||||
pub broker_endpoints: Vec<String>,
|
||||
/// The number of topics to be created upon start.
|
||||
@@ -43,7 +43,7 @@ pub struct MetaSrvKafkaConfig {
|
||||
pub backoff: BackoffConfig,
|
||||
}
|
||||
|
||||
impl Default for MetaSrvKafkaConfig {
|
||||
impl Default for MetasrvKafkaConfig {
|
||||
fn default() -> Self {
|
||||
let broker_endpoints = vec![BROKER_ENDPOINT.to_string()];
|
||||
let replication_factor = broker_endpoints.len() as i16;
|
||||
|
||||
@@ -14,8 +14,6 @@
|
||||
|
||||
//! Datanode configurations
|
||||
|
||||
use std::time::Duration;
|
||||
|
||||
use common_base::readable_size::ReadableSize;
|
||||
use common_grpc::channel_manager::{
|
||||
DEFAULT_MAX_GRPC_RECV_MESSAGE_SIZE, DEFAULT_MAX_GRPC_SEND_MESSAGE_SIZE,
|
||||
@@ -65,13 +63,6 @@ impl ObjectStoreConfig {
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
#[serde(default)]
|
||||
pub struct StorageConfig {
|
||||
/// Retention period for all tables.
|
||||
///
|
||||
/// Default value is `None`, which means no TTL.
|
||||
///
|
||||
/// The precedence order is: ttl in table options > global ttl.
|
||||
#[serde(with = "humantime_serde")]
|
||||
pub global_ttl: Option<Duration>,
|
||||
/// The working directory of database
|
||||
pub data_home: String,
|
||||
#[serde(flatten)]
|
||||
@@ -82,7 +73,6 @@ pub struct StorageConfig {
|
||||
impl Default for StorageConfig {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
global_ttl: None,
|
||||
data_home: DEFAULT_DATA_HOME.to_string(),
|
||||
store: ObjectStoreConfig::default(),
|
||||
providers: vec![],
|
||||
|
||||
@@ -545,9 +545,7 @@ impl RegionServerInner {
|
||||
match region_change {
|
||||
RegionChange::None => {}
|
||||
RegionChange::Register(_, _) | RegionChange::Deregisters => {
|
||||
self.region_map
|
||||
.remove(®ion_id)
|
||||
.map(|(id, engine)| engine.set_writable(id, false));
|
||||
self.region_map.remove(®ion_id);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -14,17 +14,30 @@ common-error.workspace = true
|
||||
common-macro.workspace = true
|
||||
common-telemetry.workspace = true
|
||||
common-time.workspace = true
|
||||
datafusion-substrait.workspace = true
|
||||
datatypes.workspace = true
|
||||
enum_dispatch = "0.3"
|
||||
hydroflow = "0.5.0"
|
||||
# This fork is simply for keeping our dependency in our org, and pin the version
|
||||
# it is the same with upstream repo
|
||||
datafusion-common.workspace = true
|
||||
datafusion-expr.workspace = true
|
||||
hydroflow = { git = "https://github.com/GreptimeTeam/hydroflow.git", rev = "ba2df44efd42b7c4d37ebefbf82e77c6f1d4cb94" }
|
||||
itertools.workspace = true
|
||||
num-traits = "0.2"
|
||||
serde.workspace = true
|
||||
servers.workspace = true
|
||||
smallvec.workspace = true
|
||||
snafu.workspace = true
|
||||
strum.workspace = true
|
||||
substrait.workspace = true
|
||||
tokio.workspace = true
|
||||
tonic.workspace = true
|
||||
|
||||
[dev-dependencies]
|
||||
catalog.workspace = true
|
||||
common-catalog.workspace = true
|
||||
prost.workspace = true
|
||||
query.workspace = true
|
||||
serde_json = "1.0"
|
||||
session.workspace = true
|
||||
table.workspace = true
|
||||
|
||||
3
src/flow/clippy.toml
Normal file
3
src/flow/clippy.toml
Normal file
@@ -0,0 +1,3 @@
|
||||
# Whether to only check for missing documentation in items visible within the current crate. For example, pub(crate) items. (default: false)
|
||||
# This is a config for clippy::missing_docs_in_private_items
|
||||
missing-docs-in-crate-items = true
|
||||
@@ -12,6 +12,8 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
//! Error definition for flow module
|
||||
|
||||
use std::any::Any;
|
||||
|
||||
use common_macro::stack_trace_debug;
|
||||
@@ -25,6 +27,7 @@ use snafu::{Location, Snafu};
|
||||
|
||||
use crate::expr::EvalError;
|
||||
|
||||
/// This error is used to represent all possible errors that can occur in the flow module.
|
||||
#[derive(Snafu)]
|
||||
#[snafu(visibility(pub))]
|
||||
#[stack_trace_debug]
|
||||
@@ -54,18 +57,49 @@ pub enum Error {
|
||||
|
||||
#[snafu(display("No protobuf type for value: {value}"))]
|
||||
NoProtoType { value: Value, location: Location },
|
||||
|
||||
#[snafu(display("Not implement in flow: {reason}"))]
|
||||
NotImplemented { reason: String, location: Location },
|
||||
|
||||
#[snafu(display("Flow plan error: {reason}"))]
|
||||
Plan { reason: String, location: Location },
|
||||
|
||||
#[snafu(display("Unsupported temporal filter: {reason}"))]
|
||||
UnsupportedTemporalFilter { reason: String, location: Location },
|
||||
|
||||
#[snafu(display("Datatypes error: {source} with extra message: {extra}"))]
|
||||
Datatypes {
|
||||
source: datatypes::Error,
|
||||
extra: String,
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Datafusion error: {raw:?} in context: {context}"))]
|
||||
Datafusion {
|
||||
raw: datafusion_common::DataFusionError,
|
||||
context: String,
|
||||
location: Location,
|
||||
},
|
||||
}
|
||||
|
||||
/// Result type for flow module
|
||||
pub type Result<T> = std::result::Result<T, Error>;
|
||||
|
||||
impl ErrorExt for Error {
|
||||
fn status_code(&self) -> StatusCode {
|
||||
match self {
|
||||
Self::Eval { .. } | &Self::JoinTask { .. } => StatusCode::Internal,
|
||||
Self::Eval { .. } | &Self::JoinTask { .. } | &Self::Datafusion { .. } => {
|
||||
StatusCode::Internal
|
||||
}
|
||||
&Self::TableAlreadyExist { .. } => StatusCode::TableAlreadyExists,
|
||||
Self::TableNotFound { .. } => StatusCode::TableNotFound,
|
||||
&Self::InvalidQuery { .. } => StatusCode::PlanQuery,
|
||||
&Self::InvalidQuery { .. } | &Self::Plan { .. } | &Self::Datatypes { .. } => {
|
||||
StatusCode::PlanQuery
|
||||
}
|
||||
Self::NoProtoType { .. } => StatusCode::Unexpected,
|
||||
&Self::NotImplemented { .. } | Self::UnsupportedTemporalFilter { .. } => {
|
||||
StatusCode::Unsupported
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
19
src/flow/src/compute.rs
Normal file
19
src/flow/src/compute.rs
Normal file
@@ -0,0 +1,19 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
//! Build and Compute the dataflow
|
||||
|
||||
mod render;
|
||||
mod state;
|
||||
mod types;
|
||||
536
src/flow/src/compute/render.rs
Normal file
536
src/flow/src/compute/render.rs
Normal file
@@ -0,0 +1,536 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
//! In this file, `render` means convert a static `Plan` into a Executable Dataflow
|
||||
//!
|
||||
//! And the [`Context`] is the environment for the render process, it contains all the necessary information for the render process
|
||||
|
||||
use std::cell::RefCell;
|
||||
use std::collections::{BTreeMap, VecDeque};
|
||||
use std::rc::Rc;
|
||||
|
||||
use hydroflow::lattices::cc_traits::Get;
|
||||
use hydroflow::scheduled::graph::Hydroflow;
|
||||
use hydroflow::scheduled::graph_ext::GraphExt;
|
||||
use hydroflow::scheduled::port::{PortCtx, SEND};
|
||||
use itertools::Itertools;
|
||||
use snafu::{OptionExt, ResultExt};
|
||||
|
||||
use super::state::Scheduler;
|
||||
use crate::adapter::error::{Error, EvalSnafu, InvalidQuerySnafu};
|
||||
use crate::compute::state::DataflowState;
|
||||
use crate::compute::types::{Arranged, Collection, CollectionBundle, ErrCollector, Toff};
|
||||
use crate::expr::{
|
||||
self, EvalError, GlobalId, LocalId, MapFilterProject, MfpPlan, SafeMfpPlan, ScalarExpr,
|
||||
};
|
||||
use crate::plan::Plan;
|
||||
use crate::repr::{self, DiffRow, KeyValDiffRow, Row};
|
||||
use crate::utils::{ArrangeHandler, Arrangement};
|
||||
|
||||
/// The Context for build a Operator with id of `GlobalId`
|
||||
pub struct Context<'referred, 'df> {
|
||||
pub id: GlobalId,
|
||||
pub df: &'referred mut Hydroflow<'df>,
|
||||
pub compute_state: &'referred mut DataflowState,
|
||||
/// a list of all collections being used in the operator
|
||||
pub input_collection: BTreeMap<GlobalId, CollectionBundle>,
|
||||
/// used by `Get`/`Let` Plan for getting/setting local variables
|
||||
///
|
||||
/// TODO(discord9): consider if use Vec<(LocalId, CollectionBundle)> instead
|
||||
local_scope: Vec<BTreeMap<LocalId, CollectionBundle>>,
|
||||
// Collect all errors in this operator's evaluation
|
||||
err_collector: ErrCollector,
|
||||
}
|
||||
|
||||
impl<'referred, 'df> Drop for Context<'referred, 'df> {
|
||||
fn drop(&mut self) {
|
||||
for bundle in std::mem::take(&mut self.input_collection)
|
||||
.into_values()
|
||||
.chain(
|
||||
std::mem::take(&mut self.local_scope)
|
||||
.into_iter()
|
||||
.flat_map(|v| v.into_iter())
|
||||
.map(|(_k, v)| v),
|
||||
)
|
||||
{
|
||||
bundle.collection.into_inner().drop(self.df);
|
||||
drop(bundle.arranged);
|
||||
}
|
||||
// The automatically generated "drop glue" which recursively calls the destructors of all the fields (including the now empty `input_collection`)
|
||||
}
|
||||
}
|
||||
|
||||
impl<'referred, 'df> Context<'referred, 'df> {
|
||||
pub fn insert_global(&mut self, id: GlobalId, collection: CollectionBundle) {
|
||||
self.input_collection.insert(id, collection);
|
||||
}
|
||||
|
||||
pub fn insert_local(&mut self, id: LocalId, collection: CollectionBundle) {
|
||||
if let Some(last) = self.local_scope.last_mut() {
|
||||
last.insert(id, collection);
|
||||
} else {
|
||||
let first = BTreeMap::from([(id, collection)]);
|
||||
self.local_scope.push(first);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// There is a false positive in using `Vec<ScalarExpr>` as key
|
||||
#[allow(clippy::mutable_key_type)]
|
||||
impl<'referred, 'df> Context<'referred, 'df> {
|
||||
/// Interpret and execute plan
|
||||
///
|
||||
/// return the output of this plan
|
||||
pub fn render_plan(&mut self, plan: Plan) -> Result<CollectionBundle, Error> {
|
||||
match plan {
|
||||
Plan::Constant { rows } => Ok(self.render_constant(rows)),
|
||||
Plan::Get { id } => self.get_by_id(id),
|
||||
Plan::Let { id, value, body } => self.eval_let(id, value, body),
|
||||
Plan::Mfp { input, mfp } => {
|
||||
self.render_map_filter_project_into_executable_dataflow(input, mfp)
|
||||
}
|
||||
Plan::Reduce { .. } => todo!(),
|
||||
Plan::Join { .. } => todo!(),
|
||||
Plan::Union { .. } => todo!(),
|
||||
}
|
||||
}
|
||||
|
||||
/// render Constant, will only emit the `rows` once.
|
||||
pub fn render_constant(&mut self, mut rows: Vec<DiffRow>) -> CollectionBundle {
|
||||
let (send_port, recv_port) = self.df.make_edge::<_, Toff>("constant");
|
||||
|
||||
self.df
|
||||
.add_subgraph_source("Constant", send_port, move |_ctx, send_port| {
|
||||
if rows.is_empty() {
|
||||
return;
|
||||
}
|
||||
send_port.give(std::mem::take(&mut rows));
|
||||
});
|
||||
|
||||
CollectionBundle::from_collection(Collection::from_port(recv_port))
|
||||
}
|
||||
|
||||
pub fn get_by_id(&mut self, id: expr::Id) -> Result<CollectionBundle, Error> {
|
||||
let ret = match id {
|
||||
expr::Id::Local(local) => {
|
||||
let bundle = self
|
||||
.local_scope
|
||||
.iter()
|
||||
.rev()
|
||||
.find_map(|scope| scope.get(&local))
|
||||
.with_context(|| InvalidQuerySnafu {
|
||||
reason: format!("Local variable {:?} not found", local),
|
||||
})?;
|
||||
bundle.clone(self.df)
|
||||
}
|
||||
expr::Id::Global(id) => {
|
||||
let bundle = self
|
||||
.input_collection
|
||||
.get(&id)
|
||||
.with_context(|| InvalidQuerySnafu {
|
||||
reason: format!("Collection {:?} not found", id),
|
||||
})?;
|
||||
bundle.clone(self.df)
|
||||
}
|
||||
};
|
||||
Ok(ret)
|
||||
}
|
||||
|
||||
/// Eval `Let` operator, useful for assigning a value to a local variable
|
||||
pub fn eval_let(
|
||||
&mut self,
|
||||
id: LocalId,
|
||||
value: Box<Plan>,
|
||||
body: Box<Plan>,
|
||||
) -> Result<CollectionBundle, Error> {
|
||||
let value = self.render_plan(*value)?;
|
||||
|
||||
self.local_scope.push(Default::default());
|
||||
self.insert_local(id, value);
|
||||
let ret = self.render_plan(*body)?;
|
||||
Ok(ret)
|
||||
}
|
||||
|
||||
/// render MapFilterProject, will only emit the `rows` once. Assume all incoming row's sys time being `now`` and ignore the row's stated sys time
|
||||
/// TODO(discord9): schedule mfp operator to run when temporal filter need
|
||||
///
|
||||
/// `MapFilterProject`(`mfp` for short) is scheduled to run when there is enough amount of input updates
|
||||
/// ***or*** when a future update in it's output buffer(a `Arrangement`) is supposed to emit now.
|
||||
pub fn render_map_filter_project_into_executable_dataflow(
|
||||
&mut self,
|
||||
input: Box<Plan>,
|
||||
mfp: MapFilterProject,
|
||||
) -> Result<CollectionBundle, Error> {
|
||||
let input = self.render_plan(*input)?;
|
||||
// TODO(discord9): consider if check if contain temporal to determine if
|
||||
// need arrange or not, or does this added complexity worth it
|
||||
let (out_send_port, out_recv_port) = self.df.make_edge::<_, Toff>("mfp");
|
||||
let input_arity = mfp.input_arity;
|
||||
|
||||
// default to have a arrange with only future updates, so it can be empty if no temporal filter is applied
|
||||
// as stream only sends current updates and etc.
|
||||
let arrange = Arrangement::new();
|
||||
let arrange_handler = ArrangeHandler::from(arrange.clone());
|
||||
let arrange_handler_inner = ArrangeHandler::from(arrange);
|
||||
|
||||
// This closure capture following variables:
|
||||
let mfp_plan = MfpPlan::create_from(mfp)?;
|
||||
let now = self.compute_state.current_time_ref();
|
||||
|
||||
let err_collector = self.err_collector.clone();
|
||||
|
||||
// TODO(discord9): better way to schedule future run
|
||||
let scheduler = self.compute_state.get_scheduler();
|
||||
let scheduler_inner = scheduler.clone();
|
||||
|
||||
let subgraph = self.df.add_subgraph_in_out(
|
||||
"mfp",
|
||||
input.collection.into_inner(),
|
||||
out_send_port,
|
||||
move |_ctx, recv, send| {
|
||||
// mfp only need to passively receive updates from recvs
|
||||
let data = recv.take_inner().into_iter().flat_map(|v| v.into_iter());
|
||||
|
||||
mfp_subgraph(
|
||||
&arrange_handler_inner,
|
||||
data,
|
||||
&mfp_plan,
|
||||
*now.borrow(),
|
||||
&err_collector,
|
||||
&scheduler_inner,
|
||||
send,
|
||||
);
|
||||
},
|
||||
);
|
||||
|
||||
// register current subgraph in scheduler for future scheduling
|
||||
scheduler.set_cur_subgraph(subgraph);
|
||||
|
||||
let arranged = BTreeMap::from([(
|
||||
(0..input_arity).map(ScalarExpr::Column).collect_vec(),
|
||||
Arranged::new(arrange_handler),
|
||||
)]);
|
||||
|
||||
let bundle = CollectionBundle {
|
||||
collection: Collection::from_port(out_recv_port),
|
||||
arranged,
|
||||
};
|
||||
Ok(bundle)
|
||||
}
|
||||
}
|
||||
|
||||
fn mfp_subgraph(
|
||||
arrange: &ArrangeHandler,
|
||||
input: impl IntoIterator<Item = DiffRow>,
|
||||
mfp_plan: &MfpPlan,
|
||||
now: repr::Timestamp,
|
||||
err_collector: &ErrCollector,
|
||||
scheduler: &Scheduler,
|
||||
send: &PortCtx<SEND, Toff>,
|
||||
) {
|
||||
let run_mfp = || {
|
||||
let all_updates = eval_mfp_core(input, mfp_plan, now, err_collector);
|
||||
arrange.write().apply_updates(now, all_updates)?;
|
||||
Ok(())
|
||||
};
|
||||
err_collector.run(run_mfp);
|
||||
|
||||
// Deal with output:
|
||||
// 1. Read all updates that were emitted between the last time this arrangement had updates and the current time.
|
||||
// 2. Output the updates.
|
||||
// 3. Truncate all updates within that range.
|
||||
|
||||
let from = arrange.read().last_compaction_time().map(|n| n + 1);
|
||||
let from = from.unwrap_or(repr::Timestamp::MIN);
|
||||
let output_kv = arrange.read().get_updates_in_range(from..=now);
|
||||
// the output is expected to be key -> empty val
|
||||
let output = output_kv
|
||||
.into_iter()
|
||||
.map(|((key, _v), ts, diff)| (key, ts, diff))
|
||||
.collect_vec();
|
||||
send.give(output);
|
||||
let run_compaction = || {
|
||||
arrange.write().compaction_to(now)?;
|
||||
Ok(())
|
||||
};
|
||||
err_collector.run(run_compaction);
|
||||
|
||||
// schedule the next time this operator should run
|
||||
if let Some(i) = arrange.read().get_next_update_time(&now) {
|
||||
scheduler.schedule_at(i)
|
||||
}
|
||||
}
|
||||
|
||||
/// The core of evaluating MFP operator, given a MFP and a input, evaluate the MFP operator,
|
||||
/// return the output updates **And** possibly any number of errors that occurred during the evaluation
|
||||
fn eval_mfp_core(
|
||||
input: impl IntoIterator<Item = DiffRow>,
|
||||
mfp_plan: &MfpPlan,
|
||||
now: repr::Timestamp,
|
||||
err_collector: &ErrCollector,
|
||||
) -> Vec<KeyValDiffRow> {
|
||||
let mut all_updates = Vec::new();
|
||||
for (mut row, _sys_time, diff) in input.into_iter() {
|
||||
// this updates is expected to be only zero to two rows
|
||||
let updates = mfp_plan.evaluate::<EvalError>(&mut row.inner, now, diff);
|
||||
// TODO(discord9): refactor error handling
|
||||
// Expect error in a single row to not interrupt the whole evaluation
|
||||
let updates = updates
|
||||
.filter_map(|r| match r {
|
||||
Ok((key, ts, diff)) => Some(((key, Row::empty()), ts, diff)),
|
||||
Err((err, _ts, _diff)) => {
|
||||
err_collector.push_err(err);
|
||||
None
|
||||
}
|
||||
})
|
||||
.collect_vec();
|
||||
|
||||
all_updates.extend(updates);
|
||||
}
|
||||
all_updates
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use std::cell::RefCell;
|
||||
use std::rc::Rc;
|
||||
|
||||
use common_time::DateTime;
|
||||
use datatypes::data_type::ConcreteDataType;
|
||||
use hydroflow::scheduled::graph::Hydroflow;
|
||||
use hydroflow::scheduled::graph_ext::GraphExt;
|
||||
use hydroflow::scheduled::handoff::VecHandoff;
|
||||
|
||||
use super::*;
|
||||
use crate::expr::BinaryFunc;
|
||||
use crate::repr::Row;
|
||||
|
||||
fn harness_test_ctx<'r, 'h>(
|
||||
df: &'r mut Hydroflow<'h>,
|
||||
state: &'r mut DataflowState,
|
||||
) -> Context<'r, 'h> {
|
||||
let err_collector = state.get_err_collector();
|
||||
Context {
|
||||
id: GlobalId::User(0),
|
||||
df,
|
||||
compute_state: state,
|
||||
input_collection: BTreeMap::new(),
|
||||
local_scope: Default::default(),
|
||||
err_collector,
|
||||
}
|
||||
}
|
||||
|
||||
/// test if temporal filter works properly
|
||||
/// namely: if mfp operator can schedule a delete at the correct time
|
||||
#[test]
|
||||
fn test_render_mfp_with_temporal() {
|
||||
let mut df = Hydroflow::new();
|
||||
let mut state = DataflowState::default();
|
||||
let mut ctx = harness_test_ctx(&mut df, &mut state);
|
||||
|
||||
let rows = vec![
|
||||
(Row::new(vec![1i64.into()]), 1, 1),
|
||||
(Row::new(vec![2i64.into()]), 2, 1),
|
||||
(Row::new(vec![3i64.into()]), 3, 1),
|
||||
];
|
||||
let collection = ctx.render_constant(rows);
|
||||
ctx.insert_global(GlobalId::User(1), collection);
|
||||
let input_plan = Plan::Get {
|
||||
id: expr::Id::Global(GlobalId::User(1)),
|
||||
};
|
||||
// temporal filter: now <= col(0) < now + 4
|
||||
let mfp = MapFilterProject::new(1)
|
||||
.filter(vec![
|
||||
ScalarExpr::Column(0)
|
||||
.call_unary(expr::UnaryFunc::Cast(ConcreteDataType::datetime_datatype()))
|
||||
.call_binary(
|
||||
ScalarExpr::CallUnmaterializable(expr::UnmaterializableFunc::Now),
|
||||
BinaryFunc::Gte,
|
||||
),
|
||||
ScalarExpr::Column(0)
|
||||
.call_binary(
|
||||
ScalarExpr::literal(4i64.into(), ConcreteDataType::int64_datatype()),
|
||||
BinaryFunc::SubInt64,
|
||||
)
|
||||
.call_unary(expr::UnaryFunc::Cast(ConcreteDataType::datetime_datatype()))
|
||||
.call_binary(
|
||||
ScalarExpr::CallUnmaterializable(expr::UnmaterializableFunc::Now),
|
||||
BinaryFunc::Lt,
|
||||
),
|
||||
])
|
||||
.unwrap();
|
||||
|
||||
let mut bundle = ctx
|
||||
.render_map_filter_project_into_executable_dataflow(Box::new(input_plan), mfp)
|
||||
.unwrap();
|
||||
let collection = bundle.collection;
|
||||
let _arranged = bundle.arranged.pop_first().unwrap().1;
|
||||
let output = Rc::new(RefCell::new(vec![]));
|
||||
let output_inner = output.clone();
|
||||
let _subgraph = ctx.df.add_subgraph_sink(
|
||||
"test_render_constant",
|
||||
collection.into_inner(),
|
||||
move |_ctx, recv| {
|
||||
let data = recv.take_inner();
|
||||
let res = data.into_iter().flat_map(|v| v.into_iter()).collect_vec();
|
||||
output_inner.borrow_mut().clear();
|
||||
output_inner.borrow_mut().extend(res);
|
||||
},
|
||||
);
|
||||
// drop ctx here to simulate actual process of compile first, run later scenario
|
||||
drop(ctx);
|
||||
// expected output at given time
|
||||
let expected_output = BTreeMap::from([
|
||||
(
|
||||
0, // time
|
||||
vec![
|
||||
(Row::new(vec![1i64.into()]), 0, 1),
|
||||
(Row::new(vec![2i64.into()]), 0, 1),
|
||||
(Row::new(vec![3i64.into()]), 0, 1),
|
||||
],
|
||||
),
|
||||
(
|
||||
2, // time
|
||||
vec![(Row::new(vec![1i64.into()]), 2, -1)],
|
||||
),
|
||||
(
|
||||
3, // time
|
||||
vec![(Row::new(vec![2i64.into()]), 3, -1)],
|
||||
),
|
||||
(
|
||||
4, // time
|
||||
vec![(Row::new(vec![3i64.into()]), 4, -1)],
|
||||
),
|
||||
]);
|
||||
|
||||
for now in 0i64..5 {
|
||||
state.set_current_ts(now);
|
||||
state.run_available_with_schedule(&mut df);
|
||||
assert!(state.get_err_collector().inner.borrow().is_empty());
|
||||
if let Some(expected) = expected_output.get(&now) {
|
||||
assert_eq!(*output.borrow(), *expected);
|
||||
} else {
|
||||
assert_eq!(*output.borrow(), vec![]);
|
||||
};
|
||||
output.borrow_mut().clear();
|
||||
}
|
||||
}
|
||||
|
||||
/// test if mfp operator without temporal filter works properly
|
||||
/// that is it filter the rows correctly
|
||||
#[test]
|
||||
fn test_render_mfp() {
|
||||
let mut df = Hydroflow::new();
|
||||
let mut state = DataflowState::default();
|
||||
let mut ctx = harness_test_ctx(&mut df, &mut state);
|
||||
|
||||
let rows = vec![
|
||||
(Row::new(vec![1.into()]), 1, 1),
|
||||
(Row::new(vec![2.into()]), 2, 1),
|
||||
(Row::new(vec![3.into()]), 3, 1),
|
||||
];
|
||||
let collection = ctx.render_constant(rows);
|
||||
ctx.insert_global(GlobalId::User(1), collection);
|
||||
let input_plan = Plan::Get {
|
||||
id: expr::Id::Global(GlobalId::User(1)),
|
||||
};
|
||||
// filter: col(0)>1
|
||||
let mfp = MapFilterProject::new(1)
|
||||
.filter(vec![ScalarExpr::Column(0).call_binary(
|
||||
ScalarExpr::literal(1.into(), ConcreteDataType::int32_datatype()),
|
||||
BinaryFunc::Gt,
|
||||
)])
|
||||
.unwrap();
|
||||
let bundle = ctx
|
||||
.render_map_filter_project_into_executable_dataflow(Box::new(input_plan), mfp)
|
||||
.unwrap();
|
||||
let collection = bundle.collection.clone(ctx.df);
|
||||
|
||||
ctx.df.add_subgraph_sink(
|
||||
"test_render_constant",
|
||||
collection.into_inner(),
|
||||
move |_ctx, recv| {
|
||||
let data = recv.take_inner();
|
||||
let res = data.into_iter().flat_map(|v| v.into_iter()).collect_vec();
|
||||
assert_eq!(
|
||||
res,
|
||||
vec![
|
||||
(Row::new(vec![2.into()]), 0, 1),
|
||||
(Row::new(vec![3.into()]), 0, 1),
|
||||
]
|
||||
)
|
||||
},
|
||||
);
|
||||
drop(ctx);
|
||||
|
||||
df.run_available();
|
||||
}
|
||||
|
||||
/// test if constant operator works properly
|
||||
/// that is it only emit once, not multiple times
|
||||
#[test]
|
||||
fn test_render_constant() {
|
||||
let mut df = Hydroflow::new();
|
||||
let mut state = DataflowState::default();
|
||||
let mut ctx = harness_test_ctx(&mut df, &mut state);
|
||||
|
||||
let rows = vec![
|
||||
(Row::empty(), 1, 1),
|
||||
(Row::empty(), 2, 1),
|
||||
(Row::empty(), 3, 1),
|
||||
];
|
||||
let collection = ctx.render_constant(rows);
|
||||
let collection = collection.collection.clone(ctx.df);
|
||||
let cnt = Rc::new(RefCell::new(0));
|
||||
let cnt_inner = cnt.clone();
|
||||
ctx.df.add_subgraph_sink(
|
||||
"test_render_constant",
|
||||
collection.into_inner(),
|
||||
move |_ctx, recv| {
|
||||
let data = recv.take_inner();
|
||||
*cnt_inner.borrow_mut() += data.iter().map(|v| v.len()).sum::<usize>();
|
||||
},
|
||||
);
|
||||
ctx.df.run_available();
|
||||
assert_eq!(*cnt.borrow(), 3);
|
||||
ctx.df.run_available();
|
||||
assert_eq!(*cnt.borrow(), 3);
|
||||
}
|
||||
|
||||
/// a simple example to show how to use source and sink
|
||||
#[test]
|
||||
fn example_source_sink() {
|
||||
let mut df = Hydroflow::new();
|
||||
let (send_port, recv_port) = df.make_edge::<_, VecHandoff<i32>>("test_handoff");
|
||||
df.add_subgraph_source("test_handoff_source", send_port, move |_ctx, send| {
|
||||
for i in 0..10 {
|
||||
send.give(vec![i]);
|
||||
}
|
||||
});
|
||||
|
||||
let sum = Rc::new(RefCell::new(0));
|
||||
let sum_move = sum.clone();
|
||||
let sink = df.add_subgraph_sink("test_handoff_sink", recv_port, move |_ctx, recv| {
|
||||
let data = recv.take_inner();
|
||||
*sum_move.borrow_mut() += data.iter().sum::<i32>();
|
||||
});
|
||||
|
||||
df.run_available();
|
||||
assert_eq!(sum.borrow().to_owned(), 45);
|
||||
df.schedule_subgraph(sink);
|
||||
df.run_available();
|
||||
|
||||
assert_eq!(sum.borrow().to_owned(), 45);
|
||||
}
|
||||
}
|
||||
106
src/flow/src/compute/state.rs
Normal file
106
src/flow/src/compute/state.rs
Normal file
@@ -0,0 +1,106 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::cell::RefCell;
|
||||
use std::collections::{BTreeMap, BTreeSet, VecDeque};
|
||||
use std::rc::Rc;
|
||||
|
||||
use hydroflow::scheduled::graph::Hydroflow;
|
||||
use hydroflow::scheduled::SubgraphId;
|
||||
|
||||
use crate::compute::types::ErrCollector;
|
||||
use crate::repr::{self, Timestamp};
|
||||
|
||||
/// input/output of a dataflow
|
||||
/// One `ComputeState` manage the input/output/schedule of one `Hydroflow`
|
||||
#[derive(Default)]
|
||||
pub struct DataflowState {
|
||||
/// it is important to use a deque to maintain the order of subgraph here
|
||||
/// TODO(discord9): consider dedup? Also not necessary for hydroflow itself also do dedup when schedule
|
||||
schedule_subgraph: Rc<RefCell<BTreeMap<Timestamp, VecDeque<SubgraphId>>>>,
|
||||
/// Frontier (in sys time) before which updates should not be emitted.
|
||||
///
|
||||
/// We *must* apply it to sinks, to ensure correct outputs.
|
||||
/// We *should* apply it to sources and imported shared state, because it improves performance.
|
||||
/// Which means it's also the current time in temporal filter to get current correct result
|
||||
as_of: Rc<RefCell<Timestamp>>,
|
||||
/// error collector local to this `ComputeState`,
|
||||
/// useful for distinguishing errors from different `Hydroflow`
|
||||
err_collector: ErrCollector,
|
||||
}
|
||||
|
||||
impl DataflowState {
|
||||
/// schedule all subgraph that need to run with time <= `as_of` and run_available()
|
||||
///
|
||||
/// return true if any subgraph actually executed
|
||||
pub fn run_available_with_schedule(&mut self, df: &mut Hydroflow) -> bool {
|
||||
// first split keys <= as_of into another map
|
||||
let mut before = self
|
||||
.schedule_subgraph
|
||||
.borrow_mut()
|
||||
.split_off(&(*self.as_of.borrow() + 1));
|
||||
std::mem::swap(&mut before, &mut self.schedule_subgraph.borrow_mut());
|
||||
for (_, v) in before {
|
||||
for subgraph in v {
|
||||
df.schedule_subgraph(subgraph);
|
||||
}
|
||||
}
|
||||
df.run_available()
|
||||
}
|
||||
pub fn get_scheduler(&self) -> Scheduler {
|
||||
Scheduler {
|
||||
schedule_subgraph: self.schedule_subgraph.clone(),
|
||||
cur_subgraph: Rc::new(RefCell::new(None)),
|
||||
}
|
||||
}
|
||||
|
||||
/// return a handle to the current time, will update when `as_of` is updated
|
||||
///
|
||||
/// so it can keep track of the current time even in a closure that is called later
|
||||
pub fn current_time_ref(&self) -> Rc<RefCell<Timestamp>> {
|
||||
self.as_of.clone()
|
||||
}
|
||||
|
||||
pub fn current_ts(&self) -> Timestamp {
|
||||
*self.as_of.borrow()
|
||||
}
|
||||
|
||||
pub fn set_current_ts(&mut self, ts: Timestamp) {
|
||||
self.as_of.replace(ts);
|
||||
}
|
||||
|
||||
pub fn get_err_collector(&self) -> ErrCollector {
|
||||
self.err_collector.clone()
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct Scheduler {
|
||||
schedule_subgraph: Rc<RefCell<BTreeMap<Timestamp, VecDeque<SubgraphId>>>>,
|
||||
cur_subgraph: Rc<RefCell<Option<SubgraphId>>>,
|
||||
}
|
||||
|
||||
impl Scheduler {
|
||||
pub fn schedule_at(&self, next_run_time: Timestamp) {
|
||||
let mut schedule_subgraph = self.schedule_subgraph.borrow_mut();
|
||||
let subgraph = self.cur_subgraph.borrow();
|
||||
let subgraph = subgraph.as_ref().expect("Set SubgraphId before schedule");
|
||||
let subgraph_queue = schedule_subgraph.entry(next_run_time).or_default();
|
||||
subgraph_queue.push_back(*subgraph);
|
||||
}
|
||||
|
||||
pub fn set_cur_subgraph(&self, subgraph: SubgraphId) {
|
||||
self.cur_subgraph.replace(Some(subgraph));
|
||||
}
|
||||
}
|
||||
162
src/flow/src/compute/types.rs
Normal file
162
src/flow/src/compute/types.rs
Normal file
@@ -0,0 +1,162 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::cell::RefCell;
|
||||
use std::collections::{BTreeMap, VecDeque};
|
||||
use std::rc::Rc;
|
||||
use std::sync::Arc;
|
||||
|
||||
use hydroflow::scheduled::graph::Hydroflow;
|
||||
use hydroflow::scheduled::handoff::TeeingHandoff;
|
||||
use hydroflow::scheduled::port::RecvPort;
|
||||
use hydroflow::scheduled::SubgraphId;
|
||||
use tokio::sync::RwLock;
|
||||
|
||||
use crate::compute::render::Context;
|
||||
use crate::expr::{EvalError, ScalarExpr};
|
||||
use crate::repr::DiffRow;
|
||||
use crate::utils::{ArrangeHandler, Arrangement};
|
||||
|
||||
pub type Toff = TeeingHandoff<DiffRow>;
|
||||
|
||||
/// A collection, represent a collections of data that is received from a handoff.
|
||||
pub struct Collection<T: 'static> {
|
||||
/// represent a stream of updates recv from this port
|
||||
stream: RecvPort<TeeingHandoff<T>>,
|
||||
}
|
||||
|
||||
impl<T: 'static + Clone> Collection<T> {
|
||||
pub fn from_port(port: RecvPort<TeeingHandoff<T>>) -> Self {
|
||||
Collection { stream: port }
|
||||
}
|
||||
|
||||
/// clone a collection, require a mutable reference to the hydroflow instance
|
||||
///
|
||||
/// Note: need to be the same hydroflow instance that this collection is created from
|
||||
pub fn clone(&self, df: &mut Hydroflow) -> Self {
|
||||
Collection {
|
||||
stream: self.stream.tee(df),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn into_inner(self) -> RecvPort<TeeingHandoff<T>> {
|
||||
self.stream
|
||||
}
|
||||
}
|
||||
|
||||
/// Arranged is a wrapper around `ArrangeHandler` that maintain a list of readers and a writer
|
||||
pub struct Arranged {
|
||||
pub arrangement: ArrangeHandler,
|
||||
pub writer: Rc<RefCell<Option<SubgraphId>>>,
|
||||
/// maintain a list of readers for the arrangement for the ease of scheduling
|
||||
pub readers: Rc<RefCell<Vec<SubgraphId>>>,
|
||||
}
|
||||
|
||||
impl Arranged {
|
||||
pub fn new(arr: ArrangeHandler) -> Self {
|
||||
Self {
|
||||
arrangement: arr,
|
||||
writer: Default::default(),
|
||||
readers: Default::default(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Copy it's future only updates, internally `Rc-ed` so it's cheap to copy
|
||||
pub fn try_copy_future(&self) -> Option<Self> {
|
||||
self.arrangement
|
||||
.clone_future_only()
|
||||
.map(|arrangement| Arranged {
|
||||
arrangement,
|
||||
readers: self.readers.clone(),
|
||||
writer: self.writer.clone(),
|
||||
})
|
||||
}
|
||||
|
||||
/// Copy the full arrangement, including the future and the current updates.
|
||||
///
|
||||
/// Internally `Rc-ed` so it's cheap to copy
|
||||
pub fn try_copy_full(&self) -> Option<Self> {
|
||||
self.arrangement
|
||||
.clone_full_arrange()
|
||||
.map(|arrangement| Arranged {
|
||||
arrangement,
|
||||
readers: self.readers.clone(),
|
||||
writer: self.writer.clone(),
|
||||
})
|
||||
}
|
||||
pub fn add_reader(&self, id: SubgraphId) {
|
||||
self.readers.borrow_mut().push(id)
|
||||
}
|
||||
}
|
||||
|
||||
/// A bundle of the various ways a collection can be represented.
|
||||
///
|
||||
/// This type maintains the invariant that it does contain at least one(or both) valid
|
||||
/// source of data, either a collection or at least one arrangement. This is for convenience
|
||||
/// of reading the data from the collection.
|
||||
pub struct CollectionBundle {
|
||||
/// This is useful for passively reading the new updates from the collection
|
||||
pub collection: Collection<DiffRow>,
|
||||
/// the key [`ScalarExpr`] indicate how the keys(also a [`Row`]) used in Arranged is extract from collection's [`Row`]
|
||||
/// So it is the "index" of the arrangement
|
||||
///
|
||||
/// The `Arranged` is the actual data source, it can be used to read the data from the collection by
|
||||
/// using the key indicated by the `Vec<ScalarExpr>`
|
||||
pub arranged: BTreeMap<Vec<ScalarExpr>, Arranged>,
|
||||
}
|
||||
|
||||
impl CollectionBundle {
|
||||
pub fn from_collection(collection: Collection<DiffRow>) -> Self {
|
||||
Self {
|
||||
collection,
|
||||
arranged: BTreeMap::default(),
|
||||
}
|
||||
}
|
||||
pub fn clone(&self, df: &mut Hydroflow) -> Self {
|
||||
Self {
|
||||
collection: self.collection.clone(df),
|
||||
arranged: self
|
||||
.arranged
|
||||
.iter()
|
||||
.map(|(k, v)| (k.clone(), v.try_copy_future().unwrap()))
|
||||
.collect(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// A thread local error collector, used to collect errors during the evaluation of the plan
|
||||
///
|
||||
/// usually only the first error matters, but store all of them just in case
|
||||
///
|
||||
/// Using a `VecDeque` to preserve the order of errors
|
||||
/// when running dataflow continuously and need errors in order
|
||||
#[derive(Default, Clone)]
|
||||
pub struct ErrCollector {
|
||||
pub inner: Rc<RefCell<VecDeque<EvalError>>>,
|
||||
}
|
||||
|
||||
impl ErrCollector {
|
||||
pub fn push_err(&self, err: EvalError) {
|
||||
self.inner.borrow_mut().push_back(err)
|
||||
}
|
||||
|
||||
pub fn run<F>(&self, f: F)
|
||||
where
|
||||
F: FnOnce() -> Result<(), EvalError>,
|
||||
{
|
||||
if let Err(e) = f() {
|
||||
self.push_err(e)
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -20,10 +20,11 @@ mod id;
|
||||
mod linear;
|
||||
mod relation;
|
||||
mod scalar;
|
||||
mod signature;
|
||||
|
||||
pub(crate) use error::{EvalError, InvalidArgumentSnafu, OptimizeSnafu};
|
||||
pub(crate) use func::{BinaryFunc, UnaryFunc, UnmaterializableFunc, VariadicFunc};
|
||||
pub(crate) use id::{GlobalId, Id, LocalId};
|
||||
pub(crate) use linear::{MapFilterProject, MfpPlan, SafeMfpPlan};
|
||||
pub(crate) use relation::{AggregateExpr, AggregateFunc};
|
||||
pub(crate) use scalar::ScalarExpr;
|
||||
pub(crate) use scalar::{ScalarExpr, TypedExpr};
|
||||
|
||||
@@ -12,6 +12,8 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
//! Error handling for expression evaluation.
|
||||
|
||||
use std::any::Any;
|
||||
|
||||
use common_macro::stack_trace_debug;
|
||||
@@ -59,9 +61,6 @@ pub enum EvalError {
|
||||
#[snafu(display("Optimize error: {reason}"))]
|
||||
Optimize { reason: String, location: Location },
|
||||
|
||||
#[snafu(display("Unsupported temporal filter: {reason}"))]
|
||||
UnsupportedTemporalFilter { reason: String, location: Location },
|
||||
|
||||
#[snafu(display("Overflowed during evaluation"))]
|
||||
Overflow { location: Location },
|
||||
}
|
||||
|
||||
@@ -12,21 +12,31 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
//! This module contains the definition of functions that can be used in expressions.
|
||||
|
||||
use std::collections::HashMap;
|
||||
use std::sync::OnceLock;
|
||||
|
||||
use common_time::DateTime;
|
||||
use datafusion_expr::Operator;
|
||||
use datafusion_substrait::logical_plan::consumer::name_to_op;
|
||||
use datatypes::data_type::ConcreteDataType;
|
||||
use datatypes::types::cast;
|
||||
use datatypes::types::cast::CastOption;
|
||||
use datatypes::value::Value;
|
||||
use hydroflow::bincode::Error;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use snafu::ResultExt;
|
||||
use smallvec::smallvec;
|
||||
use snafu::{ensure, OptionExt, ResultExt};
|
||||
use strum::{EnumIter, IntoEnumIterator};
|
||||
|
||||
use crate::adapter::error::{Error, InvalidQuerySnafu, PlanSnafu};
|
||||
use crate::expr::error::{
|
||||
CastValueSnafu, DivisionByZeroSnafu, EvalError, InternalSnafu, TryFromValueSnafu,
|
||||
TypeMismatchSnafu,
|
||||
};
|
||||
use crate::expr::signature::{GenericFn, Signature};
|
||||
use crate::expr::{InvalidArgumentSnafu, ScalarExpr};
|
||||
use crate::repr::Row;
|
||||
use crate::repr::{value_to_internal_ts, Row};
|
||||
|
||||
/// UnmaterializableFunc is a function that can't be eval independently,
|
||||
/// and require special handling
|
||||
@@ -36,6 +46,38 @@ pub enum UnmaterializableFunc {
|
||||
CurrentSchema,
|
||||
}
|
||||
|
||||
impl UnmaterializableFunc {
|
||||
/// Return the signature of the function
|
||||
pub fn signature(&self) -> Signature {
|
||||
match self {
|
||||
Self::Now => Signature {
|
||||
input: smallvec![],
|
||||
output: ConcreteDataType::datetime_datatype(),
|
||||
generic_fn: GenericFn::Now,
|
||||
},
|
||||
Self::CurrentSchema => Signature {
|
||||
input: smallvec![],
|
||||
output: ConcreteDataType::string_datatype(),
|
||||
generic_fn: GenericFn::CurrentSchema,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
/// Create a UnmaterializableFunc from a string of the function name
|
||||
pub fn from_str(name: &str) -> Result<Self, Error> {
|
||||
match name {
|
||||
"now" => Ok(Self::Now),
|
||||
"current_schema" => Ok(Self::CurrentSchema),
|
||||
_ => InvalidQuerySnafu {
|
||||
reason: format!("Unknown unmaterializable function: {}", name),
|
||||
}
|
||||
.fail(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// UnaryFunc is a function that takes one argument. Also notice this enum doesn't contain function arguments,
|
||||
/// because the arguments are stored in the expression. (except `cast` function, which requires a type argument)
|
||||
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Deserialize, Serialize, Hash)]
|
||||
pub enum UnaryFunc {
|
||||
Not,
|
||||
@@ -47,6 +89,68 @@ pub enum UnaryFunc {
|
||||
}
|
||||
|
||||
impl UnaryFunc {
|
||||
/// Return the signature of the function
|
||||
pub fn signature(&self) -> Signature {
|
||||
match self {
|
||||
Self::IsNull => Signature {
|
||||
input: smallvec![ConcreteDataType::null_datatype()],
|
||||
output: ConcreteDataType::boolean_datatype(),
|
||||
generic_fn: GenericFn::IsNull,
|
||||
},
|
||||
Self::Not | Self::IsTrue | Self::IsFalse => Signature {
|
||||
input: smallvec![ConcreteDataType::boolean_datatype()],
|
||||
output: ConcreteDataType::boolean_datatype(),
|
||||
generic_fn: match self {
|
||||
Self::Not => GenericFn::Not,
|
||||
Self::IsTrue => GenericFn::IsTrue,
|
||||
Self::IsFalse => GenericFn::IsFalse,
|
||||
_ => unreachable!(),
|
||||
},
|
||||
},
|
||||
Self::StepTimestamp => Signature {
|
||||
input: smallvec![ConcreteDataType::datetime_datatype()],
|
||||
output: ConcreteDataType::datetime_datatype(),
|
||||
generic_fn: GenericFn::StepTimestamp,
|
||||
},
|
||||
Self::Cast(to) => Signature {
|
||||
input: smallvec![ConcreteDataType::null_datatype()],
|
||||
output: to.clone(),
|
||||
generic_fn: GenericFn::Cast,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
/// Create a UnaryFunc from a string of the function name and given argument type(optional)
|
||||
pub fn from_str_and_type(
|
||||
name: &str,
|
||||
arg_type: Option<ConcreteDataType>,
|
||||
) -> Result<Self, Error> {
|
||||
match name {
|
||||
"not" => Ok(Self::Not),
|
||||
"is_null" => Ok(Self::IsNull),
|
||||
"is_true" => Ok(Self::IsTrue),
|
||||
"is_false" => Ok(Self::IsFalse),
|
||||
"step_timestamp" => Ok(Self::StepTimestamp),
|
||||
"cast" => {
|
||||
let arg_type = arg_type.with_context(|| InvalidQuerySnafu {
|
||||
reason: "cast function requires a type argument".to_string(),
|
||||
})?;
|
||||
Ok(UnaryFunc::Cast(arg_type))
|
||||
}
|
||||
_ => InvalidQuerySnafu {
|
||||
reason: format!("Unknown unary function: {}", name),
|
||||
}
|
||||
.fail(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Evaluate the function with given values and expression
|
||||
///
|
||||
/// # Arguments
|
||||
///
|
||||
/// - `values`: The values to be used in the evaluation
|
||||
///
|
||||
/// - `expr`: The expression to be evaluated and use as argument, will extract the value from the `values` and evaluate the expression
|
||||
pub fn eval(&self, values: &[Value], expr: &ScalarExpr) -> Result<Value, EvalError> {
|
||||
let arg = expr.eval(values)?;
|
||||
match self {
|
||||
@@ -80,13 +184,17 @@ impl UnaryFunc {
|
||||
}
|
||||
}
|
||||
Self::StepTimestamp => {
|
||||
let ty = arg.data_type();
|
||||
if let Value::DateTime(datetime) = arg {
|
||||
let datetime = DateTime::from(datetime.val() + 1);
|
||||
Ok(Value::from(datetime))
|
||||
} else if let Ok(v) = value_to_internal_ts(arg) {
|
||||
let datetime = DateTime::from(v + 1);
|
||||
Ok(Value::from(datetime))
|
||||
} else {
|
||||
TypeMismatchSnafu {
|
||||
expected: ConcreteDataType::datetime_datatype(),
|
||||
actual: arg.data_type(),
|
||||
actual: ty,
|
||||
}
|
||||
.fail()?
|
||||
}
|
||||
@@ -105,8 +213,13 @@ impl UnaryFunc {
|
||||
}
|
||||
}
|
||||
|
||||
/// BinaryFunc is a function that takes two arguments.
|
||||
/// Also notice this enum doesn't contain function arguments, since the arguments are stored in the expression.
|
||||
///
|
||||
/// TODO(discord9): support more binary functions for more types
|
||||
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Deserialize, Serialize, Hash)]
|
||||
#[derive(
|
||||
Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Deserialize, Serialize, Hash, EnumIter,
|
||||
)]
|
||||
pub enum BinaryFunc {
|
||||
Eq,
|
||||
NotEq,
|
||||
@@ -154,7 +267,257 @@ pub enum BinaryFunc {
|
||||
ModUInt64,
|
||||
}
|
||||
|
||||
/// Generate binary function signature based on the function and the input types
|
||||
/// The user can provide custom signature for some functions in the form of a regular match arm,
|
||||
/// and the rest will be generated according to the provided list of functions like this:
|
||||
/// ```ignore
|
||||
/// AddInt16=>(int16_datatype,Add),
|
||||
/// ```
|
||||
/// which expand to:
|
||||
/// ```ignore, rust
|
||||
/// Self::AddInt16 => Signature {
|
||||
/// input: smallvec![
|
||||
/// ConcreteDataType::int16_datatype(),
|
||||
/// ConcreteDataType::int16_datatype(),
|
||||
/// ],
|
||||
/// output: ConcreteDataType::int16_datatype(),
|
||||
/// generic_fn: GenericFn::Add,
|
||||
/// },
|
||||
/// ````
|
||||
macro_rules! generate_binary_signature {
|
||||
($value:ident, { $($user_arm:tt)* },
|
||||
[ $(
|
||||
$auto_arm:ident=>($con_type:ident,$generic:ident)
|
||||
),*
|
||||
]) => {
|
||||
match $value {
|
||||
$($user_arm)*,
|
||||
$(
|
||||
Self::$auto_arm => Signature {
|
||||
input: smallvec![
|
||||
ConcreteDataType::$con_type(),
|
||||
ConcreteDataType::$con_type(),
|
||||
],
|
||||
output: ConcreteDataType::$con_type(),
|
||||
generic_fn: GenericFn::$generic,
|
||||
},
|
||||
)*
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
static SPECIALIZATION: OnceLock<HashMap<(GenericFn, ConcreteDataType), BinaryFunc>> =
|
||||
OnceLock::new();
|
||||
|
||||
impl BinaryFunc {
|
||||
/// Use null type to ref to any type
|
||||
pub fn signature(&self) -> Signature {
|
||||
generate_binary_signature!(self, {
|
||||
Self::Eq | Self::NotEq | Self::Lt | Self::Lte | Self::Gt | Self::Gte => Signature {
|
||||
input: smallvec![
|
||||
ConcreteDataType::null_datatype(),
|
||||
ConcreteDataType::null_datatype()
|
||||
],
|
||||
output: ConcreteDataType::boolean_datatype(),
|
||||
generic_fn: match self {
|
||||
Self::Eq => GenericFn::Eq,
|
||||
Self::NotEq => GenericFn::NotEq,
|
||||
Self::Lt => GenericFn::Lt,
|
||||
Self::Lte => GenericFn::Lte,
|
||||
Self::Gt => GenericFn::Gt,
|
||||
Self::Gte => GenericFn::Gte,
|
||||
_ => unreachable!(),
|
||||
},
|
||||
}
|
||||
},
|
||||
[
|
||||
AddInt16=>(int16_datatype,Add),
|
||||
AddInt32=>(int32_datatype,Add),
|
||||
AddInt64=>(int64_datatype,Add),
|
||||
AddUInt16=>(uint16_datatype,Add),
|
||||
AddUInt32=>(uint32_datatype,Add),
|
||||
AddUInt64=>(uint64_datatype,Add),
|
||||
AddFloat32=>(float32_datatype,Add),
|
||||
AddFloat64=>(float64_datatype,Add),
|
||||
SubInt16=>(int16_datatype,Sub),
|
||||
SubInt32=>(int32_datatype,Sub),
|
||||
SubInt64=>(int64_datatype,Sub),
|
||||
SubUInt16=>(uint16_datatype,Sub),
|
||||
SubUInt32=>(uint32_datatype,Sub),
|
||||
SubUInt64=>(uint64_datatype,Sub),
|
||||
SubFloat32=>(float32_datatype,Sub),
|
||||
SubFloat64=>(float64_datatype,Sub),
|
||||
MulInt16=>(int16_datatype,Mul),
|
||||
MulInt32=>(int32_datatype,Mul),
|
||||
MulInt64=>(int64_datatype,Mul),
|
||||
MulUInt16=>(uint16_datatype,Mul),
|
||||
MulUInt32=>(uint32_datatype,Mul),
|
||||
MulUInt64=>(uint64_datatype,Mul),
|
||||
MulFloat32=>(float32_datatype,Mul),
|
||||
MulFloat64=>(float64_datatype,Mul),
|
||||
DivInt16=>(int16_datatype,Div),
|
||||
DivInt32=>(int32_datatype,Div),
|
||||
DivInt64=>(int64_datatype,Div),
|
||||
DivUInt16=>(uint16_datatype,Div),
|
||||
DivUInt32=>(uint32_datatype,Div),
|
||||
DivUInt64=>(uint64_datatype,Div),
|
||||
DivFloat32=>(float32_datatype,Div),
|
||||
DivFloat64=>(float64_datatype,Div),
|
||||
ModInt16=>(int16_datatype,Mod),
|
||||
ModInt32=>(int32_datatype,Mod),
|
||||
ModInt64=>(int64_datatype,Mod),
|
||||
ModUInt16=>(uint16_datatype,Mod),
|
||||
ModUInt32=>(uint32_datatype,Mod),
|
||||
ModUInt64=>(uint64_datatype,Mod)
|
||||
]
|
||||
)
|
||||
}
|
||||
|
||||
/// Get the specialization of the binary function based on the generic function and the input type
|
||||
pub fn specialization(generic: GenericFn, input_type: ConcreteDataType) -> Result<Self, Error> {
|
||||
let rule = SPECIALIZATION.get_or_init(|| {
|
||||
let mut spec = HashMap::new();
|
||||
for func in BinaryFunc::iter() {
|
||||
let sig = func.signature();
|
||||
spec.insert((sig.generic_fn, sig.input[0].clone()), func);
|
||||
}
|
||||
spec
|
||||
});
|
||||
rule.get(&(generic, input_type.clone()))
|
||||
.cloned()
|
||||
.with_context(|| InvalidQuerySnafu {
|
||||
reason: format!(
|
||||
"No specialization found for binary function {:?} with input type {:?}",
|
||||
generic, input_type
|
||||
),
|
||||
})
|
||||
}
|
||||
|
||||
/// try it's best to infer types from the input types and expressions
|
||||
///
|
||||
/// if it can't found out types, will return None
|
||||
pub(crate) fn infer_type_from(
|
||||
generic: GenericFn,
|
||||
arg_exprs: &[ScalarExpr],
|
||||
arg_types: &[Option<ConcreteDataType>],
|
||||
) -> Result<ConcreteDataType, Error> {
|
||||
let ret = match (arg_types[0].as_ref(), arg_types[1].as_ref()) {
|
||||
(Some(t1), Some(t2)) => {
|
||||
ensure!(
|
||||
t1 == t2,
|
||||
InvalidQuerySnafu {
|
||||
reason: format!(
|
||||
"Binary function {:?} requires both arguments to have the same type",
|
||||
generic
|
||||
),
|
||||
}
|
||||
);
|
||||
t1.clone()
|
||||
}
|
||||
(Some(t), None) | (None, Some(t)) => t.clone(),
|
||||
_ => arg_exprs[0]
|
||||
.as_literal()
|
||||
.map(|lit| lit.data_type())
|
||||
.or_else(|| arg_exprs[1].as_literal().map(|lit| lit.data_type()))
|
||||
.with_context(|| InvalidQuerySnafu {
|
||||
reason: format!(
|
||||
"Binary function {:?} requires at least one argument with known type",
|
||||
generic
|
||||
),
|
||||
})?,
|
||||
};
|
||||
Ok(ret)
|
||||
}
|
||||
|
||||
/// choose the appropriate specialization based on the input types
|
||||
/// return a specialization of the binary function and it's actual input and output type(so no null type present)
|
||||
///
|
||||
/// will try it best to extract from `arg_types` and `arg_exprs` to get the input types
|
||||
/// if `arg_types` is not enough, it will try to extract from `arg_exprs` if `arg_exprs` is literal with known type
|
||||
pub fn from_str_expr_and_type(
|
||||
name: &str,
|
||||
arg_exprs: &[ScalarExpr],
|
||||
arg_types: &[Option<ConcreteDataType>],
|
||||
) -> Result<(Self, Signature), Error> {
|
||||
// this `name_to_op` if error simply return a similar message of `unsupported function xxx` so
|
||||
let op = name_to_op(name).or_else(|err| {
|
||||
if let datafusion_common::DataFusionError::NotImplemented(msg) = err {
|
||||
InvalidQuerySnafu {
|
||||
reason: format!("Unsupported binary function: {}", msg),
|
||||
}
|
||||
.fail()
|
||||
} else {
|
||||
InvalidQuerySnafu {
|
||||
reason: format!("Error when parsing binary function: {:?}", err),
|
||||
}
|
||||
.fail()
|
||||
}
|
||||
})?;
|
||||
|
||||
// get first arg type and make sure if both is some, they are the same
|
||||
let generic_fn = {
|
||||
match op {
|
||||
Operator::Eq => GenericFn::Eq,
|
||||
Operator::NotEq => GenericFn::NotEq,
|
||||
Operator::Lt => GenericFn::Lt,
|
||||
Operator::LtEq => GenericFn::Lte,
|
||||
Operator::Gt => GenericFn::Gt,
|
||||
Operator::GtEq => GenericFn::Gte,
|
||||
Operator::Plus => GenericFn::Add,
|
||||
Operator::Minus => GenericFn::Sub,
|
||||
Operator::Multiply => GenericFn::Mul,
|
||||
Operator::Divide => GenericFn::Div,
|
||||
Operator::Modulo => GenericFn::Mod,
|
||||
_ => {
|
||||
return InvalidQuerySnafu {
|
||||
reason: format!("Unsupported binary function: {}", name),
|
||||
}
|
||||
.fail();
|
||||
}
|
||||
}
|
||||
};
|
||||
let need_type = matches!(
|
||||
generic_fn,
|
||||
GenericFn::Add | GenericFn::Sub | GenericFn::Mul | GenericFn::Div | GenericFn::Mod
|
||||
);
|
||||
|
||||
ensure!(
|
||||
arg_exprs.len() == 2 && arg_types.len() == 2,
|
||||
PlanSnafu {
|
||||
reason: "Binary function requires exactly 2 arguments".to_string()
|
||||
}
|
||||
);
|
||||
|
||||
let arg_type = Self::infer_type_from(generic_fn, arg_exprs, arg_types)?;
|
||||
|
||||
// if type is not needed, we can erase input type to null to find correct functions for
|
||||
// functions that do not need type
|
||||
let query_input_type = if need_type {
|
||||
arg_type.clone()
|
||||
} else {
|
||||
ConcreteDataType::null_datatype()
|
||||
};
|
||||
|
||||
let spec_fn = Self::specialization(generic_fn, query_input_type)?;
|
||||
|
||||
let signature = Signature {
|
||||
input: smallvec![arg_type.clone(), arg_type],
|
||||
output: spec_fn.signature().output,
|
||||
generic_fn,
|
||||
};
|
||||
|
||||
Ok((spec_fn, signature))
|
||||
}
|
||||
|
||||
/// Evaluate the function with given values and expression
|
||||
///
|
||||
/// # Arguments
|
||||
///
|
||||
/// - `values`: The values to be used in the evaluation
|
||||
///
|
||||
/// - `expr1`: The first arg to be evaluated, will extract the value from the `values` and evaluate the expression
|
||||
///
|
||||
/// - `expr2`: The second arg to be evaluated
|
||||
pub fn eval(
|
||||
&self,
|
||||
values: &[Value],
|
||||
@@ -218,7 +581,7 @@ impl BinaryFunc {
|
||||
|
||||
/// Reverse the comparison operator, i.e. `a < b` becomes `b > a`,
|
||||
/// equal and not equal are unchanged.
|
||||
pub fn reverse_compare(&self) -> Result<Self, EvalError> {
|
||||
pub fn reverse_compare(&self) -> Result<Self, Error> {
|
||||
let ret = match &self {
|
||||
BinaryFunc::Eq => BinaryFunc::Eq,
|
||||
BinaryFunc::NotEq => BinaryFunc::NotEq,
|
||||
@@ -227,7 +590,7 @@ impl BinaryFunc {
|
||||
BinaryFunc::Gt => BinaryFunc::Lt,
|
||||
BinaryFunc::Gte => BinaryFunc::Lte,
|
||||
_ => {
|
||||
return InternalSnafu {
|
||||
return InvalidQuerySnafu {
|
||||
reason: format!("Expect a comparison operator, found {:?}", self),
|
||||
}
|
||||
.fail();
|
||||
@@ -237,13 +600,44 @@ impl BinaryFunc {
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Deserialize, Serialize, Hash)]
|
||||
/// VariadicFunc is a function that takes a variable number of arguments.
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Deserialize, Serialize, Hash)]
|
||||
pub enum VariadicFunc {
|
||||
And,
|
||||
Or,
|
||||
}
|
||||
|
||||
impl VariadicFunc {
|
||||
/// Return the signature of the function
|
||||
pub fn signature(&self) -> Signature {
|
||||
Signature {
|
||||
input: smallvec![ConcreteDataType::boolean_datatype()],
|
||||
output: ConcreteDataType::boolean_datatype(),
|
||||
generic_fn: match self {
|
||||
Self::And => GenericFn::And,
|
||||
Self::Or => GenericFn::Or,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
/// Create a VariadicFunc from a string of the function name and given argument types(optional)
|
||||
pub fn from_str_and_types(
|
||||
name: &str,
|
||||
arg_types: &[Option<ConcreteDataType>],
|
||||
) -> Result<Self, Error> {
|
||||
// TODO: future variadic funcs to be added might need to check arg_types
|
||||
let _ = arg_types;
|
||||
match name {
|
||||
"and" => Ok(Self::And),
|
||||
"or" => Ok(Self::Or),
|
||||
_ => InvalidQuerySnafu {
|
||||
reason: format!("Unknown variadic function: {}", name),
|
||||
}
|
||||
.fail(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Evaluate the function with given values and expressions
|
||||
pub fn eval(&self, values: &[Value], exprs: &[ScalarExpr]) -> Result<Value, EvalError> {
|
||||
match self {
|
||||
VariadicFunc::And => and(values, exprs),
|
||||
@@ -373,7 +767,7 @@ fn test_num_ops() {
|
||||
assert_eq!(res, Value::from(30));
|
||||
let res = div::<i32>(left.clone(), right.clone()).unwrap();
|
||||
assert_eq!(res, Value::from(3));
|
||||
let res = rem::<i32>(left.clone(), right.clone()).unwrap();
|
||||
let res = rem::<i32>(left, right).unwrap();
|
||||
assert_eq!(res, Value::from(1));
|
||||
|
||||
let values = vec![Value::from(true), Value::from(false)];
|
||||
@@ -383,3 +777,97 @@ fn test_num_ops() {
|
||||
let res = or(&values, &exprs).unwrap();
|
||||
assert_eq!(res, Value::from(true));
|
||||
}
|
||||
|
||||
/// test if the binary function specialization works
|
||||
/// whether from direct type or from the expression that is literal
|
||||
#[test]
|
||||
fn test_binary_func_spec() {
|
||||
assert_eq!(
|
||||
BinaryFunc::from_str_expr_and_type(
|
||||
"add",
|
||||
&[ScalarExpr::Column(0), ScalarExpr::Column(0)],
|
||||
&[
|
||||
Some(ConcreteDataType::int32_datatype()),
|
||||
Some(ConcreteDataType::int32_datatype())
|
||||
]
|
||||
)
|
||||
.unwrap(),
|
||||
(BinaryFunc::AddInt32, BinaryFunc::AddInt32.signature())
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
BinaryFunc::from_str_expr_and_type(
|
||||
"add",
|
||||
&[ScalarExpr::Column(0), ScalarExpr::Column(0)],
|
||||
&[Some(ConcreteDataType::int32_datatype()), None]
|
||||
)
|
||||
.unwrap(),
|
||||
(BinaryFunc::AddInt32, BinaryFunc::AddInt32.signature())
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
BinaryFunc::from_str_expr_and_type(
|
||||
"add",
|
||||
&[ScalarExpr::Column(0), ScalarExpr::Column(0)],
|
||||
&[Some(ConcreteDataType::int32_datatype()), None]
|
||||
)
|
||||
.unwrap(),
|
||||
(BinaryFunc::AddInt32, BinaryFunc::AddInt32.signature())
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
BinaryFunc::from_str_expr_and_type(
|
||||
"add",
|
||||
&[ScalarExpr::Column(0), ScalarExpr::Column(0)],
|
||||
&[Some(ConcreteDataType::int32_datatype()), None]
|
||||
)
|
||||
.unwrap(),
|
||||
(BinaryFunc::AddInt32, BinaryFunc::AddInt32.signature())
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
BinaryFunc::from_str_expr_and_type(
|
||||
"add",
|
||||
&[
|
||||
ScalarExpr::Literal(Value::from(1i32), ConcreteDataType::int32_datatype()),
|
||||
ScalarExpr::Column(0)
|
||||
],
|
||||
&[None, None]
|
||||
)
|
||||
.unwrap(),
|
||||
(BinaryFunc::AddInt32, BinaryFunc::AddInt32.signature())
|
||||
);
|
||||
|
||||
// this testcase make sure the specialization can find actual type from expression and fill in signature
|
||||
assert_eq!(
|
||||
BinaryFunc::from_str_expr_and_type(
|
||||
"equal",
|
||||
&[
|
||||
ScalarExpr::Literal(Value::from(1i32), ConcreteDataType::int32_datatype()),
|
||||
ScalarExpr::Column(0)
|
||||
],
|
||||
&[None, None]
|
||||
)
|
||||
.unwrap(),
|
||||
(
|
||||
BinaryFunc::Eq,
|
||||
Signature {
|
||||
input: smallvec![
|
||||
ConcreteDataType::int32_datatype(),
|
||||
ConcreteDataType::int32_datatype()
|
||||
],
|
||||
output: ConcreteDataType::boolean_datatype(),
|
||||
generic_fn: GenericFn::Eq
|
||||
}
|
||||
)
|
||||
);
|
||||
|
||||
matches!(
|
||||
BinaryFunc::from_str_expr_and_type(
|
||||
"add",
|
||||
&[ScalarExpr::Column(0), ScalarExpr::Column(0)],
|
||||
&[None, None]
|
||||
),
|
||||
Err(Error::InvalidQuery { .. })
|
||||
);
|
||||
}
|
||||
|
||||
@@ -12,6 +12,8 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
//! `Id` is used to identify a dataflow component in plan like `Plan::Get{id: Id}`, this could be a source of data for an arrangement.
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
/// Global id's scope is in Current Worker, and is cross-dataflow
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user