mirror of
https://github.com/GreptimeTeam/greptimedb.git
synced 2025-12-23 06:30:05 +00:00
Compare commits
37 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
75975adcb6 | ||
|
|
527e523a38 | ||
|
|
aad2afd3f2 | ||
|
|
bf88b3b4a0 | ||
|
|
bf96ce3049 | ||
|
|
430ffe0e28 | ||
|
|
c1190bae7b | ||
|
|
0882da4d01 | ||
|
|
8ec1e42754 | ||
|
|
b00b49284e | ||
|
|
09b3c7029b | ||
|
|
f5798e2833 | ||
|
|
fd8fb641fd | ||
|
|
312e8e824e | ||
|
|
29a7f301df | ||
|
|
51a3fbc7bf | ||
|
|
d521bc9dc5 | ||
|
|
7fad4e8356 | ||
|
|
b6033f62cd | ||
|
|
fd3f23ea15 | ||
|
|
1b0e39a7f2 | ||
|
|
3ab370265a | ||
|
|
ec8266b969 | ||
|
|
490312bf57 | ||
|
|
1fc168bf6a | ||
|
|
db98484796 | ||
|
|
7d0d2163d2 | ||
|
|
c4582c05cc | ||
|
|
a0a31c8acc | ||
|
|
0db1861452 | ||
|
|
225ae953d1 | ||
|
|
2c1b1cecc8 | ||
|
|
62db28b465 | ||
|
|
6e860bc0fd | ||
|
|
8bd4a36136 | ||
|
|
af0c4c068a | ||
|
|
26cbcb8b3a |
19
.github/ISSUE_TEMPLATE/bug_report.yml
vendored
19
.github/ISSUE_TEMPLATE/bug_report.yml
vendored
@@ -21,6 +21,7 @@ body:
|
||||
- Locking issue
|
||||
- Performance issue
|
||||
- Unexpected error
|
||||
- User Experience
|
||||
- Other
|
||||
validations:
|
||||
required: true
|
||||
@@ -33,9 +34,14 @@ body:
|
||||
multiple: true
|
||||
options:
|
||||
- Standalone mode
|
||||
- Distributed Cluster
|
||||
- Storage Engine
|
||||
- Query Engine
|
||||
- Table Engine
|
||||
- Write Protocols
|
||||
- MetaSrv
|
||||
- Frontend
|
||||
- Datanode
|
||||
- Meta
|
||||
- Other
|
||||
validations:
|
||||
required: true
|
||||
@@ -77,6 +83,17 @@ body:
|
||||
validations:
|
||||
required: true
|
||||
|
||||
- type: input
|
||||
id: greptimedb
|
||||
attributes:
|
||||
label: What version of GreptimeDB did you use?
|
||||
description: |
|
||||
Please provide the version of GreptimeDB. For example:
|
||||
0.5.1 etc. You can get it by executing command line `greptime --version`.
|
||||
placeholder: "0.5.1"
|
||||
validations:
|
||||
required: true
|
||||
|
||||
- type: textarea
|
||||
id: logs
|
||||
attributes:
|
||||
|
||||
2
.github/pull_request_template.md
vendored
2
.github/pull_request_template.md
vendored
@@ -15,6 +15,6 @@ Please explain IN DETAIL what the changes are in this PR and why they are needed
|
||||
|
||||
- [ ] I have written the necessary rustdoc comments.
|
||||
- [ ] I have added the necessary unit tests and integration tests.
|
||||
- [ ] This PR does not require documentation updates.
|
||||
- [x] This PR does not require documentation updates.
|
||||
|
||||
## Refer to a related PR or issue link (optional)
|
||||
|
||||
2
.github/workflows/release.yml
vendored
2
.github/workflows/release.yml
vendored
@@ -91,7 +91,7 @@ env:
|
||||
# The scheduled version is '${{ env.NEXT_RELEASE_VERSION }}-nightly-YYYYMMDD', like v0.2.0-nigthly-20230313;
|
||||
NIGHTLY_RELEASE_PREFIX: nightly
|
||||
# Note: The NEXT_RELEASE_VERSION should be modified manually by every formal release.
|
||||
NEXT_RELEASE_VERSION: v0.6.0
|
||||
NEXT_RELEASE_VERSION: v0.7.0
|
||||
|
||||
jobs:
|
||||
allocate-runners:
|
||||
|
||||
133
Cargo.lock
generated
133
Cargo.lock
generated
@@ -196,7 +196,7 @@ checksum = "8f1f8f5a6f3d50d89e3797d7593a50f96bb2aaa20ca0cc7be1fb673232c91d72"
|
||||
|
||||
[[package]]
|
||||
name = "api"
|
||||
version = "0.5.1"
|
||||
version = "0.6.0"
|
||||
dependencies = [
|
||||
"common-base",
|
||||
"common-decimal",
|
||||
@@ -674,7 +674,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "auth"
|
||||
version = "0.5.1"
|
||||
version = "0.6.0"
|
||||
dependencies = [
|
||||
"api",
|
||||
"async-trait",
|
||||
@@ -847,7 +847,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "benchmarks"
|
||||
version = "0.5.1"
|
||||
version = "0.6.0"
|
||||
dependencies = [
|
||||
"arrow",
|
||||
"chrono",
|
||||
@@ -1179,7 +1179,7 @@ checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5"
|
||||
|
||||
[[package]]
|
||||
name = "catalog"
|
||||
version = "0.5.1"
|
||||
version = "0.6.0"
|
||||
dependencies = [
|
||||
"api",
|
||||
"arc-swap",
|
||||
@@ -1206,6 +1206,7 @@ dependencies = [
|
||||
"datatypes",
|
||||
"futures",
|
||||
"futures-util",
|
||||
"itertools 0.10.5",
|
||||
"lazy_static",
|
||||
"log-store",
|
||||
"meta-client",
|
||||
@@ -1451,7 +1452,7 @@ checksum = "702fc72eb24e5a1e48ce58027a675bc24edd52096d5397d4aea7c6dd9eca0bd1"
|
||||
|
||||
[[package]]
|
||||
name = "client"
|
||||
version = "0.5.1"
|
||||
version = "0.6.0"
|
||||
dependencies = [
|
||||
"api",
|
||||
"arrow-flight",
|
||||
@@ -1484,7 +1485,7 @@ dependencies = [
|
||||
"session",
|
||||
"snafu",
|
||||
"substrait 0.17.1",
|
||||
"substrait 0.5.1",
|
||||
"substrait 0.6.0",
|
||||
"tokio",
|
||||
"tokio-stream",
|
||||
"tonic 0.10.2",
|
||||
@@ -1514,7 +1515,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "cmd"
|
||||
version = "0.5.1"
|
||||
version = "0.6.0"
|
||||
dependencies = [
|
||||
"anymap",
|
||||
"async-trait",
|
||||
@@ -1565,7 +1566,7 @@ dependencies = [
|
||||
"session",
|
||||
"snafu",
|
||||
"store-api",
|
||||
"substrait 0.5.1",
|
||||
"substrait 0.6.0",
|
||||
"table",
|
||||
"temp-env",
|
||||
"tikv-jemallocator",
|
||||
@@ -1598,7 +1599,7 @@ checksum = "55b672471b4e9f9e95499ea597ff64941a309b2cdbffcc46f2cc5e2d971fd335"
|
||||
|
||||
[[package]]
|
||||
name = "common-base"
|
||||
version = "0.5.1"
|
||||
version = "0.6.0"
|
||||
dependencies = [
|
||||
"anymap",
|
||||
"bitvec",
|
||||
@@ -1613,7 +1614,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-catalog"
|
||||
version = "0.5.1"
|
||||
version = "0.6.0"
|
||||
dependencies = [
|
||||
"chrono",
|
||||
"common-error",
|
||||
@@ -1624,7 +1625,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-config"
|
||||
version = "0.5.1"
|
||||
version = "0.6.0"
|
||||
dependencies = [
|
||||
"common-base",
|
||||
"humantime-serde",
|
||||
@@ -1637,7 +1638,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-datasource"
|
||||
version = "0.5.1"
|
||||
version = "0.6.0"
|
||||
dependencies = [
|
||||
"arrow",
|
||||
"arrow-schema",
|
||||
@@ -1668,7 +1669,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-decimal"
|
||||
version = "0.5.1"
|
||||
version = "0.6.0"
|
||||
dependencies = [
|
||||
"arrow",
|
||||
"bigdecimal",
|
||||
@@ -1682,7 +1683,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-error"
|
||||
version = "0.5.1"
|
||||
version = "0.6.0"
|
||||
dependencies = [
|
||||
"snafu",
|
||||
"strum 0.25.0",
|
||||
@@ -1690,7 +1691,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-function"
|
||||
version = "0.5.1"
|
||||
version = "0.6.0"
|
||||
dependencies = [
|
||||
"arc-swap",
|
||||
"build-data",
|
||||
@@ -1714,7 +1715,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-greptimedb-telemetry"
|
||||
version = "0.5.1"
|
||||
version = "0.6.0"
|
||||
dependencies = [
|
||||
"async-trait",
|
||||
"common-error",
|
||||
@@ -1733,7 +1734,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-grpc"
|
||||
version = "0.5.1"
|
||||
version = "0.6.0"
|
||||
dependencies = [
|
||||
"api",
|
||||
"arrow-flight",
|
||||
@@ -1763,7 +1764,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-grpc-expr"
|
||||
version = "0.5.1"
|
||||
version = "0.6.0"
|
||||
dependencies = [
|
||||
"api",
|
||||
"async-trait",
|
||||
@@ -1782,7 +1783,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-macro"
|
||||
version = "0.5.1"
|
||||
version = "0.6.0"
|
||||
dependencies = [
|
||||
"arc-swap",
|
||||
"common-query",
|
||||
@@ -1797,7 +1798,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-mem-prof"
|
||||
version = "0.5.1"
|
||||
version = "0.6.0"
|
||||
dependencies = [
|
||||
"common-error",
|
||||
"common-macro",
|
||||
@@ -1810,7 +1811,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-meta"
|
||||
version = "0.5.1"
|
||||
version = "0.6.0"
|
||||
dependencies = [
|
||||
"api",
|
||||
"async-recursion",
|
||||
@@ -1857,7 +1858,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-procedure"
|
||||
version = "0.5.1"
|
||||
version = "0.6.0"
|
||||
dependencies = [
|
||||
"async-stream",
|
||||
"async-trait",
|
||||
@@ -1881,7 +1882,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-procedure-test"
|
||||
version = "0.5.1"
|
||||
version = "0.6.0"
|
||||
dependencies = [
|
||||
"async-trait",
|
||||
"common-procedure",
|
||||
@@ -1889,7 +1890,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-query"
|
||||
version = "0.5.1"
|
||||
version = "0.6.0"
|
||||
dependencies = [
|
||||
"api",
|
||||
"async-trait",
|
||||
@@ -1912,7 +1913,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-recordbatch"
|
||||
version = "0.5.1"
|
||||
version = "0.6.0"
|
||||
dependencies = [
|
||||
"common-error",
|
||||
"common-macro",
|
||||
@@ -1929,7 +1930,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-runtime"
|
||||
version = "0.5.1"
|
||||
version = "0.6.0"
|
||||
dependencies = [
|
||||
"async-trait",
|
||||
"common-error",
|
||||
@@ -1949,7 +1950,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-telemetry"
|
||||
version = "0.5.1"
|
||||
version = "0.6.0"
|
||||
dependencies = [
|
||||
"backtrace",
|
||||
"common-error",
|
||||
@@ -1975,8 +1976,11 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-test-util"
|
||||
version = "0.5.1"
|
||||
version = "0.6.0"
|
||||
dependencies = [
|
||||
"client",
|
||||
"common-query",
|
||||
"common-recordbatch",
|
||||
"once_cell",
|
||||
"rand",
|
||||
"tempfile",
|
||||
@@ -1984,7 +1988,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-time"
|
||||
version = "0.5.1"
|
||||
version = "0.6.0"
|
||||
dependencies = [
|
||||
"arrow",
|
||||
"chrono",
|
||||
@@ -2000,7 +2004,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-version"
|
||||
version = "0.5.1"
|
||||
version = "0.6.0"
|
||||
dependencies = [
|
||||
"build-data",
|
||||
]
|
||||
@@ -2630,7 +2634,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "datanode"
|
||||
version = "0.5.1"
|
||||
version = "0.6.0"
|
||||
dependencies = [
|
||||
"api",
|
||||
"arrow-flight",
|
||||
@@ -2690,7 +2694,7 @@ dependencies = [
|
||||
"snafu",
|
||||
"sql",
|
||||
"store-api",
|
||||
"substrait 0.5.1",
|
||||
"substrait 0.6.0",
|
||||
"table",
|
||||
"tokio",
|
||||
"tokio-stream",
|
||||
@@ -2704,7 +2708,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "datatypes"
|
||||
version = "0.5.1"
|
||||
version = "0.6.0"
|
||||
dependencies = [
|
||||
"arrow",
|
||||
"arrow-array",
|
||||
@@ -3165,7 +3169,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "file-engine"
|
||||
version = "0.5.1"
|
||||
version = "0.6.0"
|
||||
dependencies = [
|
||||
"api",
|
||||
"async-trait",
|
||||
@@ -3296,7 +3300,7 @@ checksum = "6c2141d6d6c8512188a7891b4b01590a45f6dac67afb4f255c4124dbb86d4eaa"
|
||||
|
||||
[[package]]
|
||||
name = "frontend"
|
||||
version = "0.5.1"
|
||||
version = "0.6.0"
|
||||
dependencies = [
|
||||
"api",
|
||||
"arc-swap",
|
||||
@@ -3360,7 +3364,7 @@ dependencies = [
|
||||
"sqlparser 0.38.0 (git+https://github.com/GreptimeTeam/sqlparser-rs.git?rev=6a93567ae38d42be5c8d08b13c8ff4dde26502ef)",
|
||||
"store-api",
|
||||
"strfmt",
|
||||
"substrait 0.5.1",
|
||||
"substrait 0.6.0",
|
||||
"table",
|
||||
"tokio",
|
||||
"toml 0.8.8",
|
||||
@@ -4014,7 +4018,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "index"
|
||||
version = "0.5.1"
|
||||
version = "0.6.0"
|
||||
dependencies = [
|
||||
"async-trait",
|
||||
"asynchronous-codec",
|
||||
@@ -4494,7 +4498,7 @@ checksum = "b5e6163cb8c49088c2c36f57875e58ccd8c87c7427f7fbd50ea6710b2f3f2e8f"
|
||||
|
||||
[[package]]
|
||||
name = "log-store"
|
||||
version = "0.5.1"
|
||||
version = "0.6.0"
|
||||
dependencies = [
|
||||
"async-stream",
|
||||
"async-trait",
|
||||
@@ -4509,6 +4513,8 @@ dependencies = [
|
||||
"common-runtime",
|
||||
"common-telemetry",
|
||||
"common-test-util",
|
||||
"common-time",
|
||||
"dashmap",
|
||||
"futures",
|
||||
"futures-util",
|
||||
"itertools 0.10.5",
|
||||
@@ -4771,7 +4777,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "meta-client"
|
||||
version = "0.5.1"
|
||||
version = "0.6.0"
|
||||
dependencies = [
|
||||
"api",
|
||||
"async-trait",
|
||||
@@ -4801,7 +4807,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "meta-srv"
|
||||
version = "0.5.1"
|
||||
version = "0.6.0"
|
||||
dependencies = [
|
||||
"anymap",
|
||||
"api",
|
||||
@@ -4830,6 +4836,7 @@ dependencies = [
|
||||
"futures",
|
||||
"h2",
|
||||
"http-body",
|
||||
"humantime",
|
||||
"humantime-serde",
|
||||
"itertools 0.10.5",
|
||||
"lazy_static",
|
||||
@@ -4879,7 +4886,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "metric-engine"
|
||||
version = "0.5.1"
|
||||
version = "0.6.0"
|
||||
dependencies = [
|
||||
"api",
|
||||
"aquamarine",
|
||||
@@ -4950,7 +4957,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "mito2"
|
||||
version = "0.5.1"
|
||||
version = "0.6.0"
|
||||
dependencies = [
|
||||
"anymap",
|
||||
"api",
|
||||
@@ -5451,7 +5458,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "object-store"
|
||||
version = "0.5.1"
|
||||
version = "0.6.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"async-trait",
|
||||
@@ -5696,7 +5703,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "operator"
|
||||
version = "0.5.1"
|
||||
version = "0.6.0"
|
||||
dependencies = [
|
||||
"api",
|
||||
"async-trait",
|
||||
@@ -5740,7 +5747,7 @@ dependencies = [
|
||||
"sql",
|
||||
"sqlparser 0.38.0 (git+https://github.com/GreptimeTeam/sqlparser-rs.git?rev=6a93567ae38d42be5c8d08b13c8ff4dde26502ef)",
|
||||
"store-api",
|
||||
"substrait 0.5.1",
|
||||
"substrait 0.6.0",
|
||||
"table",
|
||||
"tokio",
|
||||
"tonic 0.10.2",
|
||||
@@ -5971,7 +5978,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "partition"
|
||||
version = "0.5.1"
|
||||
version = "0.6.0"
|
||||
dependencies = [
|
||||
"api",
|
||||
"async-trait",
|
||||
@@ -6290,7 +6297,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "plugins"
|
||||
version = "0.5.1"
|
||||
version = "0.6.0"
|
||||
dependencies = [
|
||||
"auth",
|
||||
"common-base",
|
||||
@@ -6548,7 +6555,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "promql"
|
||||
version = "0.5.1"
|
||||
version = "0.6.0"
|
||||
dependencies = [
|
||||
"ahash 0.8.6",
|
||||
"async-recursion",
|
||||
@@ -6558,6 +6565,7 @@ dependencies = [
|
||||
"common-catalog",
|
||||
"common-error",
|
||||
"common-macro",
|
||||
"common-query",
|
||||
"common-recordbatch",
|
||||
"common-telemetry",
|
||||
"datafusion",
|
||||
@@ -6758,7 +6766,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "puffin"
|
||||
version = "0.5.1"
|
||||
version = "0.6.0"
|
||||
dependencies = [
|
||||
"async-trait",
|
||||
"bitflags 2.4.1",
|
||||
@@ -6869,7 +6877,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "query"
|
||||
version = "0.5.1"
|
||||
version = "0.6.0"
|
||||
dependencies = [
|
||||
"ahash 0.8.6",
|
||||
"api",
|
||||
@@ -6927,7 +6935,7 @@ dependencies = [
|
||||
"stats-cli",
|
||||
"store-api",
|
||||
"streaming-stats",
|
||||
"substrait 0.5.1",
|
||||
"substrait 0.6.0",
|
||||
"table",
|
||||
"tokio",
|
||||
"tokio-stream",
|
||||
@@ -8197,7 +8205,7 @@ checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49"
|
||||
|
||||
[[package]]
|
||||
name = "script"
|
||||
version = "0.5.1"
|
||||
version = "0.6.0"
|
||||
dependencies = [
|
||||
"api",
|
||||
"arc-swap",
|
||||
@@ -8457,7 +8465,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "servers"
|
||||
version = "0.5.1"
|
||||
version = "0.6.0"
|
||||
dependencies = [
|
||||
"aide",
|
||||
"api",
|
||||
@@ -8553,7 +8561,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "session"
|
||||
version = "0.5.1"
|
||||
version = "0.6.0"
|
||||
dependencies = [
|
||||
"api",
|
||||
"arc-swap",
|
||||
@@ -8708,6 +8716,9 @@ name = "smallvec"
|
||||
version = "1.11.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4dccd0940a2dcdf68d092b8cbab7dc0ad8fa938bf95787e1b916b0e3d0e8e970"
|
||||
dependencies = [
|
||||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "smartstring"
|
||||
@@ -8814,7 +8825,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "sql"
|
||||
version = "0.5.1"
|
||||
version = "0.6.0"
|
||||
dependencies = [
|
||||
"api",
|
||||
"common-base",
|
||||
@@ -8866,7 +8877,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "sqlness-runner"
|
||||
version = "0.5.1"
|
||||
version = "0.6.0"
|
||||
dependencies = [
|
||||
"async-trait",
|
||||
"clap 4.4.11",
|
||||
@@ -9073,7 +9084,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "store-api"
|
||||
version = "0.5.1"
|
||||
version = "0.6.0"
|
||||
dependencies = [
|
||||
"api",
|
||||
"aquamarine",
|
||||
@@ -9213,7 +9224,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "substrait"
|
||||
version = "0.5.1"
|
||||
version = "0.6.0"
|
||||
dependencies = [
|
||||
"async-recursion",
|
||||
"async-trait",
|
||||
@@ -9361,7 +9372,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "table"
|
||||
version = "0.5.1"
|
||||
version = "0.6.0"
|
||||
dependencies = [
|
||||
"anymap",
|
||||
"async-trait",
|
||||
@@ -9473,7 +9484,7 @@ checksum = "3369f5ac52d5eb6ab48c6b4ffdc8efbcad6b89c765749064ba298f2c68a16a76"
|
||||
|
||||
[[package]]
|
||||
name = "tests-integration"
|
||||
version = "0.5.1"
|
||||
version = "0.6.0"
|
||||
dependencies = [
|
||||
"api",
|
||||
"async-trait",
|
||||
@@ -9529,7 +9540,7 @@ dependencies = [
|
||||
"sql",
|
||||
"sqlx",
|
||||
"store-api",
|
||||
"substrait 0.5.1",
|
||||
"substrait 0.6.0",
|
||||
"table",
|
||||
"tempfile",
|
||||
"time",
|
||||
|
||||
@@ -58,7 +58,7 @@ members = [
|
||||
resolver = "2"
|
||||
|
||||
[workspace.package]
|
||||
version = "0.5.1"
|
||||
version = "0.6.0"
|
||||
edition = "2021"
|
||||
license = "Apache-2.0"
|
||||
|
||||
@@ -121,7 +121,7 @@ rskafka = "0.5"
|
||||
rust_decimal = "1.33"
|
||||
serde = { version = "1.0", features = ["derive"] }
|
||||
serde_json = "1.0"
|
||||
smallvec = "1"
|
||||
smallvec = { version = "1", features = ["serde"] }
|
||||
snafu = "0.7"
|
||||
# on branch v0.38.x
|
||||
sqlparser = { git = "https://github.com/GreptimeTeam/sqlparser-rs.git", rev = "6a93567ae38d42be5c8d08b13c8ff4dde26502ef", features = [
|
||||
|
||||
@@ -34,11 +34,7 @@ connect_timeout = "1s"
|
||||
tcp_nodelay = true
|
||||
|
||||
# WAL options.
|
||||
# Currently, users are expected to choose the wal through the provider field.
|
||||
# When a wal provider is chose, the user should comment out all other wal config
|
||||
# except those corresponding to the chosen one.
|
||||
[wal]
|
||||
# WAL data directory
|
||||
provider = "raft_engine"
|
||||
|
||||
# Raft-engine wal options, see `standalone.example.toml`.
|
||||
@@ -117,6 +113,8 @@ sst_write_buffer_size = "8MB"
|
||||
scan_parallelism = 0
|
||||
# Capacity of the channel to send data from parallel scan tasks to the main task (default 32).
|
||||
parallel_scan_channel_size = 32
|
||||
# Whether to allow stale WAL entries read during replay.
|
||||
allow_stale_entries = false
|
||||
|
||||
# Log options, see `standalone.example.toml`
|
||||
# [logging]
|
||||
|
||||
@@ -64,8 +64,6 @@ provider = "raft_engine"
|
||||
# selector_type = "round_robin"
|
||||
# A Kafka topic is constructed by concatenating `topic_name_prefix` and `topic_id`.
|
||||
# topic_name_prefix = "greptimedb_wal_topic"
|
||||
# Number of partitions per topic.
|
||||
# num_partitions = 1
|
||||
# Expected number of replicas of each partition.
|
||||
# replication_factor = 1
|
||||
# Above which a topic creation operation will be cancelled.
|
||||
|
||||
@@ -102,9 +102,8 @@ provider = "raft_engine"
|
||||
# selector_type = "round_robin"
|
||||
# The prefix of topic name.
|
||||
# topic_name_prefix = "greptimedb_wal_topic"
|
||||
# Number of partitions per topic.
|
||||
# num_partitions = 1
|
||||
# The number of replicas of each partition.
|
||||
# Warning: the replication factor must be positive and must not be greater than the number of broker endpoints.
|
||||
# replication_factor = 1
|
||||
|
||||
# The max size of a single producer batch.
|
||||
@@ -138,6 +137,12 @@ purge_interval = "10m"
|
||||
read_batch_size = 128
|
||||
# Whether to sync log file after every write.
|
||||
sync_write = false
|
||||
# Whether to reuse logically truncated log files.
|
||||
enable_log_recycle = true
|
||||
# Whether to pre-create log files on start up
|
||||
prefill_log_files = false
|
||||
# Duration for fsyncing log files.
|
||||
sync_period = "1000ms"
|
||||
|
||||
# Metadata storage options.
|
||||
[metadata_store]
|
||||
@@ -208,6 +213,8 @@ sst_write_buffer_size = "8MB"
|
||||
scan_parallelism = 0
|
||||
# Capacity of the channel to send data from parallel scan tasks to the main task (default 32).
|
||||
parallel_scan_channel_size = 32
|
||||
# Whether to allow stale WAL entries read during replay.
|
||||
allow_stale_entries = false
|
||||
|
||||
# Log options
|
||||
# [logging]
|
||||
|
||||
@@ -30,6 +30,7 @@ datafusion.workspace = true
|
||||
datatypes.workspace = true
|
||||
futures = "0.3"
|
||||
futures-util.workspace = true
|
||||
itertools.workspace = true
|
||||
lazy_static.workspace = true
|
||||
meta-client.workspace = true
|
||||
moka = { workspace = true, features = ["future"] }
|
||||
|
||||
@@ -16,6 +16,7 @@ mod columns;
|
||||
mod key_column_usage;
|
||||
mod memory_table;
|
||||
mod predicate;
|
||||
mod runtime_metrics;
|
||||
mod schemata;
|
||||
mod table_names;
|
||||
mod tables;
|
||||
@@ -23,7 +24,7 @@ mod tables;
|
||||
use std::collections::HashMap;
|
||||
use std::sync::{Arc, Weak};
|
||||
|
||||
use common_catalog::consts::{self, INFORMATION_SCHEMA_NAME};
|
||||
use common_catalog::consts::{self, DEFAULT_CATALOG_NAME, INFORMATION_SCHEMA_NAME};
|
||||
use common_error::ext::BoxedError;
|
||||
use common_recordbatch::{RecordBatchStreamWrapper, SendableRecordBatchStream};
|
||||
use datatypes::schema::SchemaRef;
|
||||
@@ -46,6 +47,7 @@ use self::columns::InformationSchemaColumns;
|
||||
use crate::error::Result;
|
||||
use crate::information_schema::key_column_usage::InformationSchemaKeyColumnUsage;
|
||||
use crate::information_schema::memory_table::{get_schema_columns, MemoryTable};
|
||||
use crate::information_schema::runtime_metrics::InformationSchemaMetrics;
|
||||
use crate::information_schema::schemata::InformationSchemaSchemata;
|
||||
use crate::information_schema::tables::InformationSchemaTables;
|
||||
use crate::CatalogManager;
|
||||
@@ -56,7 +58,6 @@ lazy_static! {
|
||||
ENGINES,
|
||||
COLUMN_PRIVILEGES,
|
||||
COLUMN_STATISTICS,
|
||||
BUILD_INFO,
|
||||
CHARACTER_SETS,
|
||||
COLLATIONS,
|
||||
COLLATION_CHARACTER_SET_APPLICABILITY,
|
||||
@@ -142,6 +143,21 @@ impl InformationSchemaProvider {
|
||||
|
||||
fn build_tables(&mut self) {
|
||||
let mut tables = HashMap::new();
|
||||
|
||||
// Carefully consider the tables that may expose sensitive cluster configurations,
|
||||
// authentication details, and other critical information.
|
||||
// Only put these tables under `greptime` catalog to prevent info leak.
|
||||
if self.catalog_name == DEFAULT_CATALOG_NAME {
|
||||
tables.insert(
|
||||
RUNTIME_METRICS.to_string(),
|
||||
self.build_table(RUNTIME_METRICS).unwrap(),
|
||||
);
|
||||
tables.insert(
|
||||
BUILD_INFO.to_string(),
|
||||
self.build_table(BUILD_INFO).unwrap(),
|
||||
);
|
||||
}
|
||||
|
||||
tables.insert(TABLES.to_string(), self.build_table(TABLES).unwrap());
|
||||
tables.insert(SCHEMATA.to_string(), self.build_table(SCHEMATA).unwrap());
|
||||
tables.insert(COLUMNS.to_string(), self.build_table(COLUMNS).unwrap());
|
||||
@@ -209,6 +225,7 @@ impl InformationSchemaProvider {
|
||||
self.catalog_name.clone(),
|
||||
self.catalog_manager.clone(),
|
||||
)) as _),
|
||||
RUNTIME_METRICS => Some(Arc::new(InformationSchemaMetrics::new())),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
250
src/catalog/src/information_schema/runtime_metrics.rs
Normal file
250
src/catalog/src/information_schema/runtime_metrics.rs
Normal file
@@ -0,0 +1,250 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::sync::Arc;
|
||||
|
||||
use arrow_schema::SchemaRef as ArrowSchemaRef;
|
||||
use common_catalog::consts::INFORMATION_SCHEMA_RUNTIME_METRICS_TABLE_ID;
|
||||
use common_error::ext::BoxedError;
|
||||
use common_query::physical_plan::TaskContext;
|
||||
use common_recordbatch::adapter::RecordBatchStreamAdapter;
|
||||
use common_recordbatch::{RecordBatch, SendableRecordBatchStream};
|
||||
use common_time::util::current_time_millis;
|
||||
use datafusion::physical_plan::stream::RecordBatchStreamAdapter as DfRecordBatchStreamAdapter;
|
||||
use datafusion::physical_plan::streaming::PartitionStream as DfPartitionStream;
|
||||
use datafusion::physical_plan::SendableRecordBatchStream as DfSendableRecordBatchStream;
|
||||
use datatypes::prelude::{ConcreteDataType, MutableVector};
|
||||
use datatypes::scalars::ScalarVectorBuilder;
|
||||
use datatypes::schema::{ColumnSchema, Schema, SchemaRef};
|
||||
use datatypes::vectors::{
|
||||
ConstantVector, Float64VectorBuilder, StringVector, StringVectorBuilder,
|
||||
TimestampMillisecondVector, VectorRef,
|
||||
};
|
||||
use itertools::Itertools;
|
||||
use snafu::ResultExt;
|
||||
use store_api::storage::{ScanRequest, TableId};
|
||||
|
||||
use super::{InformationTable, RUNTIME_METRICS};
|
||||
use crate::error::{CreateRecordBatchSnafu, InternalSnafu, Result};
|
||||
|
||||
pub(super) struct InformationSchemaMetrics {
|
||||
schema: SchemaRef,
|
||||
}
|
||||
|
||||
const METRIC_NAME: &str = "metric_name";
|
||||
const METRIC_VALUE: &str = "value";
|
||||
const METRIC_LABELS: &str = "labels";
|
||||
const NODE: &str = "node";
|
||||
const NODE_TYPE: &str = "node_type";
|
||||
const TIMESTAMP: &str = "timestamp";
|
||||
|
||||
/// The `information_schema.runtime_metrics` virtual table.
|
||||
/// It provides the GreptimeDB runtime metrics for the users by SQL.
|
||||
impl InformationSchemaMetrics {
|
||||
pub(super) fn new() -> Self {
|
||||
Self {
|
||||
schema: Self::schema(),
|
||||
}
|
||||
}
|
||||
|
||||
fn schema() -> SchemaRef {
|
||||
Arc::new(Schema::new(vec![
|
||||
ColumnSchema::new(METRIC_NAME, ConcreteDataType::string_datatype(), false),
|
||||
ColumnSchema::new(METRIC_VALUE, ConcreteDataType::float64_datatype(), false),
|
||||
ColumnSchema::new(METRIC_LABELS, ConcreteDataType::string_datatype(), true),
|
||||
ColumnSchema::new(NODE, ConcreteDataType::string_datatype(), false),
|
||||
ColumnSchema::new(NODE_TYPE, ConcreteDataType::string_datatype(), false),
|
||||
ColumnSchema::new(
|
||||
TIMESTAMP,
|
||||
ConcreteDataType::timestamp_millisecond_datatype(),
|
||||
false,
|
||||
),
|
||||
]))
|
||||
}
|
||||
|
||||
fn builder(&self) -> InformationSchemaMetricsBuilder {
|
||||
InformationSchemaMetricsBuilder::new(self.schema.clone())
|
||||
}
|
||||
}
|
||||
|
||||
impl InformationTable for InformationSchemaMetrics {
|
||||
fn table_id(&self) -> TableId {
|
||||
INFORMATION_SCHEMA_RUNTIME_METRICS_TABLE_ID
|
||||
}
|
||||
|
||||
fn table_name(&self) -> &'static str {
|
||||
RUNTIME_METRICS
|
||||
}
|
||||
|
||||
fn schema(&self) -> SchemaRef {
|
||||
self.schema.clone()
|
||||
}
|
||||
|
||||
fn to_stream(&self, request: ScanRequest) -> Result<SendableRecordBatchStream> {
|
||||
let schema = self.schema.arrow_schema().clone();
|
||||
let mut builder = self.builder();
|
||||
let stream = Box::pin(DfRecordBatchStreamAdapter::new(
|
||||
schema,
|
||||
futures::stream::once(async move {
|
||||
builder
|
||||
.make_metrics(Some(request))
|
||||
.await
|
||||
.map(|x| x.into_df_record_batch())
|
||||
.map_err(Into::into)
|
||||
}),
|
||||
));
|
||||
Ok(Box::pin(
|
||||
RecordBatchStreamAdapter::try_new(stream)
|
||||
.map_err(BoxedError::new)
|
||||
.context(InternalSnafu)?,
|
||||
))
|
||||
}
|
||||
}
|
||||
|
||||
struct InformationSchemaMetricsBuilder {
|
||||
schema: SchemaRef,
|
||||
|
||||
metric_names: StringVectorBuilder,
|
||||
metric_values: Float64VectorBuilder,
|
||||
metric_labels: StringVectorBuilder,
|
||||
}
|
||||
|
||||
impl InformationSchemaMetricsBuilder {
|
||||
fn new(schema: SchemaRef) -> Self {
|
||||
Self {
|
||||
schema,
|
||||
metric_names: StringVectorBuilder::with_capacity(42),
|
||||
metric_values: Float64VectorBuilder::with_capacity(42),
|
||||
metric_labels: StringVectorBuilder::with_capacity(42),
|
||||
}
|
||||
}
|
||||
|
||||
fn add_metric(&mut self, metric_name: &str, labels: String, metric_value: f64) {
|
||||
self.metric_names.push(Some(metric_name));
|
||||
self.metric_values.push(Some(metric_value));
|
||||
self.metric_labels.push(Some(&labels));
|
||||
}
|
||||
|
||||
async fn make_metrics(&mut self, _request: Option<ScanRequest>) -> Result<RecordBatch> {
|
||||
let metric_families = prometheus::gather();
|
||||
|
||||
let write_request =
|
||||
common_telemetry::metric::convert_metric_to_write_request(metric_families, None, 0);
|
||||
|
||||
for ts in write_request.timeseries {
|
||||
//Safety: always has `__name__` label
|
||||
let metric_name = ts
|
||||
.labels
|
||||
.iter()
|
||||
.find_map(|label| {
|
||||
if label.name == "__name__" {
|
||||
Some(label.value.clone())
|
||||
} else {
|
||||
None
|
||||
}
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
self.add_metric(
|
||||
&metric_name,
|
||||
ts.labels
|
||||
.into_iter()
|
||||
.filter_map(|label| {
|
||||
if label.name == "__name__" {
|
||||
None
|
||||
} else {
|
||||
Some(format!("{}={}", label.name, label.value))
|
||||
}
|
||||
})
|
||||
.join(", "),
|
||||
// Safety: always has a sample
|
||||
ts.samples[0].value,
|
||||
);
|
||||
}
|
||||
|
||||
self.finish()
|
||||
}
|
||||
|
||||
fn finish(&mut self) -> Result<RecordBatch> {
|
||||
let rows_num = self.metric_names.len();
|
||||
let unknowns = Arc::new(ConstantVector::new(
|
||||
Arc::new(StringVector::from(vec!["unknown"])),
|
||||
rows_num,
|
||||
));
|
||||
let timestamps = Arc::new(ConstantVector::new(
|
||||
Arc::new(TimestampMillisecondVector::from_slice([
|
||||
current_time_millis(),
|
||||
])),
|
||||
rows_num,
|
||||
));
|
||||
|
||||
let columns: Vec<VectorRef> = vec![
|
||||
Arc::new(self.metric_names.finish()),
|
||||
Arc::new(self.metric_values.finish()),
|
||||
Arc::new(self.metric_labels.finish()),
|
||||
// TODO(dennis): supports node and node_type for cluster
|
||||
unknowns.clone(),
|
||||
unknowns,
|
||||
timestamps,
|
||||
];
|
||||
|
||||
RecordBatch::new(self.schema.clone(), columns).context(CreateRecordBatchSnafu)
|
||||
}
|
||||
}
|
||||
|
||||
impl DfPartitionStream for InformationSchemaMetrics {
|
||||
fn schema(&self) -> &ArrowSchemaRef {
|
||||
self.schema.arrow_schema()
|
||||
}
|
||||
|
||||
fn execute(&self, _: Arc<TaskContext>) -> DfSendableRecordBatchStream {
|
||||
let schema = self.schema.arrow_schema().clone();
|
||||
let mut builder = self.builder();
|
||||
Box::pin(DfRecordBatchStreamAdapter::new(
|
||||
schema,
|
||||
futures::stream::once(async move {
|
||||
builder
|
||||
.make_metrics(None)
|
||||
.await
|
||||
.map(|x| x.into_df_record_batch())
|
||||
.map_err(Into::into)
|
||||
}),
|
||||
))
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use common_recordbatch::RecordBatches;
|
||||
|
||||
use super::*;
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_make_metrics() {
|
||||
let metrics = InformationSchemaMetrics::new();
|
||||
|
||||
let stream = metrics.to_stream(ScanRequest::default()).unwrap();
|
||||
|
||||
let batches = RecordBatches::try_collect(stream).await.unwrap();
|
||||
|
||||
let result_literal = batches.pretty_print().unwrap();
|
||||
|
||||
assert!(result_literal.contains(METRIC_NAME));
|
||||
assert!(result_literal.contains(METRIC_VALUE));
|
||||
assert!(result_literal.contains(METRIC_LABELS));
|
||||
assert!(result_literal.contains(NODE));
|
||||
assert!(result_literal.contains(NODE_TYPE));
|
||||
assert!(result_literal.contains(TIMESTAMP));
|
||||
}
|
||||
}
|
||||
@@ -38,3 +38,4 @@ pub const TABLE_PRIVILEGES: &str = "table_privileges";
|
||||
pub const TRIGGERS: &str = "triggers";
|
||||
pub const GLOBAL_STATUS: &str = "global_status";
|
||||
pub const SESSION_STATUS: &str = "session_status";
|
||||
pub const RUNTIME_METRICS: &str = "runtime_metrics";
|
||||
|
||||
@@ -15,7 +15,6 @@
|
||||
use std::collections::HashMap;
|
||||
use std::sync::Arc;
|
||||
|
||||
use common_catalog::consts::INFORMATION_SCHEMA_NAME;
|
||||
use common_catalog::format_full_table_name;
|
||||
use datafusion::common::{ResolvedTableReference, TableReference};
|
||||
use datafusion::datasource::provider_as_source;
|
||||
@@ -30,7 +29,7 @@ use crate::CatalogManagerRef;
|
||||
pub struct DfTableSourceProvider {
|
||||
catalog_manager: CatalogManagerRef,
|
||||
resolved_tables: HashMap<String, Arc<dyn TableSource>>,
|
||||
disallow_cross_schema_query: bool,
|
||||
disallow_cross_catalog_query: bool,
|
||||
default_catalog: String,
|
||||
default_schema: String,
|
||||
}
|
||||
@@ -38,12 +37,12 @@ pub struct DfTableSourceProvider {
|
||||
impl DfTableSourceProvider {
|
||||
pub fn new(
|
||||
catalog_manager: CatalogManagerRef,
|
||||
disallow_cross_schema_query: bool,
|
||||
disallow_cross_catalog_query: bool,
|
||||
query_ctx: &QueryContext,
|
||||
) -> Self {
|
||||
Self {
|
||||
catalog_manager,
|
||||
disallow_cross_schema_query,
|
||||
disallow_cross_catalog_query,
|
||||
resolved_tables: HashMap::new(),
|
||||
default_catalog: query_ctx.current_catalog().to_owned(),
|
||||
default_schema: query_ctx.current_schema().to_owned(),
|
||||
@@ -54,29 +53,18 @@ impl DfTableSourceProvider {
|
||||
&'a self,
|
||||
table_ref: TableReference<'a>,
|
||||
) -> Result<ResolvedTableReference<'a>> {
|
||||
if self.disallow_cross_schema_query {
|
||||
if self.disallow_cross_catalog_query {
|
||||
match &table_ref {
|
||||
TableReference::Bare { .. } => (),
|
||||
TableReference::Partial { schema, .. } => {
|
||||
ensure!(
|
||||
schema.as_ref() == self.default_schema
|
||||
|| schema.as_ref() == INFORMATION_SCHEMA_NAME,
|
||||
QueryAccessDeniedSnafu {
|
||||
catalog: &self.default_catalog,
|
||||
schema: schema.as_ref(),
|
||||
}
|
||||
);
|
||||
}
|
||||
TableReference::Partial { .. } => {}
|
||||
TableReference::Full {
|
||||
catalog, schema, ..
|
||||
} => {
|
||||
ensure!(
|
||||
catalog.as_ref() == self.default_catalog
|
||||
&& (schema.as_ref() == self.default_schema
|
||||
|| schema.as_ref() == INFORMATION_SCHEMA_NAME),
|
||||
catalog.as_ref() == self.default_catalog,
|
||||
QueryAccessDeniedSnafu {
|
||||
catalog: catalog.as_ref(),
|
||||
schema: schema.as_ref()
|
||||
schema: schema.as_ref(),
|
||||
}
|
||||
);
|
||||
}
|
||||
@@ -136,21 +124,21 @@ mod tests {
|
||||
table: Cow::Borrowed("table_name"),
|
||||
};
|
||||
let result = table_provider.resolve_table_ref(table_ref);
|
||||
let _ = result.unwrap();
|
||||
assert!(result.is_ok());
|
||||
|
||||
let table_ref = TableReference::Partial {
|
||||
schema: Cow::Borrowed("public"),
|
||||
table: Cow::Borrowed("table_name"),
|
||||
};
|
||||
let result = table_provider.resolve_table_ref(table_ref);
|
||||
let _ = result.unwrap();
|
||||
assert!(result.is_ok());
|
||||
|
||||
let table_ref = TableReference::Partial {
|
||||
schema: Cow::Borrowed("wrong_schema"),
|
||||
table: Cow::Borrowed("table_name"),
|
||||
};
|
||||
let result = table_provider.resolve_table_ref(table_ref);
|
||||
assert!(result.is_err());
|
||||
assert!(result.is_ok());
|
||||
|
||||
let table_ref = TableReference::Full {
|
||||
catalog: Cow::Borrowed("greptime"),
|
||||
@@ -158,7 +146,7 @@ mod tests {
|
||||
table: Cow::Borrowed("table_name"),
|
||||
};
|
||||
let result = table_provider.resolve_table_ref(table_ref);
|
||||
let _ = result.unwrap();
|
||||
assert!(result.is_ok());
|
||||
|
||||
let table_ref = TableReference::Full {
|
||||
catalog: Cow::Borrowed("wrong_catalog"),
|
||||
@@ -172,14 +160,15 @@ mod tests {
|
||||
schema: Cow::Borrowed("information_schema"),
|
||||
table: Cow::Borrowed("columns"),
|
||||
};
|
||||
let _ = table_provider.resolve_table_ref(table_ref).unwrap();
|
||||
let result = table_provider.resolve_table_ref(table_ref);
|
||||
assert!(result.is_ok());
|
||||
|
||||
let table_ref = TableReference::Full {
|
||||
catalog: Cow::Borrowed("greptime"),
|
||||
schema: Cow::Borrowed("information_schema"),
|
||||
table: Cow::Borrowed("columns"),
|
||||
};
|
||||
let _ = table_provider.resolve_table_ref(table_ref).unwrap();
|
||||
assert!(table_provider.resolve_table_ref(table_ref).is_ok());
|
||||
|
||||
let table_ref = TableReference::Full {
|
||||
catalog: Cow::Borrowed("dummy"),
|
||||
@@ -187,5 +176,12 @@ mod tests {
|
||||
table: Cow::Borrowed("columns"),
|
||||
};
|
||||
assert!(table_provider.resolve_table_ref(table_ref).is_err());
|
||||
|
||||
let table_ref = TableReference::Full {
|
||||
catalog: Cow::Borrowed("greptime"),
|
||||
schema: Cow::Borrowed("greptime_private"),
|
||||
table: Cow::Borrowed("columns"),
|
||||
};
|
||||
assert!(table_provider.resolve_table_ref(table_ref).is_ok());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -28,6 +28,7 @@ use frontend::heartbeat::handler::invalidate_table_cache::InvalidateTableCacheHa
|
||||
use frontend::heartbeat::HeartbeatTask;
|
||||
use frontend::instance::builder::FrontendBuilder;
|
||||
use frontend::instance::{FrontendInstance, Instance as FeInstance};
|
||||
use frontend::server::Services;
|
||||
use meta_client::MetaClientOptions;
|
||||
use servers::tls::{TlsMode, TlsOption};
|
||||
use servers::Mode;
|
||||
@@ -246,14 +247,18 @@ impl StartCommand {
|
||||
meta_client,
|
||||
)
|
||||
.with_cache_invalidator(meta_backend)
|
||||
.with_plugin(plugins)
|
||||
.with_plugin(plugins.clone())
|
||||
.with_heartbeat_task(heartbeat_task)
|
||||
.try_build()
|
||||
.await
|
||||
.context(StartFrontendSnafu)?;
|
||||
|
||||
let servers = Services::new(plugins)
|
||||
.build(opts.clone(), Arc::new(instance.clone()))
|
||||
.await
|
||||
.context(StartFrontendSnafu)?;
|
||||
instance
|
||||
.build_servers(opts)
|
||||
.build_servers(opts, servers)
|
||||
.await
|
||||
.context(StartFrontendSnafu)?;
|
||||
|
||||
|
||||
@@ -128,7 +128,7 @@ impl StartCommand {
|
||||
let mut opts: MetaSrvOptions = Options::load_layered_options(
|
||||
self.config_file.as_deref(),
|
||||
self.env_prefix.as_ref(),
|
||||
None,
|
||||
MetaSrvOptions::env_list_keys(),
|
||||
)?;
|
||||
|
||||
if let Some(dir) = &cli_options.log_dir {
|
||||
|
||||
@@ -40,6 +40,7 @@ use file_engine::config::EngineConfig as FileEngineConfig;
|
||||
use frontend::frontend::FrontendOptions;
|
||||
use frontend::instance::builder::FrontendBuilder;
|
||||
use frontend::instance::{FrontendInstance, Instance as FeInstance, StandaloneDatanodeManager};
|
||||
use frontend::server::Services;
|
||||
use frontend::service_config::{
|
||||
GrpcOptions, InfluxdbOptions, MysqlOptions, OpentsdbOptions, PostgresOptions, PromStoreOptions,
|
||||
};
|
||||
@@ -118,6 +119,12 @@ pub struct StandaloneOptions {
|
||||
pub export_metrics: ExportMetricsOption,
|
||||
}
|
||||
|
||||
impl StandaloneOptions {
|
||||
pub fn env_list_keys() -> Option<&'static [&'static str]> {
|
||||
Some(&["wal.broker_endpoints"])
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for StandaloneOptions {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
@@ -267,7 +274,7 @@ impl StartCommand {
|
||||
let opts: StandaloneOptions = Options::load_layered_options(
|
||||
self.config_file.as_deref(),
|
||||
self.env_prefix.as_ref(),
|
||||
None,
|
||||
StandaloneOptions::env_list_keys(),
|
||||
)?;
|
||||
|
||||
self.convert_options(cli_options, opts)
|
||||
@@ -425,13 +432,17 @@ impl StartCommand {
|
||||
.await?;
|
||||
|
||||
let mut frontend = FrontendBuilder::new(kv_backend, datanode_manager, ddl_task_executor)
|
||||
.with_plugin(fe_plugins)
|
||||
.with_plugin(fe_plugins.clone())
|
||||
.try_build()
|
||||
.await
|
||||
.context(StartFrontendSnafu)?;
|
||||
|
||||
let servers = Services::new(fe_plugins)
|
||||
.build(opts.clone(), Arc::new(frontend.clone()))
|
||||
.await
|
||||
.context(StartFrontendSnafu)?;
|
||||
frontend
|
||||
.build_servers(opts)
|
||||
.build_servers(opts, servers)
|
||||
.await
|
||||
.context(StartFrontendSnafu)?;
|
||||
|
||||
|
||||
@@ -80,6 +80,8 @@ pub const INFORMATION_SCHEMA_TRIGGERS_TABLE_ID: u32 = 24;
|
||||
pub const INFORMATION_SCHEMA_GLOBAL_STATUS_TABLE_ID: u32 = 25;
|
||||
/// id for information_schema.SESSION_STATUS
|
||||
pub const INFORMATION_SCHEMA_SESSION_STATUS_TABLE_ID: u32 = 26;
|
||||
/// id for information_schema.RUNTIME_METRICS
|
||||
pub const INFORMATION_SCHEMA_RUNTIME_METRICS_TABLE_ID: u32 = 27;
|
||||
/// ----- End of information_schema tables -----
|
||||
|
||||
pub const MITO_ENGINE: &str = "mito";
|
||||
|
||||
@@ -17,6 +17,11 @@ use consts::DEFAULT_CATALOG_NAME;
|
||||
pub mod consts;
|
||||
pub mod error;
|
||||
|
||||
#[inline]
|
||||
pub fn format_schema_name(catalog: &str, schema: &str) -> String {
|
||||
format!("{catalog}.{schema}")
|
||||
}
|
||||
|
||||
/// Formats table fully-qualified name
|
||||
#[inline]
|
||||
pub fn format_full_table_name(catalog: &str, schema: &str, table: &str) -> String {
|
||||
|
||||
@@ -18,9 +18,7 @@ pub mod raft_engine;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use serde_with::with_prefix;
|
||||
|
||||
pub use crate::wal::kafka::{
|
||||
KafkaConfig, KafkaOptions as KafkaWalOptions, StandaloneKafkaConfig, Topic as KafkaWalTopic,
|
||||
};
|
||||
pub use crate::wal::kafka::{KafkaConfig, KafkaOptions as KafkaWalOptions, StandaloneKafkaConfig};
|
||||
pub use crate::wal::raft_engine::RaftEngineConfig;
|
||||
|
||||
/// An encoded wal options will be wrapped into a (WAL_OPTIONS_KEY, encoded wal options) key-value pair
|
||||
|
||||
@@ -19,11 +19,6 @@ use rskafka::client::partition::Compression as RsKafkaCompression;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use serde_with::with_prefix;
|
||||
|
||||
/// Topic name prefix.
|
||||
pub const TOPIC_NAME_PREFIX: &str = "greptimedb_wal_topic";
|
||||
/// Kafka wal topic.
|
||||
pub type Topic = String;
|
||||
|
||||
/// The type of the topic selector, i.e. with which strategy to select a topic.
|
||||
#[derive(Default, Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
@@ -138,5 +133,5 @@ impl Default for StandaloneKafkaConfig {
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
|
||||
pub struct KafkaOptions {
|
||||
/// Kafka wal topic.
|
||||
pub topic: Topic,
|
||||
pub topic: String,
|
||||
}
|
||||
|
||||
@@ -34,6 +34,13 @@ pub struct RaftEngineConfig {
|
||||
pub read_batch_size: usize,
|
||||
// whether to sync log file after every write
|
||||
pub sync_write: bool,
|
||||
// whether to reuse logically truncated log files.
|
||||
pub enable_log_recycle: bool,
|
||||
// whether to pre-create log files on start up
|
||||
pub prefill_log_files: bool,
|
||||
// duration for fsyncing log files.
|
||||
#[serde(with = "humantime_serde")]
|
||||
pub sync_period: Option<Duration>,
|
||||
}
|
||||
|
||||
impl Default for RaftEngineConfig {
|
||||
@@ -45,6 +52,9 @@ impl Default for RaftEngineConfig {
|
||||
purge_interval: Duration::from_secs(600),
|
||||
read_batch_size: 128,
|
||||
sync_write: false,
|
||||
enable_log_recycle: true,
|
||||
prefill_log_files: false,
|
||||
sync_period: None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -13,10 +13,12 @@
|
||||
// limitations under the License.
|
||||
|
||||
pub mod build;
|
||||
pub mod version;
|
||||
|
||||
use std::sync::Arc;
|
||||
|
||||
use build::BuildFunction;
|
||||
use version::VersionFunction;
|
||||
|
||||
use crate::function_registry::FunctionRegistry;
|
||||
|
||||
@@ -25,5 +27,6 @@ pub(crate) struct SystemFunction;
|
||||
impl SystemFunction {
|
||||
pub fn register(registry: &FunctionRegistry) {
|
||||
registry.register(Arc::new(BuildFunction));
|
||||
registry.register(Arc::new(VersionFunction));
|
||||
}
|
||||
}
|
||||
|
||||
54
src/common/function/src/system/version.rs
Normal file
54
src/common/function/src/system/version.rs
Normal file
@@ -0,0 +1,54 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::sync::Arc;
|
||||
use std::{env, fmt};
|
||||
|
||||
use common_query::error::Result;
|
||||
use common_query::prelude::{Signature, Volatility};
|
||||
use datatypes::data_type::ConcreteDataType;
|
||||
use datatypes::vectors::{StringVector, VectorRef};
|
||||
|
||||
use crate::function::{Function, FunctionContext};
|
||||
|
||||
#[derive(Clone, Debug, Default)]
|
||||
pub(crate) struct VersionFunction;
|
||||
|
||||
impl fmt::Display for VersionFunction {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
write!(f, "VERSION")
|
||||
}
|
||||
}
|
||||
|
||||
impl Function for VersionFunction {
|
||||
fn name(&self) -> &str {
|
||||
"version"
|
||||
}
|
||||
|
||||
fn return_type(&self, _input_types: &[ConcreteDataType]) -> Result<ConcreteDataType> {
|
||||
Ok(ConcreteDataType::string_datatype())
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::exact(vec![], Volatility::Immutable)
|
||||
}
|
||||
|
||||
fn eval(&self, _func_ctx: FunctionContext, _columns: &[VectorRef]) -> Result<VectorRef> {
|
||||
let result = StringVector::from(vec![format!(
|
||||
"5.7.20-greptimedb-{}",
|
||||
env!("CARGO_PKG_VERSION")
|
||||
)]);
|
||||
Ok(Arc::new(result))
|
||||
}
|
||||
}
|
||||
@@ -24,7 +24,7 @@ use async_trait::async_trait;
|
||||
use common_grpc_expr::alter_expr_to_request;
|
||||
use common_procedure::error::{FromJsonSnafu, Result as ProcedureResult, ToJsonSnafu};
|
||||
use common_procedure::{
|
||||
Context as ProcedureContext, Error as ProcedureError, LockKey, Procedure, Status,
|
||||
Context as ProcedureContext, Error as ProcedureError, LockKey, Procedure, Status, StringKey,
|
||||
};
|
||||
use common_telemetry::tracing_context::TracingContext;
|
||||
use common_telemetry::{debug, info};
|
||||
@@ -40,10 +40,11 @@ use table::requests::AlterKind;
|
||||
use crate::cache_invalidator::Context;
|
||||
use crate::ddl::utils::handle_operate_region_error;
|
||||
use crate::ddl::DdlContext;
|
||||
use crate::error::{self, ConvertAlterTableRequestSnafu, InvalidProtoMsgSnafu, Result};
|
||||
use crate::error::{self, ConvertAlterTableRequestSnafu, Error, InvalidProtoMsgSnafu, Result};
|
||||
use crate::key::table_info::TableInfoValue;
|
||||
use crate::key::table_name::TableNameKey;
|
||||
use crate::key::DeserializedValueWithBytes;
|
||||
use crate::lock_key::{CatalogLock, SchemaLock, TableLock, TableNameLock};
|
||||
use crate::metrics;
|
||||
use crate::rpc::ddl::AlterTableTask;
|
||||
use crate::rpc::router::{find_leader_regions, find_leaders};
|
||||
@@ -63,7 +64,7 @@ impl AlterTableProcedure {
|
||||
cluster_id: u64,
|
||||
task: AlterTableTask,
|
||||
table_info_value: DeserializedValueWithBytes<TableInfoValue>,
|
||||
physical_table_name: Option<TableName>,
|
||||
physical_table_info: Option<(TableId, TableName)>,
|
||||
context: DdlContext,
|
||||
) -> Result<Self> {
|
||||
let alter_kind = task
|
||||
@@ -86,7 +87,7 @@ impl AlterTableProcedure {
|
||||
data: AlterTableData::new(
|
||||
task,
|
||||
table_info_value,
|
||||
physical_table_name,
|
||||
physical_table_info,
|
||||
cluster_id,
|
||||
next_column_id,
|
||||
),
|
||||
@@ -335,32 +336,31 @@ impl AlterTableProcedure {
|
||||
Ok(Status::Done)
|
||||
}
|
||||
|
||||
fn lock_key_inner(&self) -> Vec<String> {
|
||||
fn lock_key_inner(&self) -> Vec<StringKey> {
|
||||
let mut lock_key = vec![];
|
||||
|
||||
if let Some(physical_table_name) = self.data.physical_table_name() {
|
||||
let physical_table_key = common_catalog::format_full_table_name(
|
||||
&physical_table_name.catalog_name,
|
||||
&physical_table_name.schema_name,
|
||||
&physical_table_name.table_name,
|
||||
if let Some((physical_table_id, physical_table_name)) = self.data.physical_table_info() {
|
||||
lock_key.push(CatalogLock::Read(&physical_table_name.catalog_name).into());
|
||||
lock_key.push(
|
||||
SchemaLock::read(
|
||||
&physical_table_name.catalog_name,
|
||||
&physical_table_name.schema_name,
|
||||
)
|
||||
.into(),
|
||||
);
|
||||
lock_key.push(physical_table_key);
|
||||
lock_key.push(TableLock::Read(*physical_table_id).into())
|
||||
}
|
||||
|
||||
let table_ref = self.data.table_ref();
|
||||
let table_key = common_catalog::format_full_table_name(
|
||||
table_ref.catalog,
|
||||
table_ref.schema,
|
||||
table_ref.table,
|
||||
);
|
||||
lock_key.push(table_key);
|
||||
let table_id = self.data.table_id();
|
||||
lock_key.push(CatalogLock::Read(table_ref.catalog).into());
|
||||
lock_key.push(SchemaLock::read(table_ref.catalog, table_ref.schema).into());
|
||||
lock_key.push(TableLock::Write(table_id).into());
|
||||
|
||||
if let Ok(Kind::RenameTable(RenameTable { new_table_name })) = self.alter_kind() {
|
||||
lock_key.push(common_catalog::format_full_table_name(
|
||||
table_ref.catalog,
|
||||
table_ref.schema,
|
||||
new_table_name,
|
||||
))
|
||||
lock_key.push(
|
||||
TableNameLock::new(table_ref.catalog, table_ref.schema, new_table_name).into(),
|
||||
)
|
||||
}
|
||||
|
||||
lock_key
|
||||
@@ -374,8 +374,8 @@ impl Procedure for AlterTableProcedure {
|
||||
}
|
||||
|
||||
async fn execute(&mut self, _ctx: &ProcedureContext) -> ProcedureResult<Status> {
|
||||
let error_handler = |e| {
|
||||
if matches!(e, error::Error::RetryLater { .. }) {
|
||||
let error_handler = |e: Error| {
|
||||
if e.is_retry_later() {
|
||||
ProcedureError::retry_later(e)
|
||||
} else {
|
||||
ProcedureError::external(e)
|
||||
@@ -406,7 +406,7 @@ impl Procedure for AlterTableProcedure {
|
||||
fn lock_key(&self) -> LockKey {
|
||||
let key = self.lock_key_inner();
|
||||
|
||||
LockKey::new_exclusive(key)
|
||||
LockKey::new(key)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -423,13 +423,13 @@ enum AlterTableState {
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
pub struct AlterTableData {
|
||||
cluster_id: u64,
|
||||
state: AlterTableState,
|
||||
task: AlterTableTask,
|
||||
/// Table info value before alteration.
|
||||
table_info_value: DeserializedValueWithBytes<TableInfoValue>,
|
||||
/// Physical table name, if the table to alter is a logical table.
|
||||
physical_table_name: Option<TableName>,
|
||||
cluster_id: u64,
|
||||
physical_table_info: Option<(TableId, TableName)>,
|
||||
/// Next column id of the table if the task adds columns to the table.
|
||||
next_column_id: Option<ColumnId>,
|
||||
}
|
||||
@@ -438,7 +438,7 @@ impl AlterTableData {
|
||||
pub fn new(
|
||||
task: AlterTableTask,
|
||||
table_info_value: DeserializedValueWithBytes<TableInfoValue>,
|
||||
physical_table_name: Option<TableName>,
|
||||
physical_table_info: Option<(TableId, TableName)>,
|
||||
cluster_id: u64,
|
||||
next_column_id: Option<ColumnId>,
|
||||
) -> Self {
|
||||
@@ -446,7 +446,7 @@ impl AlterTableData {
|
||||
state: AlterTableState::Prepare,
|
||||
task,
|
||||
table_info_value,
|
||||
physical_table_name,
|
||||
physical_table_info,
|
||||
cluster_id,
|
||||
next_column_id,
|
||||
}
|
||||
@@ -464,8 +464,8 @@ impl AlterTableData {
|
||||
&self.table_info_value.table_info
|
||||
}
|
||||
|
||||
fn physical_table_name(&self) -> Option<&TableName> {
|
||||
self.physical_table_name.as_ref()
|
||||
fn physical_table_info(&self) -> Option<&(TableId, TableName)> {
|
||||
self.physical_table_info.as_ref()
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -41,6 +41,7 @@ use crate::ddl::DdlContext;
|
||||
use crate::error::{self, Result, TableRouteNotFoundSnafu};
|
||||
use crate::key::table_name::TableNameKey;
|
||||
use crate::key::table_route::TableRouteValue;
|
||||
use crate::lock_key::TableNameLock;
|
||||
use crate::metrics;
|
||||
use crate::region_keeper::OperatingRegionGuard;
|
||||
use crate::rpc::ddl::CreateTableTask;
|
||||
@@ -343,13 +344,12 @@ impl Procedure for CreateTableProcedure {
|
||||
|
||||
fn lock_key(&self) -> LockKey {
|
||||
let table_ref = &self.creator.data.table_ref();
|
||||
let key = common_catalog::format_full_table_name(
|
||||
|
||||
LockKey::single(TableNameLock::new(
|
||||
table_ref.catalog,
|
||||
table_ref.schema,
|
||||
table_ref.table,
|
||||
);
|
||||
|
||||
LockKey::single_exclusive(key)
|
||||
))
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -41,6 +41,7 @@ use crate::key::table_info::TableInfoValue;
|
||||
use crate::key::table_name::TableNameKey;
|
||||
use crate::key::table_route::TableRouteValue;
|
||||
use crate::key::DeserializedValueWithBytes;
|
||||
use crate::lock_key::{CatalogLock, SchemaLock, TableLock};
|
||||
use crate::metrics;
|
||||
use crate::region_keeper::OperatingRegionGuard;
|
||||
use crate::rpc::ddl::DropTableTask;
|
||||
@@ -267,13 +268,14 @@ impl Procedure for DropTableProcedure {
|
||||
|
||||
fn lock_key(&self) -> LockKey {
|
||||
let table_ref = &self.data.table_ref();
|
||||
let key = common_catalog::format_full_table_name(
|
||||
table_ref.catalog,
|
||||
table_ref.schema,
|
||||
table_ref.table,
|
||||
);
|
||||
let table_id = self.data.table_id();
|
||||
let lock_key = vec![
|
||||
CatalogLock::Read(table_ref.catalog).into(),
|
||||
SchemaLock::read(table_ref.catalog, table_ref.schema).into(),
|
||||
TableLock::Write(table_id).into(),
|
||||
];
|
||||
|
||||
LockKey::single_exclusive(key)
|
||||
LockKey::new(lock_key)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -37,6 +37,7 @@ use crate::error::{Result, TableNotFoundSnafu};
|
||||
use crate::key::table_info::TableInfoValue;
|
||||
use crate::key::table_name::TableNameKey;
|
||||
use crate::key::DeserializedValueWithBytes;
|
||||
use crate::lock_key::{CatalogLock, SchemaLock, TableLock};
|
||||
use crate::metrics;
|
||||
use crate::rpc::ddl::TruncateTableTask;
|
||||
use crate::rpc::router::{find_leader_regions, find_leaders, RegionRoute};
|
||||
@@ -75,13 +76,14 @@ impl Procedure for TruncateTableProcedure {
|
||||
|
||||
fn lock_key(&self) -> LockKey {
|
||||
let table_ref = &self.data.table_ref();
|
||||
let key = common_catalog::format_full_table_name(
|
||||
table_ref.catalog,
|
||||
table_ref.schema,
|
||||
table_ref.table,
|
||||
);
|
||||
let table_id = self.data.table_id();
|
||||
let lock_key = vec![
|
||||
CatalogLock::Read(table_ref.catalog).into(),
|
||||
SchemaLock::read(table_ref.catalog, table_ref.schema).into(),
|
||||
TableLock::Write(table_id).into(),
|
||||
];
|
||||
|
||||
LockKey::single_exclusive(key)
|
||||
LockKey::new(lock_key)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -36,7 +36,7 @@ pub fn handle_operate_region_error(datanode: Peer) -> impl FnOnce(crate::error::
|
||||
}
|
||||
|
||||
pub fn handle_retry_error(e: Error) -> ProcedureError {
|
||||
if matches!(e, error::Error::RetryLater { .. }) {
|
||||
if e.is_retry_later() {
|
||||
ProcedureError::retry_later(e)
|
||||
} else {
|
||||
ProcedureError::external(e)
|
||||
|
||||
@@ -19,7 +19,7 @@ use common_procedure::{watcher, ProcedureId, ProcedureManagerRef, ProcedureWithI
|
||||
use common_telemetry::tracing_context::{FutureExt, TracingContext};
|
||||
use common_telemetry::{info, tracing};
|
||||
use snafu::{OptionExt, ResultExt};
|
||||
use store_api::storage::RegionNumber;
|
||||
use store_api::storage::{RegionNumber, TableId};
|
||||
|
||||
use crate::cache_invalidator::CacheInvalidatorRef;
|
||||
use crate::datanode_manager::DatanodeManagerRef;
|
||||
@@ -162,7 +162,7 @@ impl DdlManager {
|
||||
cluster_id: u64,
|
||||
alter_table_task: AlterTableTask,
|
||||
table_info_value: DeserializedValueWithBytes<TableInfoValue>,
|
||||
physical_table_name: Option<TableName>,
|
||||
physical_table_info: Option<(TableId, TableName)>,
|
||||
) -> Result<ProcedureId> {
|
||||
let context = self.create_context();
|
||||
|
||||
@@ -170,7 +170,7 @@ impl DdlManager {
|
||||
cluster_id,
|
||||
alter_table_task,
|
||||
table_info_value,
|
||||
physical_table_name,
|
||||
physical_table_info,
|
||||
context,
|
||||
)?;
|
||||
|
||||
@@ -341,7 +341,7 @@ async fn handle_alter_table_task(
|
||||
.get_physical_table_id(table_id)
|
||||
.await?;
|
||||
|
||||
let physical_table_name = if physical_table_id == table_id {
|
||||
let physical_table_info = if physical_table_id == table_id {
|
||||
None
|
||||
} else {
|
||||
let physical_table_info = &ddl_manager
|
||||
@@ -353,11 +353,14 @@ async fn handle_alter_table_task(
|
||||
table_name: table_ref.to_string(),
|
||||
})?
|
||||
.table_info;
|
||||
Some(TableName {
|
||||
catalog_name: physical_table_info.catalog_name.clone(),
|
||||
schema_name: physical_table_info.schema_name.clone(),
|
||||
table_name: physical_table_info.name.clone(),
|
||||
})
|
||||
Some((
|
||||
physical_table_id,
|
||||
TableName {
|
||||
catalog_name: physical_table_info.catalog_name.clone(),
|
||||
schema_name: physical_table_info.schema_name.clone(),
|
||||
table_name: physical_table_info.name.clone(),
|
||||
},
|
||||
))
|
||||
};
|
||||
|
||||
let id = ddl_manager
|
||||
@@ -365,7 +368,7 @@ async fn handle_alter_table_task(
|
||||
cluster_id,
|
||||
alter_table_task,
|
||||
table_info_value,
|
||||
physical_table_name,
|
||||
physical_table_info,
|
||||
)
|
||||
.await?;
|
||||
|
||||
@@ -386,15 +389,21 @@ async fn handle_drop_table_task(
|
||||
let table_metadata_manager = &ddl_manager.table_metadata_manager();
|
||||
let table_ref = drop_table_task.table_ref();
|
||||
|
||||
let (table_info_value, table_route_value) =
|
||||
table_metadata_manager.get_full_table_info(table_id).await?;
|
||||
let table_info_value = table_metadata_manager
|
||||
.table_info_manager()
|
||||
.get(table_id)
|
||||
.await?;
|
||||
let (_, table_route_value) = table_metadata_manager
|
||||
.table_route_manager()
|
||||
.get_physical_table_route(table_id)
|
||||
.await?;
|
||||
|
||||
let table_info_value = table_info_value.with_context(|| error::TableInfoNotFoundSnafu {
|
||||
table_name: table_ref.to_string(),
|
||||
})?;
|
||||
|
||||
let table_route_value =
|
||||
table_route_value.context(error::TableRouteNotFoundSnafu { table_id })?;
|
||||
DeserializedValueWithBytes::from_inner(TableRouteValue::Physical(table_route_value));
|
||||
|
||||
let id = ddl_manager
|
||||
.submit_drop_table_task(
|
||||
|
||||
@@ -354,6 +354,7 @@ impl TableMetadataManager {
|
||||
&self.kv_backend
|
||||
}
|
||||
|
||||
// TODO(ruihang): deprecate this
|
||||
pub async fn get_full_table_info(
|
||||
&self,
|
||||
table_id: TableId,
|
||||
|
||||
@@ -178,15 +178,6 @@ impl DatanodeTableManager {
|
||||
let txns = distribution
|
||||
.into_iter()
|
||||
.map(|(datanode_id, regions)| {
|
||||
let filtered_region_wal_options = regions
|
||||
.iter()
|
||||
.filter_map(|region_number| {
|
||||
region_wal_options
|
||||
.get(region_number)
|
||||
.map(|wal_options| (*region_number, wal_options.clone()))
|
||||
})
|
||||
.collect();
|
||||
|
||||
let key = DatanodeTableKey::new(datanode_id, table_id);
|
||||
let val = DatanodeTableValue::new(
|
||||
table_id,
|
||||
@@ -195,7 +186,9 @@ impl DatanodeTableManager {
|
||||
engine: engine.to_string(),
|
||||
region_storage_path: region_storage_path.to_string(),
|
||||
region_options: region_options.clone(),
|
||||
region_wal_options: filtered_region_wal_options,
|
||||
// FIXME(weny): Before we store all region wal options into table metadata or somewhere,
|
||||
// We must store all region wal options.
|
||||
region_wal_options: region_wal_options.clone(),
|
||||
},
|
||||
);
|
||||
|
||||
@@ -243,7 +236,15 @@ impl DatanodeTableManager {
|
||||
if need_update {
|
||||
let key = DatanodeTableKey::new(datanode, table_id);
|
||||
let raw_key = key.as_raw_key();
|
||||
let val = DatanodeTableValue::new(table_id, regions, region_info.clone())
|
||||
// FIXME(weny): add unit tests.
|
||||
let mut new_region_info = region_info.clone();
|
||||
if need_update_options {
|
||||
new_region_info.region_options = new_region_options.clone();
|
||||
}
|
||||
if need_update_wal_options {
|
||||
new_region_info.region_wal_options = new_region_wal_options.clone();
|
||||
}
|
||||
let val = DatanodeTableValue::new(table_id, regions, new_region_info)
|
||||
.try_as_raw_value()?;
|
||||
opts.push(TxnOp::Put(raw_key, val));
|
||||
}
|
||||
|
||||
@@ -114,6 +114,7 @@ where
|
||||
Ok(!resp.kvs.is_empty())
|
||||
}
|
||||
|
||||
/// Returns previous key-value pair if `prev_kv` is `true`.
|
||||
async fn delete(&self, key: &[u8], prev_kv: bool) -> Result<Option<KeyValue>, Self::Error> {
|
||||
let mut req = DeleteRangeRequest::new().with_key(key.to_vec());
|
||||
if prev_kv {
|
||||
|
||||
@@ -27,6 +27,7 @@ pub mod heartbeat;
|
||||
pub mod instruction;
|
||||
pub mod key;
|
||||
pub mod kv_backend;
|
||||
pub mod lock_key;
|
||||
pub mod metrics;
|
||||
pub mod peer;
|
||||
pub mod range_stream;
|
||||
|
||||
235
src/common/meta/src/lock_key.rs
Normal file
235
src/common/meta/src/lock_key.rs
Normal file
@@ -0,0 +1,235 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::fmt::Display;
|
||||
|
||||
use common_catalog::{format_full_table_name, format_schema_name};
|
||||
use common_procedure::StringKey;
|
||||
use store_api::storage::{RegionId, TableId};
|
||||
|
||||
const CATALOG_LOCK_PREFIX: &str = "__catalog_lock";
|
||||
const SCHEMA_LOCK_PREFIX: &str = "__schema_lock";
|
||||
const TABLE_LOCK_PREFIX: &str = "__table_lock";
|
||||
const TABLE_NAME_LOCK_PREFIX: &str = "__table_name_lock";
|
||||
const REGION_LOCK_PREFIX: &str = "__region_lock";
|
||||
|
||||
/// [CatalogLock] acquires the lock on the tenant level.
|
||||
pub enum CatalogLock<'a> {
|
||||
Read(&'a str),
|
||||
Write(&'a str),
|
||||
}
|
||||
|
||||
impl<'a> Display for CatalogLock<'a> {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
let key = match self {
|
||||
CatalogLock::Read(s) => s,
|
||||
CatalogLock::Write(s) => s,
|
||||
};
|
||||
write!(f, "{}/{}", CATALOG_LOCK_PREFIX, key)
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> From<CatalogLock<'a>> for StringKey {
|
||||
fn from(value: CatalogLock) -> Self {
|
||||
match value {
|
||||
CatalogLock::Write(_) => StringKey::Exclusive(value.to_string()),
|
||||
CatalogLock::Read(_) => StringKey::Share(value.to_string()),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// [SchemaLock] acquires the lock on the database level.
|
||||
pub enum SchemaLock {
|
||||
Read(String),
|
||||
Write(String),
|
||||
}
|
||||
|
||||
impl SchemaLock {
|
||||
pub fn read(catalog: &str, schema: &str) -> Self {
|
||||
Self::Read(format_schema_name(catalog, schema))
|
||||
}
|
||||
|
||||
pub fn write(catalog: &str, schema: &str) -> Self {
|
||||
Self::Write(format_schema_name(catalog, schema))
|
||||
}
|
||||
}
|
||||
|
||||
impl Display for SchemaLock {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
let key = match self {
|
||||
SchemaLock::Read(s) => s,
|
||||
SchemaLock::Write(s) => s,
|
||||
};
|
||||
write!(f, "{}/{}", SCHEMA_LOCK_PREFIX, key)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<SchemaLock> for StringKey {
|
||||
fn from(value: SchemaLock) -> Self {
|
||||
match value {
|
||||
SchemaLock::Write(_) => StringKey::Exclusive(value.to_string()),
|
||||
SchemaLock::Read(_) => StringKey::Share(value.to_string()),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// [TableNameLock] prevents any procedures trying to create a table named it.
|
||||
pub enum TableNameLock {
|
||||
Write(String),
|
||||
}
|
||||
|
||||
impl TableNameLock {
|
||||
pub fn new(catalog: &str, schema: &str, table: &str) -> Self {
|
||||
Self::Write(format_full_table_name(catalog, schema, table))
|
||||
}
|
||||
}
|
||||
|
||||
impl Display for TableNameLock {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
let TableNameLock::Write(name) = self;
|
||||
write!(f, "{}/{}", TABLE_NAME_LOCK_PREFIX, name)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<TableNameLock> for StringKey {
|
||||
fn from(value: TableNameLock) -> Self {
|
||||
match value {
|
||||
TableNameLock::Write(_) => StringKey::Exclusive(value.to_string()),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// [TableLock] acquires the lock on the table level.
|
||||
///
|
||||
/// Note: Allows to read/modify the corresponding table's [TableInfoValue](crate::key::table_info::TableInfoValue),
|
||||
/// [TableRouteValue](crate::key::table_route::TableRouteValue), [TableDatanodeValue](crate::key::datanode_table::DatanodeTableValue).
|
||||
pub enum TableLock {
|
||||
Read(TableId),
|
||||
Write(TableId),
|
||||
}
|
||||
|
||||
impl Display for TableLock {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
let key = match self {
|
||||
TableLock::Read(s) => s,
|
||||
TableLock::Write(s) => s,
|
||||
};
|
||||
write!(f, "{}/{}", TABLE_LOCK_PREFIX, key)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<TableLock> for StringKey {
|
||||
fn from(value: TableLock) -> Self {
|
||||
match value {
|
||||
TableLock::Write(_) => StringKey::Exclusive(value.to_string()),
|
||||
TableLock::Read(_) => StringKey::Share(value.to_string()),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// [RegionLock] acquires the lock on the region level.
|
||||
///
|
||||
/// Note:
|
||||
/// - Allows modification the corresponding region's [TableRouteValue](crate::key::table_route::TableRouteValue),
|
||||
/// [TableDatanodeValue](crate::key::datanode_table::DatanodeTableValue) even if
|
||||
/// it acquires the [RegionLock::Write] only without acquiring the [TableLock::Write].
|
||||
///
|
||||
/// - Should acquire [TableLock] of the table at same procedure.
|
||||
///
|
||||
/// TODO(weny): we should consider separating TableRouteValue into finer keys.
|
||||
pub enum RegionLock {
|
||||
Read(RegionId),
|
||||
Write(RegionId),
|
||||
}
|
||||
|
||||
impl Display for RegionLock {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
let key = match self {
|
||||
RegionLock::Read(s) => s.as_u64(),
|
||||
RegionLock::Write(s) => s.as_u64(),
|
||||
};
|
||||
write!(f, "{}/{}", REGION_LOCK_PREFIX, key)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<RegionLock> for StringKey {
|
||||
fn from(value: RegionLock) -> Self {
|
||||
match value {
|
||||
RegionLock::Write(_) => StringKey::Exclusive(value.to_string()),
|
||||
RegionLock::Read(_) => StringKey::Share(value.to_string()),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use common_procedure::StringKey;
|
||||
|
||||
use crate::lock_key::*;
|
||||
|
||||
#[test]
|
||||
fn test_lock_key() {
|
||||
// The catalog lock
|
||||
let string_key: StringKey = CatalogLock::Read("foo").into();
|
||||
assert_eq!(
|
||||
string_key,
|
||||
StringKey::Share(format!("{}/{}", CATALOG_LOCK_PREFIX, "foo"))
|
||||
);
|
||||
let string_key: StringKey = CatalogLock::Write("foo").into();
|
||||
assert_eq!(
|
||||
string_key,
|
||||
StringKey::Exclusive(format!("{}/{}", CATALOG_LOCK_PREFIX, "foo"))
|
||||
);
|
||||
// The schema lock
|
||||
let string_key: StringKey = SchemaLock::read("foo", "bar").into();
|
||||
assert_eq!(
|
||||
string_key,
|
||||
StringKey::Share(format!("{}/{}", SCHEMA_LOCK_PREFIX, "foo.bar"))
|
||||
);
|
||||
let string_key: StringKey = SchemaLock::write("foo", "bar").into();
|
||||
assert_eq!(
|
||||
string_key,
|
||||
StringKey::Exclusive(format!("{}/{}", SCHEMA_LOCK_PREFIX, "foo.bar"))
|
||||
);
|
||||
// The table lock
|
||||
let string_key: StringKey = TableLock::Read(1024).into();
|
||||
assert_eq!(
|
||||
string_key,
|
||||
StringKey::Share(format!("{}/{}", TABLE_LOCK_PREFIX, 1024))
|
||||
);
|
||||
let string_key: StringKey = TableLock::Write(1024).into();
|
||||
assert_eq!(
|
||||
string_key,
|
||||
StringKey::Exclusive(format!("{}/{}", TABLE_LOCK_PREFIX, 1024))
|
||||
);
|
||||
// The table name lock
|
||||
let string_key: StringKey = TableNameLock::new("foo", "bar", "baz").into();
|
||||
assert_eq!(
|
||||
string_key,
|
||||
StringKey::Exclusive(format!("{}/{}", TABLE_NAME_LOCK_PREFIX, "foo.bar.baz"))
|
||||
);
|
||||
// The region lock
|
||||
let region_id = RegionId::new(1024, 1);
|
||||
let string_key: StringKey = RegionLock::Read(region_id).into();
|
||||
assert_eq!(
|
||||
string_key,
|
||||
StringKey::Share(format!("{}/{}", REGION_LOCK_PREFIX, region_id.as_u64()))
|
||||
);
|
||||
let string_key: StringKey = RegionLock::Write(region_id).into();
|
||||
assert_eq!(
|
||||
string_key,
|
||||
StringKey::Exclusive(format!("{}/{}", REGION_LOCK_PREFIX, region_id.as_u64()))
|
||||
);
|
||||
}
|
||||
}
|
||||
@@ -23,7 +23,6 @@ use serde::{Deserialize, Serialize};
|
||||
use store_api::storage::{RegionId, RegionNumber};
|
||||
|
||||
use crate::wal::kafka::KafkaConfig;
|
||||
pub use crate::wal::kafka::Topic as KafkaWalTopic;
|
||||
pub use crate::wal::options_allocator::{
|
||||
allocate_region_wal_options, WalOptionsAllocator, WalOptionsAllocatorRef,
|
||||
};
|
||||
@@ -98,7 +97,6 @@ mod tests {
|
||||
num_topics = 32
|
||||
selector_type = "round_robin"
|
||||
topic_name_prefix = "greptimedb_wal_topic"
|
||||
num_partitions = 1
|
||||
replication_factor = 1
|
||||
create_topic_timeout = "30s"
|
||||
backoff_init = "500ms"
|
||||
|
||||
@@ -14,7 +14,6 @@
|
||||
|
||||
#[cfg(any(test, feature = "testing"))]
|
||||
pub mod test_util;
|
||||
pub mod topic;
|
||||
pub mod topic_manager;
|
||||
pub mod topic_selector;
|
||||
|
||||
@@ -23,7 +22,6 @@ use std::time::Duration;
|
||||
use common_config::wal::kafka::{kafka_backoff, KafkaBackoffConfig, TopicSelectorType};
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
pub use crate::wal::kafka::topic::Topic;
|
||||
pub use crate::wal::kafka::topic_manager::TopicManager;
|
||||
|
||||
/// Configurations for kafka wal.
|
||||
|
||||
@@ -1,19 +0,0 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
/// Kafka wal topic.
|
||||
/// Publishers publish log entries to the topic while subscribers pull log entries from the topic.
|
||||
/// A topic is simply a string right now. But it may be more complex in the future.
|
||||
// TODO(niebayes): remove the Topic alias.
|
||||
pub type Topic = String;
|
||||
@@ -33,7 +33,6 @@ use crate::error::{
|
||||
};
|
||||
use crate::kv_backend::KvBackendRef;
|
||||
use crate::rpc::store::PutRequest;
|
||||
use crate::wal::kafka::topic::Topic;
|
||||
use crate::wal::kafka::topic_selector::{RoundRobinTopicSelector, TopicSelectorRef};
|
||||
use crate::wal::kafka::KafkaConfig;
|
||||
|
||||
@@ -46,7 +45,7 @@ const DEFAULT_PARTITION: i32 = 0;
|
||||
/// Manages topic initialization and selection.
|
||||
pub struct TopicManager {
|
||||
config: KafkaConfig,
|
||||
pub(crate) topic_pool: Vec<Topic>,
|
||||
pub(crate) topic_pool: Vec<String>,
|
||||
pub(crate) topic_selector: TopicSelectorRef,
|
||||
kv_backend: KvBackendRef,
|
||||
}
|
||||
@@ -86,7 +85,7 @@ impl TopicManager {
|
||||
let created_topics = Self::restore_created_topics(&self.kv_backend)
|
||||
.await?
|
||||
.into_iter()
|
||||
.collect::<HashSet<Topic>>();
|
||||
.collect::<HashSet<String>>();
|
||||
|
||||
// Creates missing topics.
|
||||
let to_be_created = topics
|
||||
@@ -108,7 +107,7 @@ impl TopicManager {
|
||||
}
|
||||
|
||||
/// Tries to create topics specified by indexes in `to_be_created`.
|
||||
async fn try_create_topics(&self, topics: &[Topic], to_be_created: &[usize]) -> Result<()> {
|
||||
async fn try_create_topics(&self, topics: &[String], to_be_created: &[usize]) -> Result<()> {
|
||||
// Builds an kafka controller client for creating topics.
|
||||
let backoff_config = BackoffConfig {
|
||||
init_backoff: self.config.backoff.init,
|
||||
@@ -141,18 +140,18 @@ impl TopicManager {
|
||||
}
|
||||
|
||||
/// Selects one topic from the topic pool through the topic selector.
|
||||
pub fn select(&self) -> Result<&Topic> {
|
||||
pub fn select(&self) -> Result<&String> {
|
||||
self.topic_selector.select(&self.topic_pool)
|
||||
}
|
||||
|
||||
/// Selects a batch of topics from the topic pool through the topic selector.
|
||||
pub fn select_batch(&self, num_topics: usize) -> Result<Vec<&Topic>> {
|
||||
pub fn select_batch(&self, num_topics: usize) -> Result<Vec<&String>> {
|
||||
(0..num_topics)
|
||||
.map(|_| self.topic_selector.select(&self.topic_pool))
|
||||
.collect()
|
||||
}
|
||||
|
||||
async fn try_append_noop_record(&self, topic: &Topic, client: &Client) -> Result<()> {
|
||||
async fn try_append_noop_record(&self, topic: &String, client: &Client) -> Result<()> {
|
||||
let partition_client = client
|
||||
.partition_client(topic, DEFAULT_PARTITION, UnknownTopicHandling::Retry)
|
||||
.await
|
||||
@@ -177,7 +176,7 @@ impl TopicManager {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn try_create_topic(&self, topic: &Topic, client: &ControllerClient) -> Result<()> {
|
||||
async fn try_create_topic(&self, topic: &String, client: &ControllerClient) -> Result<()> {
|
||||
match client
|
||||
.create_topic(
|
||||
topic.clone(),
|
||||
@@ -203,7 +202,7 @@ impl TopicManager {
|
||||
}
|
||||
}
|
||||
|
||||
async fn restore_created_topics(kv_backend: &KvBackendRef) -> Result<Vec<Topic>> {
|
||||
async fn restore_created_topics(kv_backend: &KvBackendRef) -> Result<Vec<String>> {
|
||||
kv_backend
|
||||
.get(CREATED_TOPICS_KEY.as_bytes())
|
||||
.await?
|
||||
@@ -213,7 +212,7 @@ impl TopicManager {
|
||||
)
|
||||
}
|
||||
|
||||
async fn persist_created_topics(topics: &[Topic], kv_backend: &KvBackendRef) -> Result<()> {
|
||||
async fn persist_created_topics(topics: &[String], kv_backend: &KvBackendRef) -> Result<()> {
|
||||
let raw_topics = serde_json::to_vec(topics).context(EncodeJsonSnafu)?;
|
||||
kv_backend
|
||||
.put(PutRequest {
|
||||
|
||||
@@ -19,12 +19,11 @@ use rand::Rng;
|
||||
use snafu::ensure;
|
||||
|
||||
use crate::error::{EmptyTopicPoolSnafu, Result};
|
||||
use crate::wal::kafka::topic::Topic;
|
||||
|
||||
/// Controls topic selection.
|
||||
pub(crate) trait TopicSelector: Send + Sync {
|
||||
/// Selects a topic from the topic pool.
|
||||
fn select<'a>(&self, topic_pool: &'a [Topic]) -> Result<&'a Topic>;
|
||||
fn select<'a>(&self, topic_pool: &'a [String]) -> Result<&'a String>;
|
||||
}
|
||||
|
||||
/// Arc wrapper of TopicSelector.
|
||||
@@ -48,7 +47,7 @@ impl RoundRobinTopicSelector {
|
||||
}
|
||||
|
||||
impl TopicSelector for RoundRobinTopicSelector {
|
||||
fn select<'a>(&self, topic_pool: &'a [Topic]) -> Result<&'a Topic> {
|
||||
fn select<'a>(&self, topic_pool: &'a [String]) -> Result<&'a String> {
|
||||
ensure!(!topic_pool.is_empty(), EmptyTopicPoolSnafu);
|
||||
let which = self.cursor.fetch_add(1, Ordering::Relaxed) % topic_pool.len();
|
||||
Ok(&topic_pool[which])
|
||||
|
||||
@@ -26,6 +26,6 @@ pub mod watcher;
|
||||
pub use crate::error::{Error, Result};
|
||||
pub use crate::procedure::{
|
||||
BoxedProcedure, Context, ContextProvider, LockKey, Procedure, ProcedureId, ProcedureManager,
|
||||
ProcedureManagerRef, ProcedureState, ProcedureWithId, Status,
|
||||
ProcedureManagerRef, ProcedureState, ProcedureWithId, Status, StringKey,
|
||||
};
|
||||
pub use crate::watcher::Watcher;
|
||||
|
||||
@@ -18,3 +18,10 @@ pub use crate::columnar_value::ColumnarValue;
|
||||
pub use crate::function::*;
|
||||
pub use crate::logical_plan::{create_udf, AggregateFunction, Expr, ScalarUdf};
|
||||
pub use crate::signature::{Signature, TypeSignature, Volatility};
|
||||
|
||||
/// Default timestamp column name for Prometheus metrics.
|
||||
pub const GREPTIME_TIMESTAMP: &str = "greptime_timestamp";
|
||||
/// Default value column name for Prometheus metrics.
|
||||
pub const GREPTIME_VALUE: &str = "greptime_value";
|
||||
/// Default counter column name for OTLP metrics.
|
||||
pub const GREPTIME_COUNT: &str = "greptime_count";
|
||||
|
||||
@@ -5,6 +5,9 @@ edition.workspace = true
|
||||
license.workspace = true
|
||||
|
||||
[dependencies]
|
||||
client.workspace = true
|
||||
common-query.workspace = true
|
||||
common-recordbatch.workspace = true
|
||||
once_cell.workspace = true
|
||||
rand.workspace = true
|
||||
tempfile.workspace = true
|
||||
|
||||
@@ -19,6 +19,7 @@ use std::process::Command;
|
||||
use std::sync::LazyLock;
|
||||
|
||||
pub mod ports;
|
||||
pub mod recordbatch;
|
||||
pub mod temp_dir;
|
||||
|
||||
// Rust is working on an env possibly named `CARGO_WORKSPACE_DIR` to find the root path to the
|
||||
|
||||
46
src/common/test-util/src/recordbatch.rs
Normal file
46
src/common/test-util/src/recordbatch.rs
Normal file
@@ -0,0 +1,46 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use client::Database;
|
||||
use common_query::Output;
|
||||
use common_recordbatch::util;
|
||||
|
||||
pub enum ExpectedOutput<'a> {
|
||||
AffectedRows(usize),
|
||||
QueryResult(&'a str),
|
||||
}
|
||||
|
||||
pub async fn execute_and_check_output(db: &Database, sql: &str, expected: ExpectedOutput<'_>) {
|
||||
let output = db.sql(sql).await.unwrap();
|
||||
match (&output, expected) {
|
||||
(Output::AffectedRows(x), ExpectedOutput::AffectedRows(y)) => {
|
||||
assert_eq!(*x, y, "actual: \n{}", x)
|
||||
}
|
||||
(Output::RecordBatches(_), ExpectedOutput::QueryResult(x))
|
||||
| (Output::Stream(_), ExpectedOutput::QueryResult(x)) => {
|
||||
check_output_stream(output, x).await
|
||||
}
|
||||
_ => panic!(),
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn check_output_stream(output: Output, expected: &str) {
|
||||
let recordbatches = match output {
|
||||
Output::Stream(stream) => util::collect_batches(stream).await.unwrap(),
|
||||
Output::RecordBatches(recordbatches) => recordbatches,
|
||||
_ => unreachable!(),
|
||||
};
|
||||
let pretty_print = recordbatches.pretty_print().unwrap();
|
||||
assert_eq!(pretty_print, expected, "actual: \n{}", pretty_print);
|
||||
}
|
||||
@@ -276,7 +276,7 @@ impl Default for DatanodeOptions {
|
||||
|
||||
impl DatanodeOptions {
|
||||
pub fn env_list_keys() -> Option<&'static [&'static str]> {
|
||||
Some(&["meta_client.metasrv_addrs"])
|
||||
Some(&["meta_client.metasrv_addrs", "wal.broker_endpoints"])
|
||||
}
|
||||
|
||||
pub fn to_toml_string(&self) -> String {
|
||||
|
||||
@@ -42,10 +42,11 @@ use metric_engine::engine::MetricEngine;
|
||||
use mito2::config::MitoConfig;
|
||||
use mito2::engine::MitoEngine;
|
||||
use object_store::manager::{ObjectStoreManager, ObjectStoreManagerRef};
|
||||
use object_store::util::normalize_dir;
|
||||
use object_store::util::{join_dir, normalize_dir};
|
||||
use query::QueryEngineFactory;
|
||||
use servers::export_metrics::ExportMetricsTask;
|
||||
use servers::grpc::{GrpcServer, GrpcServerConfig};
|
||||
use servers::grpc::builder::GrpcServerBuilder;
|
||||
use servers::grpc::GrpcServerConfig;
|
||||
use servers::http::HttpServerBuilder;
|
||||
use servers::metrics_handler::MetricsHandler;
|
||||
use servers::server::{start_server, ServerHandler, ServerHandlers};
|
||||
@@ -60,9 +61,9 @@ use tokio::sync::Notify;
|
||||
|
||||
use crate::config::{DatanodeOptions, RegionEngineConfig};
|
||||
use crate::error::{
|
||||
CreateDirSnafu, GetMetadataSnafu, MissingKvBackendSnafu, MissingNodeIdSnafu, OpenLogStoreSnafu,
|
||||
ParseAddrSnafu, Result, RuntimeResourceSnafu, ShutdownInstanceSnafu, ShutdownServerSnafu,
|
||||
StartServerSnafu,
|
||||
BuildMitoEngineSnafu, CreateDirSnafu, GetMetadataSnafu, MissingKvBackendSnafu,
|
||||
MissingNodeIdSnafu, OpenLogStoreSnafu, ParseAddrSnafu, Result, RuntimeResourceSnafu,
|
||||
ShutdownInstanceSnafu, ShutdownServerSnafu, StartServerSnafu,
|
||||
};
|
||||
use crate::event_listener::{
|
||||
new_region_server_event_channel, NoopRegionServerEventListener, RegionServerEventListenerRef,
|
||||
@@ -328,15 +329,13 @@ impl DatanodeBuilder {
|
||||
max_send_message_size: opts.rpc_max_send_message_size.as_bytes() as usize,
|
||||
};
|
||||
|
||||
let server = Box::new(GrpcServer::new(
|
||||
Some(config),
|
||||
None,
|
||||
None,
|
||||
Some(Arc::new(region_server.clone()) as _),
|
||||
Some(Arc::new(region_server.clone()) as _),
|
||||
None,
|
||||
region_server.runtime(),
|
||||
));
|
||||
let server = Box::new(
|
||||
GrpcServerBuilder::new(region_server.runtime())
|
||||
.config(config)
|
||||
.flight_handler(Arc::new(region_server.clone()))
|
||||
.region_server_handler(Arc::new(region_server.clone()))
|
||||
.build(),
|
||||
);
|
||||
|
||||
let addr: SocketAddr = opts.rpc_addr.parse().context(ParseAddrSnafu {
|
||||
addr: &opts.rpc_addr,
|
||||
@@ -458,20 +457,33 @@ impl DatanodeBuilder {
|
||||
async fn build_mito_engine(
|
||||
opts: &DatanodeOptions,
|
||||
object_store_manager: ObjectStoreManagerRef,
|
||||
config: MitoConfig,
|
||||
mut config: MitoConfig,
|
||||
) -> Result<MitoEngine> {
|
||||
// Sets write cache path if it is empty.
|
||||
if config.experimental_write_cache_path.is_empty() {
|
||||
config.experimental_write_cache_path = join_dir(&opts.storage.data_home, "write_cache");
|
||||
info!(
|
||||
"Sets write cache path to {}",
|
||||
config.experimental_write_cache_path
|
||||
);
|
||||
}
|
||||
|
||||
let mito_engine = match &opts.wal {
|
||||
WalConfig::RaftEngine(raft_engine_config) => MitoEngine::new(
|
||||
config,
|
||||
Self::build_raft_engine_log_store(&opts.storage.data_home, raft_engine_config)
|
||||
.await?,
|
||||
object_store_manager,
|
||||
),
|
||||
)
|
||||
.await
|
||||
.context(BuildMitoEngineSnafu)?,
|
||||
WalConfig::Kafka(kafka_config) => MitoEngine::new(
|
||||
config,
|
||||
Self::build_kafka_log_store(kafka_config).await?,
|
||||
object_store_manager,
|
||||
),
|
||||
)
|
||||
.await
|
||||
.context(BuildMitoEngineSnafu)?,
|
||||
};
|
||||
Ok(mito_engine)
|
||||
}
|
||||
|
||||
@@ -282,6 +282,12 @@ pub enum Error {
|
||||
source: metric_engine::error::Error,
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to build mito engine"))]
|
||||
BuildMitoEngine {
|
||||
source: mito2::error::Error,
|
||||
location: Location,
|
||||
},
|
||||
}
|
||||
|
||||
pub type Result<T> = std::result::Result<T, Error>;
|
||||
@@ -352,6 +358,7 @@ impl ErrorExt for Error {
|
||||
StopRegionEngine { source, .. } => source.status_code(),
|
||||
|
||||
FindLogicalRegions { source, .. } => source.status_code(),
|
||||
BuildMitoEngine { source, .. } => source.status_code(),
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -26,10 +26,10 @@ use std::{env, path};
|
||||
|
||||
use common_base::readable_size::ReadableSize;
|
||||
use common_telemetry::logging::info;
|
||||
use object_store::layers::{LoggingLayer, LruCacheLayer, RetryLayer, TracingLayer};
|
||||
use object_store::services::Fs as FsBuilder;
|
||||
use object_store::util::normalize_dir;
|
||||
use object_store::{util, HttpClient, ObjectStore, ObjectStoreBuilder};
|
||||
use object_store::layers::{LruCacheLayer, RetryLayer};
|
||||
use object_store::services::Fs;
|
||||
use object_store::util::{join_dir, normalize_dir, with_instrument_layers};
|
||||
use object_store::{HttpClient, ObjectStore, ObjectStoreBuilder};
|
||||
use snafu::prelude::*;
|
||||
|
||||
use crate::config::{ObjectStoreConfig, DEFAULT_OBJECT_STORE_CACHE_SIZE};
|
||||
@@ -60,16 +60,7 @@ pub(crate) async fn new_object_store(
|
||||
object_store
|
||||
};
|
||||
|
||||
let store = object_store
|
||||
.layer(
|
||||
LoggingLayer::default()
|
||||
// Print the expected error only in DEBUG level.
|
||||
// See https://docs.rs/opendal/latest/opendal/layers/struct.LoggingLayer.html#method.with_error_level
|
||||
.with_error_level(Some("debug"))
|
||||
.expect("input error level must be valid"),
|
||||
)
|
||||
.layer(TracingLayer)
|
||||
.layer(object_store::layers::PrometheusMetricsLayer);
|
||||
let store = with_instrument_layers(object_store);
|
||||
Ok(store)
|
||||
}
|
||||
|
||||
@@ -114,11 +105,10 @@ async fn create_object_store_with_cache(
|
||||
};
|
||||
|
||||
if let Some(path) = cache_path {
|
||||
let path = util::normalize_dir(path);
|
||||
let atomic_temp_dir = format!("{path}.tmp/");
|
||||
let atomic_temp_dir = join_dir(path, ".tmp/");
|
||||
clean_temp_dir(&atomic_temp_dir)?;
|
||||
let cache_store = FsBuilder::default()
|
||||
.root(&path)
|
||||
let cache_store = Fs::default()
|
||||
.root(path)
|
||||
.atomic_write_dir(&atomic_temp_dir)
|
||||
.build()
|
||||
.context(error::InitBackendSnafu)?;
|
||||
|
||||
@@ -13,7 +13,7 @@
|
||||
// limitations under the License.
|
||||
|
||||
use common_telemetry::logging::info;
|
||||
use object_store::services::Azblob as AzureBuilder;
|
||||
use object_store::services::Azblob;
|
||||
use object_store::{util, ObjectStore};
|
||||
use secrecy::ExposeSecret;
|
||||
use snafu::prelude::*;
|
||||
@@ -30,7 +30,7 @@ pub(crate) async fn new_azblob_object_store(azblob_config: &AzblobConfig) -> Res
|
||||
azblob_config.container, &root
|
||||
);
|
||||
|
||||
let mut builder = AzureBuilder::default();
|
||||
let mut builder = Azblob::default();
|
||||
let _ = builder
|
||||
.root(&root)
|
||||
.container(&azblob_config.container)
|
||||
|
||||
@@ -15,7 +15,8 @@
|
||||
use std::{fs, path};
|
||||
|
||||
use common_telemetry::logging::info;
|
||||
use object_store::services::Fs as FsBuilder;
|
||||
use object_store::services::Fs;
|
||||
use object_store::util::join_dir;
|
||||
use object_store::ObjectStore;
|
||||
use snafu::prelude::*;
|
||||
|
||||
@@ -31,10 +32,10 @@ pub(crate) async fn new_fs_object_store(
|
||||
.context(error::CreateDirSnafu { dir: data_home })?;
|
||||
info!("The file storage home is: {}", data_home);
|
||||
|
||||
let atomic_write_dir = format!("{data_home}.tmp/");
|
||||
let atomic_write_dir = join_dir(data_home, ".tmp/");
|
||||
store::clean_temp_dir(&atomic_write_dir)?;
|
||||
|
||||
let mut builder = FsBuilder::default();
|
||||
let mut builder = Fs::default();
|
||||
let _ = builder.root(data_home).atomic_write_dir(&atomic_write_dir);
|
||||
|
||||
let object_store = ObjectStore::new(builder)
|
||||
|
||||
@@ -13,7 +13,7 @@
|
||||
// limitations under the License.
|
||||
|
||||
use common_telemetry::logging::info;
|
||||
use object_store::services::Gcs as GCSBuilder;
|
||||
use object_store::services::Gcs;
|
||||
use object_store::{util, ObjectStore};
|
||||
use secrecy::ExposeSecret;
|
||||
use snafu::prelude::*;
|
||||
@@ -29,7 +29,7 @@ pub(crate) async fn new_gcs_object_store(gcs_config: &GcsConfig) -> Result<Objec
|
||||
gcs_config.bucket, &root
|
||||
);
|
||||
|
||||
let mut builder = GCSBuilder::default();
|
||||
let mut builder = Gcs::default();
|
||||
builder
|
||||
.root(&root)
|
||||
.bucket(&gcs_config.bucket)
|
||||
|
||||
@@ -13,7 +13,7 @@
|
||||
// limitations under the License.
|
||||
|
||||
use common_telemetry::logging::info;
|
||||
use object_store::services::Oss as OSSBuilder;
|
||||
use object_store::services::Oss;
|
||||
use object_store::{util, ObjectStore};
|
||||
use secrecy::ExposeSecret;
|
||||
use snafu::prelude::*;
|
||||
@@ -29,7 +29,7 @@ pub(crate) async fn new_oss_object_store(oss_config: &OssConfig) -> Result<Objec
|
||||
oss_config.bucket, &root
|
||||
);
|
||||
|
||||
let mut builder = OSSBuilder::default();
|
||||
let mut builder = Oss::default();
|
||||
let _ = builder
|
||||
.root(&root)
|
||||
.bucket(&oss_config.bucket)
|
||||
|
||||
@@ -13,7 +13,7 @@
|
||||
// limitations under the License.
|
||||
|
||||
use common_telemetry::logging::info;
|
||||
use object_store::services::S3 as S3Builder;
|
||||
use object_store::services::S3;
|
||||
use object_store::{util, ObjectStore};
|
||||
use secrecy::ExposeSecret;
|
||||
use snafu::prelude::*;
|
||||
@@ -30,7 +30,7 @@ pub(crate) async fn new_s3_object_store(s3_config: &S3Config) -> Result<ObjectSt
|
||||
s3_config.bucket, &root
|
||||
);
|
||||
|
||||
let mut builder = S3Builder::default();
|
||||
let mut builder = S3::default();
|
||||
let _ = builder
|
||||
.root(&root)
|
||||
.bucket(&s3_config.bucket)
|
||||
|
||||
@@ -86,7 +86,6 @@ use crate::frontend::{FrontendOptions, TomlSerializable};
|
||||
use crate::heartbeat::HeartbeatTask;
|
||||
use crate::metrics;
|
||||
use crate::script::ScriptExecutor;
|
||||
use crate::server::Services;
|
||||
|
||||
#[async_trait]
|
||||
pub trait FrontendInstance:
|
||||
@@ -190,12 +189,13 @@ impl Instance {
|
||||
pub async fn build_servers(
|
||||
&mut self,
|
||||
opts: impl Into<FrontendOptions> + TomlSerializable,
|
||||
servers: ServerHandlers,
|
||||
) -> Result<()> {
|
||||
let opts: FrontendOptions = opts.into();
|
||||
self.export_metrics_task =
|
||||
ExportMetricsTask::try_new(&opts.export_metrics, Some(&self.plugins))
|
||||
.context(StartServerSnafu)?;
|
||||
let servers = Services::build(opts, Arc::new(self.clone()), self.plugins.clone()).await?;
|
||||
|
||||
self.servers = Arc::new(servers);
|
||||
|
||||
Ok(())
|
||||
@@ -442,7 +442,7 @@ pub fn check_permission(
|
||||
) -> Result<()> {
|
||||
let need_validate = plugins
|
||||
.get::<QueryOptions>()
|
||||
.map(|opts| opts.disallow_cross_schema_query)
|
||||
.map(|opts| opts.disallow_cross_catalog_query)
|
||||
.unwrap_or_default();
|
||||
|
||||
if !need_validate {
|
||||
@@ -520,7 +520,7 @@ mod tests {
|
||||
let query_ctx = QueryContext::arc();
|
||||
let plugins: Plugins = Plugins::new();
|
||||
plugins.insert(QueryOptions {
|
||||
disallow_cross_schema_query: true,
|
||||
disallow_cross_catalog_query: true,
|
||||
});
|
||||
|
||||
let sql = r#"
|
||||
@@ -556,8 +556,6 @@ mod tests {
|
||||
}
|
||||
|
||||
let wrong = vec![
|
||||
("", "wrongschema."),
|
||||
("greptime.", "wrongschema."),
|
||||
("wrongcatalog.", "public."),
|
||||
("wrongcatalog.", "wrongschema."),
|
||||
];
|
||||
@@ -607,10 +605,10 @@ mod tests {
|
||||
let stmt = parse_stmt(sql, &GreptimeDbDialect {}).unwrap();
|
||||
check_permission(plugins.clone(), &stmt[0], &query_ctx).unwrap();
|
||||
|
||||
let sql = "SHOW TABLES FROM wrongschema";
|
||||
let sql = "SHOW TABLES FROM private";
|
||||
let stmt = parse_stmt(sql, &GreptimeDbDialect {}).unwrap();
|
||||
let re = check_permission(plugins.clone(), &stmt[0], &query_ctx);
|
||||
assert!(re.is_err());
|
||||
assert!(re.is_ok());
|
||||
|
||||
// test describe table
|
||||
let sql = "DESC TABLE {catalog}{schema}demo;";
|
||||
|
||||
@@ -20,5 +20,5 @@ pub mod heartbeat;
|
||||
pub mod instance;
|
||||
pub(crate) mod metrics;
|
||||
mod script;
|
||||
mod server;
|
||||
pub mod server;
|
||||
pub mod service_config;
|
||||
|
||||
@@ -19,7 +19,9 @@ use auth::UserProviderRef;
|
||||
use common_base::Plugins;
|
||||
use common_runtime::Builder as RuntimeBuilder;
|
||||
use servers::error::InternalIoSnafu;
|
||||
use servers::grpc::{GrpcServer, GrpcServerConfig};
|
||||
use servers::grpc::builder::GrpcServerBuilder;
|
||||
use servers::grpc::greptime_handler::GreptimeRequestHandler;
|
||||
use servers::grpc::GrpcServerConfig;
|
||||
use servers::http::HttpServerBuilder;
|
||||
use servers::metrics_handler::MetricsHandler;
|
||||
use servers::mysql::server::{MysqlServer, MysqlSpawnConfig, MysqlSpawnRef};
|
||||
@@ -33,14 +35,49 @@ use snafu::ResultExt;
|
||||
use crate::error::{self, Result, StartServerSnafu};
|
||||
use crate::frontend::{FrontendOptions, TomlSerializable};
|
||||
use crate::instance::FrontendInstance;
|
||||
use crate::service_config::GrpcOptions;
|
||||
|
||||
pub(crate) struct Services;
|
||||
pub struct Services {
|
||||
plugins: Plugins,
|
||||
}
|
||||
|
||||
impl Services {
|
||||
pub(crate) async fn build<T, U>(
|
||||
pub fn new(plugins: Plugins) -> Self {
|
||||
Self { plugins }
|
||||
}
|
||||
|
||||
pub fn grpc_server_builder(opts: &GrpcOptions) -> Result<GrpcServerBuilder> {
|
||||
let grpc_runtime = Arc::new(
|
||||
RuntimeBuilder::default()
|
||||
.worker_threads(opts.runtime_size)
|
||||
.thread_name("grpc-handlers")
|
||||
.build()
|
||||
.context(error::RuntimeResourceSnafu)?,
|
||||
);
|
||||
|
||||
let grpc_config = GrpcServerConfig {
|
||||
max_recv_message_size: opts.max_recv_message_size.as_bytes() as usize,
|
||||
max_send_message_size: opts.max_send_message_size.as_bytes() as usize,
|
||||
};
|
||||
|
||||
Ok(GrpcServerBuilder::new(grpc_runtime).config(grpc_config))
|
||||
}
|
||||
|
||||
pub async fn build<T, U>(&self, opts: T, instance: Arc<U>) -> Result<ServerHandlers>
|
||||
where
|
||||
T: Into<FrontendOptions> + TomlSerializable + Clone,
|
||||
U: FrontendInstance,
|
||||
{
|
||||
let grpc_options = &opts.clone().into().grpc;
|
||||
let builder = Self::grpc_server_builder(grpc_options)?;
|
||||
self.build_with(opts, instance, builder).await
|
||||
}
|
||||
|
||||
pub async fn build_with<T, U>(
|
||||
&self,
|
||||
opts: T,
|
||||
instance: Arc<U>,
|
||||
plugins: Plugins,
|
||||
builder: GrpcServerBuilder,
|
||||
) -> Result<ServerHandlers>
|
||||
where
|
||||
T: Into<FrontendOptions> + TomlSerializable,
|
||||
@@ -48,35 +85,28 @@ impl Services {
|
||||
{
|
||||
let toml = opts.to_toml()?;
|
||||
let opts: FrontendOptions = opts.into();
|
||||
let mut result = Vec::<ServerHandler>::with_capacity(plugins.len());
|
||||
let user_provider = plugins.get::<UserProviderRef>();
|
||||
|
||||
let mut result = Vec::<ServerHandler>::new();
|
||||
|
||||
let user_provider = self.plugins.get::<UserProviderRef>();
|
||||
|
||||
{
|
||||
// Always init GRPC server
|
||||
let opts = &opts.grpc;
|
||||
let grpc_addr = parse_addr(&opts.addr)?;
|
||||
|
||||
let grpc_runtime = Arc::new(
|
||||
RuntimeBuilder::default()
|
||||
.worker_threads(opts.runtime_size)
|
||||
.thread_name("grpc-handlers")
|
||||
.build()
|
||||
.context(error::RuntimeResourceSnafu)?,
|
||||
);
|
||||
|
||||
let grpc_config = GrpcServerConfig {
|
||||
max_recv_message_size: opts.max_recv_message_size.as_bytes() as usize,
|
||||
max_send_message_size: opts.max_send_message_size.as_bytes() as usize,
|
||||
};
|
||||
let grpc_server = GrpcServer::new(
|
||||
Some(grpc_config),
|
||||
Some(ServerGrpcQueryHandlerAdapter::arc(instance.clone())),
|
||||
Some(instance.clone()),
|
||||
None,
|
||||
None,
|
||||
let greptime_request_handler = GreptimeRequestHandler::new(
|
||||
ServerGrpcQueryHandlerAdapter::arc(instance.clone()),
|
||||
user_provider.clone(),
|
||||
grpc_runtime,
|
||||
builder.runtime().clone(),
|
||||
);
|
||||
let grpc_server = builder
|
||||
.database_handler(greptime_request_handler.clone())
|
||||
.prometheus_handler(instance.clone())
|
||||
.otlp_handler(instance.clone())
|
||||
.user_provider(user_provider.clone())
|
||||
.flight_handler(Arc::new(greptime_request_handler))
|
||||
.build();
|
||||
|
||||
result.push((Box::new(grpc_server), grpc_addr));
|
||||
}
|
||||
@@ -116,7 +146,7 @@ impl Services {
|
||||
let http_server = http_server_builder
|
||||
.with_metrics_handler(MetricsHandler)
|
||||
.with_script_handler(instance.clone())
|
||||
.with_plugins(plugins)
|
||||
.with_plugins(self.plugins.clone())
|
||||
.with_greptime_config_options(toml)
|
||||
.build();
|
||||
result.push((Box::new(http_server), http_addr));
|
||||
|
||||
@@ -12,8 +12,8 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
mod sort;
|
||||
mod sort_create;
|
||||
pub mod sort;
|
||||
pub mod sort_create;
|
||||
|
||||
use async_trait::async_trait;
|
||||
|
||||
|
||||
@@ -12,8 +12,8 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
mod external_provider;
|
||||
mod external_sort;
|
||||
pub mod external_provider;
|
||||
pub mod external_sort;
|
||||
mod intermediate_rw;
|
||||
mod merge_stream;
|
||||
|
||||
|
||||
@@ -15,7 +15,7 @@
|
||||
use std::any::Any;
|
||||
use std::io::Error as IoError;
|
||||
|
||||
use common_error::ext::ErrorExt;
|
||||
use common_error::ext::{BoxedError, ErrorExt};
|
||||
use common_error::status_code::StatusCode;
|
||||
use common_macro::stack_trace_debug;
|
||||
use snafu::{Location, Snafu};
|
||||
@@ -167,6 +167,12 @@ pub enum Error {
|
||||
total_row_count: usize,
|
||||
expected_row_count: usize,
|
||||
},
|
||||
|
||||
#[snafu(display("External error"))]
|
||||
External {
|
||||
source: BoxedError,
|
||||
location: Location,
|
||||
},
|
||||
}
|
||||
|
||||
impl ErrorExt for Error {
|
||||
@@ -197,6 +203,8 @@ impl ErrorExt for Error {
|
||||
| FstInsert { .. }
|
||||
| InconsistentRowCount { .. }
|
||||
| IndexNotFound { .. } => StatusCode::InvalidArguments,
|
||||
|
||||
External { source, .. } => source.status_code(),
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -28,7 +28,7 @@ use crate::inverted_index::format::reader::InvertedIndexReader;
|
||||
/// avoiding repeated compilation of fixed predicates such as regex patterns.
|
||||
#[mockall::automock]
|
||||
#[async_trait]
|
||||
pub trait IndexApplier {
|
||||
pub trait IndexApplier: Send + Sync {
|
||||
/// Applies the predefined predicates to the data read by the given index reader, returning
|
||||
/// a list of relevant indices (e.g., post IDs, group IDs, row IDs).
|
||||
async fn apply<'a>(
|
||||
|
||||
@@ -22,6 +22,8 @@ common-macro.workspace = true
|
||||
common-meta.workspace = true
|
||||
common-runtime.workspace = true
|
||||
common-telemetry.workspace = true
|
||||
common-time.workspace = true
|
||||
dashmap.workspace = true
|
||||
futures-util.workspace = true
|
||||
futures.workspace = true
|
||||
protobuf = { version = "2", features = ["bytes"] }
|
||||
|
||||
@@ -14,12 +14,12 @@
|
||||
|
||||
use std::any::Any;
|
||||
|
||||
use common_config::wal::KafkaWalTopic;
|
||||
use common_error::ext::ErrorExt;
|
||||
use common_macro::stack_trace_debug;
|
||||
use common_runtime::error::Error as RuntimeError;
|
||||
use serde_json::error::Error as JsonError;
|
||||
use snafu::{Location, Snafu};
|
||||
use store_api::storage::RegionId;
|
||||
|
||||
use crate::kafka::NamespaceImpl as KafkaNamespace;
|
||||
|
||||
@@ -119,7 +119,7 @@ pub enum Error {
|
||||
error
|
||||
))]
|
||||
GetClient {
|
||||
topic: KafkaWalTopic,
|
||||
topic: String,
|
||||
location: Location,
|
||||
error: String,
|
||||
},
|
||||
@@ -140,7 +140,7 @@ pub enum Error {
|
||||
limit,
|
||||
))]
|
||||
ProduceRecord {
|
||||
topic: KafkaWalTopic,
|
||||
topic: String,
|
||||
size: usize,
|
||||
limit: usize,
|
||||
location: Location,
|
||||
@@ -183,6 +183,18 @@ pub enum Error {
|
||||
|
||||
#[snafu(display("The record sequence is not legal, error: {}", error))]
|
||||
IllegalSequence { location: Location, error: String },
|
||||
|
||||
#[snafu(display(
|
||||
"Attempt to append discontinuous log entry, region: {}, last index: {}, attempt index: {}",
|
||||
region_id,
|
||||
last_index,
|
||||
attempt_index
|
||||
))]
|
||||
DiscontinuousLogIndex {
|
||||
region_id: RegionId,
|
||||
last_index: u64,
|
||||
attempt_index: u64,
|
||||
},
|
||||
}
|
||||
|
||||
impl ErrorExt for Error {
|
||||
|
||||
@@ -18,7 +18,6 @@ pub(crate) mod util;
|
||||
|
||||
use std::fmt::Display;
|
||||
|
||||
use common_meta::wal::KafkaWalTopic as Topic;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use store_api::logstore::entry::{Entry, Id as EntryId};
|
||||
use store_api::logstore::namespace::Namespace;
|
||||
@@ -29,7 +28,7 @@ use crate::error::Error;
|
||||
#[derive(Debug, PartialEq, Eq, Hash, Clone, Serialize, Deserialize)]
|
||||
pub struct NamespaceImpl {
|
||||
pub region_id: u64,
|
||||
pub topic: Topic,
|
||||
pub topic: String,
|
||||
}
|
||||
|
||||
impl Namespace for NamespaceImpl {
|
||||
|
||||
@@ -15,7 +15,7 @@
|
||||
use std::collections::HashMap;
|
||||
use std::sync::Arc;
|
||||
|
||||
use common_config::wal::{KafkaConfig, KafkaWalTopic as Topic};
|
||||
use common_config::wal::KafkaConfig;
|
||||
use rskafka::client::partition::{PartitionClient, UnknownTopicHandling};
|
||||
use rskafka::client::producer::aggregator::RecordAggregator;
|
||||
use rskafka::client::producer::{BatchProducer, BatchProducerBuilder};
|
||||
@@ -67,7 +67,7 @@ pub(crate) struct ClientManager {
|
||||
client_factory: RsKafkaClient,
|
||||
/// A pool maintaining a collection of clients.
|
||||
/// Key: a topic. Value: the associated client of the topic.
|
||||
client_pool: RwLock<HashMap<Topic, Client>>,
|
||||
client_pool: RwLock<HashMap<String, Client>>,
|
||||
}
|
||||
|
||||
impl ClientManager {
|
||||
@@ -97,7 +97,7 @@ impl ClientManager {
|
||||
|
||||
/// Gets the client associated with the topic. If the client does not exist, a new one will
|
||||
/// be created and returned.
|
||||
pub(crate) async fn get_or_insert(&self, topic: &Topic) -> Result<Client> {
|
||||
pub(crate) async fn get_or_insert(&self, topic: &String) -> Result<Client> {
|
||||
{
|
||||
let client_pool = self.client_pool.read().await;
|
||||
if let Some(client) = client_pool.get(topic) {
|
||||
@@ -116,7 +116,7 @@ impl ClientManager {
|
||||
}
|
||||
}
|
||||
|
||||
async fn try_create_client(&self, topic: &Topic) -> Result<Client> {
|
||||
async fn try_create_client(&self, topic: &String) -> Result<Client> {
|
||||
// Sets to Retry to retry connecting if the kafka cluter replies with an UnknownTopic error.
|
||||
// That's because the topic is believed to exist as the metasrv is expected to create required topics upon start.
|
||||
// The reconnecting won't stop until succeed or a different error returns.
|
||||
@@ -147,7 +147,7 @@ mod tests {
|
||||
test_name: &str,
|
||||
num_topics: usize,
|
||||
broker_endpoints: Vec<String>,
|
||||
) -> (ClientManager, Vec<Topic>) {
|
||||
) -> (ClientManager, Vec<String>) {
|
||||
let topics = create_topics(
|
||||
num_topics,
|
||||
|i| format!("{test_name}_{}_{}", i, uuid::Uuid::new_v4()),
|
||||
|
||||
@@ -205,7 +205,11 @@ impl LogStore for KafkaLogStore {
|
||||
}
|
||||
|
||||
// Tries to construct an entry from records consumed so far.
|
||||
if let Some(entry) = maybe_emit_entry(record, &mut entry_records)? {
|
||||
if let Some(mut entry) = maybe_emit_entry(record, &mut entry_records)? {
|
||||
// We don't rely on the EntryId generated by mito2.
|
||||
// Instead, we use the offset return from Kafka as EntryId.
|
||||
// Therefore, we MUST overwrite the EntryId with RecordOffset.
|
||||
entry.id = offset as u64;
|
||||
yield Ok(vec![entry]);
|
||||
}
|
||||
|
||||
@@ -283,7 +287,6 @@ fn check_termination(
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use common_base::readable_size::ReadableSize;
|
||||
use common_config::wal::KafkaWalTopic as Topic;
|
||||
use rand::seq::IteratorRandom;
|
||||
|
||||
use super::*;
|
||||
@@ -304,7 +307,7 @@ mod tests {
|
||||
test_name: &str,
|
||||
num_topics: usize,
|
||||
broker_endpoints: Vec<String>,
|
||||
) -> (KafkaLogStore, Vec<Topic>) {
|
||||
) -> (KafkaLogStore, Vec<String>) {
|
||||
let topics = create_topics(
|
||||
num_topics,
|
||||
|i| format!("{test_name}_{}_{}", i, uuid::Uuid::new_v4()),
|
||||
@@ -424,17 +427,20 @@ mod tests {
|
||||
|
||||
// Reads entries for regions and checks for each region that the gotten entries are identical with the expected ones.
|
||||
for region_id in which {
|
||||
let ctx = ®ion_contexts[®ion_id];
|
||||
let ctx = region_contexts.get_mut(®ion_id).unwrap();
|
||||
let stream = logstore
|
||||
.read(&ctx.ns, ctx.flushed_entry_id + 1)
|
||||
.await
|
||||
.unwrap();
|
||||
let got = stream
|
||||
let mut got = stream
|
||||
.collect::<Vec<_>>()
|
||||
.await
|
||||
.into_iter()
|
||||
.flat_map(|x| x.unwrap())
|
||||
.collect::<Vec<_>>();
|
||||
//FIXME(weny): https://github.com/GreptimeTeam/greptimedb/issues/3152
|
||||
ctx.expected.iter_mut().for_each(|entry| entry.id = 0);
|
||||
got.iter_mut().for_each(|entry| entry.id = 0);
|
||||
assert_eq!(ctx.expected, got);
|
||||
}
|
||||
|
||||
|
||||
@@ -12,8 +12,10 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::collections::hash_map::Entry;
|
||||
use std::collections::HashMap;
|
||||
use std::fmt::{Debug, Formatter};
|
||||
use std::sync::atomic::{AtomicI64, Ordering};
|
||||
use std::sync::Arc;
|
||||
|
||||
use async_stream::stream;
|
||||
@@ -22,15 +24,15 @@ use common_runtime::{RepeatedTask, TaskFunction};
|
||||
use common_telemetry::{error, info};
|
||||
use raft_engine::{Config, Engine, LogBatch, MessageExt, ReadableSize, RecoveryMode};
|
||||
use snafu::{ensure, ResultExt};
|
||||
use store_api::logstore::entry::{Entry, Id as EntryId};
|
||||
use store_api::logstore::entry::Id as EntryId;
|
||||
use store_api::logstore::entry_stream::SendableEntryStream;
|
||||
use store_api::logstore::namespace::{Id as NamespaceId, Namespace as NamespaceTrait};
|
||||
use store_api::logstore::{AppendBatchResponse, AppendResponse, LogStore};
|
||||
|
||||
use crate::error;
|
||||
use crate::error::{
|
||||
AddEntryLogBatchSnafu, Error, FetchEntrySnafu, IllegalNamespaceSnafu, IllegalStateSnafu,
|
||||
OverrideCompactedEntrySnafu, RaftEngineSnafu, Result, StartGcTaskSnafu, StopGcTaskSnafu,
|
||||
AddEntryLogBatchSnafu, DiscontinuousLogIndexSnafu, Error, FetchEntrySnafu,
|
||||
IllegalNamespaceSnafu, IllegalStateSnafu, OverrideCompactedEntrySnafu, RaftEngineSnafu, Result,
|
||||
StartGcTaskSnafu, StopGcTaskSnafu,
|
||||
};
|
||||
use crate::raft_engine::backend::SYSTEM_NAMESPACE;
|
||||
use crate::raft_engine::protos::logstore::{EntryImpl, NamespaceImpl as Namespace};
|
||||
@@ -41,6 +43,7 @@ pub struct RaftEngineLogStore {
|
||||
config: RaftEngineConfig,
|
||||
engine: Arc<Engine>,
|
||||
gc_task: RepeatedTask<Error>,
|
||||
last_sync_time: AtomicI64,
|
||||
}
|
||||
|
||||
pub struct PurgeExpiredFilesFunction {
|
||||
@@ -80,6 +83,8 @@ impl RaftEngineLogStore {
|
||||
recovery_mode: RecoveryMode::TolerateTailCorruption,
|
||||
batch_compression_threshold: ReadableSize::kb(8),
|
||||
target_file_size: ReadableSize(config.file_size.0),
|
||||
enable_log_recycle: config.enable_log_recycle,
|
||||
prefill_for_recycle: config.prefill_log_files,
|
||||
..Default::default()
|
||||
};
|
||||
let engine = Arc::new(Engine::open(raft_engine_config).context(RaftEngineSnafu)?);
|
||||
@@ -94,6 +99,7 @@ impl RaftEngineLogStore {
|
||||
config,
|
||||
engine,
|
||||
gc_task,
|
||||
last_sync_time: AtomicI64::new(0),
|
||||
};
|
||||
log_store.start()?;
|
||||
Ok(log_store)
|
||||
@@ -116,22 +122,65 @@ impl RaftEngineLogStore {
|
||||
)
|
||||
}
|
||||
|
||||
/// Checks if entry does not override the min index of namespace.
|
||||
fn check_entry(&self, e: &EntryImpl) -> Result<()> {
|
||||
if cfg!(debug_assertions) {
|
||||
/// Converts entries to `LogBatch` and checks if entry ids are valid.
|
||||
/// Returns the `LogBatch` converted along with the last entry id
|
||||
/// to append in each namespace(region).
|
||||
fn entries_to_batch(
|
||||
&self,
|
||||
entries: Vec<EntryImpl>,
|
||||
) -> Result<(LogBatch, HashMap<NamespaceId, EntryId>)> {
|
||||
// Records the last entry id for each region's entries.
|
||||
let mut entry_ids: HashMap<NamespaceId, EntryId> = HashMap::with_capacity(entries.len());
|
||||
let mut batch = LogBatch::with_capacity(entries.len());
|
||||
|
||||
for e in entries {
|
||||
let ns_id = e.namespace_id;
|
||||
if let Some(first_index) = self.engine.first_index(ns_id) {
|
||||
ensure!(
|
||||
e.id() >= first_index,
|
||||
OverrideCompactedEntrySnafu {
|
||||
namespace: ns_id,
|
||||
first_index,
|
||||
attempt_index: e.id(),
|
||||
match entry_ids.entry(ns_id) {
|
||||
Entry::Occupied(mut o) => {
|
||||
let prev = *o.get();
|
||||
ensure!(
|
||||
e.id == prev + 1,
|
||||
DiscontinuousLogIndexSnafu {
|
||||
region_id: ns_id,
|
||||
last_index: prev,
|
||||
attempt_index: e.id
|
||||
}
|
||||
);
|
||||
o.insert(e.id);
|
||||
}
|
||||
Entry::Vacant(v) => {
|
||||
// this entry is the first in batch of given region.
|
||||
if let Some(first_index) = self.engine.first_index(ns_id) {
|
||||
// ensure the first in batch does not override compacted entry.
|
||||
ensure!(
|
||||
e.id > first_index,
|
||||
OverrideCompactedEntrySnafu {
|
||||
namespace: ns_id,
|
||||
first_index,
|
||||
attempt_index: e.id,
|
||||
}
|
||||
);
|
||||
}
|
||||
);
|
||||
// ensure the first in batch does not form a hole in raft-engine.
|
||||
if let Some(last_index) = self.engine.last_index(ns_id) {
|
||||
ensure!(
|
||||
e.id == last_index + 1,
|
||||
DiscontinuousLogIndexSnafu {
|
||||
region_id: ns_id,
|
||||
last_index,
|
||||
attempt_index: e.id
|
||||
}
|
||||
);
|
||||
}
|
||||
v.insert(e.id);
|
||||
}
|
||||
}
|
||||
batch
|
||||
.add_entries::<MessageType>(ns_id, &[e])
|
||||
.context(AddEntryLogBatchSnafu)?;
|
||||
}
|
||||
Ok(())
|
||||
|
||||
Ok((batch, entry_ids))
|
||||
}
|
||||
}
|
||||
|
||||
@@ -166,8 +215,8 @@ impl LogStore for RaftEngineLogStore {
|
||||
|
||||
if let Some(first_index) = self.engine.first_index(namespace_id) {
|
||||
ensure!(
|
||||
entry_id >= first_index,
|
||||
error::OverrideCompactedEntrySnafu {
|
||||
entry_id > first_index,
|
||||
OverrideCompactedEntrySnafu {
|
||||
namespace: namespace_id,
|
||||
first_index,
|
||||
attempt_index: entry_id,
|
||||
@@ -175,6 +224,17 @@ impl LogStore for RaftEngineLogStore {
|
||||
);
|
||||
}
|
||||
|
||||
if let Some(last_index) = self.engine.last_index(namespace_id) {
|
||||
ensure!(
|
||||
entry_id == last_index + 1,
|
||||
DiscontinuousLogIndexSnafu {
|
||||
region_id: namespace_id,
|
||||
last_index,
|
||||
attempt_index: entry_id
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
let _ = self
|
||||
.engine
|
||||
.write(&mut batch, self.config.sync_write)
|
||||
@@ -192,27 +252,21 @@ impl LogStore for RaftEngineLogStore {
|
||||
return Ok(AppendBatchResponse::default());
|
||||
}
|
||||
|
||||
// Records the last entry id for each region's entries.
|
||||
let mut last_entry_ids: HashMap<NamespaceId, EntryId> =
|
||||
HashMap::with_capacity(entries.len());
|
||||
let mut batch = LogBatch::with_capacity(entries.len());
|
||||
let (mut batch, last_entry_ids) = self.entries_to_batch(entries)?;
|
||||
|
||||
for e in entries {
|
||||
self.check_entry(&e)?;
|
||||
// For raft-engine log store, the namespace id is the region id.
|
||||
let ns_id = e.namespace_id;
|
||||
last_entry_ids
|
||||
.entry(ns_id)
|
||||
.and_modify(|x| *x = (*x).max(e.id))
|
||||
.or_insert(e.id);
|
||||
batch
|
||||
.add_entries::<MessageType>(ns_id, &[e])
|
||||
.context(AddEntryLogBatchSnafu)?;
|
||||
let mut sync = self.config.sync_write;
|
||||
|
||||
if let Some(sync_period) = &self.config.sync_period {
|
||||
let now = common_time::util::current_time_millis();
|
||||
if now - self.last_sync_time.load(Ordering::Relaxed) >= sync_period.as_millis() as i64 {
|
||||
self.last_sync_time.store(now, Ordering::Relaxed);
|
||||
sync = true;
|
||||
}
|
||||
}
|
||||
|
||||
let _ = self
|
||||
.engine
|
||||
.write(&mut batch, self.config.sync_write)
|
||||
.write(&mut batch, sync)
|
||||
.context(RaftEngineSnafu)?;
|
||||
|
||||
Ok(AppendBatchResponse { last_entry_ids })
|
||||
|
||||
@@ -15,7 +15,6 @@
|
||||
use std::sync::atomic::{AtomicU64 as AtomicEntryId, Ordering};
|
||||
use std::sync::Mutex;
|
||||
|
||||
use common_meta::wal::KafkaWalTopic as Topic;
|
||||
use rand::distributions::Alphanumeric;
|
||||
use rand::rngs::ThreadRng;
|
||||
use rand::{thread_rng, Rng};
|
||||
@@ -29,7 +28,7 @@ pub async fn create_topics<F>(
|
||||
num_topics: usize,
|
||||
decorator: F,
|
||||
broker_endpoints: &[String],
|
||||
) -> Vec<Topic>
|
||||
) -> Vec<String>
|
||||
where
|
||||
F: Fn(usize) -> String,
|
||||
{
|
||||
|
||||
@@ -33,6 +33,7 @@ etcd-client.workspace = true
|
||||
futures.workspace = true
|
||||
h2 = "0.3"
|
||||
http-body = "0.4"
|
||||
humantime = "2.1"
|
||||
humantime-serde.workspace = true
|
||||
itertools.workspace = true
|
||||
lazy_static.workspace = true
|
||||
|
||||
@@ -12,6 +12,7 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::any::Any;
|
||||
use std::collections::{HashMap, HashSet};
|
||||
use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
@@ -22,8 +23,12 @@ use api::v1::meta::{
|
||||
RangeRequest as PbRangeRequest, RangeResponse as PbRangeResponse, ResponseHeader,
|
||||
};
|
||||
use common_grpc::channel_manager::ChannelManager;
|
||||
use common_meta::kv_backend::ResettableKvBackendRef;
|
||||
use common_meta::rpc::store::{BatchGetRequest, RangeRequest};
|
||||
use common_meta::kv_backend::{KvBackend, ResettableKvBackendRef, TxnService};
|
||||
use common_meta::rpc::store::{
|
||||
BatchDeleteRequest, BatchDeleteResponse, BatchGetRequest, BatchGetResponse, BatchPutRequest,
|
||||
BatchPutResponse, CompareAndPutRequest, CompareAndPutResponse, DeleteRangeRequest,
|
||||
DeleteRangeResponse, PutRequest, PutResponse, RangeRequest, RangeResponse,
|
||||
};
|
||||
use common_meta::rpc::KeyValue;
|
||||
use common_meta::util;
|
||||
use common_telemetry::warn;
|
||||
@@ -49,11 +54,158 @@ pub struct MetaPeerClient {
|
||||
retry_interval_ms: u64,
|
||||
}
|
||||
|
||||
#[async_trait::async_trait]
|
||||
impl TxnService for MetaPeerClient {
|
||||
type Error = error::Error;
|
||||
}
|
||||
|
||||
#[async_trait::async_trait]
|
||||
impl KvBackend for MetaPeerClient {
|
||||
fn name(&self) -> &str {
|
||||
"MetaPeerClient"
|
||||
}
|
||||
|
||||
fn as_any(&self) -> &dyn Any {
|
||||
self
|
||||
}
|
||||
|
||||
async fn range(&self, req: RangeRequest) -> Result<RangeResponse> {
|
||||
if self.is_leader() {
|
||||
return self
|
||||
.in_memory
|
||||
.range(req)
|
||||
.await
|
||||
.context(error::KvBackendSnafu);
|
||||
}
|
||||
|
||||
let max_retry_count = self.max_retry_count;
|
||||
let retry_interval_ms = self.retry_interval_ms;
|
||||
|
||||
for _ in 0..max_retry_count {
|
||||
match self
|
||||
.remote_range(req.key.clone(), req.range_end.clone(), req.keys_only)
|
||||
.await
|
||||
{
|
||||
Ok(res) => return Ok(res),
|
||||
Err(e) => {
|
||||
if need_retry(&e) {
|
||||
warn!("Encountered an error that need to retry, err: {:?}", e);
|
||||
tokio::time::sleep(Duration::from_millis(retry_interval_ms)).await;
|
||||
} else {
|
||||
return Err(e);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
error::ExceededRetryLimitSnafu {
|
||||
func_name: "range",
|
||||
retry_num: max_retry_count,
|
||||
}
|
||||
.fail()
|
||||
}
|
||||
|
||||
// Get kv information from the leader's in_mem kv store
|
||||
async fn batch_get(&self, req: BatchGetRequest) -> Result<BatchGetResponse> {
|
||||
if self.is_leader() {
|
||||
return self
|
||||
.in_memory
|
||||
.batch_get(req)
|
||||
.await
|
||||
.context(error::KvBackendSnafu);
|
||||
}
|
||||
|
||||
let max_retry_count = self.max_retry_count;
|
||||
let retry_interval_ms = self.retry_interval_ms;
|
||||
|
||||
for _ in 0..max_retry_count {
|
||||
match self.remote_batch_get(req.keys.clone()).await {
|
||||
Ok(res) => return Ok(res),
|
||||
Err(e) => {
|
||||
if need_retry(&e) {
|
||||
warn!("Encountered an error that need to retry, err: {:?}", e);
|
||||
tokio::time::sleep(Duration::from_millis(retry_interval_ms)).await;
|
||||
} else {
|
||||
return Err(e);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
error::ExceededRetryLimitSnafu {
|
||||
func_name: "batch_get",
|
||||
retry_num: max_retry_count,
|
||||
}
|
||||
.fail()
|
||||
}
|
||||
|
||||
// MetaPeerClient does not support mutable methods listed below.
|
||||
async fn put(&self, _req: PutRequest) -> Result<PutResponse> {
|
||||
error::UnsupportedSnafu {
|
||||
operation: "put".to_string(),
|
||||
}
|
||||
.fail()
|
||||
}
|
||||
|
||||
async fn batch_put(&self, _req: BatchPutRequest) -> Result<BatchPutResponse> {
|
||||
error::UnsupportedSnafu {
|
||||
operation: "batch put".to_string(),
|
||||
}
|
||||
.fail()
|
||||
}
|
||||
|
||||
async fn compare_and_put(&self, _req: CompareAndPutRequest) -> Result<CompareAndPutResponse> {
|
||||
error::UnsupportedSnafu {
|
||||
operation: "compare and put".to_string(),
|
||||
}
|
||||
.fail()
|
||||
}
|
||||
|
||||
async fn delete_range(&self, _req: DeleteRangeRequest) -> Result<DeleteRangeResponse> {
|
||||
error::UnsupportedSnafu {
|
||||
operation: "delete range".to_string(),
|
||||
}
|
||||
.fail()
|
||||
}
|
||||
|
||||
async fn batch_delete(&self, _req: BatchDeleteRequest) -> Result<BatchDeleteResponse> {
|
||||
error::UnsupportedSnafu {
|
||||
operation: "batch delete".to_string(),
|
||||
}
|
||||
.fail()
|
||||
}
|
||||
|
||||
async fn delete(&self, _key: &[u8], _prev_kv: bool) -> Result<Option<KeyValue>> {
|
||||
error::UnsupportedSnafu {
|
||||
operation: "delete".to_string(),
|
||||
}
|
||||
.fail()
|
||||
}
|
||||
|
||||
async fn put_conditionally(
|
||||
&self,
|
||||
_key: Vec<u8>,
|
||||
_value: Vec<u8>,
|
||||
_if_not_exists: bool,
|
||||
) -> Result<bool> {
|
||||
error::UnsupportedSnafu {
|
||||
operation: "put conditionally".to_string(),
|
||||
}
|
||||
.fail()
|
||||
}
|
||||
}
|
||||
|
||||
impl MetaPeerClient {
|
||||
async fn get_dn_key_value(&self, keys_only: bool) -> Result<Vec<KeyValue>> {
|
||||
let key = format!("{DN_STAT_PREFIX}-").into_bytes();
|
||||
let range_end = util::get_prefix_end_key(&key);
|
||||
self.range(key, range_end, keys_only).await
|
||||
let range_request = RangeRequest {
|
||||
key,
|
||||
range_end,
|
||||
keys_only,
|
||||
..Default::default()
|
||||
};
|
||||
self.range(range_request).await.map(|res| res.kvs)
|
||||
}
|
||||
|
||||
// Get all datanode stat kvs from leader meta.
|
||||
@@ -73,70 +225,11 @@ impl MetaPeerClient {
|
||||
// Get datanode stat kvs from leader meta by input keys.
|
||||
pub async fn get_dn_stat_kvs(&self, keys: Vec<StatKey>) -> Result<HashMap<StatKey, StatValue>> {
|
||||
let stat_keys = keys.into_iter().map(|key| key.into()).collect();
|
||||
let batch_get_req = BatchGetRequest { keys: stat_keys };
|
||||
|
||||
let kvs = self.batch_get(stat_keys).await?;
|
||||
let res = self.batch_get(batch_get_req).await?;
|
||||
|
||||
to_stat_kv_map(kvs)
|
||||
}
|
||||
|
||||
// Get kv information from the leader's in_mem kv store.
|
||||
pub async fn get(&self, key: Vec<u8>) -> Result<Option<KeyValue>> {
|
||||
let mut kvs = self.range(key, vec![], false).await?;
|
||||
Ok(if kvs.is_empty() {
|
||||
None
|
||||
} else {
|
||||
debug_assert_eq!(kvs.len(), 1);
|
||||
Some(kvs.remove(0))
|
||||
})
|
||||
}
|
||||
|
||||
// Range kv information from the leader's in_mem kv store
|
||||
pub async fn range(
|
||||
&self,
|
||||
key: Vec<u8>,
|
||||
range_end: Vec<u8>,
|
||||
keys_only: bool,
|
||||
) -> Result<Vec<KeyValue>> {
|
||||
if self.is_leader() {
|
||||
let request = RangeRequest {
|
||||
key,
|
||||
range_end,
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
return self
|
||||
.in_memory
|
||||
.range(request)
|
||||
.await
|
||||
.map(|resp| resp.kvs)
|
||||
.context(error::KvBackendSnafu);
|
||||
}
|
||||
|
||||
let max_retry_count = self.max_retry_count;
|
||||
let retry_interval_ms = self.retry_interval_ms;
|
||||
|
||||
for _ in 0..max_retry_count {
|
||||
match self
|
||||
.remote_range(key.clone(), range_end.clone(), keys_only)
|
||||
.await
|
||||
{
|
||||
Ok(kvs) => return Ok(kvs),
|
||||
Err(e) => {
|
||||
if need_retry(&e) {
|
||||
warn!("Encountered an error that need to retry, err: {:?}", e);
|
||||
tokio::time::sleep(Duration::from_millis(retry_interval_ms)).await;
|
||||
} else {
|
||||
return Err(e);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
error::ExceededRetryLimitSnafu {
|
||||
func_name: "range",
|
||||
retry_num: max_retry_count,
|
||||
}
|
||||
.fail()
|
||||
to_stat_kv_map(res.kvs)
|
||||
}
|
||||
|
||||
async fn remote_range(
|
||||
@@ -144,7 +237,7 @@ impl MetaPeerClient {
|
||||
key: Vec<u8>,
|
||||
range_end: Vec<u8>,
|
||||
keys_only: bool,
|
||||
) -> Result<Vec<KeyValue>> {
|
||||
) -> Result<RangeResponse> {
|
||||
// Safety: when self.is_leader() == false, election must not empty.
|
||||
let election = self.election.as_ref().unwrap();
|
||||
|
||||
@@ -170,47 +263,13 @@ impl MetaPeerClient {
|
||||
|
||||
check_resp_header(&response.header, Context { addr: &leader_addr })?;
|
||||
|
||||
Ok(response.kvs.into_iter().map(KeyValue::new).collect())
|
||||
Ok(RangeResponse {
|
||||
kvs: response.kvs.into_iter().map(KeyValue::new).collect(),
|
||||
more: response.more,
|
||||
})
|
||||
}
|
||||
|
||||
// Get kv information from the leader's in_mem kv store
|
||||
pub async fn batch_get(&self, keys: Vec<Vec<u8>>) -> Result<Vec<KeyValue>> {
|
||||
if self.is_leader() {
|
||||
let request = BatchGetRequest { keys };
|
||||
|
||||
return self
|
||||
.in_memory
|
||||
.batch_get(request)
|
||||
.await
|
||||
.map(|resp| resp.kvs)
|
||||
.context(error::KvBackendSnafu);
|
||||
}
|
||||
|
||||
let max_retry_count = self.max_retry_count;
|
||||
let retry_interval_ms = self.retry_interval_ms;
|
||||
|
||||
for _ in 0..max_retry_count {
|
||||
match self.remote_batch_get(keys.clone()).await {
|
||||
Ok(kvs) => return Ok(kvs),
|
||||
Err(e) => {
|
||||
if need_retry(&e) {
|
||||
warn!("Encountered an error that need to retry, err: {:?}", e);
|
||||
tokio::time::sleep(Duration::from_millis(retry_interval_ms)).await;
|
||||
} else {
|
||||
return Err(e);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
error::ExceededRetryLimitSnafu {
|
||||
func_name: "batch_get",
|
||||
retry_num: max_retry_count,
|
||||
}
|
||||
.fail()
|
||||
}
|
||||
|
||||
async fn remote_batch_get(&self, keys: Vec<Vec<u8>>) -> Result<Vec<KeyValue>> {
|
||||
async fn remote_batch_get(&self, keys: Vec<Vec<u8>>) -> Result<BatchGetResponse> {
|
||||
// Safety: when self.is_leader() == false, election must not empty.
|
||||
let election = self.election.as_ref().unwrap();
|
||||
|
||||
@@ -234,7 +293,9 @@ impl MetaPeerClient {
|
||||
|
||||
check_resp_header(&response.header, Context { addr: &leader_addr })?;
|
||||
|
||||
Ok(response.kvs.into_iter().map(KeyValue::new).collect())
|
||||
Ok(BatchGetResponse {
|
||||
kvs: response.kvs.into_iter().map(KeyValue::new).collect(),
|
||||
})
|
||||
}
|
||||
|
||||
// Check if the meta node is a leader node.
|
||||
|
||||
@@ -210,6 +210,12 @@ pub enum Error {
|
||||
location: Location,
|
||||
source: servers::error::Error,
|
||||
},
|
||||
#[snafu(display("Failed to parse duration {}", duration))]
|
||||
ParseDuration {
|
||||
duration: String,
|
||||
#[snafu(source)]
|
||||
error: humantime::DurationError,
|
||||
},
|
||||
#[snafu(display("Failed to parse address {}", addr))]
|
||||
ParseAddr {
|
||||
addr: String,
|
||||
@@ -534,6 +540,13 @@ pub enum Error {
|
||||
#[snafu(display("Expected to retry later, reason: {}", reason))]
|
||||
RetryLater { reason: String, location: Location },
|
||||
|
||||
#[snafu(display("Expected to retry later, reason: {}", reason))]
|
||||
RetryLaterWithSource {
|
||||
reason: String,
|
||||
location: Location,
|
||||
source: BoxedError,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to update table metadata, err_msg: {}", err_msg))]
|
||||
UpdateTableMetadata { err_msg: String, location: Location },
|
||||
|
||||
@@ -622,6 +635,7 @@ impl Error {
|
||||
/// Returns `true` if the error is retryable.
|
||||
pub fn is_retryable(&self) -> bool {
|
||||
matches!(self, Error::RetryLater { .. })
|
||||
|| matches!(self, Error::RetryLaterWithSource { .. })
|
||||
}
|
||||
}
|
||||
|
||||
@@ -652,7 +666,6 @@ impl ErrorExt for Error {
|
||||
| Error::LockNotConfig { .. }
|
||||
| Error::ExceededRetryLimit { .. }
|
||||
| Error::SendShutdownSignal { .. }
|
||||
| Error::ParseAddr { .. }
|
||||
| Error::SchemaAlreadyExists { .. }
|
||||
| Error::PusherNotFound { .. }
|
||||
| Error::PushMessage { .. }
|
||||
@@ -660,6 +673,7 @@ impl ErrorExt for Error {
|
||||
| Error::MailboxTimeout { .. }
|
||||
| Error::MailboxReceiver { .. }
|
||||
| Error::RetryLater { .. }
|
||||
| Error::RetryLaterWithSource { .. }
|
||||
| Error::StartGrpc { .. }
|
||||
| Error::UpdateTableMetadata { .. }
|
||||
| Error::NoEnoughAvailableDatanode { .. }
|
||||
@@ -678,6 +692,8 @@ impl ErrorExt for Error {
|
||||
| Error::InvalidStatKey { .. }
|
||||
| Error::InvalidInactiveRegionKey { .. }
|
||||
| Error::ParseNum { .. }
|
||||
| Error::ParseAddr { .. }
|
||||
| Error::ParseDuration { .. }
|
||||
| Error::UnsupportedSelectorType { .. }
|
||||
| Error::InvalidArguments { .. }
|
||||
| Error::InitExportMetricsTask { .. }
|
||||
|
||||
@@ -14,6 +14,7 @@
|
||||
|
||||
use std::collections::HashMap;
|
||||
|
||||
use common_meta::kv_backend::KvBackend;
|
||||
use common_meta::peer::Peer;
|
||||
use common_meta::{util, ClusterId};
|
||||
use common_time::util as time_util;
|
||||
@@ -39,7 +40,8 @@ pub async fn lookup_alive_datanode_peer(
|
||||
cluster_id,
|
||||
node_id: datanode_id,
|
||||
};
|
||||
let Some(kv) = meta_peer_client.get(lease_key.clone().try_into()?).await? else {
|
||||
let lease_key_bytes: Vec<u8> = lease_key.clone().try_into()?;
|
||||
let Some(kv) = meta_peer_client.get(&lease_key_bytes).await? else {
|
||||
return Ok(None);
|
||||
};
|
||||
let lease_value: LeaseValue = kv.value.try_into()?;
|
||||
@@ -74,7 +76,13 @@ where
|
||||
let key = get_lease_prefix(cluster_id);
|
||||
let range_end = util::get_prefix_end_key(&key);
|
||||
|
||||
let kvs = meta_peer_client.range(key, range_end, false).await?;
|
||||
let range_req = common_meta::rpc::store::RangeRequest {
|
||||
key,
|
||||
range_end,
|
||||
keys_only: false,
|
||||
..Default::default()
|
||||
};
|
||||
let kvs = meta_peer_client.range(range_req).await?.kvs;
|
||||
let mut lease_kvs = HashMap::new();
|
||||
for kv in kvs {
|
||||
let lease_key: LeaseKey = kv.key.try_into()?;
|
||||
|
||||
@@ -79,6 +79,12 @@ pub struct MetaSrvOptions {
|
||||
pub store_key_prefix: String,
|
||||
}
|
||||
|
||||
impl MetaSrvOptions {
|
||||
pub fn env_list_keys() -> Option<&'static [&'static str]> {
|
||||
Some(&["wal.broker_endpoints"])
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for MetaSrvOptions {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
|
||||
@@ -28,6 +28,7 @@ use async_trait::async_trait;
|
||||
use common_meta::key::datanode_table::DatanodeTableKey;
|
||||
use common_meta::key::TableMetadataManagerRef;
|
||||
use common_meta::kv_backend::ResettableKvBackendRef;
|
||||
use common_meta::lock_key::{RegionLock, TableLock};
|
||||
use common_meta::{ClusterId, RegionIdent};
|
||||
use common_procedure::error::{
|
||||
Error as ProcedureError, FromJsonSnafu, Result as ProcedureResult, ToJsonSnafu,
|
||||
@@ -40,13 +41,12 @@ use common_telemetry::{error, info, warn};
|
||||
use failover_start::RegionFailoverStart;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use snafu::ResultExt;
|
||||
use store_api::storage::RegionNumber;
|
||||
use store_api::storage::{RegionId, RegionNumber};
|
||||
use table::metadata::TableId;
|
||||
|
||||
use crate::error::{Error, RegisterProcedureLoaderSnafu, Result, TableMetadataManagerSnafu};
|
||||
use crate::error::{RegisterProcedureLoaderSnafu, Result, TableMetadataManagerSnafu};
|
||||
use crate::lock::DistLockRef;
|
||||
use crate::metasrv::{SelectorContext, SelectorRef};
|
||||
use crate::procedure::utils::region_lock_key;
|
||||
use crate::service::mailbox::MailboxRef;
|
||||
|
||||
const OPEN_REGION_MESSAGE_TIMEOUT: Duration = Duration::from_secs(30);
|
||||
@@ -357,7 +357,7 @@ impl Procedure for RegionFailoverProcedure {
|
||||
.next(&self.context, &self.node.failed_region)
|
||||
.await
|
||||
.map_err(|e| {
|
||||
if matches!(e, Error::RetryLater { .. }) {
|
||||
if e.is_retryable() {
|
||||
ProcedureError::retry_later(e)
|
||||
} else {
|
||||
ProcedureError::external(e)
|
||||
@@ -372,8 +372,17 @@ impl Procedure for RegionFailoverProcedure {
|
||||
|
||||
fn lock_key(&self) -> LockKey {
|
||||
let region_ident = &self.node.failed_region;
|
||||
let region_key = region_lock_key(region_ident.table_id, region_ident.region_number);
|
||||
LockKey::single_exclusive(region_key)
|
||||
// TODO(weny): acquires the catalog, schema read locks.
|
||||
let lock_key = vec![
|
||||
TableLock::Read(region_ident.table_id).into(),
|
||||
RegionLock::Write(RegionId::new(
|
||||
region_ident.table_id,
|
||||
region_ident.region_number,
|
||||
))
|
||||
.into(),
|
||||
];
|
||||
|
||||
LockKey::new(lock_key)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -771,7 +780,8 @@ mod tests {
|
||||
|
||||
let result = procedure.execute(&ctx).await;
|
||||
assert!(result.is_err());
|
||||
assert!(result.unwrap_err().is_retry_later());
|
||||
let err = result.unwrap_err();
|
||||
assert!(err.is_retry_later(), "err: {:?}", err);
|
||||
assert_eq!(
|
||||
r#"{"region_failover_state":"RegionFailoverStart","failover_candidate":null}"#,
|
||||
serde_json::to_string(&procedure.node.state).unwrap()
|
||||
|
||||
@@ -13,17 +13,17 @@
|
||||
// limitations under the License.
|
||||
|
||||
use async_trait::async_trait;
|
||||
use common_error::ext::ErrorExt;
|
||||
use common_error::ext::{BoxedError, ErrorExt};
|
||||
use common_error::status_code::StatusCode;
|
||||
use common_meta::peer::Peer;
|
||||
use common_meta::RegionIdent;
|
||||
use common_telemetry::info;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use snafu::ensure;
|
||||
use snafu::{ensure, location, Location};
|
||||
|
||||
use super::deactivate_region::DeactivateRegion;
|
||||
use super::{RegionFailoverContext, State};
|
||||
use crate::error::{RegionFailoverCandidatesNotFoundSnafu, Result, RetryLaterSnafu};
|
||||
use crate::error::{self, RegionFailoverCandidatesNotFoundSnafu, Result};
|
||||
use crate::selector::SelectorOptions;
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug)]
|
||||
@@ -93,10 +93,11 @@ impl State for RegionFailoverStart {
|
||||
.await
|
||||
.map_err(|e| {
|
||||
if e.status_code() == StatusCode::RuntimeResourcesExhausted {
|
||||
RetryLaterSnafu {
|
||||
reason: format!("{e}"),
|
||||
error::Error::RetryLaterWithSource {
|
||||
reason: format!("Region failover aborted for {failed_region:?}"),
|
||||
location: location!(),
|
||||
source: BoxedError::new(e),
|
||||
}
|
||||
.build()
|
||||
} else {
|
||||
e
|
||||
}
|
||||
|
||||
@@ -15,6 +15,7 @@
|
||||
use std::collections::HashMap;
|
||||
|
||||
use async_trait::async_trait;
|
||||
use common_error::ext::BoxedError;
|
||||
use common_meta::key::datanode_table::RegionInfo;
|
||||
use common_meta::key::table_route::TableRouteKey;
|
||||
use common_meta::peer::Peer;
|
||||
@@ -27,7 +28,7 @@ use store_api::storage::RegionNumber;
|
||||
|
||||
use super::invalidate_cache::InvalidateCache;
|
||||
use super::{RegionFailoverContext, State};
|
||||
use crate::error::{self, Result, RetryLaterSnafu, TableRouteNotFoundSnafu};
|
||||
use crate::error::{self, Result, TableRouteNotFoundSnafu};
|
||||
use crate::lock::keys::table_metadata_lock_key;
|
||||
use crate::lock::Opts;
|
||||
|
||||
@@ -172,14 +173,12 @@ impl State for UpdateRegionMetadata {
|
||||
) -> Result<Box<dyn State>> {
|
||||
self.update_metadata(ctx, failed_region)
|
||||
.await
|
||||
.map_err(|e| {
|
||||
RetryLaterSnafu {
|
||||
reason: format!(
|
||||
"Failed to update metadata for failed region: {}, error: {}",
|
||||
failed_region, e
|
||||
),
|
||||
}
|
||||
.build()
|
||||
.map_err(BoxedError::new)
|
||||
.context(error::RetryLaterWithSourceSnafu {
|
||||
reason: format!(
|
||||
"Failed to update metadata for failed region: {}",
|
||||
failed_region
|
||||
),
|
||||
})?;
|
||||
Ok(Box::new(InvalidateCache))
|
||||
}
|
||||
|
||||
@@ -30,27 +30,28 @@ use std::fmt::Debug;
|
||||
use std::time::Duration;
|
||||
|
||||
use api::v1::meta::MailboxMessage;
|
||||
use common_error::ext::BoxedError;
|
||||
use common_meta::instruction::Instruction;
|
||||
use common_meta::key::datanode_table::{DatanodeTableKey, DatanodeTableValue};
|
||||
use common_meta::key::table_info::TableInfoValue;
|
||||
use common_meta::key::table_route::TableRouteValue;
|
||||
use common_meta::key::{DeserializedValueWithBytes, TableMetadataManagerRef};
|
||||
use common_meta::lock_key::{RegionLock, TableLock};
|
||||
use common_meta::peer::Peer;
|
||||
use common_meta::region_keeper::{MemoryRegionKeeperRef, OperatingRegionGuard};
|
||||
use common_meta::ClusterId;
|
||||
use common_procedure::error::{
|
||||
Error as ProcedureError, FromJsonSnafu, Result as ProcedureResult, ToJsonSnafu,
|
||||
};
|
||||
use common_procedure::{Context as ProcedureContext, LockKey, Procedure, Status};
|
||||
use common_procedure::{Context as ProcedureContext, LockKey, Procedure, Status, StringKey};
|
||||
pub use manager::RegionMigrationProcedureTask;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use snafu::{location, Location, OptionExt, ResultExt};
|
||||
use snafu::{OptionExt, ResultExt};
|
||||
use store_api::storage::RegionId;
|
||||
use tokio::time::Instant;
|
||||
|
||||
use self::migration_start::RegionMigrationStart;
|
||||
use crate::error::{self, Error, Result};
|
||||
use crate::procedure::utils::region_lock_key;
|
||||
use crate::error::{self, Result};
|
||||
use crate::service::mailbox::{BroadcastChannel, MailboxRef};
|
||||
|
||||
/// It's shared in each step and available even after recovering.
|
||||
@@ -68,11 +69,25 @@ pub struct PersistentContext {
|
||||
to_peer: Peer,
|
||||
/// The [RegionId] of migration region.
|
||||
region_id: RegionId,
|
||||
/// The timeout of waiting for a candidate to replay the WAL.
|
||||
#[serde(with = "humantime_serde", default = "default_replay_timeout")]
|
||||
replay_timeout: Duration,
|
||||
}
|
||||
|
||||
fn default_replay_timeout() -> Duration {
|
||||
Duration::from_secs(1)
|
||||
}
|
||||
|
||||
impl PersistentContext {
|
||||
pub fn lock_key(&self) -> String {
|
||||
region_lock_key(self.region_id.table_id(), self.region_id.region_number())
|
||||
pub fn lock_key(&self) -> Vec<StringKey> {
|
||||
let region_id = self.region_id;
|
||||
// TODO(weny): acquires the catalog, schema read locks.
|
||||
let lock_key = vec![
|
||||
TableLock::Read(region_id.table_id()).into(),
|
||||
RegionLock::Write(region_id).into(),
|
||||
];
|
||||
|
||||
lock_key
|
||||
}
|
||||
}
|
||||
|
||||
@@ -206,9 +221,9 @@ impl Context {
|
||||
.get(table_id)
|
||||
.await
|
||||
.context(error::TableMetadataManagerSnafu)
|
||||
.map_err(|e| error::Error::RetryLater {
|
||||
reason: e.to_string(),
|
||||
location: location!(),
|
||||
.map_err(BoxedError::new)
|
||||
.context(error::RetryLaterWithSourceSnafu {
|
||||
reason: format!("Failed to get TableRoute: {table_id}"),
|
||||
})?
|
||||
.context(error::TableRouteNotFoundSnafu { table_id })?;
|
||||
|
||||
@@ -242,9 +257,9 @@ impl Context {
|
||||
.get(table_id)
|
||||
.await
|
||||
.context(error::TableMetadataManagerSnafu)
|
||||
.map_err(|e| error::Error::RetryLater {
|
||||
reason: e.to_string(),
|
||||
location: location!(),
|
||||
.map_err(BoxedError::new)
|
||||
.context(error::RetryLaterWithSourceSnafu {
|
||||
reason: format!("Failed to get TableInfo: {table_id}"),
|
||||
})?
|
||||
.context(error::TableInfoNotFoundSnafu { table_id })?;
|
||||
|
||||
@@ -275,9 +290,9 @@ impl Context {
|
||||
})
|
||||
.await
|
||||
.context(error::TableMetadataManagerSnafu)
|
||||
.map_err(|e| error::Error::RetryLater {
|
||||
reason: e.to_string(),
|
||||
location: location!(),
|
||||
.map_err(BoxedError::new)
|
||||
.context(error::RetryLaterWithSourceSnafu {
|
||||
reason: format!("Failed to get DatanodeTable: ({datanode_id},{table_id})"),
|
||||
})?
|
||||
.context(error::DatanodeTableNotFoundSnafu {
|
||||
table_id,
|
||||
@@ -398,7 +413,7 @@ impl Procedure for RegionMigrationProcedure {
|
||||
let state = &mut self.state;
|
||||
|
||||
let (next, status) = state.next(&mut self.context).await.map_err(|e| {
|
||||
if matches!(e, Error::RetryLater { .. }) {
|
||||
if e.is_retryable() {
|
||||
ProcedureError::retry_later(e)
|
||||
} else {
|
||||
ProcedureError::external(e)
|
||||
@@ -418,8 +433,7 @@ impl Procedure for RegionMigrationProcedure {
|
||||
}
|
||||
|
||||
fn lock_key(&self) -> LockKey {
|
||||
let key = self.context.persistent_ctx.lock_key();
|
||||
LockKey::single_exclusive(key)
|
||||
LockKey::new(self.context.persistent_ctx.lock_key())
|
||||
}
|
||||
}
|
||||
|
||||
@@ -447,7 +461,7 @@ mod tests {
|
||||
#[test]
|
||||
fn test_lock_key() {
|
||||
let persistent_context = new_persistent_context();
|
||||
let expected_key = persistent_context.lock_key();
|
||||
let expected_keys = persistent_context.lock_key();
|
||||
|
||||
let env = TestingEnv::new();
|
||||
let context = env.context_factory();
|
||||
@@ -455,13 +469,11 @@ mod tests {
|
||||
let procedure = RegionMigrationProcedure::new(persistent_context, context);
|
||||
|
||||
let key = procedure.lock_key();
|
||||
let keys = key
|
||||
.keys_to_lock()
|
||||
.cloned()
|
||||
.map(|s| s.into_string())
|
||||
.collect::<Vec<_>>();
|
||||
let keys = key.keys_to_lock().cloned().collect::<Vec<_>>();
|
||||
|
||||
assert!(keys.contains(&expected_key));
|
||||
for key in expected_keys {
|
||||
assert!(keys.contains(&key));
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -475,7 +487,7 @@ mod tests {
|
||||
|
||||
let serialized = procedure.dump().unwrap();
|
||||
|
||||
let expected = r#"{"persistent_ctx":{"cluster_id":0,"from_peer":{"id":1,"addr":""},"to_peer":{"id":2,"addr":""},"region_id":4398046511105},"state":{"region_migration_state":"RegionMigrationStart"}}"#;
|
||||
let expected = r#"{"persistent_ctx":{"cluster_id":0,"from_peer":{"id":1,"addr":""},"to_peer":{"id":2,"addr":""},"region_id":4398046511105,"replay_timeout":"1s"},"state":{"region_migration_state":"RegionMigrationStart"}}"#;
|
||||
assert_eq!(expected, serialized);
|
||||
}
|
||||
|
||||
|
||||
@@ -55,6 +55,7 @@ impl Default for DowngradeLeaderRegion {
|
||||
#[typetag::serde]
|
||||
impl State for DowngradeLeaderRegion {
|
||||
async fn next(&mut self, ctx: &mut Context) -> Result<(Box<dyn State>, Status)> {
|
||||
let replay_timeout = ctx.persistent_ctx.replay_timeout;
|
||||
// Ensures the `leader_region_lease_deadline` must exist after recovering.
|
||||
ctx.volatile_ctx
|
||||
.set_leader_region_lease_deadline(Duration::from_secs(REGION_LEASE_SECS));
|
||||
@@ -69,7 +70,10 @@ impl State for DowngradeLeaderRegion {
|
||||
}
|
||||
|
||||
Ok((
|
||||
Box::<UpgradeCandidateRegion>::default(),
|
||||
Box::new(UpgradeCandidateRegion {
|
||||
replay_timeout,
|
||||
..Default::default()
|
||||
}),
|
||||
Status::executing(false),
|
||||
))
|
||||
}
|
||||
@@ -226,6 +230,7 @@ mod tests {
|
||||
to_peer: Peer::empty(2),
|
||||
region_id: RegionId::new(1024, 1),
|
||||
cluster_id: 0,
|
||||
replay_timeout: Duration::from_millis(1000),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -369,7 +374,7 @@ mod tests {
|
||||
|
||||
assert_matches!(err, Error::RetryLater { .. });
|
||||
assert!(err.is_retryable());
|
||||
assert!(err.to_string().contains("test mocked"));
|
||||
assert!(format!("{err:?}").contains("test mocked"), "err: {err:?}",);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
|
||||
@@ -16,6 +16,7 @@ use std::collections::hash_map::Entry;
|
||||
use std::collections::HashMap;
|
||||
use std::fmt::Display;
|
||||
use std::sync::{Arc, RwLock};
|
||||
use std::time::Duration;
|
||||
|
||||
use common_meta::key::table_route::TableRouteValue;
|
||||
use common_meta::peer::Peer;
|
||||
@@ -61,15 +62,23 @@ pub struct RegionMigrationProcedureTask {
|
||||
pub(crate) region_id: RegionId,
|
||||
pub(crate) from_peer: Peer,
|
||||
pub(crate) to_peer: Peer,
|
||||
pub(crate) replay_timeout: Duration,
|
||||
}
|
||||
|
||||
impl RegionMigrationProcedureTask {
|
||||
pub fn new(cluster_id: ClusterId, region_id: RegionId, from_peer: Peer, to_peer: Peer) -> Self {
|
||||
pub fn new(
|
||||
cluster_id: ClusterId,
|
||||
region_id: RegionId,
|
||||
from_peer: Peer,
|
||||
to_peer: Peer,
|
||||
replay_timeout: Duration,
|
||||
) -> Self {
|
||||
Self {
|
||||
cluster_id,
|
||||
region_id,
|
||||
from_peer,
|
||||
to_peer,
|
||||
replay_timeout,
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -91,6 +100,7 @@ impl From<RegionMigrationProcedureTask> for PersistentContext {
|
||||
region_id,
|
||||
from_peer,
|
||||
to_peer,
|
||||
replay_timeout,
|
||||
}: RegionMigrationProcedureTask,
|
||||
) -> Self {
|
||||
PersistentContext {
|
||||
@@ -98,6 +108,7 @@ impl From<RegionMigrationProcedureTask> for PersistentContext {
|
||||
from_peer,
|
||||
to_peer,
|
||||
region_id,
|
||||
replay_timeout,
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -319,6 +330,7 @@ mod test {
|
||||
region_id,
|
||||
from_peer: Peer::empty(2),
|
||||
to_peer: Peer::empty(1),
|
||||
replay_timeout: Duration::from_millis(1000),
|
||||
};
|
||||
// Inserts one
|
||||
manager
|
||||
@@ -342,6 +354,7 @@ mod test {
|
||||
region_id,
|
||||
from_peer: Peer::empty(1),
|
||||
to_peer: Peer::empty(1),
|
||||
replay_timeout: Duration::from_millis(1000),
|
||||
};
|
||||
|
||||
let err = manager.submit_procedure(task).await.unwrap_err();
|
||||
@@ -359,6 +372,7 @@ mod test {
|
||||
region_id,
|
||||
from_peer: Peer::empty(1),
|
||||
to_peer: Peer::empty(2),
|
||||
replay_timeout: Duration::from_millis(1000),
|
||||
};
|
||||
|
||||
let err = manager.submit_procedure(task).await.unwrap_err();
|
||||
@@ -376,6 +390,7 @@ mod test {
|
||||
region_id,
|
||||
from_peer: Peer::empty(1),
|
||||
to_peer: Peer::empty(2),
|
||||
replay_timeout: Duration::from_millis(1000),
|
||||
};
|
||||
|
||||
let table_info = new_test_table_info(1024, vec![1]).into();
|
||||
@@ -403,6 +418,7 @@ mod test {
|
||||
region_id,
|
||||
from_peer: Peer::empty(1),
|
||||
to_peer: Peer::empty(2),
|
||||
replay_timeout: Duration::from_millis(1000),
|
||||
};
|
||||
|
||||
let table_info = new_test_table_info(1024, vec![1]).into();
|
||||
@@ -434,6 +450,7 @@ mod test {
|
||||
region_id,
|
||||
from_peer: Peer::empty(1),
|
||||
to_peer: Peer::empty(2),
|
||||
replay_timeout: Duration::from_millis(1000),
|
||||
};
|
||||
|
||||
let table_info = new_test_table_info(1024, vec![1]).into();
|
||||
@@ -460,6 +477,7 @@ mod test {
|
||||
region_id,
|
||||
from_peer: Peer::empty(1),
|
||||
to_peer: Peer::empty(2),
|
||||
replay_timeout: Duration::from_millis(1000),
|
||||
};
|
||||
|
||||
let err = manager
|
||||
|
||||
@@ -383,7 +383,7 @@ mod tests {
|
||||
|
||||
assert_matches!(err, Error::RetryLater { .. });
|
||||
assert!(err.is_retryable());
|
||||
assert!(err.to_string().contains("test mocked"));
|
||||
assert!(format!("{err:?}").contains("test mocked"));
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
|
||||
@@ -16,6 +16,7 @@ use std::assert_matches::assert_matches;
|
||||
use std::collections::HashMap;
|
||||
use std::sync::atomic::{AtomicUsize, Ordering};
|
||||
use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
|
||||
use api::v1::meta::mailbox_message::Payload;
|
||||
use api::v1::meta::{HeartbeatResponse, MailboxMessage, RequestHeader};
|
||||
@@ -281,6 +282,7 @@ pub fn new_persistent_context(from: u64, to: u64, region_id: RegionId) -> Persis
|
||||
to_peer: Peer::empty(to),
|
||||
region_id,
|
||||
cluster_id: 0,
|
||||
replay_timeout: Duration::from_millis(1000),
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -12,6 +12,7 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use common_error::ext::BoxedError;
|
||||
use common_meta::rpc::router::RegionStatus;
|
||||
use snafu::ResultExt;
|
||||
|
||||
@@ -60,13 +61,15 @@ impl UpdateMetadata {
|
||||
.await
|
||||
.context(error::TableMetadataManagerSnafu)
|
||||
{
|
||||
debug_assert!(ctx.remove_table_route_value());
|
||||
return error::RetryLaterSnafu {
|
||||
reason: format!("Failed to update the table route during the downgrading leader region, error: {err}")
|
||||
}.fail();
|
||||
ctx.remove_table_route_value();
|
||||
return Err(BoxedError::new(err)).context(error::RetryLaterWithSourceSnafu {
|
||||
reason: format!(
|
||||
"Failed to update the table route during the downgrading leader region, region_id: {region_id}, from_peer_id: {from_peer_id}"
|
||||
),
|
||||
});
|
||||
}
|
||||
|
||||
debug_assert!(ctx.remove_table_route_value());
|
||||
ctx.remove_table_route_value();
|
||||
|
||||
Ok(())
|
||||
}
|
||||
@@ -163,13 +166,9 @@ mod tests {
|
||||
ctx.volatile_ctx.table_route = Some(original_table_route);
|
||||
|
||||
let err = state.downgrade_leader_region(&mut ctx).await.unwrap_err();
|
||||
|
||||
assert!(ctx.volatile_ctx.table_route.is_none());
|
||||
|
||||
assert_matches!(err, Error::RetryLater { .. });
|
||||
|
||||
assert!(err.is_retryable());
|
||||
assert!(err.to_string().contains("Failed to update the table route"));
|
||||
assert!(format!("{err:?}").contains("Failed to update the table route"));
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
|
||||
@@ -12,6 +12,7 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use common_error::ext::BoxedError;
|
||||
use snafu::ResultExt;
|
||||
|
||||
use crate::error::{self, Result};
|
||||
@@ -44,13 +45,13 @@ impl UpdateMetadata {
|
||||
.await
|
||||
.context(error::TableMetadataManagerSnafu)
|
||||
{
|
||||
debug_assert!(ctx.remove_table_route_value());
|
||||
return error::RetryLaterSnafu {
|
||||
reason: format!("Failed to update the table route during the rollback downgraded leader region, error: {err}")
|
||||
}.fail();
|
||||
ctx.remove_table_route_value();
|
||||
return Err(BoxedError::new(err)).context(error::RetryLaterWithSourceSnafu {
|
||||
reason: format!("Failed to update the table route during the rollback downgraded leader region: {region_id}"),
|
||||
});
|
||||
}
|
||||
|
||||
debug_assert!(ctx.remove_table_route_value());
|
||||
ctx.remove_table_route_value();
|
||||
|
||||
Ok(())
|
||||
}
|
||||
@@ -157,9 +158,8 @@ mod tests {
|
||||
.await
|
||||
.unwrap_err();
|
||||
assert!(ctx.volatile_ctx.table_route.is_none());
|
||||
assert_matches!(err, Error::RetryLater { .. });
|
||||
assert!(err.is_retryable());
|
||||
assert!(err.to_string().contains("Failed to update the table route"));
|
||||
assert!(format!("{err:?}").contains("Failed to update the table route"));
|
||||
|
||||
state.rollback_downgraded_region(&mut ctx).await.unwrap();
|
||||
|
||||
|
||||
@@ -12,6 +12,7 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use common_error::ext::BoxedError;
|
||||
use common_meta::key::datanode_table::RegionInfo;
|
||||
use common_meta::rpc::router::{region_distribution, RegionRoute};
|
||||
use common_telemetry::{info, warn};
|
||||
@@ -167,13 +168,13 @@ impl UpdateMetadata {
|
||||
.await
|
||||
.context(error::TableMetadataManagerSnafu)
|
||||
{
|
||||
debug_assert!(ctx.remove_table_route_value());
|
||||
return error::RetryLaterSnafu {
|
||||
reason: format!("Failed to update the table route during the upgrading candidate region, error: {err}")
|
||||
}.fail();
|
||||
ctx.remove_table_route_value();
|
||||
return Err(BoxedError::new(err)).context(error::RetryLaterWithSourceSnafu {
|
||||
reason: format!("Failed to update the table route during the upgrading candidate region: {region_id}"),
|
||||
});
|
||||
};
|
||||
|
||||
debug_assert!(ctx.remove_table_route_value());
|
||||
ctx.remove_table_route_value();
|
||||
// Consumes the guard.
|
||||
ctx.volatile_ctx.opening_region_guard.take();
|
||||
|
||||
@@ -354,15 +355,12 @@ mod tests {
|
||||
.register(2, RegionId::new(table_id, 1))
|
||||
.unwrap();
|
||||
ctx.volatile_ctx.opening_region_guard = Some(guard);
|
||||
|
||||
let err = state.upgrade_candidate_region(&mut ctx).await.unwrap_err();
|
||||
|
||||
assert!(ctx.volatile_ctx.table_route.is_none());
|
||||
assert!(ctx.volatile_ctx.opening_region_guard.is_some());
|
||||
assert_matches!(err, Error::RetryLater { .. });
|
||||
|
||||
assert!(err.is_retryable());
|
||||
assert!(err.to_string().contains("Failed to update the table route"));
|
||||
assert!(format!("{err:?}").contains("Failed to update the table route"));
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
|
||||
@@ -33,14 +33,14 @@ use crate::service::mailbox::Channel;
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
pub struct UpgradeCandidateRegion {
|
||||
// The optimistic retry times.
|
||||
optimistic_retry: usize,
|
||||
pub(crate) optimistic_retry: usize,
|
||||
// The retry initial interval.
|
||||
retry_initial_interval: Duration,
|
||||
pub(crate) retry_initial_interval: Duration,
|
||||
// The replay timeout of a instruction.
|
||||
replay_timeout: Duration,
|
||||
pub(crate) replay_timeout: Duration,
|
||||
// If it's true it requires the candidate region MUST replay the WAL to the latest entry id.
|
||||
// Otherwise, it will rollback to the old leader region.
|
||||
require_ready: bool,
|
||||
pub(crate) require_ready: bool,
|
||||
}
|
||||
|
||||
impl Default for UpgradeCandidateRegion {
|
||||
@@ -236,6 +236,7 @@ mod tests {
|
||||
to_peer: Peer::empty(2),
|
||||
region_id: RegionId::new(1024, 1),
|
||||
cluster_id: 0,
|
||||
replay_timeout: Duration::from_millis(1000),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -335,7 +336,7 @@ mod tests {
|
||||
|
||||
assert_matches!(err, Error::RetryLater { .. });
|
||||
assert!(err.is_retryable());
|
||||
assert!(err.to_string().contains("test mocked"));
|
||||
assert!(format!("{err:?}").contains("test mocked"));
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
@@ -397,7 +398,7 @@ mod tests {
|
||||
|
||||
assert_matches!(err, Error::RetryLater { .. });
|
||||
assert!(err.is_retryable());
|
||||
assert!(err.to_string().contains("still replaying the wal"));
|
||||
assert!(format!("{err:?}").contains("still replaying the wal"));
|
||||
|
||||
// Sets the `require_ready` to false.
|
||||
state.require_ready = false;
|
||||
|
||||
@@ -12,13 +12,7 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use store_api::storage::{RegionNumber, TableId};
|
||||
|
||||
pub fn region_lock_key(table_id: TableId, region_number: RegionNumber) -> String {
|
||||
format!("{}/region-{}", table_id, region_number)
|
||||
}
|
||||
|
||||
#[cfg(feature = "mock")]
|
||||
#[cfg(any(test, feature = "mock"))]
|
||||
pub mod mock {
|
||||
use std::io::Error;
|
||||
use std::sync::Arc;
|
||||
|
||||
@@ -15,9 +15,11 @@
|
||||
use std::collections::HashMap;
|
||||
use std::num::ParseIntError;
|
||||
use std::str::FromStr;
|
||||
use std::time::Duration;
|
||||
|
||||
use common_meta::peer::Peer;
|
||||
use common_meta::{distributed_time_constants, ClusterId};
|
||||
use humantime::parse_duration;
|
||||
use serde::Serialize;
|
||||
use snafu::{ensure, OptionExt, ResultExt};
|
||||
use store_api::storage::RegionId;
|
||||
@@ -43,6 +45,7 @@ struct SubmitRegionMigrationTaskRequest {
|
||||
region_id: RegionId,
|
||||
from_peer_id: u64,
|
||||
to_peer_id: u64,
|
||||
replay_timeout: Duration,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize)]
|
||||
@@ -71,6 +74,8 @@ where
|
||||
Ok(parse_result)
|
||||
}
|
||||
|
||||
const DEFAULT_REPLAY_TIMEOUT: Duration = Duration::from_millis(1000);
|
||||
|
||||
impl TryFrom<&HashMap<String, String>> for SubmitRegionMigrationTaskRequest {
|
||||
type Error = Error;
|
||||
|
||||
@@ -89,11 +94,18 @@ impl TryFrom<&HashMap<String, String>> for SubmitRegionMigrationTaskRequest {
|
||||
error::MissingRequiredParameterSnafu { param: key }.fail()
|
||||
})?;
|
||||
|
||||
let replay_timeout = if let Some(duration) = params.get("replay_timeout") {
|
||||
parse_duration(duration).context(error::ParseDurationSnafu { duration })?
|
||||
} else {
|
||||
DEFAULT_REPLAY_TIMEOUT
|
||||
};
|
||||
|
||||
Ok(SubmitRegionMigrationTaskRequest {
|
||||
cluster_id,
|
||||
region_id: RegionId::from_u64(region_id),
|
||||
from_peer_id,
|
||||
to_peer_id,
|
||||
replay_timeout,
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -131,6 +143,7 @@ impl SubmitRegionMigrationTaskHandler {
|
||||
region_id,
|
||||
from_peer_id,
|
||||
to_peer_id,
|
||||
replay_timeout,
|
||||
} = task;
|
||||
|
||||
let from_peer = self.lookup_peer(cluster_id, from_peer_id).await?.context(
|
||||
@@ -150,6 +163,7 @@ impl SubmitRegionMigrationTaskHandler {
|
||||
region_id,
|
||||
from_peer,
|
||||
to_peer,
|
||||
replay_timeout,
|
||||
})
|
||||
.await?;
|
||||
|
||||
@@ -187,6 +201,7 @@ mod tests {
|
||||
use std::collections::HashMap;
|
||||
|
||||
use crate::error;
|
||||
use crate::service::admin::region_migration::DEFAULT_REPLAY_TIMEOUT;
|
||||
|
||||
#[test]
|
||||
fn test_parse_migration_task_req() {
|
||||
@@ -212,6 +227,7 @@ mod tests {
|
||||
region_id: RegionId::new(1024, 1),
|
||||
from_peer_id: 1,
|
||||
to_peer_id: 2,
|
||||
replay_timeout: DEFAULT_REPLAY_TIMEOUT
|
||||
},
|
||||
task_req
|
||||
);
|
||||
@@ -233,6 +249,7 @@ mod tests {
|
||||
region_id: RegionId::new(1024, 1),
|
||||
from_peer_id: 1,
|
||||
to_peer_id: 2,
|
||||
replay_timeout: DEFAULT_REPLAY_TIMEOUT
|
||||
},
|
||||
task_req
|
||||
);
|
||||
|
||||
@@ -15,6 +15,7 @@
|
||||
mod alter;
|
||||
mod close;
|
||||
mod create;
|
||||
mod drop;
|
||||
mod open;
|
||||
mod put;
|
||||
mod read;
|
||||
@@ -83,13 +84,15 @@ use crate::utils;
|
||||
/// | Operations | Logical Region | Physical Region |
|
||||
/// | ---------- | -------------- | --------------- |
|
||||
/// | Create | ✅ | ✅ |
|
||||
/// | Drop | ✅ | ❌ |
|
||||
/// | Drop | ✅ | ❓* |
|
||||
/// | Write | ✅ | ❌ |
|
||||
/// | Read | ✅ | ✅ |
|
||||
/// | Close | ✅ | ✅ |
|
||||
/// | Open | ✅ | ✅ |
|
||||
/// | Alter | ✅ | ❌ |
|
||||
///
|
||||
/// *: Physical region can be dropped only when all related logical regions are dropped.
|
||||
///
|
||||
/// ## Internal Columns
|
||||
///
|
||||
/// The physical data region contains two internal columns. Should
|
||||
@@ -123,7 +126,7 @@ impl RegionEngine for MetricEngine {
|
||||
RegionRequest::Put(put) => self.inner.put_region(region_id, put).await,
|
||||
RegionRequest::Delete(_) => todo!(),
|
||||
RegionRequest::Create(create) => self.inner.create_region(region_id, create).await,
|
||||
RegionRequest::Drop(_) => todo!(),
|
||||
RegionRequest::Drop(drop) => self.inner.drop_region(region_id, drop).await,
|
||||
RegionRequest::Open(open) => self.inner.open_region(region_id, open).await,
|
||||
RegionRequest::Close(close) => self.inner.close_region(region_id, close).await,
|
||||
RegionRequest::Alter(alter) => self.inner.alter_region(region_id, alter).await,
|
||||
|
||||
@@ -73,8 +73,7 @@ impl MetricEngineInner {
|
||||
let (data_region_id, metadata_region_id) = Self::transform_region_id(region_id);
|
||||
|
||||
// create metadata region
|
||||
let create_metadata_region_request =
|
||||
self.create_request_for_metadata_region(&request.region_dir);
|
||||
let create_metadata_region_request = self.create_request_for_metadata_region(&request);
|
||||
self.mito
|
||||
.handle_request(
|
||||
metadata_region_id,
|
||||
@@ -287,7 +286,10 @@ impl MetricEngineInner {
|
||||
/// Build [RegionCreateRequest] for metadata region
|
||||
///
|
||||
/// This method will append [METADATA_REGION_SUBDIR] to the given `region_dir`.
|
||||
pub fn create_request_for_metadata_region(&self, region_dir: &str) -> RegionCreateRequest {
|
||||
pub fn create_request_for_metadata_region(
|
||||
&self,
|
||||
request: &RegionCreateRequest,
|
||||
) -> RegionCreateRequest {
|
||||
// ts TIME INDEX DEFAULT 0
|
||||
let timestamp_column_metadata = ColumnMetadata {
|
||||
column_id: METADATA_SCHEMA_TIMESTAMP_COLUMN_INDEX as _,
|
||||
@@ -324,7 +326,7 @@ impl MetricEngineInner {
|
||||
};
|
||||
|
||||
// concat region dir
|
||||
let metadata_region_dir = join_dir(region_dir, METADATA_REGION_SUBDIR);
|
||||
let metadata_region_dir = join_dir(&request.region_dir, METADATA_REGION_SUBDIR);
|
||||
|
||||
RegionCreateRequest {
|
||||
engine: MITO_ENGINE_NAME.to_string(),
|
||||
@@ -334,7 +336,7 @@ impl MetricEngineInner {
|
||||
value_column_metadata,
|
||||
],
|
||||
primary_key: vec![METADATA_SCHEMA_KEY_COLUMN_INDEX as _],
|
||||
options: HashMap::new(),
|
||||
options: request.options.clone(),
|
||||
region_dir: metadata_region_dir,
|
||||
}
|
||||
}
|
||||
@@ -532,7 +534,7 @@ mod test {
|
||||
],
|
||||
primary_key: vec![0],
|
||||
options: HashMap::new(),
|
||||
region_dir: "test_dir".to_string(),
|
||||
region_dir: "/test_dir".to_string(),
|
||||
};
|
||||
|
||||
let env = TestEnv::new().await;
|
||||
|
||||
138
src/metric-engine/src/engine/drop.rs
Normal file
138
src/metric-engine/src/engine/drop.rs
Normal file
@@ -0,0 +1,138 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
//! Drop a metric region
|
||||
|
||||
use mito2::engine::MITO_ENGINE_NAME;
|
||||
use object_store::util::join_dir;
|
||||
use snafu::{OptionExt, ResultExt};
|
||||
use store_api::metric_engine_consts::{
|
||||
DATA_REGION_SUBDIR, METADATA_REGION_SUBDIR, PHYSICAL_TABLE_METADATA_KEY,
|
||||
};
|
||||
use store_api::region_engine::RegionEngine;
|
||||
use store_api::region_request::{
|
||||
AffectedRows, RegionDropRequest, RegionOpenRequest, RegionRequest,
|
||||
};
|
||||
use store_api::storage::RegionId;
|
||||
|
||||
use super::MetricEngineInner;
|
||||
use crate::error::{
|
||||
CloseMitoRegionSnafu, Error, LogicalRegionNotFoundSnafu, OpenMitoRegionSnafu,
|
||||
PhysicalRegionBusySnafu, PhysicalRegionNotFoundSnafu, Result,
|
||||
};
|
||||
use crate::metrics::PHYSICAL_REGION_COUNT;
|
||||
use crate::{metadata_region, utils};
|
||||
|
||||
impl MetricEngineInner {
|
||||
pub async fn drop_region(
|
||||
&self,
|
||||
region_id: RegionId,
|
||||
_req: RegionDropRequest,
|
||||
) -> Result<AffectedRows> {
|
||||
let data_region_id = utils::to_data_region_id(region_id);
|
||||
|
||||
// enclose the guard in a block to prevent the guard from polluting the async context
|
||||
let (is_physical_region, is_physical_region_busy) = {
|
||||
if let Some(logical_regions) = self
|
||||
.state
|
||||
.read()
|
||||
.unwrap()
|
||||
.physical_regions()
|
||||
.get(&data_region_id)
|
||||
{
|
||||
(true, !logical_regions.is_empty())
|
||||
} else {
|
||||
// the second argument is not used, just pass in a dummy value
|
||||
(false, true)
|
||||
}
|
||||
};
|
||||
|
||||
if is_physical_region {
|
||||
// check if there is no logical region relates to this physical region
|
||||
if is_physical_region_busy {
|
||||
// reject if there is any present logical region
|
||||
return Err(PhysicalRegionBusySnafu {
|
||||
region_id: data_region_id,
|
||||
}
|
||||
.build());
|
||||
}
|
||||
|
||||
self.drop_physical_region(data_region_id).await
|
||||
} else {
|
||||
// cannot merge these two `if` otherwise the stupid type checker will complain
|
||||
let metadata_region_id = self
|
||||
.state
|
||||
.read()
|
||||
.unwrap()
|
||||
.logical_regions()
|
||||
.get(®ion_id)
|
||||
.copied();
|
||||
if let Some(metadata_region_id) = metadata_region_id {
|
||||
self.drop_logical_region(region_id, metadata_region_id)
|
||||
.await
|
||||
} else {
|
||||
Err(LogicalRegionNotFoundSnafu { region_id }.build())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async fn drop_physical_region(&self, region_id: RegionId) -> Result<AffectedRows> {
|
||||
let data_region_id = utils::to_data_region_id(region_id);
|
||||
let metadata_region_id = utils::to_metadata_region_id(region_id);
|
||||
|
||||
// Drop mito regions.
|
||||
// Since the physical regions are going to be dropped, we don't need to
|
||||
// update the contents in metadata region.
|
||||
self.mito
|
||||
.handle_request(data_region_id, RegionRequest::Drop(RegionDropRequest {}))
|
||||
.await
|
||||
.with_context(|_| CloseMitoRegionSnafu { region_id })?;
|
||||
self.mito
|
||||
.handle_request(
|
||||
metadata_region_id,
|
||||
RegionRequest::Drop(RegionDropRequest {}),
|
||||
)
|
||||
.await
|
||||
.with_context(|_| CloseMitoRegionSnafu { region_id })?;
|
||||
|
||||
PHYSICAL_REGION_COUNT.dec();
|
||||
|
||||
// Update engine state
|
||||
self.state
|
||||
.write()
|
||||
.unwrap()
|
||||
.remove_physical_region(data_region_id)?;
|
||||
|
||||
Ok(0)
|
||||
}
|
||||
|
||||
async fn drop_logical_region(
|
||||
&self,
|
||||
logical_region_id: RegionId,
|
||||
physical_region_id: RegionId,
|
||||
) -> Result<AffectedRows> {
|
||||
// Update metadata
|
||||
self.metadata_region
|
||||
.remove_logical_region(physical_region_id, logical_region_id)
|
||||
.await?;
|
||||
|
||||
// Update engine state
|
||||
self.state
|
||||
.write()
|
||||
.unwrap()
|
||||
.remove_logical_region(logical_region_id)?;
|
||||
|
||||
Ok(0)
|
||||
}
|
||||
}
|
||||
@@ -115,4 +115,20 @@ impl MetricEngineState {
|
||||
self.physical_columns.remove(&physical_region_id);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Remove all data that are related to the logical region id.
|
||||
pub fn remove_logical_region(&mut self, logical_region_id: RegionId) -> Result<()> {
|
||||
let physical_region_id = self.logical_regions.remove(&logical_region_id).context(
|
||||
PhysicalRegionNotFoundSnafu {
|
||||
region_id: logical_region_id,
|
||||
},
|
||||
)?;
|
||||
|
||||
self.physical_regions
|
||||
.get_mut(&physical_region_id)
|
||||
.unwrap() // Safety: physical_region_id is got from physical_regions
|
||||
.remove(&logical_region_id);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
@@ -146,6 +146,15 @@ pub enum Error {
|
||||
source: store_api::metadata::MetadataError,
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display(
|
||||
"Physical region {} is busy, there are still some logical regions using it",
|
||||
region_id
|
||||
))]
|
||||
PhysicalRegionBusy {
|
||||
region_id: RegionId,
|
||||
location: Location,
|
||||
},
|
||||
}
|
||||
|
||||
pub type Result<T, E = Error> = std::result::Result<T, E>;
|
||||
@@ -158,7 +167,8 @@ impl ErrorExt for Error {
|
||||
InternalColumnOccupied { .. }
|
||||
| MissingRegionOption { .. }
|
||||
| ConflictRegionOption { .. }
|
||||
| ColumnTypeMismatch { .. } => StatusCode::InvalidArguments,
|
||||
| ColumnTypeMismatch { .. }
|
||||
| PhysicalRegionBusy { .. } => StatusCode::InvalidArguments,
|
||||
|
||||
ForbiddenPhysicalAlter { .. } => StatusCode::Unsupported,
|
||||
|
||||
|
||||
@@ -29,7 +29,7 @@ use store_api::metric_engine_consts::{
|
||||
METADATA_SCHEMA_VALUE_COLUMN_NAME,
|
||||
};
|
||||
use store_api::region_engine::RegionEngine;
|
||||
use store_api::region_request::RegionPutRequest;
|
||||
use store_api::region_request::{RegionDeleteRequest, RegionPutRequest};
|
||||
use store_api::storage::{RegionId, ScanRequest};
|
||||
|
||||
use crate::error::{
|
||||
@@ -111,6 +111,34 @@ impl MetadataRegion {
|
||||
.await
|
||||
}
|
||||
|
||||
/// Remove a registered logical region from metadata.
|
||||
///
|
||||
/// This method doesn't check if the previous key exists.
|
||||
pub async fn remove_logical_region(
|
||||
&self,
|
||||
physical_region_id: RegionId,
|
||||
logical_region_id: RegionId,
|
||||
) -> Result<()> {
|
||||
// concat region key
|
||||
let region_id = utils::to_metadata_region_id(physical_region_id);
|
||||
let region_key = Self::concat_region_key(logical_region_id);
|
||||
|
||||
// concat column keys
|
||||
let logical_columns = self
|
||||
.logical_columns(physical_region_id, logical_region_id)
|
||||
.await?;
|
||||
let mut column_keys = logical_columns
|
||||
.into_iter()
|
||||
.map(|(col, _)| Self::concat_column_key(logical_region_id, &col))
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
// remove region key and column keys
|
||||
column_keys.push(region_key);
|
||||
self.delete(region_id, &column_keys).await?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Check if the given logical region exists.
|
||||
pub async fn is_logical_region_exists(
|
||||
&self,
|
||||
@@ -354,6 +382,20 @@ impl MetadataRegion {
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
/// Delete the given keys. For performance consideration, this method
|
||||
/// doesn't check if those keys exist or not.
|
||||
async fn delete(&self, region_id: RegionId, keys: &[String]) -> Result<()> {
|
||||
let delete_request = Self::build_delete_request(keys);
|
||||
self.mito
|
||||
.handle_request(
|
||||
region_id,
|
||||
store_api::region_request::RegionRequest::Delete(delete_request),
|
||||
)
|
||||
.await
|
||||
.context(MitoWriteOperationSnafu)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Builds a [ScanRequest] to read metadata for a given key.
|
||||
/// The request will contains a EQ filter on the key column.
|
||||
///
|
||||
@@ -409,6 +451,39 @@ impl MetadataRegion {
|
||||
|
||||
RegionPutRequest { rows }
|
||||
}
|
||||
|
||||
fn build_delete_request(keys: &[String]) -> RegionDeleteRequest {
|
||||
let cols = vec![
|
||||
ColumnSchema {
|
||||
column_name: METADATA_SCHEMA_TIMESTAMP_COLUMN_NAME.to_string(),
|
||||
datatype: ColumnDataType::TimestampMillisecond as _,
|
||||
semantic_type: SemanticType::Timestamp as _,
|
||||
..Default::default()
|
||||
},
|
||||
ColumnSchema {
|
||||
column_name: METADATA_SCHEMA_KEY_COLUMN_NAME.to_string(),
|
||||
datatype: ColumnDataType::String as _,
|
||||
semantic_type: SemanticType::Tag as _,
|
||||
..Default::default()
|
||||
},
|
||||
];
|
||||
let rows = keys
|
||||
.iter()
|
||||
.map(|key| Row {
|
||||
values: vec![
|
||||
Value {
|
||||
value_data: Some(ValueData::TimestampMillisecondValue(0)),
|
||||
},
|
||||
Value {
|
||||
value_data: Some(ValueData::StringValue(key.to_string())),
|
||||
},
|
||||
],
|
||||
})
|
||||
.collect();
|
||||
let rows = Rows { schema: cols, rows };
|
||||
|
||||
RegionDeleteRequest { rows }
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
|
||||
@@ -14,15 +14,17 @@
|
||||
|
||||
use std::sync::Arc;
|
||||
|
||||
use object_store::services::Fs;
|
||||
use object_store::util::{join_dir, with_instrument_layers};
|
||||
use object_store::ObjectStore;
|
||||
use snafu::ResultExt;
|
||||
use store_api::metadata::RegionMetadataRef;
|
||||
|
||||
use crate::cache::write_cache::SstUploadRequest;
|
||||
use crate::cache::CacheManagerRef;
|
||||
use crate::error::{DeleteSstSnafu, Result};
|
||||
use crate::error::{CleanDirSnafu, DeleteIndexSnafu, DeleteSstSnafu, OpenDalSnafu, Result};
|
||||
use crate::read::Source;
|
||||
use crate::sst::file::{FileHandle, FileId};
|
||||
use crate::sst::file::{FileHandle, FileId, FileMeta};
|
||||
use crate::sst::location;
|
||||
use crate::sst::parquet::reader::ParquetReaderBuilder;
|
||||
use crate::sst::parquet::writer::ParquetWriter;
|
||||
@@ -64,13 +66,27 @@ impl AccessLayer {
|
||||
&self.object_store
|
||||
}
|
||||
|
||||
/// Deletes a SST file with given file id.
|
||||
pub(crate) async fn delete_sst(&self, file_id: FileId) -> Result<()> {
|
||||
let path = location::sst_file_path(&self.region_dir, file_id);
|
||||
/// Deletes a SST file (and its index file if it has one) with given file id.
|
||||
pub(crate) async fn delete_sst(&self, file_meta: &FileMeta) -> Result<()> {
|
||||
let path = location::sst_file_path(&self.region_dir, file_meta.file_id);
|
||||
self.object_store
|
||||
.delete(&path)
|
||||
.await
|
||||
.context(DeleteSstSnafu { file_id })
|
||||
.context(DeleteSstSnafu {
|
||||
file_id: file_meta.file_id,
|
||||
})?;
|
||||
|
||||
if file_meta.inverted_index_available() {
|
||||
let path = location::index_file_path(&self.region_dir, file_meta.file_id);
|
||||
self.object_store
|
||||
.delete(&path)
|
||||
.await
|
||||
.context(DeleteIndexSnafu {
|
||||
file_id: file_meta.file_id,
|
||||
})?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Returns a reader builder for specific `file`.
|
||||
@@ -86,28 +102,45 @@ impl AccessLayer {
|
||||
request: SstWriteRequest,
|
||||
write_opts: &WriteOptions,
|
||||
) -> Result<Option<SstInfo>> {
|
||||
let path = location::sst_file_path(&self.region_dir, request.file_id);
|
||||
let file_path = location::sst_file_path(&self.region_dir, request.file_id);
|
||||
let index_file_path = location::index_file_path(&self.region_dir, request.file_id);
|
||||
let region_id = request.metadata.region_id;
|
||||
|
||||
if let Some(write_cache) = request.cache_manager.write_cache() {
|
||||
let sst_info = if let Some(write_cache) = request.cache_manager.write_cache() {
|
||||
// Write to the write cache.
|
||||
return write_cache
|
||||
write_cache
|
||||
.write_and_upload_sst(
|
||||
SstUploadRequest {
|
||||
file_id: request.file_id,
|
||||
metadata: request.metadata,
|
||||
source: request.source,
|
||||
storage: request.storage,
|
||||
upload_path: path,
|
||||
upload_path: file_path,
|
||||
index_upload_path: index_file_path,
|
||||
remote_store: self.object_store.clone(),
|
||||
},
|
||||
write_opts,
|
||||
)
|
||||
.await;
|
||||
.await?
|
||||
} else {
|
||||
// Write cache is disabled.
|
||||
let mut writer =
|
||||
ParquetWriter::new(file_path, request.metadata, self.object_store.clone());
|
||||
writer.write_all(request.source, write_opts).await?
|
||||
};
|
||||
|
||||
// Put parquet metadata to cache manager.
|
||||
if let Some(sst_info) = &sst_info {
|
||||
if let Some(parquet_metadata) = &sst_info.file_metadata {
|
||||
request.cache_manager.put_parquet_meta_data(
|
||||
region_id,
|
||||
request.file_id,
|
||||
parquet_metadata.clone(),
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
// Write cache is disabled.
|
||||
let mut writer = ParquetWriter::new(path, request.metadata, self.object_store.clone());
|
||||
writer.write_all(request.source, write_opts).await
|
||||
Ok(sst_info)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -119,3 +152,31 @@ pub(crate) struct SstWriteRequest {
|
||||
pub(crate) cache_manager: CacheManagerRef,
|
||||
pub(crate) storage: Option<String>,
|
||||
}
|
||||
|
||||
/// Creates a fs object store with atomic write dir.
|
||||
pub(crate) async fn new_fs_object_store(root: &str) -> Result<ObjectStore> {
|
||||
let atomic_write_dir = join_dir(root, ".tmp/");
|
||||
clean_dir(&atomic_write_dir).await?;
|
||||
|
||||
let mut builder = Fs::default();
|
||||
builder.root(root).atomic_write_dir(&atomic_write_dir);
|
||||
let object_store = ObjectStore::new(builder).context(OpenDalSnafu)?.finish();
|
||||
|
||||
// Add layers.
|
||||
let object_store = with_instrument_layers(object_store);
|
||||
Ok(object_store)
|
||||
}
|
||||
|
||||
/// Clean the directory.
|
||||
async fn clean_dir(dir: &str) -> Result<()> {
|
||||
if tokio::fs::try_exists(dir)
|
||||
.await
|
||||
.context(CleanDirSnafu { dir })?
|
||||
{
|
||||
tokio::fs::remove_dir_all(dir)
|
||||
.await
|
||||
.context(CleanDirSnafu { dir })?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -47,9 +47,10 @@ const PAGE_TYPE: &str = "page";
|
||||
// Metrics type key for files on the local store.
|
||||
const FILE_TYPE: &str = "file";
|
||||
|
||||
// TODO(yingwen): Builder for cache manager.
|
||||
|
||||
/// Manages cached data for the engine.
|
||||
///
|
||||
/// All caches are disabled by default.
|
||||
#[derive(Default)]
|
||||
pub struct CacheManager {
|
||||
/// Cache for SST metadata.
|
||||
sst_meta_cache: Option<SstMetaCache>,
|
||||
@@ -58,70 +59,15 @@ pub struct CacheManager {
|
||||
/// Cache for SST pages.
|
||||
page_cache: Option<PageCache>,
|
||||
/// A Cache for writing files to object stores.
|
||||
// TODO(yingwen): Remove this once the cache is ready.
|
||||
#[allow(unused)]
|
||||
write_cache: Option<WriteCacheRef>,
|
||||
}
|
||||
|
||||
pub type CacheManagerRef = Arc<CacheManager>;
|
||||
|
||||
impl CacheManager {
|
||||
/// Creates a new manager with specific cache size in bytes.
|
||||
pub fn new(
|
||||
sst_meta_cache_size: u64,
|
||||
vector_cache_size: u64,
|
||||
page_cache_size: u64,
|
||||
) -> CacheManager {
|
||||
let sst_meta_cache = if sst_meta_cache_size == 0 {
|
||||
None
|
||||
} else {
|
||||
let cache = Cache::builder()
|
||||
.max_capacity(sst_meta_cache_size)
|
||||
.weigher(meta_cache_weight)
|
||||
.eviction_listener(|k, v, _cause| {
|
||||
let size = meta_cache_weight(&k, &v);
|
||||
CACHE_BYTES
|
||||
.with_label_values(&[SST_META_TYPE])
|
||||
.sub(size.into());
|
||||
})
|
||||
.build();
|
||||
Some(cache)
|
||||
};
|
||||
let vector_cache = if vector_cache_size == 0 {
|
||||
None
|
||||
} else {
|
||||
let cache = Cache::builder()
|
||||
.max_capacity(vector_cache_size)
|
||||
.weigher(vector_cache_weight)
|
||||
.eviction_listener(|k, v, _cause| {
|
||||
let size = vector_cache_weight(&k, &v);
|
||||
CACHE_BYTES
|
||||
.with_label_values(&[VECTOR_TYPE])
|
||||
.sub(size.into());
|
||||
})
|
||||
.build();
|
||||
Some(cache)
|
||||
};
|
||||
let page_cache = if page_cache_size == 0 {
|
||||
None
|
||||
} else {
|
||||
let cache = Cache::builder()
|
||||
.max_capacity(page_cache_size)
|
||||
.weigher(page_cache_weight)
|
||||
.eviction_listener(|k, v, _cause| {
|
||||
let size = page_cache_weight(&k, &v);
|
||||
CACHE_BYTES.with_label_values(&[PAGE_TYPE]).sub(size.into());
|
||||
})
|
||||
.build();
|
||||
Some(cache)
|
||||
};
|
||||
|
||||
CacheManager {
|
||||
sst_meta_cache,
|
||||
vector_cache,
|
||||
page_cache,
|
||||
write_cache: None,
|
||||
}
|
||||
/// Returns a builder to build the cache.
|
||||
pub fn builder() -> CacheManagerBuilder {
|
||||
CacheManagerBuilder::default()
|
||||
}
|
||||
|
||||
/// Gets cached [ParquetMetaData].
|
||||
@@ -201,6 +147,86 @@ impl CacheManager {
|
||||
}
|
||||
}
|
||||
|
||||
/// Builder to construct a [CacheManager].
|
||||
#[derive(Default)]
|
||||
pub struct CacheManagerBuilder {
|
||||
sst_meta_cache_size: u64,
|
||||
vector_cache_size: u64,
|
||||
page_cache_size: u64,
|
||||
write_cache: Option<WriteCacheRef>,
|
||||
}
|
||||
|
||||
impl CacheManagerBuilder {
|
||||
/// Sets meta cache size.
|
||||
pub fn sst_meta_cache_size(mut self, bytes: u64) -> Self {
|
||||
self.sst_meta_cache_size = bytes;
|
||||
self
|
||||
}
|
||||
|
||||
/// Sets vector cache size.
|
||||
pub fn vector_cache_size(mut self, bytes: u64) -> Self {
|
||||
self.vector_cache_size = bytes;
|
||||
self
|
||||
}
|
||||
|
||||
/// Sets page cache size.
|
||||
pub fn page_cache_size(mut self, bytes: u64) -> Self {
|
||||
self.page_cache_size = bytes;
|
||||
self
|
||||
}
|
||||
|
||||
/// Sets write cache.
|
||||
pub fn write_cache(mut self, cache: Option<WriteCacheRef>) -> Self {
|
||||
self.write_cache = cache;
|
||||
self
|
||||
}
|
||||
|
||||
/// Builds the [CacheManager].
|
||||
pub fn build(self) -> CacheManager {
|
||||
let sst_meta_cache = (self.sst_meta_cache_size != 0).then(|| {
|
||||
Cache::builder()
|
||||
.max_capacity(self.sst_meta_cache_size)
|
||||
.weigher(meta_cache_weight)
|
||||
.eviction_listener(|k, v, _cause| {
|
||||
let size = meta_cache_weight(&k, &v);
|
||||
CACHE_BYTES
|
||||
.with_label_values(&[SST_META_TYPE])
|
||||
.sub(size.into());
|
||||
})
|
||||
.build()
|
||||
});
|
||||
let vector_cache = (self.vector_cache_size != 0).then(|| {
|
||||
Cache::builder()
|
||||
.max_capacity(self.vector_cache_size)
|
||||
.weigher(vector_cache_weight)
|
||||
.eviction_listener(|k, v, _cause| {
|
||||
let size = vector_cache_weight(&k, &v);
|
||||
CACHE_BYTES
|
||||
.with_label_values(&[VECTOR_TYPE])
|
||||
.sub(size.into());
|
||||
})
|
||||
.build()
|
||||
});
|
||||
let page_cache = (self.page_cache_size != 0).then(|| {
|
||||
Cache::builder()
|
||||
.max_capacity(self.page_cache_size)
|
||||
.weigher(page_cache_weight)
|
||||
.eviction_listener(|k, v, _cause| {
|
||||
let size = page_cache_weight(&k, &v);
|
||||
CACHE_BYTES.with_label_values(&[PAGE_TYPE]).sub(size.into());
|
||||
})
|
||||
.build()
|
||||
});
|
||||
|
||||
CacheManager {
|
||||
sst_meta_cache,
|
||||
vector_cache,
|
||||
page_cache,
|
||||
write_cache: self.write_cache,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn meta_cache_weight(k: &SstMetaKey, v: &Arc<ParquetMetaData>) -> u32 {
|
||||
// We ignore the size of `Arc`.
|
||||
(k.estimated_size() + parquet_meta_size(v)) as u32
|
||||
@@ -293,7 +319,7 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn test_disable_cache() {
|
||||
let cache = CacheManager::new(0, 0, 0);
|
||||
let cache = CacheManager::default();
|
||||
assert!(cache.sst_meta_cache.is_none());
|
||||
assert!(cache.vector_cache.is_none());
|
||||
assert!(cache.page_cache.is_none());
|
||||
@@ -318,11 +344,13 @@ mod tests {
|
||||
let pages = Arc::new(PageValue::new(Vec::new()));
|
||||
cache.put_pages(key.clone(), pages);
|
||||
assert!(cache.get_pages(&key).is_none());
|
||||
|
||||
assert!(cache.write_cache().is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parquet_meta_cache() {
|
||||
let cache = CacheManager::new(2000, 0, 0);
|
||||
let cache = CacheManager::builder().sst_meta_cache_size(2000).build();
|
||||
let region_id = RegionId::new(1, 1);
|
||||
let file_id = FileId::random();
|
||||
assert!(cache.get_parquet_meta_data(region_id, file_id).is_none());
|
||||
@@ -335,7 +363,7 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn test_repeated_vector_cache() {
|
||||
let cache = CacheManager::new(0, 4096, 0);
|
||||
let cache = CacheManager::builder().vector_cache_size(4096).build();
|
||||
let value = Value::Int64(10);
|
||||
assert!(cache.get_repeated_vector(&value).is_none());
|
||||
let vector: VectorRef = Arc::new(Int64Vector::from_slice([10, 10, 10, 10]));
|
||||
@@ -346,7 +374,7 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn test_page_cache() {
|
||||
let cache = CacheManager::new(0, 0, 1000);
|
||||
let cache = CacheManager::builder().page_cache_size(1000).build();
|
||||
let region_id = RegionId::new(1, 1);
|
||||
let file_id = FileId::random();
|
||||
let key = PageKey {
|
||||
|
||||
225
src/mito2/src/cache/file_cache.rs
vendored
225
src/mito2/src/cache/file_cache.rs
vendored
@@ -14,9 +14,11 @@
|
||||
|
||||
//! A cache for files.
|
||||
|
||||
use std::ops::{Range, RangeBounds};
|
||||
use std::sync::Arc;
|
||||
use std::time::Instant;
|
||||
|
||||
use bytes::Bytes;
|
||||
use common_base::readable_size::ReadableSize;
|
||||
use common_telemetry::{info, warn};
|
||||
use futures::{FutureExt, TryStreamExt};
|
||||
@@ -31,6 +33,7 @@ use crate::cache::FILE_TYPE;
|
||||
use crate::error::{OpenDalSnafu, Result};
|
||||
use crate::metrics::{CACHE_BYTES, CACHE_HIT, CACHE_MISS};
|
||||
use crate::sst::file::FileId;
|
||||
use crate::sst::parquet::helper::fetch_byte_ranges;
|
||||
|
||||
/// Subdirectory of cached files.
|
||||
const FILE_DIR: &str = "files/";
|
||||
@@ -68,7 +71,7 @@ impl FileCache {
|
||||
// The cache is replaced by another file. This is unexpected, we don't remove the same
|
||||
// file but updates the metrics as the file is already replaced by users.
|
||||
CACHE_BYTES.with_label_values(&[FILE_TYPE]).sub(value.file_size.into());
|
||||
warn!("Replace existing cache {} for region {} unexpectedly", file_path, key.0);
|
||||
warn!("Replace existing cache {} for region {} unexpectedly", file_path, key.region_id);
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -77,7 +80,7 @@ impl FileCache {
|
||||
CACHE_BYTES.with_label_values(&[FILE_TYPE]).sub(value.file_size.into());
|
||||
}
|
||||
Err(e) => {
|
||||
warn!(e; "Failed to delete cached file {} for region {}", file_path, key.0);
|
||||
warn!(e; "Failed to delete cached file {} for region {}", file_path, key.region_id);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -100,17 +103,11 @@ impl FileCache {
|
||||
self.memory_index.insert(key, value).await;
|
||||
}
|
||||
|
||||
async fn get_reader(&self, file_path: &str) -> object_store::Result<Option<Reader>> {
|
||||
if self.local_store.is_exist(file_path).await? {
|
||||
Ok(Some(self.local_store.reader(file_path).await?))
|
||||
} else {
|
||||
Ok(None)
|
||||
}
|
||||
}
|
||||
|
||||
/// Reads a file from the cache.
|
||||
pub(crate) async fn reader(&self, key: IndexKey) -> Option<Reader> {
|
||||
if !self.memory_index.contains_key(&key) {
|
||||
// We must use `get()` to update the estimator of the cache.
|
||||
// See https://docs.rs/moka/latest/moka/future/struct.Cache.html#method.contains_key
|
||||
if self.memory_index.get(&key).await.is_none() {
|
||||
CACHE_MISS.with_label_values(&[FILE_TYPE]).inc();
|
||||
return None;
|
||||
}
|
||||
@@ -135,6 +132,39 @@ impl FileCache {
|
||||
None
|
||||
}
|
||||
|
||||
/// Reads ranges from the cache.
|
||||
pub(crate) async fn read_ranges(
|
||||
&self,
|
||||
key: IndexKey,
|
||||
ranges: &[Range<u64>],
|
||||
) -> Option<Vec<Bytes>> {
|
||||
if self.memory_index.get(&key).await.is_none() {
|
||||
CACHE_MISS.with_label_values(&[FILE_TYPE]).inc();
|
||||
return None;
|
||||
}
|
||||
|
||||
let file_path = self.cache_file_path(key);
|
||||
// In most cases, it will use blocking read,
|
||||
// because FileCache is normally based on local file system, which supports blocking read.
|
||||
let bytes_result = fetch_byte_ranges(&file_path, self.local_store.clone(), ranges).await;
|
||||
match bytes_result {
|
||||
Ok(bytes) => {
|
||||
CACHE_HIT.with_label_values(&[FILE_TYPE]).inc();
|
||||
Some(bytes)
|
||||
}
|
||||
Err(e) => {
|
||||
if e.kind() != ErrorKind::NotFound {
|
||||
warn!("Failed to get file for key {:?}, err: {}", key, e);
|
||||
}
|
||||
|
||||
// We removes the file from the index.
|
||||
self.memory_index.remove(&key).await;
|
||||
CACHE_MISS.with_label_values(&[FILE_TYPE]).inc();
|
||||
None
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Removes a file from the cache explicitly.
|
||||
pub(crate) async fn remove(&self, key: IndexKey) {
|
||||
let file_path = self.cache_file_path(key);
|
||||
@@ -194,10 +224,68 @@ impl FileCache {
|
||||
pub(crate) fn local_store(&self) -> ObjectStore {
|
||||
self.local_store.clone()
|
||||
}
|
||||
|
||||
async fn get_reader(&self, file_path: &str) -> object_store::Result<Option<Reader>> {
|
||||
if self.local_store.is_exist(file_path).await? {
|
||||
Ok(Some(self.local_store.reader(file_path).await?))
|
||||
} else {
|
||||
Ok(None)
|
||||
}
|
||||
}
|
||||
|
||||
/// Checks if the key is in the file cache.
|
||||
#[cfg(test)]
|
||||
pub(crate) fn contains_key(&self, key: &IndexKey) -> bool {
|
||||
self.memory_index.contains_key(key)
|
||||
}
|
||||
}
|
||||
|
||||
/// Key of file cache index.
|
||||
pub(crate) type IndexKey = (RegionId, FileId);
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
|
||||
pub(crate) struct IndexKey {
|
||||
pub region_id: RegionId,
|
||||
pub file_id: FileId,
|
||||
pub file_type: FileType,
|
||||
}
|
||||
|
||||
impl IndexKey {
|
||||
/// Creates a new index key.
|
||||
pub fn new(region_id: RegionId, file_id: FileId, file_type: FileType) -> IndexKey {
|
||||
IndexKey {
|
||||
region_id,
|
||||
file_id,
|
||||
file_type,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Type of the file.
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
|
||||
pub enum FileType {
|
||||
/// Parquet file.
|
||||
Parquet,
|
||||
/// Puffin file.
|
||||
Puffin,
|
||||
}
|
||||
|
||||
impl FileType {
|
||||
/// Parses the file type from string.
|
||||
fn parse(s: &str) -> Option<FileType> {
|
||||
match s {
|
||||
"parquet" => Some(FileType::Parquet),
|
||||
"puffin" => Some(FileType::Puffin),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Converts the file type to string.
|
||||
fn as_str(&self) -> &'static str {
|
||||
match self {
|
||||
FileType::Parquet => "parquet",
|
||||
FileType::Puffin => "puffin",
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// An entity that describes the file in the file cache.
|
||||
///
|
||||
@@ -205,26 +293,35 @@ pub(crate) type IndexKey = (RegionId, FileId);
|
||||
#[derive(Debug, Clone)]
|
||||
pub(crate) struct IndexValue {
|
||||
/// Size of the file in bytes.
|
||||
file_size: u32,
|
||||
pub(crate) file_size: u32,
|
||||
}
|
||||
|
||||
/// Generates the path to the cached file.
|
||||
///
|
||||
/// The file name format is `{region_id}.{file_id}`
|
||||
/// The file name format is `{region_id}.{file_id}.{file_type}`
|
||||
fn cache_file_path(cache_file_dir: &str, key: IndexKey) -> String {
|
||||
join_path(cache_file_dir, &format!("{}.{}", key.0.as_u64(), key.1))
|
||||
join_path(
|
||||
cache_file_dir,
|
||||
&format!(
|
||||
"{}.{}.{}",
|
||||
key.region_id.as_u64(),
|
||||
key.file_id,
|
||||
key.file_type.as_str()
|
||||
),
|
||||
)
|
||||
}
|
||||
|
||||
/// Parse index key from the file name.
|
||||
fn parse_index_key(name: &str) -> Option<IndexKey> {
|
||||
let mut splited = name.splitn(2, '.');
|
||||
let region_id = splited.next().and_then(|s| {
|
||||
let mut split = name.splitn(3, '.');
|
||||
let region_id = split.next().and_then(|s| {
|
||||
let id = s.parse::<u64>().ok()?;
|
||||
Some(RegionId::from_u64(id))
|
||||
})?;
|
||||
let file_id = splited.next().and_then(|s| FileId::parse_str(s).ok())?;
|
||||
let file_id = split.next().and_then(|s| FileId::parse_str(s).ok())?;
|
||||
let file_type = split.next().and_then(FileType::parse)?;
|
||||
|
||||
Some((region_id, file_id))
|
||||
Some(IndexKey::new(region_id, file_id, file_type))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
@@ -249,7 +346,7 @@ mod tests {
|
||||
let cache = FileCache::new(local_store.clone(), ReadableSize::mb(10));
|
||||
let region_id = RegionId::new(2000, 0);
|
||||
let file_id = FileId::random();
|
||||
let key = (region_id, file_id);
|
||||
let key = IndexKey::new(region_id, file_id, FileType::Parquet);
|
||||
let file_path = cache.cache_file_path(key);
|
||||
|
||||
// Get an empty file.
|
||||
@@ -262,7 +359,10 @@ mod tests {
|
||||
.unwrap();
|
||||
// Add to the cache.
|
||||
cache
|
||||
.put((region_id, file_id), IndexValue { file_size: 5 })
|
||||
.put(
|
||||
IndexKey::new(region_id, file_id, FileType::Parquet),
|
||||
IndexValue { file_size: 5 },
|
||||
)
|
||||
.await;
|
||||
|
||||
// Read file content.
|
||||
@@ -271,6 +371,10 @@ mod tests {
|
||||
reader.read_to_string(&mut buf).await.unwrap();
|
||||
assert_eq!("hello", buf);
|
||||
|
||||
// Get weighted size.
|
||||
cache.memory_index.run_pending_tasks().await;
|
||||
assert_eq!(5, cache.memory_index.weighted_size());
|
||||
|
||||
// Remove the file.
|
||||
cache.remove(key).await;
|
||||
assert!(cache.reader(key).await.is_none());
|
||||
@@ -280,6 +384,7 @@ mod tests {
|
||||
|
||||
// The file also not exists.
|
||||
assert!(!local_store.is_exist(&file_path).await.unwrap());
|
||||
assert_eq!(0, cache.memory_index.weighted_size());
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
@@ -290,7 +395,7 @@ mod tests {
|
||||
let cache = FileCache::new(local_store.clone(), ReadableSize::mb(10));
|
||||
let region_id = RegionId::new(2000, 0);
|
||||
let file_id = FileId::random();
|
||||
let key = (region_id, file_id);
|
||||
let key = IndexKey::new(region_id, file_id, FileType::Parquet);
|
||||
let file_path = cache.cache_file_path(key);
|
||||
|
||||
// Write a file.
|
||||
@@ -300,7 +405,10 @@ mod tests {
|
||||
.unwrap();
|
||||
// Add to the cache.
|
||||
cache
|
||||
.put((region_id, file_id), IndexValue { file_size: 5 })
|
||||
.put(
|
||||
IndexKey::new(region_id, file_id, FileType::Parquet),
|
||||
IndexValue { file_size: 5 },
|
||||
)
|
||||
.await;
|
||||
|
||||
// Remove the file but keep the index.
|
||||
@@ -319,10 +427,12 @@ mod tests {
|
||||
let cache = FileCache::new(local_store.clone(), ReadableSize::mb(10));
|
||||
|
||||
let region_id = RegionId::new(2000, 0);
|
||||
let file_type = FileType::Parquet;
|
||||
// Write N files.
|
||||
let file_ids: Vec<_> = (0..10).map(|_| FileId::random()).collect();
|
||||
let mut total_size = 0;
|
||||
for (i, file_id) in file_ids.iter().enumerate() {
|
||||
let key = (region_id, *file_id);
|
||||
let key = IndexKey::new(region_id, *file_id, file_type);
|
||||
let file_path = cache.cache_file_path(key);
|
||||
let bytes = i.to_string().into_bytes();
|
||||
local_store.write(&file_path, bytes.clone()).await.unwrap();
|
||||
@@ -330,22 +440,30 @@ mod tests {
|
||||
// Add to the cache.
|
||||
cache
|
||||
.put(
|
||||
(region_id, *file_id),
|
||||
IndexKey::new(region_id, *file_id, file_type),
|
||||
IndexValue {
|
||||
file_size: bytes.len() as u32,
|
||||
},
|
||||
)
|
||||
.await;
|
||||
total_size += bytes.len();
|
||||
}
|
||||
|
||||
// Recover the cache.
|
||||
let cache = FileCache::new(local_store.clone(), ReadableSize::mb(10));
|
||||
// No entry before recovery.
|
||||
assert!(cache.reader((region_id, file_ids[0])).await.is_none());
|
||||
assert!(cache
|
||||
.reader(IndexKey::new(region_id, file_ids[0], file_type))
|
||||
.await
|
||||
.is_none());
|
||||
cache.recover().await.unwrap();
|
||||
|
||||
// Check size.
|
||||
cache.memory_index.run_pending_tasks().await;
|
||||
assert_eq!(total_size, cache.memory_index.weighted_size() as usize);
|
||||
|
||||
for (i, file_id) in file_ids.iter().enumerate() {
|
||||
let key = (region_id, *file_id);
|
||||
let key = IndexKey::new(region_id, *file_id, file_type);
|
||||
let mut reader = cache.reader(key).await.unwrap();
|
||||
let mut buf = String::new();
|
||||
reader.read_to_string(&mut buf).await.unwrap();
|
||||
@@ -353,16 +471,50 @@ mod tests {
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_file_cache_read_ranges() {
|
||||
let dir = create_temp_dir("");
|
||||
let local_store = new_fs_store(dir.path().to_str().unwrap());
|
||||
let file_cache = FileCache::new(local_store.clone(), ReadableSize::mb(10));
|
||||
let region_id = RegionId::new(2000, 0);
|
||||
let file_id = FileId::random();
|
||||
let key = IndexKey::new(region_id, file_id, FileType::Parquet);
|
||||
let file_path = file_cache.cache_file_path(key);
|
||||
// Write a file.
|
||||
let data = b"hello greptime database";
|
||||
local_store
|
||||
.write(&file_path, data.as_slice())
|
||||
.await
|
||||
.unwrap();
|
||||
// Add to the cache.
|
||||
file_cache.put(key, IndexValue { file_size: 5 }).await;
|
||||
// Ranges
|
||||
let ranges = vec![0..5, 6..10, 15..19, 0..data.len() as u64];
|
||||
let bytes = file_cache.read_ranges(key, &ranges).await.unwrap();
|
||||
|
||||
assert_eq!(4, bytes.len());
|
||||
assert_eq!(b"hello", bytes[0].as_ref());
|
||||
assert_eq!(b"grep", bytes[1].as_ref());
|
||||
assert_eq!(b"data", bytes[2].as_ref());
|
||||
assert_eq!(data, bytes[3].as_ref());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_cache_file_path() {
|
||||
let file_id = FileId::parse_str("3368731b-a556-42b8-a5df-9c31ce155095").unwrap();
|
||||
assert_eq!(
|
||||
"test_dir/5299989643269.3368731b-a556-42b8-a5df-9c31ce155095",
|
||||
cache_file_path("test_dir", (RegionId::new(1234, 5), file_id))
|
||||
"test_dir/5299989643269.3368731b-a556-42b8-a5df-9c31ce155095.parquet",
|
||||
cache_file_path(
|
||||
"test_dir",
|
||||
IndexKey::new(RegionId::new(1234, 5), file_id, FileType::Parquet)
|
||||
)
|
||||
);
|
||||
assert_eq!(
|
||||
"test_dir/5299989643269.3368731b-a556-42b8-a5df-9c31ce155095",
|
||||
cache_file_path("test_dir/", (RegionId::new(1234, 5), file_id))
|
||||
"test_dir/5299989643269.3368731b-a556-42b8-a5df-9c31ce155095.parquet",
|
||||
cache_file_path(
|
||||
"test_dir/",
|
||||
IndexKey::new(RegionId::new(1234, 5), file_id, FileType::Parquet)
|
||||
)
|
||||
);
|
||||
}
|
||||
|
||||
@@ -371,8 +523,8 @@ mod tests {
|
||||
let file_id = FileId::parse_str("3368731b-a556-42b8-a5df-9c31ce155095").unwrap();
|
||||
let region_id = RegionId::new(1234, 5);
|
||||
assert_eq!(
|
||||
(region_id, file_id),
|
||||
parse_index_key("5299989643269.3368731b-a556-42b8-a5df-9c31ce155095").unwrap()
|
||||
IndexKey::new(region_id, file_id, FileType::Parquet),
|
||||
parse_index_key("5299989643269.3368731b-a556-42b8-a5df-9c31ce155095.parquet").unwrap()
|
||||
);
|
||||
assert!(parse_index_key("").is_none());
|
||||
assert!(parse_index_key(".").is_none());
|
||||
@@ -381,8 +533,13 @@ mod tests {
|
||||
assert!(parse_index_key(".5299989643269").is_none());
|
||||
assert!(parse_index_key("5299989643269.").is_none());
|
||||
assert!(parse_index_key("5299989643269.3368731b-a556-42b8-a5df").is_none());
|
||||
assert!(parse_index_key("5299989643269.3368731b-a556-42b8-a5df-9c31ce155095").is_none());
|
||||
assert!(
|
||||
parse_index_key("5299989643269.3368731b-a556-42b8-a5df-9c31ce155095.parquet").is_none()
|
||||
parse_index_key("5299989643269.3368731b-a556-42b8-a5df-9c31ce155095.parque").is_none()
|
||||
);
|
||||
assert!(parse_index_key(
|
||||
"5299989643269.3368731b-a556-42b8-a5df-9c31ce155095.parquet.puffin"
|
||||
)
|
||||
.is_none());
|
||||
}
|
||||
}
|
||||
|
||||
8
src/mito2/src/cache/test_util.rs
vendored
8
src/mito2/src/cache/test_util.rs
vendored
@@ -19,6 +19,8 @@ use std::sync::Arc;
|
||||
use bytes::Bytes;
|
||||
use datatypes::arrow::array::{ArrayRef, Int64Array};
|
||||
use datatypes::arrow::record_batch::RecordBatch;
|
||||
use object_store::services::Fs;
|
||||
use object_store::ObjectStore;
|
||||
use parquet::arrow::arrow_reader::ParquetRecordBatchReaderBuilder;
|
||||
use parquet::arrow::ArrowWriter;
|
||||
use parquet::file::metadata::ParquetMetaData;
|
||||
@@ -42,3 +44,9 @@ fn parquet_file_data() -> Vec<u8> {
|
||||
|
||||
buffer
|
||||
}
|
||||
|
||||
pub(crate) fn new_fs_store(path: &str) -> ObjectStore {
|
||||
let mut builder = Fs::default();
|
||||
builder.root(path);
|
||||
ObjectStore::new(builder).unwrap().finish()
|
||||
}
|
||||
|
||||
237
src/mito2/src/cache/write_cache.rs
vendored
237
src/mito2/src/cache/write_cache.rs
vendored
@@ -14,19 +14,28 @@
|
||||
|
||||
//! A write-through cache for remote object stores.
|
||||
|
||||
use std::ops::Range;
|
||||
use std::sync::Arc;
|
||||
|
||||
use api::v1::region;
|
||||
use bytes::Bytes;
|
||||
use common_base::readable_size::ReadableSize;
|
||||
use common_telemetry::{debug, info};
|
||||
use object_store::manager::ObjectStoreManagerRef;
|
||||
use object_store::ObjectStore;
|
||||
use snafu::ResultExt;
|
||||
use store_api::metadata::RegionMetadataRef;
|
||||
use store_api::storage::RegionId;
|
||||
|
||||
use crate::cache::file_cache::{FileCache, FileCacheRef};
|
||||
use crate::error::Result;
|
||||
use crate::access_layer::new_fs_object_store;
|
||||
use crate::cache::file_cache::{FileCache, FileCacheRef, FileType, IndexKey, IndexValue};
|
||||
use crate::error::{self, Result};
|
||||
use crate::metrics::{FLUSH_ELAPSED, UPLOAD_BYTES_TOTAL};
|
||||
use crate::read::Source;
|
||||
use crate::sst::file::FileId;
|
||||
use crate::sst::parquet::writer::ParquetWriter;
|
||||
use crate::sst::parquet::{SstInfo, WriteOptions};
|
||||
use crate::sst::DEFAULT_WRITE_BUFFER_SIZE;
|
||||
|
||||
/// A cache for uploading files to remote object stores.
|
||||
///
|
||||
@@ -43,20 +52,35 @@ pub type WriteCacheRef = Arc<WriteCache>;
|
||||
impl WriteCache {
|
||||
/// Create the cache with a `local_store` to cache files and a
|
||||
/// `object_store_manager` for all object stores.
|
||||
pub fn new(
|
||||
pub async fn new(
|
||||
local_store: ObjectStore,
|
||||
object_store_manager: ObjectStoreManagerRef,
|
||||
cache_capacity: ReadableSize,
|
||||
) -> Self {
|
||||
Self {
|
||||
file_cache: Arc::new(FileCache::new(local_store, cache_capacity)),
|
||||
) -> Result<Self> {
|
||||
let file_cache = FileCache::new(local_store, cache_capacity);
|
||||
file_cache.recover().await?;
|
||||
|
||||
Ok(Self {
|
||||
file_cache: Arc::new(file_cache),
|
||||
object_store_manager,
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
/// Recovers the write cache from local store.
|
||||
pub async fn recover(&self) -> Result<()> {
|
||||
self.file_cache.recover().await
|
||||
/// Creates a write cache based on local fs.
|
||||
pub async fn new_fs(
|
||||
cache_dir: &str,
|
||||
object_store_manager: ObjectStoreManagerRef,
|
||||
cache_capacity: ReadableSize,
|
||||
) -> Result<Self> {
|
||||
info!("Init write cache on {cache_dir}, capacity: {cache_capacity}");
|
||||
|
||||
let local_store = new_fs_object_store(cache_dir).await?;
|
||||
Self::new(local_store, object_store_manager, cache_capacity).await
|
||||
}
|
||||
|
||||
/// Returns the file cache of the write cache.
|
||||
pub(crate) fn file_cache(&self) -> FileCacheRef {
|
||||
self.file_cache.clone()
|
||||
}
|
||||
|
||||
/// Writes SST to the cache and then uploads it to the remote object store.
|
||||
@@ -65,11 +89,105 @@ impl WriteCache {
|
||||
request: SstUploadRequest,
|
||||
write_opts: &WriteOptions,
|
||||
) -> Result<Option<SstInfo>> {
|
||||
// TODO(yingwen): Write to the local store and then upload.
|
||||
// Now we write to the remote and ignore local cache.
|
||||
let mut writer =
|
||||
ParquetWriter::new(request.upload_path, request.metadata, request.remote_store);
|
||||
writer.write_all(request.source, write_opts).await
|
||||
let timer = FLUSH_ELAPSED
|
||||
.with_label_values(&["write_sst"])
|
||||
.start_timer();
|
||||
|
||||
let region_id = request.metadata.region_id;
|
||||
let file_id = request.file_id;
|
||||
let parquet_key = IndexKey::new(region_id, file_id, FileType::Parquet);
|
||||
|
||||
// Write to FileCache.
|
||||
let mut writer = ParquetWriter::new(
|
||||
self.file_cache.cache_file_path(parquet_key),
|
||||
request.metadata,
|
||||
self.file_cache.local_store(),
|
||||
);
|
||||
|
||||
let sst_info = writer.write_all(request.source, write_opts).await?;
|
||||
|
||||
timer.stop_and_record();
|
||||
|
||||
// Upload sst file to remote object store.
|
||||
let Some(sst_info) = sst_info else {
|
||||
// No data need to upload.
|
||||
return Ok(None);
|
||||
};
|
||||
|
||||
let parquet_path = &request.upload_path;
|
||||
let remote_store = &request.remote_store;
|
||||
self.upload(parquet_key, parquet_path, remote_store).await?;
|
||||
|
||||
if sst_info.inverted_index_available {
|
||||
let puffin_key = IndexKey::new(region_id, file_id, FileType::Puffin);
|
||||
let puffin_path = &request.index_upload_path;
|
||||
self.upload(puffin_key, puffin_path, remote_store).await?;
|
||||
}
|
||||
|
||||
Ok(Some(sst_info))
|
||||
}
|
||||
|
||||
/// Uploads a Parquet file or a Puffin file to the remote object store.
|
||||
async fn upload(
|
||||
&self,
|
||||
index_key: IndexKey,
|
||||
upload_path: &str,
|
||||
remote_store: &ObjectStore,
|
||||
) -> Result<()> {
|
||||
let region_id = index_key.region_id;
|
||||
let file_id = index_key.file_id;
|
||||
let file_type = index_key.file_type;
|
||||
let cache_path = self.file_cache.cache_file_path(index_key);
|
||||
|
||||
let timer = FLUSH_ELAPSED
|
||||
.with_label_values(&[match file_type {
|
||||
FileType::Parquet => "upload_parquet",
|
||||
FileType::Puffin => "upload_puffin",
|
||||
}])
|
||||
.start_timer();
|
||||
|
||||
let reader = self
|
||||
.file_cache
|
||||
.local_store()
|
||||
.reader(&cache_path)
|
||||
.await
|
||||
.context(error::OpenDalSnafu)?;
|
||||
|
||||
let mut writer = remote_store
|
||||
.writer_with(upload_path)
|
||||
.buffer(DEFAULT_WRITE_BUFFER_SIZE.as_bytes() as usize)
|
||||
.await
|
||||
.context(error::OpenDalSnafu)?;
|
||||
|
||||
let bytes_written =
|
||||
futures::io::copy(reader, &mut writer)
|
||||
.await
|
||||
.context(error::UploadSnafu {
|
||||
region_id,
|
||||
file_id,
|
||||
file_type,
|
||||
})?;
|
||||
|
||||
// Must close to upload all data.
|
||||
writer.close().await.context(error::OpenDalSnafu)?;
|
||||
|
||||
UPLOAD_BYTES_TOTAL.inc_by(bytes_written);
|
||||
|
||||
debug!(
|
||||
"Successfully upload file to remote, region: {}, file: {}, upload_path: {}, cost: {:?}s",
|
||||
region_id,
|
||||
file_id,
|
||||
upload_path,
|
||||
timer.stop_and_record()
|
||||
);
|
||||
|
||||
let index_value = IndexValue {
|
||||
file_size: bytes_written as _,
|
||||
};
|
||||
// Register to file cache
|
||||
self.file_cache.put(index_key, index_value).await;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
@@ -81,6 +199,95 @@ pub struct SstUploadRequest {
|
||||
pub storage: Option<String>,
|
||||
/// Path to upload the file.
|
||||
pub upload_path: String,
|
||||
/// Path to upload the index file.
|
||||
pub index_upload_path: String,
|
||||
/// Remote object store to upload.
|
||||
pub remote_store: ObjectStore,
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use api::v1::OpType;
|
||||
use common_base::readable_size::ReadableSize;
|
||||
use common_test_util::temp_dir::create_temp_dir;
|
||||
use object_store::manager::ObjectStoreManager;
|
||||
use object_store::services::Fs;
|
||||
use object_store::ObjectStore;
|
||||
use store_api::storage::RegionId;
|
||||
|
||||
use super::*;
|
||||
use crate::cache::file_cache::{self, FileCache};
|
||||
use crate::cache::test_util::new_fs_store;
|
||||
use crate::sst::file::FileId;
|
||||
use crate::sst::location::{index_file_path, sst_file_path};
|
||||
use crate::test_util::sst_util::{
|
||||
new_batch_by_range, new_source, sst_file_handle, sst_region_metadata,
|
||||
};
|
||||
use crate::test_util::{build_rows, new_batch_builder, CreateRequestBuilder, TestEnv};
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_write_and_upload_sst() {
|
||||
// TODO(QuenKar): maybe find a way to create some object server for testing,
|
||||
// and now just use local file system to mock.
|
||||
let mut env = TestEnv::new();
|
||||
let mock_store = env.init_object_store_manager();
|
||||
let file_id = FileId::random();
|
||||
let upload_path = sst_file_path("test", file_id);
|
||||
let index_upload_path = index_file_path("test", file_id);
|
||||
|
||||
// Create WriteCache
|
||||
let local_dir = create_temp_dir("");
|
||||
let local_store = new_fs_store(local_dir.path().to_str().unwrap());
|
||||
let object_store_manager = env.get_object_store_manager().unwrap();
|
||||
let write_cache = WriteCache::new(
|
||||
local_store.clone(),
|
||||
object_store_manager,
|
||||
ReadableSize::mb(10),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
// Create Source
|
||||
let metadata = Arc::new(sst_region_metadata());
|
||||
let region_id = metadata.region_id;
|
||||
let source = new_source(&[
|
||||
new_batch_by_range(&["a", "d"], 0, 60),
|
||||
new_batch_by_range(&["b", "f"], 0, 40),
|
||||
new_batch_by_range(&["b", "h"], 100, 200),
|
||||
]);
|
||||
|
||||
let request = SstUploadRequest {
|
||||
file_id,
|
||||
metadata,
|
||||
source,
|
||||
storage: None,
|
||||
upload_path: upload_path.clone(),
|
||||
index_upload_path,
|
||||
remote_store: mock_store.clone(),
|
||||
};
|
||||
|
||||
let write_opts = WriteOptions {
|
||||
row_group_size: 512,
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
// Write to cache and upload sst to mock remote store
|
||||
let sst_info = write_cache
|
||||
.write_and_upload_sst(request, &write_opts)
|
||||
.await
|
||||
.unwrap()
|
||||
.unwrap();
|
||||
|
||||
// Check write cache contains the key
|
||||
let key = IndexKey::new(region_id, file_id, FileType::Parquet);
|
||||
assert!(write_cache.file_cache.contains_key(&key));
|
||||
|
||||
// Check file data
|
||||
let remote_data = mock_store.read(&upload_path).await.unwrap();
|
||||
let cache_data = local_store
|
||||
.read(&write_cache.file_cache.cache_file_path(key))
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(remote_data, cache_data);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -35,6 +35,8 @@ pub fn new_file_handle(
|
||||
),
|
||||
level,
|
||||
file_size: 0,
|
||||
available_indexes: Default::default(),
|
||||
index_file_size: 0,
|
||||
},
|
||||
file_purger,
|
||||
)
|
||||
|
||||
@@ -22,6 +22,7 @@ use common_telemetry::{debug, error, info};
|
||||
use common_time::timestamp::TimeUnit;
|
||||
use common_time::timestamp_millis::BucketAligned;
|
||||
use common_time::Timestamp;
|
||||
use smallvec::SmallVec;
|
||||
use snafu::ResultExt;
|
||||
use store_api::metadata::RegionMetadataRef;
|
||||
use store_api::storage::RegionId;
|
||||
@@ -39,7 +40,7 @@ use crate::read::{BoxedBatchReader, Source};
|
||||
use crate::request::{
|
||||
BackgroundNotify, CompactionFailed, CompactionFinished, OutputTx, WorkerRequest,
|
||||
};
|
||||
use crate::sst::file::{FileHandle, FileId, FileMeta, Level};
|
||||
use crate::sst::file::{FileHandle, FileId, FileMeta, IndexType, Level};
|
||||
use crate::sst::file_purger::FilePurgerRef;
|
||||
use crate::sst::parquet::WriteOptions;
|
||||
use crate::sst::version::LevelMeta;
|
||||
@@ -306,6 +307,7 @@ impl TwcsCompactionTask {
|
||||
let metadata = self.metadata.clone();
|
||||
let sst_layer = self.sst_layer.clone();
|
||||
let region_id = self.region_id;
|
||||
let file_id = output.output_file_id;
|
||||
let cache_manager = self.cache_manager.clone();
|
||||
let storage = self.storage.clone();
|
||||
futs.push(async move {
|
||||
@@ -314,7 +316,7 @@ impl TwcsCompactionTask {
|
||||
let file_meta_opt = sst_layer
|
||||
.write_sst(
|
||||
SstWriteRequest {
|
||||
file_id: output.output_file_id,
|
||||
file_id,
|
||||
metadata,
|
||||
source: Source::Reader(reader),
|
||||
cache_manager,
|
||||
@@ -325,10 +327,15 @@ impl TwcsCompactionTask {
|
||||
.await?
|
||||
.map(|sst_info| FileMeta {
|
||||
region_id,
|
||||
file_id: output.output_file_id,
|
||||
file_id,
|
||||
time_range: sst_info.time_range,
|
||||
level: output.output_level,
|
||||
file_size: sst_info.file_size,
|
||||
available_indexes: sst_info
|
||||
.inverted_index_available
|
||||
.then(|| SmallVec::from_iter([IndexType::InvertedIndex]))
|
||||
.unwrap_or_default(),
|
||||
index_file_size: sst_info.index_file_size,
|
||||
});
|
||||
Ok(file_meta_opt)
|
||||
});
|
||||
|
||||
@@ -19,6 +19,9 @@ use std::time::Duration;
|
||||
use common_base::readable_size::ReadableSize;
|
||||
use common_telemetry::warn;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use snafu::ensure;
|
||||
|
||||
use crate::error::{InvalidConfigSnafu, Result};
|
||||
|
||||
/// Default max running background job.
|
||||
const DEFAULT_MAX_BG_JOB: usize = 4;
|
||||
@@ -67,6 +70,12 @@ pub struct MitoConfig {
|
||||
pub vector_cache_size: ReadableSize,
|
||||
/// Cache size for pages of SST row groups (default 512MB). Setting it to 0 to disable the cache.
|
||||
pub page_cache_size: ReadableSize,
|
||||
/// Whether to enable the experimental write cache.
|
||||
pub enable_experimental_write_cache: bool,
|
||||
/// Path for write cache.
|
||||
pub experimental_write_cache_path: String,
|
||||
/// Capacity for write cache.
|
||||
pub experimental_write_cache_size: ReadableSize,
|
||||
|
||||
// Other configs:
|
||||
/// Buffer size for SST writing.
|
||||
@@ -78,6 +87,8 @@ pub struct MitoConfig {
|
||||
pub scan_parallelism: usize,
|
||||
/// Capacity of the channel to send data from parallel scan tasks to the main task (default 32).
|
||||
pub parallel_scan_channel_size: usize,
|
||||
/// Whether to allow stale entries read during replay.
|
||||
pub allow_stale_entries: bool,
|
||||
}
|
||||
|
||||
impl Default for MitoConfig {
|
||||
@@ -95,16 +106,22 @@ impl Default for MitoConfig {
|
||||
sst_meta_cache_size: ReadableSize::mb(128),
|
||||
vector_cache_size: ReadableSize::mb(512),
|
||||
page_cache_size: ReadableSize::mb(512),
|
||||
enable_experimental_write_cache: false,
|
||||
experimental_write_cache_path: String::new(),
|
||||
experimental_write_cache_size: ReadableSize::mb(512),
|
||||
sst_write_buffer_size: ReadableSize::mb(8),
|
||||
scan_parallelism: divide_num_cpus(4),
|
||||
parallel_scan_channel_size: DEFAULT_SCAN_CHANNEL_SIZE,
|
||||
allow_stale_entries: false,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl MitoConfig {
|
||||
/// Sanitize incorrect configurations.
|
||||
pub(crate) fn sanitize(&mut self) {
|
||||
///
|
||||
/// Returns an error if there is a configuration that unable to sanitize.
|
||||
pub(crate) fn sanitize(&mut self) -> Result<()> {
|
||||
// Use default value if `num_workers` is 0.
|
||||
if self.num_workers == 0 {
|
||||
self.num_workers = divide_num_cpus(2);
|
||||
@@ -149,6 +166,17 @@ impl MitoConfig {
|
||||
self.parallel_scan_channel_size
|
||||
);
|
||||
}
|
||||
|
||||
if self.enable_experimental_write_cache {
|
||||
ensure!(
|
||||
!self.experimental_write_cache_path.is_empty(),
|
||||
InvalidConfigSnafu {
|
||||
reason: "experimental_write_cache_path should not be empty",
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user