Compare commits

...

37 Commits

Author SHA1 Message Date
Weny Xu
75975adcb6 fix: fix tests failed on windows (#3155)
* fix: fix tests failed on windows

* feat: add comments

* Update src/object-store/src/util.rs

Co-authored-by: Yingwen <realevenyag@gmail.com>

---------

Co-authored-by: Yingwen <realevenyag@gmail.com>
2024-01-12 09:12:11 +00:00
Ruihang Xia
527e523a38 fix: handle non-identical time index and field column in PromQL set operation (#3145)
* handle different field columns

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* fix and/unless on different time index

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* update sqlness result

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

---------

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
2024-01-12 06:27:03 +00:00
Weny Xu
aad2afd3f2 chore: bump version to 0.6.0 (#3154) 2024-01-12 06:25:14 +00:00
Weny Xu
bf88b3b4a0 fix: fix store all wal options (#3149)
* fix: fix store all wal options

* fix: incorrect updating DatanodeTable value
2024-01-12 04:48:14 +00:00
Weny Xu
bf96ce3049 fix: print detailed error (#3146) 2024-01-12 04:02:32 +00:00
Weny Xu
430ffe0e28 fix(kafka): overwrite the EntryId with Offset while consuming records (#3148)
* fix(kafka): overwrite the EntryId with Offset while consuming the KafkaRecords

* fix: temporarily workaround of incorrect entry Id
2024-01-12 03:46:17 +00:00
Zhenchi
c1190bae7b feat(mito): support write cache for index file (#3144)
* feat(mito): support write cache for index file

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

* fix: address comments

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

* fix: merge main

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

---------

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>
2024-01-12 02:40:56 +00:00
Ruihang Xia
0882da4d01 feat: support PromQL operations over the same metric (#3124)
* update sqlness result

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* update ut cases

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* remove deadcode

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

---------

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
2024-01-11 23:07:17 +00:00
Wei
8ec1e42754 feat: read data from write cache (#3128)
* feat: read from write cache

* chore: add read ranges test

* fix: use get instead of contains_key

* chore: clippy

* chore: cr comment

Co-authored-by: Yingwen <realevenyag@gmail.com>

* fix: with_label_values

---------

Co-authored-by: Yingwen <realevenyag@gmail.com>
2024-01-11 12:06:28 +00:00
Ruihang Xia
b00b49284e feat: manager kafka cluster in sqlness runner (#3143)
* feat: manager kafka cluster in sqlness runner

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* pull up clippy config

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* Apply suggestions from code review

Co-authored-by: niebayes <niebayes@gmail.com>

---------

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
Co-authored-by: niebayes <niebayes@gmail.com>
2024-01-11 09:47:19 +00:00
Ruihang Xia
09b3c7029b feat: handle drop request for metric table (#3136)
* handle drop request

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* adjust procedure manager

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* add create table sqlness test

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* insert/query metric table

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* address CR comments

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* Update src/common/meta/src/kv_backend.rs

Co-authored-by: JeremyHi <jiachun_feng@proton.me>

* fix clippy

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* reuse region option for metadata region

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* tweak variable name

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

---------

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
Co-authored-by: JeremyHi <jiachun_feng@proton.me>
2024-01-11 09:38:43 +00:00
niebayes
f5798e2833 fix: remove incorrect wal comments in config file (#3142)
fix: kafka config comments
2024-01-11 09:34:24 +00:00
Zhenchi
fd8fb641fd feat(parquet): introduce inverted index applier to reader (#3130)
* feat(parquet): introduce inverted index applier to reader

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

* feat: purger removes index file

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

* fix test

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

* chore: add TODO for escape route

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

* chore: add TODO for escape route

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

* Update src/mito2/src/access_layer.rs

Co-authored-by: dennis zhuang <killme2008@gmail.com>

* Update src/mito2/src/sst/parquet/reader.rs

Co-authored-by: dennis zhuang <killme2008@gmail.com>

* feat: min-max index to prune row groups filtered by inverted index

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

* feat: file_meta.inverted_index_available -> file_meta.available_indexes

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

* chore: add TODO for leveraging WriteCache

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

* fix fmt

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

* fix: misset available indexes

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

* feat: add index file size

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

* refactor: use smallvec to reduce heap allocation

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

* fix: add index size to disk usage

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

---------

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>
Co-authored-by: dennis zhuang <killme2008@gmail.com>
2024-01-11 08:04:59 +00:00
Weny Xu
312e8e824e fix: save code in debug_assert! (#3137)
fix: save code in debug_assert!
2024-01-11 06:07:08 +00:00
Wei
29a7f301df feat: write and upload sst (#3106)
* feat: write and upload sst file

* refactor: unit test

* cr comment

* chore: typos

* chore: cr comment

* chore: conflict

* Apply suggestions from code review

Co-authored-by: dennis zhuang <killme2008@gmail.com>

* chore: fmt

* chore: style

Co-authored-by: Yingwen <realevenyag@gmail.com>

---------

Co-authored-by: dennis zhuang <killme2008@gmail.com>
Co-authored-by: Yingwen <realevenyag@gmail.com>
2024-01-11 02:34:16 +00:00
LFC
51a3fbc7bf refactor: change how frontend grpc services are orchestrated (#3134) 2024-01-11 02:26:44 +00:00
Lanqing Yang
d521bc9dc5 chore: impl KvBackend for MetaPeerClient (#3076) 2024-01-10 14:16:03 +00:00
Weny Xu
7fad4e8356 fix: incorrect parsing broker_endpoints env variable (#3135) 2024-01-10 13:59:49 +00:00
Ning Sun
b6033f62cd refactor: implement version as built-in function and use fixed mysql version (#3133)
* refactor:  implement version as built-in function

* test: add sqlness test for version()
2024-01-10 11:04:18 +00:00
dennis zhuang
fd3f23ea15 feat: adds runtime_metrics (#3127)
* feat: adds runtime_metrics

* fix: comment

* feat: refactor metrics table

* chore: ensure build_info and runtime_metrics only avaiable in greptime catalog

* feat: adds timestamp column
2024-01-10 10:51:30 +00:00
niebayes
1b0e39a7f2 chore: stop exposing num_partitions (#3132) 2024-01-10 10:45:18 +00:00
Weny Xu
3ab370265a feat: expose the region migration replay_timeout argument (#3129)
* feat: expose region migration args

* fix: fix ci
2024-01-10 09:47:59 +00:00
Weny Xu
ec8266b969 refactor: refactor the locks in the procedure (#3126)
* feat: add lock key

* refactor: procedure lock keys

* chore: apply suggestions from CR
2024-01-10 09:46:39 +00:00
Zhenchi
490312bf57 fix: unstable time record test (#3131)
Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>
2024-01-10 09:41:52 +00:00
Ning Sun
1fc168bf6a feat: update our cross schema check to cross catalog (#3123) 2024-01-09 09:38:48 +00:00
Zhenchi
db98484796 feat(inverted_index): introduce SstIndexCreator (#3107)
* feat(inverted_index): introduce SstIndexCreator

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

* chore: tiny polish

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

* feat: distinguish intermediate store and index store

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

* chore: move comment as doc comment

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

* refactor: column id as index name

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

---------

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>
2024-01-09 09:24:16 +00:00
Ruihang Xia
7d0d2163d2 fix: expose unsupported datatype error on mysql protocol (#3121)
Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
2024-01-09 09:13:53 +00:00
Ruihang Xia
c4582c05cc chore: change the default doc checkbox to no need (#3122)
Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
2024-01-09 17:12:54 +08:00
niebayes
a0a31c8acc chore(remote_wal): remove topic alias (#3120)
chore: remove topic alias
2024-01-09 07:35:02 +00:00
tison
0db1861452 chore(python): Print Python interpreter version (#3118)
* chore(pyo3_backend): Print bundle Python interpreter version

Signed-off-by: tison <wander4096@gmail.com>

* print RustPython interpreter version on init

Signed-off-by: tison <wander4096@gmail.com>

---------

Signed-off-by: tison <wander4096@gmail.com>
2024-01-09 07:04:23 +00:00
Wei
225ae953d1 feat: add parquet metadata to cache (#3097)
* feat: parquet metadata to sst meta cache

* chore: clippy

* refactor: move code to access_layer

* chore: clone()
2024-01-09 07:00:42 +00:00
Lei, HUANG
2c1b1cecc8 chore: add bound check for raft-engine logstore (#3073)
* chore: add bound check for raft-engine logstore

* feat: add bound check to append_batch API

* chore: check entry id during replay

* chore: resolve conflicts

* feat: add allow_stale_entries options to force obsolete wal entries

* chore: resolve some comments
2024-01-09 06:42:46 +00:00
Lei, HUANG
62db28b465 feat: add options to enable log recycle and periodical fsync (#3114)
* feat: add options to enable log recycle and periodical fsync

* fix: resolve review comments

* fix: conflicts
2024-01-09 06:41:23 +00:00
fys
6e860bc0fd feat: support grpc for otlp trace and metrics (#3105)
* feat: add grpc support for otlp trace and metrics

* cr: add some comment

* fix: ut

* fix: cr
2024-01-09 05:01:48 +00:00
Yingwen
8bd4a36136 feat(mito): Init the write cache in datanode (#3100)
* feat: add builder to build cache manager

* refactor: make MitoEngine::new async

* refactor: refactor object store creation

* refactor: add helper fn to attaches layers

* feat: fn to build fs store

* feat: add write cache to engine

* feat: config write cache

* style: fix clippy

* test: fix test

* feat: add warning

* chore: add experimental prefix to configs

* test: fix config test

* test: test weighted size

* feat: add switch to enable write cache

* fix: update cache stats by using get

* style: use then
2024-01-09 04:40:22 +00:00
Ruihang Xia
af0c4c068a feat: support PromQL function vector (#3036)
* produce vector plan

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* work with OR

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* apply review sugg

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* move common const strings to common_query

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* add comment for GREPTIME_COUNT

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

---------

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
2024-01-09 03:44:00 +00:00
dennis zhuang
26cbcb8b3a docs: update issue template (#3119) 2024-01-09 02:45:55 +00:00
201 changed files with 5695 additions and 1734 deletions

View File

@@ -21,6 +21,7 @@ body:
- Locking issue
- Performance issue
- Unexpected error
- User Experience
- Other
validations:
required: true
@@ -33,9 +34,14 @@ body:
multiple: true
options:
- Standalone mode
- Distributed Cluster
- Storage Engine
- Query Engine
- Table Engine
- Write Protocols
- MetaSrv
- Frontend
- Datanode
- Meta
- Other
validations:
required: true
@@ -77,6 +83,17 @@ body:
validations:
required: true
- type: input
id: greptimedb
attributes:
label: What version of GreptimeDB did you use?
description: |
Please provide the version of GreptimeDB. For example:
0.5.1 etc. You can get it by executing command line `greptime --version`.
placeholder: "0.5.1"
validations:
required: true
- type: textarea
id: logs
attributes:

View File

@@ -15,6 +15,6 @@ Please explain IN DETAIL what the changes are in this PR and why they are needed
- [ ] I have written the necessary rustdoc comments.
- [ ] I have added the necessary unit tests and integration tests.
- [ ] This PR does not require documentation updates.
- [x] This PR does not require documentation updates.
## Refer to a related PR or issue link (optional)

View File

@@ -91,7 +91,7 @@ env:
# The scheduled version is '${{ env.NEXT_RELEASE_VERSION }}-nightly-YYYYMMDD', like v0.2.0-nigthly-20230313;
NIGHTLY_RELEASE_PREFIX: nightly
# Note: The NEXT_RELEASE_VERSION should be modified manually by every formal release.
NEXT_RELEASE_VERSION: v0.6.0
NEXT_RELEASE_VERSION: v0.7.0
jobs:
allocate-runners:

133
Cargo.lock generated
View File

@@ -196,7 +196,7 @@ checksum = "8f1f8f5a6f3d50d89e3797d7593a50f96bb2aaa20ca0cc7be1fb673232c91d72"
[[package]]
name = "api"
version = "0.5.1"
version = "0.6.0"
dependencies = [
"common-base",
"common-decimal",
@@ -674,7 +674,7 @@ dependencies = [
[[package]]
name = "auth"
version = "0.5.1"
version = "0.6.0"
dependencies = [
"api",
"async-trait",
@@ -847,7 +847,7 @@ dependencies = [
[[package]]
name = "benchmarks"
version = "0.5.1"
version = "0.6.0"
dependencies = [
"arrow",
"chrono",
@@ -1179,7 +1179,7 @@ checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5"
[[package]]
name = "catalog"
version = "0.5.1"
version = "0.6.0"
dependencies = [
"api",
"arc-swap",
@@ -1206,6 +1206,7 @@ dependencies = [
"datatypes",
"futures",
"futures-util",
"itertools 0.10.5",
"lazy_static",
"log-store",
"meta-client",
@@ -1451,7 +1452,7 @@ checksum = "702fc72eb24e5a1e48ce58027a675bc24edd52096d5397d4aea7c6dd9eca0bd1"
[[package]]
name = "client"
version = "0.5.1"
version = "0.6.0"
dependencies = [
"api",
"arrow-flight",
@@ -1484,7 +1485,7 @@ dependencies = [
"session",
"snafu",
"substrait 0.17.1",
"substrait 0.5.1",
"substrait 0.6.0",
"tokio",
"tokio-stream",
"tonic 0.10.2",
@@ -1514,7 +1515,7 @@ dependencies = [
[[package]]
name = "cmd"
version = "0.5.1"
version = "0.6.0"
dependencies = [
"anymap",
"async-trait",
@@ -1565,7 +1566,7 @@ dependencies = [
"session",
"snafu",
"store-api",
"substrait 0.5.1",
"substrait 0.6.0",
"table",
"temp-env",
"tikv-jemallocator",
@@ -1598,7 +1599,7 @@ checksum = "55b672471b4e9f9e95499ea597ff64941a309b2cdbffcc46f2cc5e2d971fd335"
[[package]]
name = "common-base"
version = "0.5.1"
version = "0.6.0"
dependencies = [
"anymap",
"bitvec",
@@ -1613,7 +1614,7 @@ dependencies = [
[[package]]
name = "common-catalog"
version = "0.5.1"
version = "0.6.0"
dependencies = [
"chrono",
"common-error",
@@ -1624,7 +1625,7 @@ dependencies = [
[[package]]
name = "common-config"
version = "0.5.1"
version = "0.6.0"
dependencies = [
"common-base",
"humantime-serde",
@@ -1637,7 +1638,7 @@ dependencies = [
[[package]]
name = "common-datasource"
version = "0.5.1"
version = "0.6.0"
dependencies = [
"arrow",
"arrow-schema",
@@ -1668,7 +1669,7 @@ dependencies = [
[[package]]
name = "common-decimal"
version = "0.5.1"
version = "0.6.0"
dependencies = [
"arrow",
"bigdecimal",
@@ -1682,7 +1683,7 @@ dependencies = [
[[package]]
name = "common-error"
version = "0.5.1"
version = "0.6.0"
dependencies = [
"snafu",
"strum 0.25.0",
@@ -1690,7 +1691,7 @@ dependencies = [
[[package]]
name = "common-function"
version = "0.5.1"
version = "0.6.0"
dependencies = [
"arc-swap",
"build-data",
@@ -1714,7 +1715,7 @@ dependencies = [
[[package]]
name = "common-greptimedb-telemetry"
version = "0.5.1"
version = "0.6.0"
dependencies = [
"async-trait",
"common-error",
@@ -1733,7 +1734,7 @@ dependencies = [
[[package]]
name = "common-grpc"
version = "0.5.1"
version = "0.6.0"
dependencies = [
"api",
"arrow-flight",
@@ -1763,7 +1764,7 @@ dependencies = [
[[package]]
name = "common-grpc-expr"
version = "0.5.1"
version = "0.6.0"
dependencies = [
"api",
"async-trait",
@@ -1782,7 +1783,7 @@ dependencies = [
[[package]]
name = "common-macro"
version = "0.5.1"
version = "0.6.0"
dependencies = [
"arc-swap",
"common-query",
@@ -1797,7 +1798,7 @@ dependencies = [
[[package]]
name = "common-mem-prof"
version = "0.5.1"
version = "0.6.0"
dependencies = [
"common-error",
"common-macro",
@@ -1810,7 +1811,7 @@ dependencies = [
[[package]]
name = "common-meta"
version = "0.5.1"
version = "0.6.0"
dependencies = [
"api",
"async-recursion",
@@ -1857,7 +1858,7 @@ dependencies = [
[[package]]
name = "common-procedure"
version = "0.5.1"
version = "0.6.0"
dependencies = [
"async-stream",
"async-trait",
@@ -1881,7 +1882,7 @@ dependencies = [
[[package]]
name = "common-procedure-test"
version = "0.5.1"
version = "0.6.0"
dependencies = [
"async-trait",
"common-procedure",
@@ -1889,7 +1890,7 @@ dependencies = [
[[package]]
name = "common-query"
version = "0.5.1"
version = "0.6.0"
dependencies = [
"api",
"async-trait",
@@ -1912,7 +1913,7 @@ dependencies = [
[[package]]
name = "common-recordbatch"
version = "0.5.1"
version = "0.6.0"
dependencies = [
"common-error",
"common-macro",
@@ -1929,7 +1930,7 @@ dependencies = [
[[package]]
name = "common-runtime"
version = "0.5.1"
version = "0.6.0"
dependencies = [
"async-trait",
"common-error",
@@ -1949,7 +1950,7 @@ dependencies = [
[[package]]
name = "common-telemetry"
version = "0.5.1"
version = "0.6.0"
dependencies = [
"backtrace",
"common-error",
@@ -1975,8 +1976,11 @@ dependencies = [
[[package]]
name = "common-test-util"
version = "0.5.1"
version = "0.6.0"
dependencies = [
"client",
"common-query",
"common-recordbatch",
"once_cell",
"rand",
"tempfile",
@@ -1984,7 +1988,7 @@ dependencies = [
[[package]]
name = "common-time"
version = "0.5.1"
version = "0.6.0"
dependencies = [
"arrow",
"chrono",
@@ -2000,7 +2004,7 @@ dependencies = [
[[package]]
name = "common-version"
version = "0.5.1"
version = "0.6.0"
dependencies = [
"build-data",
]
@@ -2630,7 +2634,7 @@ dependencies = [
[[package]]
name = "datanode"
version = "0.5.1"
version = "0.6.0"
dependencies = [
"api",
"arrow-flight",
@@ -2690,7 +2694,7 @@ dependencies = [
"snafu",
"sql",
"store-api",
"substrait 0.5.1",
"substrait 0.6.0",
"table",
"tokio",
"tokio-stream",
@@ -2704,7 +2708,7 @@ dependencies = [
[[package]]
name = "datatypes"
version = "0.5.1"
version = "0.6.0"
dependencies = [
"arrow",
"arrow-array",
@@ -3165,7 +3169,7 @@ dependencies = [
[[package]]
name = "file-engine"
version = "0.5.1"
version = "0.6.0"
dependencies = [
"api",
"async-trait",
@@ -3296,7 +3300,7 @@ checksum = "6c2141d6d6c8512188a7891b4b01590a45f6dac67afb4f255c4124dbb86d4eaa"
[[package]]
name = "frontend"
version = "0.5.1"
version = "0.6.0"
dependencies = [
"api",
"arc-swap",
@@ -3360,7 +3364,7 @@ dependencies = [
"sqlparser 0.38.0 (git+https://github.com/GreptimeTeam/sqlparser-rs.git?rev=6a93567ae38d42be5c8d08b13c8ff4dde26502ef)",
"store-api",
"strfmt",
"substrait 0.5.1",
"substrait 0.6.0",
"table",
"tokio",
"toml 0.8.8",
@@ -4014,7 +4018,7 @@ dependencies = [
[[package]]
name = "index"
version = "0.5.1"
version = "0.6.0"
dependencies = [
"async-trait",
"asynchronous-codec",
@@ -4494,7 +4498,7 @@ checksum = "b5e6163cb8c49088c2c36f57875e58ccd8c87c7427f7fbd50ea6710b2f3f2e8f"
[[package]]
name = "log-store"
version = "0.5.1"
version = "0.6.0"
dependencies = [
"async-stream",
"async-trait",
@@ -4509,6 +4513,8 @@ dependencies = [
"common-runtime",
"common-telemetry",
"common-test-util",
"common-time",
"dashmap",
"futures",
"futures-util",
"itertools 0.10.5",
@@ -4771,7 +4777,7 @@ dependencies = [
[[package]]
name = "meta-client"
version = "0.5.1"
version = "0.6.0"
dependencies = [
"api",
"async-trait",
@@ -4801,7 +4807,7 @@ dependencies = [
[[package]]
name = "meta-srv"
version = "0.5.1"
version = "0.6.0"
dependencies = [
"anymap",
"api",
@@ -4830,6 +4836,7 @@ dependencies = [
"futures",
"h2",
"http-body",
"humantime",
"humantime-serde",
"itertools 0.10.5",
"lazy_static",
@@ -4879,7 +4886,7 @@ dependencies = [
[[package]]
name = "metric-engine"
version = "0.5.1"
version = "0.6.0"
dependencies = [
"api",
"aquamarine",
@@ -4950,7 +4957,7 @@ dependencies = [
[[package]]
name = "mito2"
version = "0.5.1"
version = "0.6.0"
dependencies = [
"anymap",
"api",
@@ -5451,7 +5458,7 @@ dependencies = [
[[package]]
name = "object-store"
version = "0.5.1"
version = "0.6.0"
dependencies = [
"anyhow",
"async-trait",
@@ -5696,7 +5703,7 @@ dependencies = [
[[package]]
name = "operator"
version = "0.5.1"
version = "0.6.0"
dependencies = [
"api",
"async-trait",
@@ -5740,7 +5747,7 @@ dependencies = [
"sql",
"sqlparser 0.38.0 (git+https://github.com/GreptimeTeam/sqlparser-rs.git?rev=6a93567ae38d42be5c8d08b13c8ff4dde26502ef)",
"store-api",
"substrait 0.5.1",
"substrait 0.6.0",
"table",
"tokio",
"tonic 0.10.2",
@@ -5971,7 +5978,7 @@ dependencies = [
[[package]]
name = "partition"
version = "0.5.1"
version = "0.6.0"
dependencies = [
"api",
"async-trait",
@@ -6290,7 +6297,7 @@ dependencies = [
[[package]]
name = "plugins"
version = "0.5.1"
version = "0.6.0"
dependencies = [
"auth",
"common-base",
@@ -6548,7 +6555,7 @@ dependencies = [
[[package]]
name = "promql"
version = "0.5.1"
version = "0.6.0"
dependencies = [
"ahash 0.8.6",
"async-recursion",
@@ -6558,6 +6565,7 @@ dependencies = [
"common-catalog",
"common-error",
"common-macro",
"common-query",
"common-recordbatch",
"common-telemetry",
"datafusion",
@@ -6758,7 +6766,7 @@ dependencies = [
[[package]]
name = "puffin"
version = "0.5.1"
version = "0.6.0"
dependencies = [
"async-trait",
"bitflags 2.4.1",
@@ -6869,7 +6877,7 @@ dependencies = [
[[package]]
name = "query"
version = "0.5.1"
version = "0.6.0"
dependencies = [
"ahash 0.8.6",
"api",
@@ -6927,7 +6935,7 @@ dependencies = [
"stats-cli",
"store-api",
"streaming-stats",
"substrait 0.5.1",
"substrait 0.6.0",
"table",
"tokio",
"tokio-stream",
@@ -8197,7 +8205,7 @@ checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49"
[[package]]
name = "script"
version = "0.5.1"
version = "0.6.0"
dependencies = [
"api",
"arc-swap",
@@ -8457,7 +8465,7 @@ dependencies = [
[[package]]
name = "servers"
version = "0.5.1"
version = "0.6.0"
dependencies = [
"aide",
"api",
@@ -8553,7 +8561,7 @@ dependencies = [
[[package]]
name = "session"
version = "0.5.1"
version = "0.6.0"
dependencies = [
"api",
"arc-swap",
@@ -8708,6 +8716,9 @@ name = "smallvec"
version = "1.11.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4dccd0940a2dcdf68d092b8cbab7dc0ad8fa938bf95787e1b916b0e3d0e8e970"
dependencies = [
"serde",
]
[[package]]
name = "smartstring"
@@ -8814,7 +8825,7 @@ dependencies = [
[[package]]
name = "sql"
version = "0.5.1"
version = "0.6.0"
dependencies = [
"api",
"common-base",
@@ -8866,7 +8877,7 @@ dependencies = [
[[package]]
name = "sqlness-runner"
version = "0.5.1"
version = "0.6.0"
dependencies = [
"async-trait",
"clap 4.4.11",
@@ -9073,7 +9084,7 @@ dependencies = [
[[package]]
name = "store-api"
version = "0.5.1"
version = "0.6.0"
dependencies = [
"api",
"aquamarine",
@@ -9213,7 +9224,7 @@ dependencies = [
[[package]]
name = "substrait"
version = "0.5.1"
version = "0.6.0"
dependencies = [
"async-recursion",
"async-trait",
@@ -9361,7 +9372,7 @@ dependencies = [
[[package]]
name = "table"
version = "0.5.1"
version = "0.6.0"
dependencies = [
"anymap",
"async-trait",
@@ -9473,7 +9484,7 @@ checksum = "3369f5ac52d5eb6ab48c6b4ffdc8efbcad6b89c765749064ba298f2c68a16a76"
[[package]]
name = "tests-integration"
version = "0.5.1"
version = "0.6.0"
dependencies = [
"api",
"async-trait",
@@ -9529,7 +9540,7 @@ dependencies = [
"sql",
"sqlx",
"store-api",
"substrait 0.5.1",
"substrait 0.6.0",
"table",
"tempfile",
"time",

View File

@@ -58,7 +58,7 @@ members = [
resolver = "2"
[workspace.package]
version = "0.5.1"
version = "0.6.0"
edition = "2021"
license = "Apache-2.0"
@@ -121,7 +121,7 @@ rskafka = "0.5"
rust_decimal = "1.33"
serde = { version = "1.0", features = ["derive"] }
serde_json = "1.0"
smallvec = "1"
smallvec = { version = "1", features = ["serde"] }
snafu = "0.7"
# on branch v0.38.x
sqlparser = { git = "https://github.com/GreptimeTeam/sqlparser-rs.git", rev = "6a93567ae38d42be5c8d08b13c8ff4dde26502ef", features = [

View File

@@ -34,11 +34,7 @@ connect_timeout = "1s"
tcp_nodelay = true
# WAL options.
# Currently, users are expected to choose the wal through the provider field.
# When a wal provider is chose, the user should comment out all other wal config
# except those corresponding to the chosen one.
[wal]
# WAL data directory
provider = "raft_engine"
# Raft-engine wal options, see `standalone.example.toml`.
@@ -117,6 +113,8 @@ sst_write_buffer_size = "8MB"
scan_parallelism = 0
# Capacity of the channel to send data from parallel scan tasks to the main task (default 32).
parallel_scan_channel_size = 32
# Whether to allow stale WAL entries read during replay.
allow_stale_entries = false
# Log options, see `standalone.example.toml`
# [logging]

View File

@@ -64,8 +64,6 @@ provider = "raft_engine"
# selector_type = "round_robin"
# A Kafka topic is constructed by concatenating `topic_name_prefix` and `topic_id`.
# topic_name_prefix = "greptimedb_wal_topic"
# Number of partitions per topic.
# num_partitions = 1
# Expected number of replicas of each partition.
# replication_factor = 1
# Above which a topic creation operation will be cancelled.

View File

@@ -102,9 +102,8 @@ provider = "raft_engine"
# selector_type = "round_robin"
# The prefix of topic name.
# topic_name_prefix = "greptimedb_wal_topic"
# Number of partitions per topic.
# num_partitions = 1
# The number of replicas of each partition.
# Warning: the replication factor must be positive and must not be greater than the number of broker endpoints.
# replication_factor = 1
# The max size of a single producer batch.
@@ -138,6 +137,12 @@ purge_interval = "10m"
read_batch_size = 128
# Whether to sync log file after every write.
sync_write = false
# Whether to reuse logically truncated log files.
enable_log_recycle = true
# Whether to pre-create log files on start up
prefill_log_files = false
# Duration for fsyncing log files.
sync_period = "1000ms"
# Metadata storage options.
[metadata_store]
@@ -208,6 +213,8 @@ sst_write_buffer_size = "8MB"
scan_parallelism = 0
# Capacity of the channel to send data from parallel scan tasks to the main task (default 32).
parallel_scan_channel_size = 32
# Whether to allow stale WAL entries read during replay.
allow_stale_entries = false
# Log options
# [logging]

View File

@@ -30,6 +30,7 @@ datafusion.workspace = true
datatypes.workspace = true
futures = "0.3"
futures-util.workspace = true
itertools.workspace = true
lazy_static.workspace = true
meta-client.workspace = true
moka = { workspace = true, features = ["future"] }

View File

@@ -16,6 +16,7 @@ mod columns;
mod key_column_usage;
mod memory_table;
mod predicate;
mod runtime_metrics;
mod schemata;
mod table_names;
mod tables;
@@ -23,7 +24,7 @@ mod tables;
use std::collections::HashMap;
use std::sync::{Arc, Weak};
use common_catalog::consts::{self, INFORMATION_SCHEMA_NAME};
use common_catalog::consts::{self, DEFAULT_CATALOG_NAME, INFORMATION_SCHEMA_NAME};
use common_error::ext::BoxedError;
use common_recordbatch::{RecordBatchStreamWrapper, SendableRecordBatchStream};
use datatypes::schema::SchemaRef;
@@ -46,6 +47,7 @@ use self::columns::InformationSchemaColumns;
use crate::error::Result;
use crate::information_schema::key_column_usage::InformationSchemaKeyColumnUsage;
use crate::information_schema::memory_table::{get_schema_columns, MemoryTable};
use crate::information_schema::runtime_metrics::InformationSchemaMetrics;
use crate::information_schema::schemata::InformationSchemaSchemata;
use crate::information_schema::tables::InformationSchemaTables;
use crate::CatalogManager;
@@ -56,7 +58,6 @@ lazy_static! {
ENGINES,
COLUMN_PRIVILEGES,
COLUMN_STATISTICS,
BUILD_INFO,
CHARACTER_SETS,
COLLATIONS,
COLLATION_CHARACTER_SET_APPLICABILITY,
@@ -142,6 +143,21 @@ impl InformationSchemaProvider {
fn build_tables(&mut self) {
let mut tables = HashMap::new();
// Carefully consider the tables that may expose sensitive cluster configurations,
// authentication details, and other critical information.
// Only put these tables under `greptime` catalog to prevent info leak.
if self.catalog_name == DEFAULT_CATALOG_NAME {
tables.insert(
RUNTIME_METRICS.to_string(),
self.build_table(RUNTIME_METRICS).unwrap(),
);
tables.insert(
BUILD_INFO.to_string(),
self.build_table(BUILD_INFO).unwrap(),
);
}
tables.insert(TABLES.to_string(), self.build_table(TABLES).unwrap());
tables.insert(SCHEMATA.to_string(), self.build_table(SCHEMATA).unwrap());
tables.insert(COLUMNS.to_string(), self.build_table(COLUMNS).unwrap());
@@ -209,6 +225,7 @@ impl InformationSchemaProvider {
self.catalog_name.clone(),
self.catalog_manager.clone(),
)) as _),
RUNTIME_METRICS => Some(Arc::new(InformationSchemaMetrics::new())),
_ => None,
}
}

View File

@@ -0,0 +1,250 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::sync::Arc;
use arrow_schema::SchemaRef as ArrowSchemaRef;
use common_catalog::consts::INFORMATION_SCHEMA_RUNTIME_METRICS_TABLE_ID;
use common_error::ext::BoxedError;
use common_query::physical_plan::TaskContext;
use common_recordbatch::adapter::RecordBatchStreamAdapter;
use common_recordbatch::{RecordBatch, SendableRecordBatchStream};
use common_time::util::current_time_millis;
use datafusion::physical_plan::stream::RecordBatchStreamAdapter as DfRecordBatchStreamAdapter;
use datafusion::physical_plan::streaming::PartitionStream as DfPartitionStream;
use datafusion::physical_plan::SendableRecordBatchStream as DfSendableRecordBatchStream;
use datatypes::prelude::{ConcreteDataType, MutableVector};
use datatypes::scalars::ScalarVectorBuilder;
use datatypes::schema::{ColumnSchema, Schema, SchemaRef};
use datatypes::vectors::{
ConstantVector, Float64VectorBuilder, StringVector, StringVectorBuilder,
TimestampMillisecondVector, VectorRef,
};
use itertools::Itertools;
use snafu::ResultExt;
use store_api::storage::{ScanRequest, TableId};
use super::{InformationTable, RUNTIME_METRICS};
use crate::error::{CreateRecordBatchSnafu, InternalSnafu, Result};
pub(super) struct InformationSchemaMetrics {
schema: SchemaRef,
}
const METRIC_NAME: &str = "metric_name";
const METRIC_VALUE: &str = "value";
const METRIC_LABELS: &str = "labels";
const NODE: &str = "node";
const NODE_TYPE: &str = "node_type";
const TIMESTAMP: &str = "timestamp";
/// The `information_schema.runtime_metrics` virtual table.
/// It provides the GreptimeDB runtime metrics for the users by SQL.
impl InformationSchemaMetrics {
pub(super) fn new() -> Self {
Self {
schema: Self::schema(),
}
}
fn schema() -> SchemaRef {
Arc::new(Schema::new(vec![
ColumnSchema::new(METRIC_NAME, ConcreteDataType::string_datatype(), false),
ColumnSchema::new(METRIC_VALUE, ConcreteDataType::float64_datatype(), false),
ColumnSchema::new(METRIC_LABELS, ConcreteDataType::string_datatype(), true),
ColumnSchema::new(NODE, ConcreteDataType::string_datatype(), false),
ColumnSchema::new(NODE_TYPE, ConcreteDataType::string_datatype(), false),
ColumnSchema::new(
TIMESTAMP,
ConcreteDataType::timestamp_millisecond_datatype(),
false,
),
]))
}
fn builder(&self) -> InformationSchemaMetricsBuilder {
InformationSchemaMetricsBuilder::new(self.schema.clone())
}
}
impl InformationTable for InformationSchemaMetrics {
fn table_id(&self) -> TableId {
INFORMATION_SCHEMA_RUNTIME_METRICS_TABLE_ID
}
fn table_name(&self) -> &'static str {
RUNTIME_METRICS
}
fn schema(&self) -> SchemaRef {
self.schema.clone()
}
fn to_stream(&self, request: ScanRequest) -> Result<SendableRecordBatchStream> {
let schema = self.schema.arrow_schema().clone();
let mut builder = self.builder();
let stream = Box::pin(DfRecordBatchStreamAdapter::new(
schema,
futures::stream::once(async move {
builder
.make_metrics(Some(request))
.await
.map(|x| x.into_df_record_batch())
.map_err(Into::into)
}),
));
Ok(Box::pin(
RecordBatchStreamAdapter::try_new(stream)
.map_err(BoxedError::new)
.context(InternalSnafu)?,
))
}
}
struct InformationSchemaMetricsBuilder {
schema: SchemaRef,
metric_names: StringVectorBuilder,
metric_values: Float64VectorBuilder,
metric_labels: StringVectorBuilder,
}
impl InformationSchemaMetricsBuilder {
fn new(schema: SchemaRef) -> Self {
Self {
schema,
metric_names: StringVectorBuilder::with_capacity(42),
metric_values: Float64VectorBuilder::with_capacity(42),
metric_labels: StringVectorBuilder::with_capacity(42),
}
}
fn add_metric(&mut self, metric_name: &str, labels: String, metric_value: f64) {
self.metric_names.push(Some(metric_name));
self.metric_values.push(Some(metric_value));
self.metric_labels.push(Some(&labels));
}
async fn make_metrics(&mut self, _request: Option<ScanRequest>) -> Result<RecordBatch> {
let metric_families = prometheus::gather();
let write_request =
common_telemetry::metric::convert_metric_to_write_request(metric_families, None, 0);
for ts in write_request.timeseries {
//Safety: always has `__name__` label
let metric_name = ts
.labels
.iter()
.find_map(|label| {
if label.name == "__name__" {
Some(label.value.clone())
} else {
None
}
})
.unwrap();
self.add_metric(
&metric_name,
ts.labels
.into_iter()
.filter_map(|label| {
if label.name == "__name__" {
None
} else {
Some(format!("{}={}", label.name, label.value))
}
})
.join(", "),
// Safety: always has a sample
ts.samples[0].value,
);
}
self.finish()
}
fn finish(&mut self) -> Result<RecordBatch> {
let rows_num = self.metric_names.len();
let unknowns = Arc::new(ConstantVector::new(
Arc::new(StringVector::from(vec!["unknown"])),
rows_num,
));
let timestamps = Arc::new(ConstantVector::new(
Arc::new(TimestampMillisecondVector::from_slice([
current_time_millis(),
])),
rows_num,
));
let columns: Vec<VectorRef> = vec![
Arc::new(self.metric_names.finish()),
Arc::new(self.metric_values.finish()),
Arc::new(self.metric_labels.finish()),
// TODO(dennis): supports node and node_type for cluster
unknowns.clone(),
unknowns,
timestamps,
];
RecordBatch::new(self.schema.clone(), columns).context(CreateRecordBatchSnafu)
}
}
impl DfPartitionStream for InformationSchemaMetrics {
fn schema(&self) -> &ArrowSchemaRef {
self.schema.arrow_schema()
}
fn execute(&self, _: Arc<TaskContext>) -> DfSendableRecordBatchStream {
let schema = self.schema.arrow_schema().clone();
let mut builder = self.builder();
Box::pin(DfRecordBatchStreamAdapter::new(
schema,
futures::stream::once(async move {
builder
.make_metrics(None)
.await
.map(|x| x.into_df_record_batch())
.map_err(Into::into)
}),
))
}
}
#[cfg(test)]
mod tests {
use common_recordbatch::RecordBatches;
use super::*;
#[tokio::test]
async fn test_make_metrics() {
let metrics = InformationSchemaMetrics::new();
let stream = metrics.to_stream(ScanRequest::default()).unwrap();
let batches = RecordBatches::try_collect(stream).await.unwrap();
let result_literal = batches.pretty_print().unwrap();
assert!(result_literal.contains(METRIC_NAME));
assert!(result_literal.contains(METRIC_VALUE));
assert!(result_literal.contains(METRIC_LABELS));
assert!(result_literal.contains(NODE));
assert!(result_literal.contains(NODE_TYPE));
assert!(result_literal.contains(TIMESTAMP));
}
}

View File

@@ -38,3 +38,4 @@ pub const TABLE_PRIVILEGES: &str = "table_privileges";
pub const TRIGGERS: &str = "triggers";
pub const GLOBAL_STATUS: &str = "global_status";
pub const SESSION_STATUS: &str = "session_status";
pub const RUNTIME_METRICS: &str = "runtime_metrics";

View File

@@ -15,7 +15,6 @@
use std::collections::HashMap;
use std::sync::Arc;
use common_catalog::consts::INFORMATION_SCHEMA_NAME;
use common_catalog::format_full_table_name;
use datafusion::common::{ResolvedTableReference, TableReference};
use datafusion::datasource::provider_as_source;
@@ -30,7 +29,7 @@ use crate::CatalogManagerRef;
pub struct DfTableSourceProvider {
catalog_manager: CatalogManagerRef,
resolved_tables: HashMap<String, Arc<dyn TableSource>>,
disallow_cross_schema_query: bool,
disallow_cross_catalog_query: bool,
default_catalog: String,
default_schema: String,
}
@@ -38,12 +37,12 @@ pub struct DfTableSourceProvider {
impl DfTableSourceProvider {
pub fn new(
catalog_manager: CatalogManagerRef,
disallow_cross_schema_query: bool,
disallow_cross_catalog_query: bool,
query_ctx: &QueryContext,
) -> Self {
Self {
catalog_manager,
disallow_cross_schema_query,
disallow_cross_catalog_query,
resolved_tables: HashMap::new(),
default_catalog: query_ctx.current_catalog().to_owned(),
default_schema: query_ctx.current_schema().to_owned(),
@@ -54,29 +53,18 @@ impl DfTableSourceProvider {
&'a self,
table_ref: TableReference<'a>,
) -> Result<ResolvedTableReference<'a>> {
if self.disallow_cross_schema_query {
if self.disallow_cross_catalog_query {
match &table_ref {
TableReference::Bare { .. } => (),
TableReference::Partial { schema, .. } => {
ensure!(
schema.as_ref() == self.default_schema
|| schema.as_ref() == INFORMATION_SCHEMA_NAME,
QueryAccessDeniedSnafu {
catalog: &self.default_catalog,
schema: schema.as_ref(),
}
);
}
TableReference::Partial { .. } => {}
TableReference::Full {
catalog, schema, ..
} => {
ensure!(
catalog.as_ref() == self.default_catalog
&& (schema.as_ref() == self.default_schema
|| schema.as_ref() == INFORMATION_SCHEMA_NAME),
catalog.as_ref() == self.default_catalog,
QueryAccessDeniedSnafu {
catalog: catalog.as_ref(),
schema: schema.as_ref()
schema: schema.as_ref(),
}
);
}
@@ -136,21 +124,21 @@ mod tests {
table: Cow::Borrowed("table_name"),
};
let result = table_provider.resolve_table_ref(table_ref);
let _ = result.unwrap();
assert!(result.is_ok());
let table_ref = TableReference::Partial {
schema: Cow::Borrowed("public"),
table: Cow::Borrowed("table_name"),
};
let result = table_provider.resolve_table_ref(table_ref);
let _ = result.unwrap();
assert!(result.is_ok());
let table_ref = TableReference::Partial {
schema: Cow::Borrowed("wrong_schema"),
table: Cow::Borrowed("table_name"),
};
let result = table_provider.resolve_table_ref(table_ref);
assert!(result.is_err());
assert!(result.is_ok());
let table_ref = TableReference::Full {
catalog: Cow::Borrowed("greptime"),
@@ -158,7 +146,7 @@ mod tests {
table: Cow::Borrowed("table_name"),
};
let result = table_provider.resolve_table_ref(table_ref);
let _ = result.unwrap();
assert!(result.is_ok());
let table_ref = TableReference::Full {
catalog: Cow::Borrowed("wrong_catalog"),
@@ -172,14 +160,15 @@ mod tests {
schema: Cow::Borrowed("information_schema"),
table: Cow::Borrowed("columns"),
};
let _ = table_provider.resolve_table_ref(table_ref).unwrap();
let result = table_provider.resolve_table_ref(table_ref);
assert!(result.is_ok());
let table_ref = TableReference::Full {
catalog: Cow::Borrowed("greptime"),
schema: Cow::Borrowed("information_schema"),
table: Cow::Borrowed("columns"),
};
let _ = table_provider.resolve_table_ref(table_ref).unwrap();
assert!(table_provider.resolve_table_ref(table_ref).is_ok());
let table_ref = TableReference::Full {
catalog: Cow::Borrowed("dummy"),
@@ -187,5 +176,12 @@ mod tests {
table: Cow::Borrowed("columns"),
};
assert!(table_provider.resolve_table_ref(table_ref).is_err());
let table_ref = TableReference::Full {
catalog: Cow::Borrowed("greptime"),
schema: Cow::Borrowed("greptime_private"),
table: Cow::Borrowed("columns"),
};
assert!(table_provider.resolve_table_ref(table_ref).is_ok());
}
}

View File

@@ -28,6 +28,7 @@ use frontend::heartbeat::handler::invalidate_table_cache::InvalidateTableCacheHa
use frontend::heartbeat::HeartbeatTask;
use frontend::instance::builder::FrontendBuilder;
use frontend::instance::{FrontendInstance, Instance as FeInstance};
use frontend::server::Services;
use meta_client::MetaClientOptions;
use servers::tls::{TlsMode, TlsOption};
use servers::Mode;
@@ -246,14 +247,18 @@ impl StartCommand {
meta_client,
)
.with_cache_invalidator(meta_backend)
.with_plugin(plugins)
.with_plugin(plugins.clone())
.with_heartbeat_task(heartbeat_task)
.try_build()
.await
.context(StartFrontendSnafu)?;
let servers = Services::new(plugins)
.build(opts.clone(), Arc::new(instance.clone()))
.await
.context(StartFrontendSnafu)?;
instance
.build_servers(opts)
.build_servers(opts, servers)
.await
.context(StartFrontendSnafu)?;

View File

@@ -128,7 +128,7 @@ impl StartCommand {
let mut opts: MetaSrvOptions = Options::load_layered_options(
self.config_file.as_deref(),
self.env_prefix.as_ref(),
None,
MetaSrvOptions::env_list_keys(),
)?;
if let Some(dir) = &cli_options.log_dir {

View File

@@ -40,6 +40,7 @@ use file_engine::config::EngineConfig as FileEngineConfig;
use frontend::frontend::FrontendOptions;
use frontend::instance::builder::FrontendBuilder;
use frontend::instance::{FrontendInstance, Instance as FeInstance, StandaloneDatanodeManager};
use frontend::server::Services;
use frontend::service_config::{
GrpcOptions, InfluxdbOptions, MysqlOptions, OpentsdbOptions, PostgresOptions, PromStoreOptions,
};
@@ -118,6 +119,12 @@ pub struct StandaloneOptions {
pub export_metrics: ExportMetricsOption,
}
impl StandaloneOptions {
pub fn env_list_keys() -> Option<&'static [&'static str]> {
Some(&["wal.broker_endpoints"])
}
}
impl Default for StandaloneOptions {
fn default() -> Self {
Self {
@@ -267,7 +274,7 @@ impl StartCommand {
let opts: StandaloneOptions = Options::load_layered_options(
self.config_file.as_deref(),
self.env_prefix.as_ref(),
None,
StandaloneOptions::env_list_keys(),
)?;
self.convert_options(cli_options, opts)
@@ -425,13 +432,17 @@ impl StartCommand {
.await?;
let mut frontend = FrontendBuilder::new(kv_backend, datanode_manager, ddl_task_executor)
.with_plugin(fe_plugins)
.with_plugin(fe_plugins.clone())
.try_build()
.await
.context(StartFrontendSnafu)?;
let servers = Services::new(fe_plugins)
.build(opts.clone(), Arc::new(frontend.clone()))
.await
.context(StartFrontendSnafu)?;
frontend
.build_servers(opts)
.build_servers(opts, servers)
.await
.context(StartFrontendSnafu)?;

View File

@@ -80,6 +80,8 @@ pub const INFORMATION_SCHEMA_TRIGGERS_TABLE_ID: u32 = 24;
pub const INFORMATION_SCHEMA_GLOBAL_STATUS_TABLE_ID: u32 = 25;
/// id for information_schema.SESSION_STATUS
pub const INFORMATION_SCHEMA_SESSION_STATUS_TABLE_ID: u32 = 26;
/// id for information_schema.RUNTIME_METRICS
pub const INFORMATION_SCHEMA_RUNTIME_METRICS_TABLE_ID: u32 = 27;
/// ----- End of information_schema tables -----
pub const MITO_ENGINE: &str = "mito";

View File

@@ -17,6 +17,11 @@ use consts::DEFAULT_CATALOG_NAME;
pub mod consts;
pub mod error;
#[inline]
pub fn format_schema_name(catalog: &str, schema: &str) -> String {
format!("{catalog}.{schema}")
}
/// Formats table fully-qualified name
#[inline]
pub fn format_full_table_name(catalog: &str, schema: &str, table: &str) -> String {

View File

@@ -18,9 +18,7 @@ pub mod raft_engine;
use serde::{Deserialize, Serialize};
use serde_with::with_prefix;
pub use crate::wal::kafka::{
KafkaConfig, KafkaOptions as KafkaWalOptions, StandaloneKafkaConfig, Topic as KafkaWalTopic,
};
pub use crate::wal::kafka::{KafkaConfig, KafkaOptions as KafkaWalOptions, StandaloneKafkaConfig};
pub use crate::wal::raft_engine::RaftEngineConfig;
/// An encoded wal options will be wrapped into a (WAL_OPTIONS_KEY, encoded wal options) key-value pair

View File

@@ -19,11 +19,6 @@ use rskafka::client::partition::Compression as RsKafkaCompression;
use serde::{Deserialize, Serialize};
use serde_with::with_prefix;
/// Topic name prefix.
pub const TOPIC_NAME_PREFIX: &str = "greptimedb_wal_topic";
/// Kafka wal topic.
pub type Topic = String;
/// The type of the topic selector, i.e. with which strategy to select a topic.
#[derive(Default, Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
#[serde(rename_all = "snake_case")]
@@ -138,5 +133,5 @@ impl Default for StandaloneKafkaConfig {
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
pub struct KafkaOptions {
/// Kafka wal topic.
pub topic: Topic,
pub topic: String,
}

View File

@@ -34,6 +34,13 @@ pub struct RaftEngineConfig {
pub read_batch_size: usize,
// whether to sync log file after every write
pub sync_write: bool,
// whether to reuse logically truncated log files.
pub enable_log_recycle: bool,
// whether to pre-create log files on start up
pub prefill_log_files: bool,
// duration for fsyncing log files.
#[serde(with = "humantime_serde")]
pub sync_period: Option<Duration>,
}
impl Default for RaftEngineConfig {
@@ -45,6 +52,9 @@ impl Default for RaftEngineConfig {
purge_interval: Duration::from_secs(600),
read_batch_size: 128,
sync_write: false,
enable_log_recycle: true,
prefill_log_files: false,
sync_period: None,
}
}
}

View File

@@ -13,10 +13,12 @@
// limitations under the License.
pub mod build;
pub mod version;
use std::sync::Arc;
use build::BuildFunction;
use version::VersionFunction;
use crate::function_registry::FunctionRegistry;
@@ -25,5 +27,6 @@ pub(crate) struct SystemFunction;
impl SystemFunction {
pub fn register(registry: &FunctionRegistry) {
registry.register(Arc::new(BuildFunction));
registry.register(Arc::new(VersionFunction));
}
}

View File

@@ -0,0 +1,54 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::sync::Arc;
use std::{env, fmt};
use common_query::error::Result;
use common_query::prelude::{Signature, Volatility};
use datatypes::data_type::ConcreteDataType;
use datatypes::vectors::{StringVector, VectorRef};
use crate::function::{Function, FunctionContext};
#[derive(Clone, Debug, Default)]
pub(crate) struct VersionFunction;
impl fmt::Display for VersionFunction {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "VERSION")
}
}
impl Function for VersionFunction {
fn name(&self) -> &str {
"version"
}
fn return_type(&self, _input_types: &[ConcreteDataType]) -> Result<ConcreteDataType> {
Ok(ConcreteDataType::string_datatype())
}
fn signature(&self) -> Signature {
Signature::exact(vec![], Volatility::Immutable)
}
fn eval(&self, _func_ctx: FunctionContext, _columns: &[VectorRef]) -> Result<VectorRef> {
let result = StringVector::from(vec![format!(
"5.7.20-greptimedb-{}",
env!("CARGO_PKG_VERSION")
)]);
Ok(Arc::new(result))
}
}

View File

@@ -24,7 +24,7 @@ use async_trait::async_trait;
use common_grpc_expr::alter_expr_to_request;
use common_procedure::error::{FromJsonSnafu, Result as ProcedureResult, ToJsonSnafu};
use common_procedure::{
Context as ProcedureContext, Error as ProcedureError, LockKey, Procedure, Status,
Context as ProcedureContext, Error as ProcedureError, LockKey, Procedure, Status, StringKey,
};
use common_telemetry::tracing_context::TracingContext;
use common_telemetry::{debug, info};
@@ -40,10 +40,11 @@ use table::requests::AlterKind;
use crate::cache_invalidator::Context;
use crate::ddl::utils::handle_operate_region_error;
use crate::ddl::DdlContext;
use crate::error::{self, ConvertAlterTableRequestSnafu, InvalidProtoMsgSnafu, Result};
use crate::error::{self, ConvertAlterTableRequestSnafu, Error, InvalidProtoMsgSnafu, Result};
use crate::key::table_info::TableInfoValue;
use crate::key::table_name::TableNameKey;
use crate::key::DeserializedValueWithBytes;
use crate::lock_key::{CatalogLock, SchemaLock, TableLock, TableNameLock};
use crate::metrics;
use crate::rpc::ddl::AlterTableTask;
use crate::rpc::router::{find_leader_regions, find_leaders};
@@ -63,7 +64,7 @@ impl AlterTableProcedure {
cluster_id: u64,
task: AlterTableTask,
table_info_value: DeserializedValueWithBytes<TableInfoValue>,
physical_table_name: Option<TableName>,
physical_table_info: Option<(TableId, TableName)>,
context: DdlContext,
) -> Result<Self> {
let alter_kind = task
@@ -86,7 +87,7 @@ impl AlterTableProcedure {
data: AlterTableData::new(
task,
table_info_value,
physical_table_name,
physical_table_info,
cluster_id,
next_column_id,
),
@@ -335,32 +336,31 @@ impl AlterTableProcedure {
Ok(Status::Done)
}
fn lock_key_inner(&self) -> Vec<String> {
fn lock_key_inner(&self) -> Vec<StringKey> {
let mut lock_key = vec![];
if let Some(physical_table_name) = self.data.physical_table_name() {
let physical_table_key = common_catalog::format_full_table_name(
&physical_table_name.catalog_name,
&physical_table_name.schema_name,
&physical_table_name.table_name,
if let Some((physical_table_id, physical_table_name)) = self.data.physical_table_info() {
lock_key.push(CatalogLock::Read(&physical_table_name.catalog_name).into());
lock_key.push(
SchemaLock::read(
&physical_table_name.catalog_name,
&physical_table_name.schema_name,
)
.into(),
);
lock_key.push(physical_table_key);
lock_key.push(TableLock::Read(*physical_table_id).into())
}
let table_ref = self.data.table_ref();
let table_key = common_catalog::format_full_table_name(
table_ref.catalog,
table_ref.schema,
table_ref.table,
);
lock_key.push(table_key);
let table_id = self.data.table_id();
lock_key.push(CatalogLock::Read(table_ref.catalog).into());
lock_key.push(SchemaLock::read(table_ref.catalog, table_ref.schema).into());
lock_key.push(TableLock::Write(table_id).into());
if let Ok(Kind::RenameTable(RenameTable { new_table_name })) = self.alter_kind() {
lock_key.push(common_catalog::format_full_table_name(
table_ref.catalog,
table_ref.schema,
new_table_name,
))
lock_key.push(
TableNameLock::new(table_ref.catalog, table_ref.schema, new_table_name).into(),
)
}
lock_key
@@ -374,8 +374,8 @@ impl Procedure for AlterTableProcedure {
}
async fn execute(&mut self, _ctx: &ProcedureContext) -> ProcedureResult<Status> {
let error_handler = |e| {
if matches!(e, error::Error::RetryLater { .. }) {
let error_handler = |e: Error| {
if e.is_retry_later() {
ProcedureError::retry_later(e)
} else {
ProcedureError::external(e)
@@ -406,7 +406,7 @@ impl Procedure for AlterTableProcedure {
fn lock_key(&self) -> LockKey {
let key = self.lock_key_inner();
LockKey::new_exclusive(key)
LockKey::new(key)
}
}
@@ -423,13 +423,13 @@ enum AlterTableState {
#[derive(Debug, Serialize, Deserialize)]
pub struct AlterTableData {
cluster_id: u64,
state: AlterTableState,
task: AlterTableTask,
/// Table info value before alteration.
table_info_value: DeserializedValueWithBytes<TableInfoValue>,
/// Physical table name, if the table to alter is a logical table.
physical_table_name: Option<TableName>,
cluster_id: u64,
physical_table_info: Option<(TableId, TableName)>,
/// Next column id of the table if the task adds columns to the table.
next_column_id: Option<ColumnId>,
}
@@ -438,7 +438,7 @@ impl AlterTableData {
pub fn new(
task: AlterTableTask,
table_info_value: DeserializedValueWithBytes<TableInfoValue>,
physical_table_name: Option<TableName>,
physical_table_info: Option<(TableId, TableName)>,
cluster_id: u64,
next_column_id: Option<ColumnId>,
) -> Self {
@@ -446,7 +446,7 @@ impl AlterTableData {
state: AlterTableState::Prepare,
task,
table_info_value,
physical_table_name,
physical_table_info,
cluster_id,
next_column_id,
}
@@ -464,8 +464,8 @@ impl AlterTableData {
&self.table_info_value.table_info
}
fn physical_table_name(&self) -> Option<&TableName> {
self.physical_table_name.as_ref()
fn physical_table_info(&self) -> Option<&(TableId, TableName)> {
self.physical_table_info.as_ref()
}
}

View File

@@ -41,6 +41,7 @@ use crate::ddl::DdlContext;
use crate::error::{self, Result, TableRouteNotFoundSnafu};
use crate::key::table_name::TableNameKey;
use crate::key::table_route::TableRouteValue;
use crate::lock_key::TableNameLock;
use crate::metrics;
use crate::region_keeper::OperatingRegionGuard;
use crate::rpc::ddl::CreateTableTask;
@@ -343,13 +344,12 @@ impl Procedure for CreateTableProcedure {
fn lock_key(&self) -> LockKey {
let table_ref = &self.creator.data.table_ref();
let key = common_catalog::format_full_table_name(
LockKey::single(TableNameLock::new(
table_ref.catalog,
table_ref.schema,
table_ref.table,
);
LockKey::single_exclusive(key)
))
}
}

View File

@@ -41,6 +41,7 @@ use crate::key::table_info::TableInfoValue;
use crate::key::table_name::TableNameKey;
use crate::key::table_route::TableRouteValue;
use crate::key::DeserializedValueWithBytes;
use crate::lock_key::{CatalogLock, SchemaLock, TableLock};
use crate::metrics;
use crate::region_keeper::OperatingRegionGuard;
use crate::rpc::ddl::DropTableTask;
@@ -267,13 +268,14 @@ impl Procedure for DropTableProcedure {
fn lock_key(&self) -> LockKey {
let table_ref = &self.data.table_ref();
let key = common_catalog::format_full_table_name(
table_ref.catalog,
table_ref.schema,
table_ref.table,
);
let table_id = self.data.table_id();
let lock_key = vec![
CatalogLock::Read(table_ref.catalog).into(),
SchemaLock::read(table_ref.catalog, table_ref.schema).into(),
TableLock::Write(table_id).into(),
];
LockKey::single_exclusive(key)
LockKey::new(lock_key)
}
}

View File

@@ -37,6 +37,7 @@ use crate::error::{Result, TableNotFoundSnafu};
use crate::key::table_info::TableInfoValue;
use crate::key::table_name::TableNameKey;
use crate::key::DeserializedValueWithBytes;
use crate::lock_key::{CatalogLock, SchemaLock, TableLock};
use crate::metrics;
use crate::rpc::ddl::TruncateTableTask;
use crate::rpc::router::{find_leader_regions, find_leaders, RegionRoute};
@@ -75,13 +76,14 @@ impl Procedure for TruncateTableProcedure {
fn lock_key(&self) -> LockKey {
let table_ref = &self.data.table_ref();
let key = common_catalog::format_full_table_name(
table_ref.catalog,
table_ref.schema,
table_ref.table,
);
let table_id = self.data.table_id();
let lock_key = vec![
CatalogLock::Read(table_ref.catalog).into(),
SchemaLock::read(table_ref.catalog, table_ref.schema).into(),
TableLock::Write(table_id).into(),
];
LockKey::single_exclusive(key)
LockKey::new(lock_key)
}
}

View File

@@ -36,7 +36,7 @@ pub fn handle_operate_region_error(datanode: Peer) -> impl FnOnce(crate::error::
}
pub fn handle_retry_error(e: Error) -> ProcedureError {
if matches!(e, error::Error::RetryLater { .. }) {
if e.is_retry_later() {
ProcedureError::retry_later(e)
} else {
ProcedureError::external(e)

View File

@@ -19,7 +19,7 @@ use common_procedure::{watcher, ProcedureId, ProcedureManagerRef, ProcedureWithI
use common_telemetry::tracing_context::{FutureExt, TracingContext};
use common_telemetry::{info, tracing};
use snafu::{OptionExt, ResultExt};
use store_api::storage::RegionNumber;
use store_api::storage::{RegionNumber, TableId};
use crate::cache_invalidator::CacheInvalidatorRef;
use crate::datanode_manager::DatanodeManagerRef;
@@ -162,7 +162,7 @@ impl DdlManager {
cluster_id: u64,
alter_table_task: AlterTableTask,
table_info_value: DeserializedValueWithBytes<TableInfoValue>,
physical_table_name: Option<TableName>,
physical_table_info: Option<(TableId, TableName)>,
) -> Result<ProcedureId> {
let context = self.create_context();
@@ -170,7 +170,7 @@ impl DdlManager {
cluster_id,
alter_table_task,
table_info_value,
physical_table_name,
physical_table_info,
context,
)?;
@@ -341,7 +341,7 @@ async fn handle_alter_table_task(
.get_physical_table_id(table_id)
.await?;
let physical_table_name = if physical_table_id == table_id {
let physical_table_info = if physical_table_id == table_id {
None
} else {
let physical_table_info = &ddl_manager
@@ -353,11 +353,14 @@ async fn handle_alter_table_task(
table_name: table_ref.to_string(),
})?
.table_info;
Some(TableName {
catalog_name: physical_table_info.catalog_name.clone(),
schema_name: physical_table_info.schema_name.clone(),
table_name: physical_table_info.name.clone(),
})
Some((
physical_table_id,
TableName {
catalog_name: physical_table_info.catalog_name.clone(),
schema_name: physical_table_info.schema_name.clone(),
table_name: physical_table_info.name.clone(),
},
))
};
let id = ddl_manager
@@ -365,7 +368,7 @@ async fn handle_alter_table_task(
cluster_id,
alter_table_task,
table_info_value,
physical_table_name,
physical_table_info,
)
.await?;
@@ -386,15 +389,21 @@ async fn handle_drop_table_task(
let table_metadata_manager = &ddl_manager.table_metadata_manager();
let table_ref = drop_table_task.table_ref();
let (table_info_value, table_route_value) =
table_metadata_manager.get_full_table_info(table_id).await?;
let table_info_value = table_metadata_manager
.table_info_manager()
.get(table_id)
.await?;
let (_, table_route_value) = table_metadata_manager
.table_route_manager()
.get_physical_table_route(table_id)
.await?;
let table_info_value = table_info_value.with_context(|| error::TableInfoNotFoundSnafu {
table_name: table_ref.to_string(),
})?;
let table_route_value =
table_route_value.context(error::TableRouteNotFoundSnafu { table_id })?;
DeserializedValueWithBytes::from_inner(TableRouteValue::Physical(table_route_value));
let id = ddl_manager
.submit_drop_table_task(

View File

@@ -354,6 +354,7 @@ impl TableMetadataManager {
&self.kv_backend
}
// TODO(ruihang): deprecate this
pub async fn get_full_table_info(
&self,
table_id: TableId,

View File

@@ -178,15 +178,6 @@ impl DatanodeTableManager {
let txns = distribution
.into_iter()
.map(|(datanode_id, regions)| {
let filtered_region_wal_options = regions
.iter()
.filter_map(|region_number| {
region_wal_options
.get(region_number)
.map(|wal_options| (*region_number, wal_options.clone()))
})
.collect();
let key = DatanodeTableKey::new(datanode_id, table_id);
let val = DatanodeTableValue::new(
table_id,
@@ -195,7 +186,9 @@ impl DatanodeTableManager {
engine: engine.to_string(),
region_storage_path: region_storage_path.to_string(),
region_options: region_options.clone(),
region_wal_options: filtered_region_wal_options,
// FIXME(weny): Before we store all region wal options into table metadata or somewhere,
// We must store all region wal options.
region_wal_options: region_wal_options.clone(),
},
);
@@ -243,7 +236,15 @@ impl DatanodeTableManager {
if need_update {
let key = DatanodeTableKey::new(datanode, table_id);
let raw_key = key.as_raw_key();
let val = DatanodeTableValue::new(table_id, regions, region_info.clone())
// FIXME(weny): add unit tests.
let mut new_region_info = region_info.clone();
if need_update_options {
new_region_info.region_options = new_region_options.clone();
}
if need_update_wal_options {
new_region_info.region_wal_options = new_region_wal_options.clone();
}
let val = DatanodeTableValue::new(table_id, regions, new_region_info)
.try_as_raw_value()?;
opts.push(TxnOp::Put(raw_key, val));
}

View File

@@ -114,6 +114,7 @@ where
Ok(!resp.kvs.is_empty())
}
/// Returns previous key-value pair if `prev_kv` is `true`.
async fn delete(&self, key: &[u8], prev_kv: bool) -> Result<Option<KeyValue>, Self::Error> {
let mut req = DeleteRangeRequest::new().with_key(key.to_vec());
if prev_kv {

View File

@@ -27,6 +27,7 @@ pub mod heartbeat;
pub mod instruction;
pub mod key;
pub mod kv_backend;
pub mod lock_key;
pub mod metrics;
pub mod peer;
pub mod range_stream;

View File

@@ -0,0 +1,235 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::fmt::Display;
use common_catalog::{format_full_table_name, format_schema_name};
use common_procedure::StringKey;
use store_api::storage::{RegionId, TableId};
const CATALOG_LOCK_PREFIX: &str = "__catalog_lock";
const SCHEMA_LOCK_PREFIX: &str = "__schema_lock";
const TABLE_LOCK_PREFIX: &str = "__table_lock";
const TABLE_NAME_LOCK_PREFIX: &str = "__table_name_lock";
const REGION_LOCK_PREFIX: &str = "__region_lock";
/// [CatalogLock] acquires the lock on the tenant level.
pub enum CatalogLock<'a> {
Read(&'a str),
Write(&'a str),
}
impl<'a> Display for CatalogLock<'a> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let key = match self {
CatalogLock::Read(s) => s,
CatalogLock::Write(s) => s,
};
write!(f, "{}/{}", CATALOG_LOCK_PREFIX, key)
}
}
impl<'a> From<CatalogLock<'a>> for StringKey {
fn from(value: CatalogLock) -> Self {
match value {
CatalogLock::Write(_) => StringKey::Exclusive(value.to_string()),
CatalogLock::Read(_) => StringKey::Share(value.to_string()),
}
}
}
/// [SchemaLock] acquires the lock on the database level.
pub enum SchemaLock {
Read(String),
Write(String),
}
impl SchemaLock {
pub fn read(catalog: &str, schema: &str) -> Self {
Self::Read(format_schema_name(catalog, schema))
}
pub fn write(catalog: &str, schema: &str) -> Self {
Self::Write(format_schema_name(catalog, schema))
}
}
impl Display for SchemaLock {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let key = match self {
SchemaLock::Read(s) => s,
SchemaLock::Write(s) => s,
};
write!(f, "{}/{}", SCHEMA_LOCK_PREFIX, key)
}
}
impl From<SchemaLock> for StringKey {
fn from(value: SchemaLock) -> Self {
match value {
SchemaLock::Write(_) => StringKey::Exclusive(value.to_string()),
SchemaLock::Read(_) => StringKey::Share(value.to_string()),
}
}
}
/// [TableNameLock] prevents any procedures trying to create a table named it.
pub enum TableNameLock {
Write(String),
}
impl TableNameLock {
pub fn new(catalog: &str, schema: &str, table: &str) -> Self {
Self::Write(format_full_table_name(catalog, schema, table))
}
}
impl Display for TableNameLock {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let TableNameLock::Write(name) = self;
write!(f, "{}/{}", TABLE_NAME_LOCK_PREFIX, name)
}
}
impl From<TableNameLock> for StringKey {
fn from(value: TableNameLock) -> Self {
match value {
TableNameLock::Write(_) => StringKey::Exclusive(value.to_string()),
}
}
}
/// [TableLock] acquires the lock on the table level.
///
/// Note: Allows to read/modify the corresponding table's [TableInfoValue](crate::key::table_info::TableInfoValue),
/// [TableRouteValue](crate::key::table_route::TableRouteValue), [TableDatanodeValue](crate::key::datanode_table::DatanodeTableValue).
pub enum TableLock {
Read(TableId),
Write(TableId),
}
impl Display for TableLock {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let key = match self {
TableLock::Read(s) => s,
TableLock::Write(s) => s,
};
write!(f, "{}/{}", TABLE_LOCK_PREFIX, key)
}
}
impl From<TableLock> for StringKey {
fn from(value: TableLock) -> Self {
match value {
TableLock::Write(_) => StringKey::Exclusive(value.to_string()),
TableLock::Read(_) => StringKey::Share(value.to_string()),
}
}
}
/// [RegionLock] acquires the lock on the region level.
///
/// Note:
/// - Allows modification the corresponding region's [TableRouteValue](crate::key::table_route::TableRouteValue),
/// [TableDatanodeValue](crate::key::datanode_table::DatanodeTableValue) even if
/// it acquires the [RegionLock::Write] only without acquiring the [TableLock::Write].
///
/// - Should acquire [TableLock] of the table at same procedure.
///
/// TODO(weny): we should consider separating TableRouteValue into finer keys.
pub enum RegionLock {
Read(RegionId),
Write(RegionId),
}
impl Display for RegionLock {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let key = match self {
RegionLock::Read(s) => s.as_u64(),
RegionLock::Write(s) => s.as_u64(),
};
write!(f, "{}/{}", REGION_LOCK_PREFIX, key)
}
}
impl From<RegionLock> for StringKey {
fn from(value: RegionLock) -> Self {
match value {
RegionLock::Write(_) => StringKey::Exclusive(value.to_string()),
RegionLock::Read(_) => StringKey::Share(value.to_string()),
}
}
}
#[cfg(test)]
mod tests {
use common_procedure::StringKey;
use crate::lock_key::*;
#[test]
fn test_lock_key() {
// The catalog lock
let string_key: StringKey = CatalogLock::Read("foo").into();
assert_eq!(
string_key,
StringKey::Share(format!("{}/{}", CATALOG_LOCK_PREFIX, "foo"))
);
let string_key: StringKey = CatalogLock::Write("foo").into();
assert_eq!(
string_key,
StringKey::Exclusive(format!("{}/{}", CATALOG_LOCK_PREFIX, "foo"))
);
// The schema lock
let string_key: StringKey = SchemaLock::read("foo", "bar").into();
assert_eq!(
string_key,
StringKey::Share(format!("{}/{}", SCHEMA_LOCK_PREFIX, "foo.bar"))
);
let string_key: StringKey = SchemaLock::write("foo", "bar").into();
assert_eq!(
string_key,
StringKey::Exclusive(format!("{}/{}", SCHEMA_LOCK_PREFIX, "foo.bar"))
);
// The table lock
let string_key: StringKey = TableLock::Read(1024).into();
assert_eq!(
string_key,
StringKey::Share(format!("{}/{}", TABLE_LOCK_PREFIX, 1024))
);
let string_key: StringKey = TableLock::Write(1024).into();
assert_eq!(
string_key,
StringKey::Exclusive(format!("{}/{}", TABLE_LOCK_PREFIX, 1024))
);
// The table name lock
let string_key: StringKey = TableNameLock::new("foo", "bar", "baz").into();
assert_eq!(
string_key,
StringKey::Exclusive(format!("{}/{}", TABLE_NAME_LOCK_PREFIX, "foo.bar.baz"))
);
// The region lock
let region_id = RegionId::new(1024, 1);
let string_key: StringKey = RegionLock::Read(region_id).into();
assert_eq!(
string_key,
StringKey::Share(format!("{}/{}", REGION_LOCK_PREFIX, region_id.as_u64()))
);
let string_key: StringKey = RegionLock::Write(region_id).into();
assert_eq!(
string_key,
StringKey::Exclusive(format!("{}/{}", REGION_LOCK_PREFIX, region_id.as_u64()))
);
}
}

View File

@@ -23,7 +23,6 @@ use serde::{Deserialize, Serialize};
use store_api::storage::{RegionId, RegionNumber};
use crate::wal::kafka::KafkaConfig;
pub use crate::wal::kafka::Topic as KafkaWalTopic;
pub use crate::wal::options_allocator::{
allocate_region_wal_options, WalOptionsAllocator, WalOptionsAllocatorRef,
};
@@ -98,7 +97,6 @@ mod tests {
num_topics = 32
selector_type = "round_robin"
topic_name_prefix = "greptimedb_wal_topic"
num_partitions = 1
replication_factor = 1
create_topic_timeout = "30s"
backoff_init = "500ms"

View File

@@ -14,7 +14,6 @@
#[cfg(any(test, feature = "testing"))]
pub mod test_util;
pub mod topic;
pub mod topic_manager;
pub mod topic_selector;
@@ -23,7 +22,6 @@ use std::time::Duration;
use common_config::wal::kafka::{kafka_backoff, KafkaBackoffConfig, TopicSelectorType};
use serde::{Deserialize, Serialize};
pub use crate::wal::kafka::topic::Topic;
pub use crate::wal::kafka::topic_manager::TopicManager;
/// Configurations for kafka wal.

View File

@@ -1,19 +0,0 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
/// Kafka wal topic.
/// Publishers publish log entries to the topic while subscribers pull log entries from the topic.
/// A topic is simply a string right now. But it may be more complex in the future.
// TODO(niebayes): remove the Topic alias.
pub type Topic = String;

View File

@@ -33,7 +33,6 @@ use crate::error::{
};
use crate::kv_backend::KvBackendRef;
use crate::rpc::store::PutRequest;
use crate::wal::kafka::topic::Topic;
use crate::wal::kafka::topic_selector::{RoundRobinTopicSelector, TopicSelectorRef};
use crate::wal::kafka::KafkaConfig;
@@ -46,7 +45,7 @@ const DEFAULT_PARTITION: i32 = 0;
/// Manages topic initialization and selection.
pub struct TopicManager {
config: KafkaConfig,
pub(crate) topic_pool: Vec<Topic>,
pub(crate) topic_pool: Vec<String>,
pub(crate) topic_selector: TopicSelectorRef,
kv_backend: KvBackendRef,
}
@@ -86,7 +85,7 @@ impl TopicManager {
let created_topics = Self::restore_created_topics(&self.kv_backend)
.await?
.into_iter()
.collect::<HashSet<Topic>>();
.collect::<HashSet<String>>();
// Creates missing topics.
let to_be_created = topics
@@ -108,7 +107,7 @@ impl TopicManager {
}
/// Tries to create topics specified by indexes in `to_be_created`.
async fn try_create_topics(&self, topics: &[Topic], to_be_created: &[usize]) -> Result<()> {
async fn try_create_topics(&self, topics: &[String], to_be_created: &[usize]) -> Result<()> {
// Builds an kafka controller client for creating topics.
let backoff_config = BackoffConfig {
init_backoff: self.config.backoff.init,
@@ -141,18 +140,18 @@ impl TopicManager {
}
/// Selects one topic from the topic pool through the topic selector.
pub fn select(&self) -> Result<&Topic> {
pub fn select(&self) -> Result<&String> {
self.topic_selector.select(&self.topic_pool)
}
/// Selects a batch of topics from the topic pool through the topic selector.
pub fn select_batch(&self, num_topics: usize) -> Result<Vec<&Topic>> {
pub fn select_batch(&self, num_topics: usize) -> Result<Vec<&String>> {
(0..num_topics)
.map(|_| self.topic_selector.select(&self.topic_pool))
.collect()
}
async fn try_append_noop_record(&self, topic: &Topic, client: &Client) -> Result<()> {
async fn try_append_noop_record(&self, topic: &String, client: &Client) -> Result<()> {
let partition_client = client
.partition_client(topic, DEFAULT_PARTITION, UnknownTopicHandling::Retry)
.await
@@ -177,7 +176,7 @@ impl TopicManager {
Ok(())
}
async fn try_create_topic(&self, topic: &Topic, client: &ControllerClient) -> Result<()> {
async fn try_create_topic(&self, topic: &String, client: &ControllerClient) -> Result<()> {
match client
.create_topic(
topic.clone(),
@@ -203,7 +202,7 @@ impl TopicManager {
}
}
async fn restore_created_topics(kv_backend: &KvBackendRef) -> Result<Vec<Topic>> {
async fn restore_created_topics(kv_backend: &KvBackendRef) -> Result<Vec<String>> {
kv_backend
.get(CREATED_TOPICS_KEY.as_bytes())
.await?
@@ -213,7 +212,7 @@ impl TopicManager {
)
}
async fn persist_created_topics(topics: &[Topic], kv_backend: &KvBackendRef) -> Result<()> {
async fn persist_created_topics(topics: &[String], kv_backend: &KvBackendRef) -> Result<()> {
let raw_topics = serde_json::to_vec(topics).context(EncodeJsonSnafu)?;
kv_backend
.put(PutRequest {

View File

@@ -19,12 +19,11 @@ use rand::Rng;
use snafu::ensure;
use crate::error::{EmptyTopicPoolSnafu, Result};
use crate::wal::kafka::topic::Topic;
/// Controls topic selection.
pub(crate) trait TopicSelector: Send + Sync {
/// Selects a topic from the topic pool.
fn select<'a>(&self, topic_pool: &'a [Topic]) -> Result<&'a Topic>;
fn select<'a>(&self, topic_pool: &'a [String]) -> Result<&'a String>;
}
/// Arc wrapper of TopicSelector.
@@ -48,7 +47,7 @@ impl RoundRobinTopicSelector {
}
impl TopicSelector for RoundRobinTopicSelector {
fn select<'a>(&self, topic_pool: &'a [Topic]) -> Result<&'a Topic> {
fn select<'a>(&self, topic_pool: &'a [String]) -> Result<&'a String> {
ensure!(!topic_pool.is_empty(), EmptyTopicPoolSnafu);
let which = self.cursor.fetch_add(1, Ordering::Relaxed) % topic_pool.len();
Ok(&topic_pool[which])

View File

@@ -26,6 +26,6 @@ pub mod watcher;
pub use crate::error::{Error, Result};
pub use crate::procedure::{
BoxedProcedure, Context, ContextProvider, LockKey, Procedure, ProcedureId, ProcedureManager,
ProcedureManagerRef, ProcedureState, ProcedureWithId, Status,
ProcedureManagerRef, ProcedureState, ProcedureWithId, Status, StringKey,
};
pub use crate::watcher::Watcher;

View File

@@ -18,3 +18,10 @@ pub use crate::columnar_value::ColumnarValue;
pub use crate::function::*;
pub use crate::logical_plan::{create_udf, AggregateFunction, Expr, ScalarUdf};
pub use crate::signature::{Signature, TypeSignature, Volatility};
/// Default timestamp column name for Prometheus metrics.
pub const GREPTIME_TIMESTAMP: &str = "greptime_timestamp";
/// Default value column name for Prometheus metrics.
pub const GREPTIME_VALUE: &str = "greptime_value";
/// Default counter column name for OTLP metrics.
pub const GREPTIME_COUNT: &str = "greptime_count";

View File

@@ -5,6 +5,9 @@ edition.workspace = true
license.workspace = true
[dependencies]
client.workspace = true
common-query.workspace = true
common-recordbatch.workspace = true
once_cell.workspace = true
rand.workspace = true
tempfile.workspace = true

View File

@@ -19,6 +19,7 @@ use std::process::Command;
use std::sync::LazyLock;
pub mod ports;
pub mod recordbatch;
pub mod temp_dir;
// Rust is working on an env possibly named `CARGO_WORKSPACE_DIR` to find the root path to the

View File

@@ -0,0 +1,46 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use client::Database;
use common_query::Output;
use common_recordbatch::util;
pub enum ExpectedOutput<'a> {
AffectedRows(usize),
QueryResult(&'a str),
}
pub async fn execute_and_check_output(db: &Database, sql: &str, expected: ExpectedOutput<'_>) {
let output = db.sql(sql).await.unwrap();
match (&output, expected) {
(Output::AffectedRows(x), ExpectedOutput::AffectedRows(y)) => {
assert_eq!(*x, y, "actual: \n{}", x)
}
(Output::RecordBatches(_), ExpectedOutput::QueryResult(x))
| (Output::Stream(_), ExpectedOutput::QueryResult(x)) => {
check_output_stream(output, x).await
}
_ => panic!(),
}
}
pub async fn check_output_stream(output: Output, expected: &str) {
let recordbatches = match output {
Output::Stream(stream) => util::collect_batches(stream).await.unwrap(),
Output::RecordBatches(recordbatches) => recordbatches,
_ => unreachable!(),
};
let pretty_print = recordbatches.pretty_print().unwrap();
assert_eq!(pretty_print, expected, "actual: \n{}", pretty_print);
}

View File

@@ -276,7 +276,7 @@ impl Default for DatanodeOptions {
impl DatanodeOptions {
pub fn env_list_keys() -> Option<&'static [&'static str]> {
Some(&["meta_client.metasrv_addrs"])
Some(&["meta_client.metasrv_addrs", "wal.broker_endpoints"])
}
pub fn to_toml_string(&self) -> String {

View File

@@ -42,10 +42,11 @@ use metric_engine::engine::MetricEngine;
use mito2::config::MitoConfig;
use mito2::engine::MitoEngine;
use object_store::manager::{ObjectStoreManager, ObjectStoreManagerRef};
use object_store::util::normalize_dir;
use object_store::util::{join_dir, normalize_dir};
use query::QueryEngineFactory;
use servers::export_metrics::ExportMetricsTask;
use servers::grpc::{GrpcServer, GrpcServerConfig};
use servers::grpc::builder::GrpcServerBuilder;
use servers::grpc::GrpcServerConfig;
use servers::http::HttpServerBuilder;
use servers::metrics_handler::MetricsHandler;
use servers::server::{start_server, ServerHandler, ServerHandlers};
@@ -60,9 +61,9 @@ use tokio::sync::Notify;
use crate::config::{DatanodeOptions, RegionEngineConfig};
use crate::error::{
CreateDirSnafu, GetMetadataSnafu, MissingKvBackendSnafu, MissingNodeIdSnafu, OpenLogStoreSnafu,
ParseAddrSnafu, Result, RuntimeResourceSnafu, ShutdownInstanceSnafu, ShutdownServerSnafu,
StartServerSnafu,
BuildMitoEngineSnafu, CreateDirSnafu, GetMetadataSnafu, MissingKvBackendSnafu,
MissingNodeIdSnafu, OpenLogStoreSnafu, ParseAddrSnafu, Result, RuntimeResourceSnafu,
ShutdownInstanceSnafu, ShutdownServerSnafu, StartServerSnafu,
};
use crate::event_listener::{
new_region_server_event_channel, NoopRegionServerEventListener, RegionServerEventListenerRef,
@@ -328,15 +329,13 @@ impl DatanodeBuilder {
max_send_message_size: opts.rpc_max_send_message_size.as_bytes() as usize,
};
let server = Box::new(GrpcServer::new(
Some(config),
None,
None,
Some(Arc::new(region_server.clone()) as _),
Some(Arc::new(region_server.clone()) as _),
None,
region_server.runtime(),
));
let server = Box::new(
GrpcServerBuilder::new(region_server.runtime())
.config(config)
.flight_handler(Arc::new(region_server.clone()))
.region_server_handler(Arc::new(region_server.clone()))
.build(),
);
let addr: SocketAddr = opts.rpc_addr.parse().context(ParseAddrSnafu {
addr: &opts.rpc_addr,
@@ -458,20 +457,33 @@ impl DatanodeBuilder {
async fn build_mito_engine(
opts: &DatanodeOptions,
object_store_manager: ObjectStoreManagerRef,
config: MitoConfig,
mut config: MitoConfig,
) -> Result<MitoEngine> {
// Sets write cache path if it is empty.
if config.experimental_write_cache_path.is_empty() {
config.experimental_write_cache_path = join_dir(&opts.storage.data_home, "write_cache");
info!(
"Sets write cache path to {}",
config.experimental_write_cache_path
);
}
let mito_engine = match &opts.wal {
WalConfig::RaftEngine(raft_engine_config) => MitoEngine::new(
config,
Self::build_raft_engine_log_store(&opts.storage.data_home, raft_engine_config)
.await?,
object_store_manager,
),
)
.await
.context(BuildMitoEngineSnafu)?,
WalConfig::Kafka(kafka_config) => MitoEngine::new(
config,
Self::build_kafka_log_store(kafka_config).await?,
object_store_manager,
),
)
.await
.context(BuildMitoEngineSnafu)?,
};
Ok(mito_engine)
}

View File

@@ -282,6 +282,12 @@ pub enum Error {
source: metric_engine::error::Error,
location: Location,
},
#[snafu(display("Failed to build mito engine"))]
BuildMitoEngine {
source: mito2::error::Error,
location: Location,
},
}
pub type Result<T> = std::result::Result<T, Error>;
@@ -352,6 +358,7 @@ impl ErrorExt for Error {
StopRegionEngine { source, .. } => source.status_code(),
FindLogicalRegions { source, .. } => source.status_code(),
BuildMitoEngine { source, .. } => source.status_code(),
}
}

View File

@@ -26,10 +26,10 @@ use std::{env, path};
use common_base::readable_size::ReadableSize;
use common_telemetry::logging::info;
use object_store::layers::{LoggingLayer, LruCacheLayer, RetryLayer, TracingLayer};
use object_store::services::Fs as FsBuilder;
use object_store::util::normalize_dir;
use object_store::{util, HttpClient, ObjectStore, ObjectStoreBuilder};
use object_store::layers::{LruCacheLayer, RetryLayer};
use object_store::services::Fs;
use object_store::util::{join_dir, normalize_dir, with_instrument_layers};
use object_store::{HttpClient, ObjectStore, ObjectStoreBuilder};
use snafu::prelude::*;
use crate::config::{ObjectStoreConfig, DEFAULT_OBJECT_STORE_CACHE_SIZE};
@@ -60,16 +60,7 @@ pub(crate) async fn new_object_store(
object_store
};
let store = object_store
.layer(
LoggingLayer::default()
// Print the expected error only in DEBUG level.
// See https://docs.rs/opendal/latest/opendal/layers/struct.LoggingLayer.html#method.with_error_level
.with_error_level(Some("debug"))
.expect("input error level must be valid"),
)
.layer(TracingLayer)
.layer(object_store::layers::PrometheusMetricsLayer);
let store = with_instrument_layers(object_store);
Ok(store)
}
@@ -114,11 +105,10 @@ async fn create_object_store_with_cache(
};
if let Some(path) = cache_path {
let path = util::normalize_dir(path);
let atomic_temp_dir = format!("{path}.tmp/");
let atomic_temp_dir = join_dir(path, ".tmp/");
clean_temp_dir(&atomic_temp_dir)?;
let cache_store = FsBuilder::default()
.root(&path)
let cache_store = Fs::default()
.root(path)
.atomic_write_dir(&atomic_temp_dir)
.build()
.context(error::InitBackendSnafu)?;

View File

@@ -13,7 +13,7 @@
// limitations under the License.
use common_telemetry::logging::info;
use object_store::services::Azblob as AzureBuilder;
use object_store::services::Azblob;
use object_store::{util, ObjectStore};
use secrecy::ExposeSecret;
use snafu::prelude::*;
@@ -30,7 +30,7 @@ pub(crate) async fn new_azblob_object_store(azblob_config: &AzblobConfig) -> Res
azblob_config.container, &root
);
let mut builder = AzureBuilder::default();
let mut builder = Azblob::default();
let _ = builder
.root(&root)
.container(&azblob_config.container)

View File

@@ -15,7 +15,8 @@
use std::{fs, path};
use common_telemetry::logging::info;
use object_store::services::Fs as FsBuilder;
use object_store::services::Fs;
use object_store::util::join_dir;
use object_store::ObjectStore;
use snafu::prelude::*;
@@ -31,10 +32,10 @@ pub(crate) async fn new_fs_object_store(
.context(error::CreateDirSnafu { dir: data_home })?;
info!("The file storage home is: {}", data_home);
let atomic_write_dir = format!("{data_home}.tmp/");
let atomic_write_dir = join_dir(data_home, ".tmp/");
store::clean_temp_dir(&atomic_write_dir)?;
let mut builder = FsBuilder::default();
let mut builder = Fs::default();
let _ = builder.root(data_home).atomic_write_dir(&atomic_write_dir);
let object_store = ObjectStore::new(builder)

View File

@@ -13,7 +13,7 @@
// limitations under the License.
use common_telemetry::logging::info;
use object_store::services::Gcs as GCSBuilder;
use object_store::services::Gcs;
use object_store::{util, ObjectStore};
use secrecy::ExposeSecret;
use snafu::prelude::*;
@@ -29,7 +29,7 @@ pub(crate) async fn new_gcs_object_store(gcs_config: &GcsConfig) -> Result<Objec
gcs_config.bucket, &root
);
let mut builder = GCSBuilder::default();
let mut builder = Gcs::default();
builder
.root(&root)
.bucket(&gcs_config.bucket)

View File

@@ -13,7 +13,7 @@
// limitations under the License.
use common_telemetry::logging::info;
use object_store::services::Oss as OSSBuilder;
use object_store::services::Oss;
use object_store::{util, ObjectStore};
use secrecy::ExposeSecret;
use snafu::prelude::*;
@@ -29,7 +29,7 @@ pub(crate) async fn new_oss_object_store(oss_config: &OssConfig) -> Result<Objec
oss_config.bucket, &root
);
let mut builder = OSSBuilder::default();
let mut builder = Oss::default();
let _ = builder
.root(&root)
.bucket(&oss_config.bucket)

View File

@@ -13,7 +13,7 @@
// limitations under the License.
use common_telemetry::logging::info;
use object_store::services::S3 as S3Builder;
use object_store::services::S3;
use object_store::{util, ObjectStore};
use secrecy::ExposeSecret;
use snafu::prelude::*;
@@ -30,7 +30,7 @@ pub(crate) async fn new_s3_object_store(s3_config: &S3Config) -> Result<ObjectSt
s3_config.bucket, &root
);
let mut builder = S3Builder::default();
let mut builder = S3::default();
let _ = builder
.root(&root)
.bucket(&s3_config.bucket)

View File

@@ -86,7 +86,6 @@ use crate::frontend::{FrontendOptions, TomlSerializable};
use crate::heartbeat::HeartbeatTask;
use crate::metrics;
use crate::script::ScriptExecutor;
use crate::server::Services;
#[async_trait]
pub trait FrontendInstance:
@@ -190,12 +189,13 @@ impl Instance {
pub async fn build_servers(
&mut self,
opts: impl Into<FrontendOptions> + TomlSerializable,
servers: ServerHandlers,
) -> Result<()> {
let opts: FrontendOptions = opts.into();
self.export_metrics_task =
ExportMetricsTask::try_new(&opts.export_metrics, Some(&self.plugins))
.context(StartServerSnafu)?;
let servers = Services::build(opts, Arc::new(self.clone()), self.plugins.clone()).await?;
self.servers = Arc::new(servers);
Ok(())
@@ -442,7 +442,7 @@ pub fn check_permission(
) -> Result<()> {
let need_validate = plugins
.get::<QueryOptions>()
.map(|opts| opts.disallow_cross_schema_query)
.map(|opts| opts.disallow_cross_catalog_query)
.unwrap_or_default();
if !need_validate {
@@ -520,7 +520,7 @@ mod tests {
let query_ctx = QueryContext::arc();
let plugins: Plugins = Plugins::new();
plugins.insert(QueryOptions {
disallow_cross_schema_query: true,
disallow_cross_catalog_query: true,
});
let sql = r#"
@@ -556,8 +556,6 @@ mod tests {
}
let wrong = vec![
("", "wrongschema."),
("greptime.", "wrongschema."),
("wrongcatalog.", "public."),
("wrongcatalog.", "wrongschema."),
];
@@ -607,10 +605,10 @@ mod tests {
let stmt = parse_stmt(sql, &GreptimeDbDialect {}).unwrap();
check_permission(plugins.clone(), &stmt[0], &query_ctx).unwrap();
let sql = "SHOW TABLES FROM wrongschema";
let sql = "SHOW TABLES FROM private";
let stmt = parse_stmt(sql, &GreptimeDbDialect {}).unwrap();
let re = check_permission(plugins.clone(), &stmt[0], &query_ctx);
assert!(re.is_err());
assert!(re.is_ok());
// test describe table
let sql = "DESC TABLE {catalog}{schema}demo;";

View File

@@ -20,5 +20,5 @@ pub mod heartbeat;
pub mod instance;
pub(crate) mod metrics;
mod script;
mod server;
pub mod server;
pub mod service_config;

View File

@@ -19,7 +19,9 @@ use auth::UserProviderRef;
use common_base::Plugins;
use common_runtime::Builder as RuntimeBuilder;
use servers::error::InternalIoSnafu;
use servers::grpc::{GrpcServer, GrpcServerConfig};
use servers::grpc::builder::GrpcServerBuilder;
use servers::grpc::greptime_handler::GreptimeRequestHandler;
use servers::grpc::GrpcServerConfig;
use servers::http::HttpServerBuilder;
use servers::metrics_handler::MetricsHandler;
use servers::mysql::server::{MysqlServer, MysqlSpawnConfig, MysqlSpawnRef};
@@ -33,14 +35,49 @@ use snafu::ResultExt;
use crate::error::{self, Result, StartServerSnafu};
use crate::frontend::{FrontendOptions, TomlSerializable};
use crate::instance::FrontendInstance;
use crate::service_config::GrpcOptions;
pub(crate) struct Services;
pub struct Services {
plugins: Plugins,
}
impl Services {
pub(crate) async fn build<T, U>(
pub fn new(plugins: Plugins) -> Self {
Self { plugins }
}
pub fn grpc_server_builder(opts: &GrpcOptions) -> Result<GrpcServerBuilder> {
let grpc_runtime = Arc::new(
RuntimeBuilder::default()
.worker_threads(opts.runtime_size)
.thread_name("grpc-handlers")
.build()
.context(error::RuntimeResourceSnafu)?,
);
let grpc_config = GrpcServerConfig {
max_recv_message_size: opts.max_recv_message_size.as_bytes() as usize,
max_send_message_size: opts.max_send_message_size.as_bytes() as usize,
};
Ok(GrpcServerBuilder::new(grpc_runtime).config(grpc_config))
}
pub async fn build<T, U>(&self, opts: T, instance: Arc<U>) -> Result<ServerHandlers>
where
T: Into<FrontendOptions> + TomlSerializable + Clone,
U: FrontendInstance,
{
let grpc_options = &opts.clone().into().grpc;
let builder = Self::grpc_server_builder(grpc_options)?;
self.build_with(opts, instance, builder).await
}
pub async fn build_with<T, U>(
&self,
opts: T,
instance: Arc<U>,
plugins: Plugins,
builder: GrpcServerBuilder,
) -> Result<ServerHandlers>
where
T: Into<FrontendOptions> + TomlSerializable,
@@ -48,35 +85,28 @@ impl Services {
{
let toml = opts.to_toml()?;
let opts: FrontendOptions = opts.into();
let mut result = Vec::<ServerHandler>::with_capacity(plugins.len());
let user_provider = plugins.get::<UserProviderRef>();
let mut result = Vec::<ServerHandler>::new();
let user_provider = self.plugins.get::<UserProviderRef>();
{
// Always init GRPC server
let opts = &opts.grpc;
let grpc_addr = parse_addr(&opts.addr)?;
let grpc_runtime = Arc::new(
RuntimeBuilder::default()
.worker_threads(opts.runtime_size)
.thread_name("grpc-handlers")
.build()
.context(error::RuntimeResourceSnafu)?,
);
let grpc_config = GrpcServerConfig {
max_recv_message_size: opts.max_recv_message_size.as_bytes() as usize,
max_send_message_size: opts.max_send_message_size.as_bytes() as usize,
};
let grpc_server = GrpcServer::new(
Some(grpc_config),
Some(ServerGrpcQueryHandlerAdapter::arc(instance.clone())),
Some(instance.clone()),
None,
None,
let greptime_request_handler = GreptimeRequestHandler::new(
ServerGrpcQueryHandlerAdapter::arc(instance.clone()),
user_provider.clone(),
grpc_runtime,
builder.runtime().clone(),
);
let grpc_server = builder
.database_handler(greptime_request_handler.clone())
.prometheus_handler(instance.clone())
.otlp_handler(instance.clone())
.user_provider(user_provider.clone())
.flight_handler(Arc::new(greptime_request_handler))
.build();
result.push((Box::new(grpc_server), grpc_addr));
}
@@ -116,7 +146,7 @@ impl Services {
let http_server = http_server_builder
.with_metrics_handler(MetricsHandler)
.with_script_handler(instance.clone())
.with_plugins(plugins)
.with_plugins(self.plugins.clone())
.with_greptime_config_options(toml)
.build();
result.push((Box::new(http_server), http_addr));

View File

@@ -12,8 +12,8 @@
// See the License for the specific language governing permissions and
// limitations under the License.
mod sort;
mod sort_create;
pub mod sort;
pub mod sort_create;
use async_trait::async_trait;

View File

@@ -12,8 +12,8 @@
// See the License for the specific language governing permissions and
// limitations under the License.
mod external_provider;
mod external_sort;
pub mod external_provider;
pub mod external_sort;
mod intermediate_rw;
mod merge_stream;

View File

@@ -15,7 +15,7 @@
use std::any::Any;
use std::io::Error as IoError;
use common_error::ext::ErrorExt;
use common_error::ext::{BoxedError, ErrorExt};
use common_error::status_code::StatusCode;
use common_macro::stack_trace_debug;
use snafu::{Location, Snafu};
@@ -167,6 +167,12 @@ pub enum Error {
total_row_count: usize,
expected_row_count: usize,
},
#[snafu(display("External error"))]
External {
source: BoxedError,
location: Location,
},
}
impl ErrorExt for Error {
@@ -197,6 +203,8 @@ impl ErrorExt for Error {
| FstInsert { .. }
| InconsistentRowCount { .. }
| IndexNotFound { .. } => StatusCode::InvalidArguments,
External { source, .. } => source.status_code(),
}
}

View File

@@ -28,7 +28,7 @@ use crate::inverted_index::format::reader::InvertedIndexReader;
/// avoiding repeated compilation of fixed predicates such as regex patterns.
#[mockall::automock]
#[async_trait]
pub trait IndexApplier {
pub trait IndexApplier: Send + Sync {
/// Applies the predefined predicates to the data read by the given index reader, returning
/// a list of relevant indices (e.g., post IDs, group IDs, row IDs).
async fn apply<'a>(

View File

@@ -22,6 +22,8 @@ common-macro.workspace = true
common-meta.workspace = true
common-runtime.workspace = true
common-telemetry.workspace = true
common-time.workspace = true
dashmap.workspace = true
futures-util.workspace = true
futures.workspace = true
protobuf = { version = "2", features = ["bytes"] }

View File

@@ -14,12 +14,12 @@
use std::any::Any;
use common_config::wal::KafkaWalTopic;
use common_error::ext::ErrorExt;
use common_macro::stack_trace_debug;
use common_runtime::error::Error as RuntimeError;
use serde_json::error::Error as JsonError;
use snafu::{Location, Snafu};
use store_api::storage::RegionId;
use crate::kafka::NamespaceImpl as KafkaNamespace;
@@ -119,7 +119,7 @@ pub enum Error {
error
))]
GetClient {
topic: KafkaWalTopic,
topic: String,
location: Location,
error: String,
},
@@ -140,7 +140,7 @@ pub enum Error {
limit,
))]
ProduceRecord {
topic: KafkaWalTopic,
topic: String,
size: usize,
limit: usize,
location: Location,
@@ -183,6 +183,18 @@ pub enum Error {
#[snafu(display("The record sequence is not legal, error: {}", error))]
IllegalSequence { location: Location, error: String },
#[snafu(display(
"Attempt to append discontinuous log entry, region: {}, last index: {}, attempt index: {}",
region_id,
last_index,
attempt_index
))]
DiscontinuousLogIndex {
region_id: RegionId,
last_index: u64,
attempt_index: u64,
},
}
impl ErrorExt for Error {

View File

@@ -18,7 +18,6 @@ pub(crate) mod util;
use std::fmt::Display;
use common_meta::wal::KafkaWalTopic as Topic;
use serde::{Deserialize, Serialize};
use store_api::logstore::entry::{Entry, Id as EntryId};
use store_api::logstore::namespace::Namespace;
@@ -29,7 +28,7 @@ use crate::error::Error;
#[derive(Debug, PartialEq, Eq, Hash, Clone, Serialize, Deserialize)]
pub struct NamespaceImpl {
pub region_id: u64,
pub topic: Topic,
pub topic: String,
}
impl Namespace for NamespaceImpl {

View File

@@ -15,7 +15,7 @@
use std::collections::HashMap;
use std::sync::Arc;
use common_config::wal::{KafkaConfig, KafkaWalTopic as Topic};
use common_config::wal::KafkaConfig;
use rskafka::client::partition::{PartitionClient, UnknownTopicHandling};
use rskafka::client::producer::aggregator::RecordAggregator;
use rskafka::client::producer::{BatchProducer, BatchProducerBuilder};
@@ -67,7 +67,7 @@ pub(crate) struct ClientManager {
client_factory: RsKafkaClient,
/// A pool maintaining a collection of clients.
/// Key: a topic. Value: the associated client of the topic.
client_pool: RwLock<HashMap<Topic, Client>>,
client_pool: RwLock<HashMap<String, Client>>,
}
impl ClientManager {
@@ -97,7 +97,7 @@ impl ClientManager {
/// Gets the client associated with the topic. If the client does not exist, a new one will
/// be created and returned.
pub(crate) async fn get_or_insert(&self, topic: &Topic) -> Result<Client> {
pub(crate) async fn get_or_insert(&self, topic: &String) -> Result<Client> {
{
let client_pool = self.client_pool.read().await;
if let Some(client) = client_pool.get(topic) {
@@ -116,7 +116,7 @@ impl ClientManager {
}
}
async fn try_create_client(&self, topic: &Topic) -> Result<Client> {
async fn try_create_client(&self, topic: &String) -> Result<Client> {
// Sets to Retry to retry connecting if the kafka cluter replies with an UnknownTopic error.
// That's because the topic is believed to exist as the metasrv is expected to create required topics upon start.
// The reconnecting won't stop until succeed or a different error returns.
@@ -147,7 +147,7 @@ mod tests {
test_name: &str,
num_topics: usize,
broker_endpoints: Vec<String>,
) -> (ClientManager, Vec<Topic>) {
) -> (ClientManager, Vec<String>) {
let topics = create_topics(
num_topics,
|i| format!("{test_name}_{}_{}", i, uuid::Uuid::new_v4()),

View File

@@ -205,7 +205,11 @@ impl LogStore for KafkaLogStore {
}
// Tries to construct an entry from records consumed so far.
if let Some(entry) = maybe_emit_entry(record, &mut entry_records)? {
if let Some(mut entry) = maybe_emit_entry(record, &mut entry_records)? {
// We don't rely on the EntryId generated by mito2.
// Instead, we use the offset return from Kafka as EntryId.
// Therefore, we MUST overwrite the EntryId with RecordOffset.
entry.id = offset as u64;
yield Ok(vec![entry]);
}
@@ -283,7 +287,6 @@ fn check_termination(
#[cfg(test)]
mod tests {
use common_base::readable_size::ReadableSize;
use common_config::wal::KafkaWalTopic as Topic;
use rand::seq::IteratorRandom;
use super::*;
@@ -304,7 +307,7 @@ mod tests {
test_name: &str,
num_topics: usize,
broker_endpoints: Vec<String>,
) -> (KafkaLogStore, Vec<Topic>) {
) -> (KafkaLogStore, Vec<String>) {
let topics = create_topics(
num_topics,
|i| format!("{test_name}_{}_{}", i, uuid::Uuid::new_v4()),
@@ -424,17 +427,20 @@ mod tests {
// Reads entries for regions and checks for each region that the gotten entries are identical with the expected ones.
for region_id in which {
let ctx = &region_contexts[&region_id];
let ctx = region_contexts.get_mut(&region_id).unwrap();
let stream = logstore
.read(&ctx.ns, ctx.flushed_entry_id + 1)
.await
.unwrap();
let got = stream
let mut got = stream
.collect::<Vec<_>>()
.await
.into_iter()
.flat_map(|x| x.unwrap())
.collect::<Vec<_>>();
//FIXME(weny): https://github.com/GreptimeTeam/greptimedb/issues/3152
ctx.expected.iter_mut().for_each(|entry| entry.id = 0);
got.iter_mut().for_each(|entry| entry.id = 0);
assert_eq!(ctx.expected, got);
}

View File

@@ -12,8 +12,10 @@
// See the License for the specific language governing permissions and
// limitations under the License.
use std::collections::hash_map::Entry;
use std::collections::HashMap;
use std::fmt::{Debug, Formatter};
use std::sync::atomic::{AtomicI64, Ordering};
use std::sync::Arc;
use async_stream::stream;
@@ -22,15 +24,15 @@ use common_runtime::{RepeatedTask, TaskFunction};
use common_telemetry::{error, info};
use raft_engine::{Config, Engine, LogBatch, MessageExt, ReadableSize, RecoveryMode};
use snafu::{ensure, ResultExt};
use store_api::logstore::entry::{Entry, Id as EntryId};
use store_api::logstore::entry::Id as EntryId;
use store_api::logstore::entry_stream::SendableEntryStream;
use store_api::logstore::namespace::{Id as NamespaceId, Namespace as NamespaceTrait};
use store_api::logstore::{AppendBatchResponse, AppendResponse, LogStore};
use crate::error;
use crate::error::{
AddEntryLogBatchSnafu, Error, FetchEntrySnafu, IllegalNamespaceSnafu, IllegalStateSnafu,
OverrideCompactedEntrySnafu, RaftEngineSnafu, Result, StartGcTaskSnafu, StopGcTaskSnafu,
AddEntryLogBatchSnafu, DiscontinuousLogIndexSnafu, Error, FetchEntrySnafu,
IllegalNamespaceSnafu, IllegalStateSnafu, OverrideCompactedEntrySnafu, RaftEngineSnafu, Result,
StartGcTaskSnafu, StopGcTaskSnafu,
};
use crate::raft_engine::backend::SYSTEM_NAMESPACE;
use crate::raft_engine::protos::logstore::{EntryImpl, NamespaceImpl as Namespace};
@@ -41,6 +43,7 @@ pub struct RaftEngineLogStore {
config: RaftEngineConfig,
engine: Arc<Engine>,
gc_task: RepeatedTask<Error>,
last_sync_time: AtomicI64,
}
pub struct PurgeExpiredFilesFunction {
@@ -80,6 +83,8 @@ impl RaftEngineLogStore {
recovery_mode: RecoveryMode::TolerateTailCorruption,
batch_compression_threshold: ReadableSize::kb(8),
target_file_size: ReadableSize(config.file_size.0),
enable_log_recycle: config.enable_log_recycle,
prefill_for_recycle: config.prefill_log_files,
..Default::default()
};
let engine = Arc::new(Engine::open(raft_engine_config).context(RaftEngineSnafu)?);
@@ -94,6 +99,7 @@ impl RaftEngineLogStore {
config,
engine,
gc_task,
last_sync_time: AtomicI64::new(0),
};
log_store.start()?;
Ok(log_store)
@@ -116,22 +122,65 @@ impl RaftEngineLogStore {
)
}
/// Checks if entry does not override the min index of namespace.
fn check_entry(&self, e: &EntryImpl) -> Result<()> {
if cfg!(debug_assertions) {
/// Converts entries to `LogBatch` and checks if entry ids are valid.
/// Returns the `LogBatch` converted along with the last entry id
/// to append in each namespace(region).
fn entries_to_batch(
&self,
entries: Vec<EntryImpl>,
) -> Result<(LogBatch, HashMap<NamespaceId, EntryId>)> {
// Records the last entry id for each region's entries.
let mut entry_ids: HashMap<NamespaceId, EntryId> = HashMap::with_capacity(entries.len());
let mut batch = LogBatch::with_capacity(entries.len());
for e in entries {
let ns_id = e.namespace_id;
if let Some(first_index) = self.engine.first_index(ns_id) {
ensure!(
e.id() >= first_index,
OverrideCompactedEntrySnafu {
namespace: ns_id,
first_index,
attempt_index: e.id(),
match entry_ids.entry(ns_id) {
Entry::Occupied(mut o) => {
let prev = *o.get();
ensure!(
e.id == prev + 1,
DiscontinuousLogIndexSnafu {
region_id: ns_id,
last_index: prev,
attempt_index: e.id
}
);
o.insert(e.id);
}
Entry::Vacant(v) => {
// this entry is the first in batch of given region.
if let Some(first_index) = self.engine.first_index(ns_id) {
// ensure the first in batch does not override compacted entry.
ensure!(
e.id > first_index,
OverrideCompactedEntrySnafu {
namespace: ns_id,
first_index,
attempt_index: e.id,
}
);
}
);
// ensure the first in batch does not form a hole in raft-engine.
if let Some(last_index) = self.engine.last_index(ns_id) {
ensure!(
e.id == last_index + 1,
DiscontinuousLogIndexSnafu {
region_id: ns_id,
last_index,
attempt_index: e.id
}
);
}
v.insert(e.id);
}
}
batch
.add_entries::<MessageType>(ns_id, &[e])
.context(AddEntryLogBatchSnafu)?;
}
Ok(())
Ok((batch, entry_ids))
}
}
@@ -166,8 +215,8 @@ impl LogStore for RaftEngineLogStore {
if let Some(first_index) = self.engine.first_index(namespace_id) {
ensure!(
entry_id >= first_index,
error::OverrideCompactedEntrySnafu {
entry_id > first_index,
OverrideCompactedEntrySnafu {
namespace: namespace_id,
first_index,
attempt_index: entry_id,
@@ -175,6 +224,17 @@ impl LogStore for RaftEngineLogStore {
);
}
if let Some(last_index) = self.engine.last_index(namespace_id) {
ensure!(
entry_id == last_index + 1,
DiscontinuousLogIndexSnafu {
region_id: namespace_id,
last_index,
attempt_index: entry_id
}
);
}
let _ = self
.engine
.write(&mut batch, self.config.sync_write)
@@ -192,27 +252,21 @@ impl LogStore for RaftEngineLogStore {
return Ok(AppendBatchResponse::default());
}
// Records the last entry id for each region's entries.
let mut last_entry_ids: HashMap<NamespaceId, EntryId> =
HashMap::with_capacity(entries.len());
let mut batch = LogBatch::with_capacity(entries.len());
let (mut batch, last_entry_ids) = self.entries_to_batch(entries)?;
for e in entries {
self.check_entry(&e)?;
// For raft-engine log store, the namespace id is the region id.
let ns_id = e.namespace_id;
last_entry_ids
.entry(ns_id)
.and_modify(|x| *x = (*x).max(e.id))
.or_insert(e.id);
batch
.add_entries::<MessageType>(ns_id, &[e])
.context(AddEntryLogBatchSnafu)?;
let mut sync = self.config.sync_write;
if let Some(sync_period) = &self.config.sync_period {
let now = common_time::util::current_time_millis();
if now - self.last_sync_time.load(Ordering::Relaxed) >= sync_period.as_millis() as i64 {
self.last_sync_time.store(now, Ordering::Relaxed);
sync = true;
}
}
let _ = self
.engine
.write(&mut batch, self.config.sync_write)
.write(&mut batch, sync)
.context(RaftEngineSnafu)?;
Ok(AppendBatchResponse { last_entry_ids })

View File

@@ -15,7 +15,6 @@
use std::sync::atomic::{AtomicU64 as AtomicEntryId, Ordering};
use std::sync::Mutex;
use common_meta::wal::KafkaWalTopic as Topic;
use rand::distributions::Alphanumeric;
use rand::rngs::ThreadRng;
use rand::{thread_rng, Rng};
@@ -29,7 +28,7 @@ pub async fn create_topics<F>(
num_topics: usize,
decorator: F,
broker_endpoints: &[String],
) -> Vec<Topic>
) -> Vec<String>
where
F: Fn(usize) -> String,
{

View File

@@ -33,6 +33,7 @@ etcd-client.workspace = true
futures.workspace = true
h2 = "0.3"
http-body = "0.4"
humantime = "2.1"
humantime-serde.workspace = true
itertools.workspace = true
lazy_static.workspace = true

View File

@@ -12,6 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
use std::any::Any;
use std::collections::{HashMap, HashSet};
use std::sync::Arc;
use std::time::Duration;
@@ -22,8 +23,12 @@ use api::v1::meta::{
RangeRequest as PbRangeRequest, RangeResponse as PbRangeResponse, ResponseHeader,
};
use common_grpc::channel_manager::ChannelManager;
use common_meta::kv_backend::ResettableKvBackendRef;
use common_meta::rpc::store::{BatchGetRequest, RangeRequest};
use common_meta::kv_backend::{KvBackend, ResettableKvBackendRef, TxnService};
use common_meta::rpc::store::{
BatchDeleteRequest, BatchDeleteResponse, BatchGetRequest, BatchGetResponse, BatchPutRequest,
BatchPutResponse, CompareAndPutRequest, CompareAndPutResponse, DeleteRangeRequest,
DeleteRangeResponse, PutRequest, PutResponse, RangeRequest, RangeResponse,
};
use common_meta::rpc::KeyValue;
use common_meta::util;
use common_telemetry::warn;
@@ -49,11 +54,158 @@ pub struct MetaPeerClient {
retry_interval_ms: u64,
}
#[async_trait::async_trait]
impl TxnService for MetaPeerClient {
type Error = error::Error;
}
#[async_trait::async_trait]
impl KvBackend for MetaPeerClient {
fn name(&self) -> &str {
"MetaPeerClient"
}
fn as_any(&self) -> &dyn Any {
self
}
async fn range(&self, req: RangeRequest) -> Result<RangeResponse> {
if self.is_leader() {
return self
.in_memory
.range(req)
.await
.context(error::KvBackendSnafu);
}
let max_retry_count = self.max_retry_count;
let retry_interval_ms = self.retry_interval_ms;
for _ in 0..max_retry_count {
match self
.remote_range(req.key.clone(), req.range_end.clone(), req.keys_only)
.await
{
Ok(res) => return Ok(res),
Err(e) => {
if need_retry(&e) {
warn!("Encountered an error that need to retry, err: {:?}", e);
tokio::time::sleep(Duration::from_millis(retry_interval_ms)).await;
} else {
return Err(e);
}
}
}
}
error::ExceededRetryLimitSnafu {
func_name: "range",
retry_num: max_retry_count,
}
.fail()
}
// Get kv information from the leader's in_mem kv store
async fn batch_get(&self, req: BatchGetRequest) -> Result<BatchGetResponse> {
if self.is_leader() {
return self
.in_memory
.batch_get(req)
.await
.context(error::KvBackendSnafu);
}
let max_retry_count = self.max_retry_count;
let retry_interval_ms = self.retry_interval_ms;
for _ in 0..max_retry_count {
match self.remote_batch_get(req.keys.clone()).await {
Ok(res) => return Ok(res),
Err(e) => {
if need_retry(&e) {
warn!("Encountered an error that need to retry, err: {:?}", e);
tokio::time::sleep(Duration::from_millis(retry_interval_ms)).await;
} else {
return Err(e);
}
}
}
}
error::ExceededRetryLimitSnafu {
func_name: "batch_get",
retry_num: max_retry_count,
}
.fail()
}
// MetaPeerClient does not support mutable methods listed below.
async fn put(&self, _req: PutRequest) -> Result<PutResponse> {
error::UnsupportedSnafu {
operation: "put".to_string(),
}
.fail()
}
async fn batch_put(&self, _req: BatchPutRequest) -> Result<BatchPutResponse> {
error::UnsupportedSnafu {
operation: "batch put".to_string(),
}
.fail()
}
async fn compare_and_put(&self, _req: CompareAndPutRequest) -> Result<CompareAndPutResponse> {
error::UnsupportedSnafu {
operation: "compare and put".to_string(),
}
.fail()
}
async fn delete_range(&self, _req: DeleteRangeRequest) -> Result<DeleteRangeResponse> {
error::UnsupportedSnafu {
operation: "delete range".to_string(),
}
.fail()
}
async fn batch_delete(&self, _req: BatchDeleteRequest) -> Result<BatchDeleteResponse> {
error::UnsupportedSnafu {
operation: "batch delete".to_string(),
}
.fail()
}
async fn delete(&self, _key: &[u8], _prev_kv: bool) -> Result<Option<KeyValue>> {
error::UnsupportedSnafu {
operation: "delete".to_string(),
}
.fail()
}
async fn put_conditionally(
&self,
_key: Vec<u8>,
_value: Vec<u8>,
_if_not_exists: bool,
) -> Result<bool> {
error::UnsupportedSnafu {
operation: "put conditionally".to_string(),
}
.fail()
}
}
impl MetaPeerClient {
async fn get_dn_key_value(&self, keys_only: bool) -> Result<Vec<KeyValue>> {
let key = format!("{DN_STAT_PREFIX}-").into_bytes();
let range_end = util::get_prefix_end_key(&key);
self.range(key, range_end, keys_only).await
let range_request = RangeRequest {
key,
range_end,
keys_only,
..Default::default()
};
self.range(range_request).await.map(|res| res.kvs)
}
// Get all datanode stat kvs from leader meta.
@@ -73,70 +225,11 @@ impl MetaPeerClient {
// Get datanode stat kvs from leader meta by input keys.
pub async fn get_dn_stat_kvs(&self, keys: Vec<StatKey>) -> Result<HashMap<StatKey, StatValue>> {
let stat_keys = keys.into_iter().map(|key| key.into()).collect();
let batch_get_req = BatchGetRequest { keys: stat_keys };
let kvs = self.batch_get(stat_keys).await?;
let res = self.batch_get(batch_get_req).await?;
to_stat_kv_map(kvs)
}
// Get kv information from the leader's in_mem kv store.
pub async fn get(&self, key: Vec<u8>) -> Result<Option<KeyValue>> {
let mut kvs = self.range(key, vec![], false).await?;
Ok(if kvs.is_empty() {
None
} else {
debug_assert_eq!(kvs.len(), 1);
Some(kvs.remove(0))
})
}
// Range kv information from the leader's in_mem kv store
pub async fn range(
&self,
key: Vec<u8>,
range_end: Vec<u8>,
keys_only: bool,
) -> Result<Vec<KeyValue>> {
if self.is_leader() {
let request = RangeRequest {
key,
range_end,
..Default::default()
};
return self
.in_memory
.range(request)
.await
.map(|resp| resp.kvs)
.context(error::KvBackendSnafu);
}
let max_retry_count = self.max_retry_count;
let retry_interval_ms = self.retry_interval_ms;
for _ in 0..max_retry_count {
match self
.remote_range(key.clone(), range_end.clone(), keys_only)
.await
{
Ok(kvs) => return Ok(kvs),
Err(e) => {
if need_retry(&e) {
warn!("Encountered an error that need to retry, err: {:?}", e);
tokio::time::sleep(Duration::from_millis(retry_interval_ms)).await;
} else {
return Err(e);
}
}
}
}
error::ExceededRetryLimitSnafu {
func_name: "range",
retry_num: max_retry_count,
}
.fail()
to_stat_kv_map(res.kvs)
}
async fn remote_range(
@@ -144,7 +237,7 @@ impl MetaPeerClient {
key: Vec<u8>,
range_end: Vec<u8>,
keys_only: bool,
) -> Result<Vec<KeyValue>> {
) -> Result<RangeResponse> {
// Safety: when self.is_leader() == false, election must not empty.
let election = self.election.as_ref().unwrap();
@@ -170,47 +263,13 @@ impl MetaPeerClient {
check_resp_header(&response.header, Context { addr: &leader_addr })?;
Ok(response.kvs.into_iter().map(KeyValue::new).collect())
Ok(RangeResponse {
kvs: response.kvs.into_iter().map(KeyValue::new).collect(),
more: response.more,
})
}
// Get kv information from the leader's in_mem kv store
pub async fn batch_get(&self, keys: Vec<Vec<u8>>) -> Result<Vec<KeyValue>> {
if self.is_leader() {
let request = BatchGetRequest { keys };
return self
.in_memory
.batch_get(request)
.await
.map(|resp| resp.kvs)
.context(error::KvBackendSnafu);
}
let max_retry_count = self.max_retry_count;
let retry_interval_ms = self.retry_interval_ms;
for _ in 0..max_retry_count {
match self.remote_batch_get(keys.clone()).await {
Ok(kvs) => return Ok(kvs),
Err(e) => {
if need_retry(&e) {
warn!("Encountered an error that need to retry, err: {:?}", e);
tokio::time::sleep(Duration::from_millis(retry_interval_ms)).await;
} else {
return Err(e);
}
}
}
}
error::ExceededRetryLimitSnafu {
func_name: "batch_get",
retry_num: max_retry_count,
}
.fail()
}
async fn remote_batch_get(&self, keys: Vec<Vec<u8>>) -> Result<Vec<KeyValue>> {
async fn remote_batch_get(&self, keys: Vec<Vec<u8>>) -> Result<BatchGetResponse> {
// Safety: when self.is_leader() == false, election must not empty.
let election = self.election.as_ref().unwrap();
@@ -234,7 +293,9 @@ impl MetaPeerClient {
check_resp_header(&response.header, Context { addr: &leader_addr })?;
Ok(response.kvs.into_iter().map(KeyValue::new).collect())
Ok(BatchGetResponse {
kvs: response.kvs.into_iter().map(KeyValue::new).collect(),
})
}
// Check if the meta node is a leader node.

View File

@@ -210,6 +210,12 @@ pub enum Error {
location: Location,
source: servers::error::Error,
},
#[snafu(display("Failed to parse duration {}", duration))]
ParseDuration {
duration: String,
#[snafu(source)]
error: humantime::DurationError,
},
#[snafu(display("Failed to parse address {}", addr))]
ParseAddr {
addr: String,
@@ -534,6 +540,13 @@ pub enum Error {
#[snafu(display("Expected to retry later, reason: {}", reason))]
RetryLater { reason: String, location: Location },
#[snafu(display("Expected to retry later, reason: {}", reason))]
RetryLaterWithSource {
reason: String,
location: Location,
source: BoxedError,
},
#[snafu(display("Failed to update table metadata, err_msg: {}", err_msg))]
UpdateTableMetadata { err_msg: String, location: Location },
@@ -622,6 +635,7 @@ impl Error {
/// Returns `true` if the error is retryable.
pub fn is_retryable(&self) -> bool {
matches!(self, Error::RetryLater { .. })
|| matches!(self, Error::RetryLaterWithSource { .. })
}
}
@@ -652,7 +666,6 @@ impl ErrorExt for Error {
| Error::LockNotConfig { .. }
| Error::ExceededRetryLimit { .. }
| Error::SendShutdownSignal { .. }
| Error::ParseAddr { .. }
| Error::SchemaAlreadyExists { .. }
| Error::PusherNotFound { .. }
| Error::PushMessage { .. }
@@ -660,6 +673,7 @@ impl ErrorExt for Error {
| Error::MailboxTimeout { .. }
| Error::MailboxReceiver { .. }
| Error::RetryLater { .. }
| Error::RetryLaterWithSource { .. }
| Error::StartGrpc { .. }
| Error::UpdateTableMetadata { .. }
| Error::NoEnoughAvailableDatanode { .. }
@@ -678,6 +692,8 @@ impl ErrorExt for Error {
| Error::InvalidStatKey { .. }
| Error::InvalidInactiveRegionKey { .. }
| Error::ParseNum { .. }
| Error::ParseAddr { .. }
| Error::ParseDuration { .. }
| Error::UnsupportedSelectorType { .. }
| Error::InvalidArguments { .. }
| Error::InitExportMetricsTask { .. }

View File

@@ -14,6 +14,7 @@
use std::collections::HashMap;
use common_meta::kv_backend::KvBackend;
use common_meta::peer::Peer;
use common_meta::{util, ClusterId};
use common_time::util as time_util;
@@ -39,7 +40,8 @@ pub async fn lookup_alive_datanode_peer(
cluster_id,
node_id: datanode_id,
};
let Some(kv) = meta_peer_client.get(lease_key.clone().try_into()?).await? else {
let lease_key_bytes: Vec<u8> = lease_key.clone().try_into()?;
let Some(kv) = meta_peer_client.get(&lease_key_bytes).await? else {
return Ok(None);
};
let lease_value: LeaseValue = kv.value.try_into()?;
@@ -74,7 +76,13 @@ where
let key = get_lease_prefix(cluster_id);
let range_end = util::get_prefix_end_key(&key);
let kvs = meta_peer_client.range(key, range_end, false).await?;
let range_req = common_meta::rpc::store::RangeRequest {
key,
range_end,
keys_only: false,
..Default::default()
};
let kvs = meta_peer_client.range(range_req).await?.kvs;
let mut lease_kvs = HashMap::new();
for kv in kvs {
let lease_key: LeaseKey = kv.key.try_into()?;

View File

@@ -79,6 +79,12 @@ pub struct MetaSrvOptions {
pub store_key_prefix: String,
}
impl MetaSrvOptions {
pub fn env_list_keys() -> Option<&'static [&'static str]> {
Some(&["wal.broker_endpoints"])
}
}
impl Default for MetaSrvOptions {
fn default() -> Self {
Self {

View File

@@ -28,6 +28,7 @@ use async_trait::async_trait;
use common_meta::key::datanode_table::DatanodeTableKey;
use common_meta::key::TableMetadataManagerRef;
use common_meta::kv_backend::ResettableKvBackendRef;
use common_meta::lock_key::{RegionLock, TableLock};
use common_meta::{ClusterId, RegionIdent};
use common_procedure::error::{
Error as ProcedureError, FromJsonSnafu, Result as ProcedureResult, ToJsonSnafu,
@@ -40,13 +41,12 @@ use common_telemetry::{error, info, warn};
use failover_start::RegionFailoverStart;
use serde::{Deserialize, Serialize};
use snafu::ResultExt;
use store_api::storage::RegionNumber;
use store_api::storage::{RegionId, RegionNumber};
use table::metadata::TableId;
use crate::error::{Error, RegisterProcedureLoaderSnafu, Result, TableMetadataManagerSnafu};
use crate::error::{RegisterProcedureLoaderSnafu, Result, TableMetadataManagerSnafu};
use crate::lock::DistLockRef;
use crate::metasrv::{SelectorContext, SelectorRef};
use crate::procedure::utils::region_lock_key;
use crate::service::mailbox::MailboxRef;
const OPEN_REGION_MESSAGE_TIMEOUT: Duration = Duration::from_secs(30);
@@ -357,7 +357,7 @@ impl Procedure for RegionFailoverProcedure {
.next(&self.context, &self.node.failed_region)
.await
.map_err(|e| {
if matches!(e, Error::RetryLater { .. }) {
if e.is_retryable() {
ProcedureError::retry_later(e)
} else {
ProcedureError::external(e)
@@ -372,8 +372,17 @@ impl Procedure for RegionFailoverProcedure {
fn lock_key(&self) -> LockKey {
let region_ident = &self.node.failed_region;
let region_key = region_lock_key(region_ident.table_id, region_ident.region_number);
LockKey::single_exclusive(region_key)
// TODO(weny): acquires the catalog, schema read locks.
let lock_key = vec![
TableLock::Read(region_ident.table_id).into(),
RegionLock::Write(RegionId::new(
region_ident.table_id,
region_ident.region_number,
))
.into(),
];
LockKey::new(lock_key)
}
}
@@ -771,7 +780,8 @@ mod tests {
let result = procedure.execute(&ctx).await;
assert!(result.is_err());
assert!(result.unwrap_err().is_retry_later());
let err = result.unwrap_err();
assert!(err.is_retry_later(), "err: {:?}", err);
assert_eq!(
r#"{"region_failover_state":"RegionFailoverStart","failover_candidate":null}"#,
serde_json::to_string(&procedure.node.state).unwrap()

View File

@@ -13,17 +13,17 @@
// limitations under the License.
use async_trait::async_trait;
use common_error::ext::ErrorExt;
use common_error::ext::{BoxedError, ErrorExt};
use common_error::status_code::StatusCode;
use common_meta::peer::Peer;
use common_meta::RegionIdent;
use common_telemetry::info;
use serde::{Deserialize, Serialize};
use snafu::ensure;
use snafu::{ensure, location, Location};
use super::deactivate_region::DeactivateRegion;
use super::{RegionFailoverContext, State};
use crate::error::{RegionFailoverCandidatesNotFoundSnafu, Result, RetryLaterSnafu};
use crate::error::{self, RegionFailoverCandidatesNotFoundSnafu, Result};
use crate::selector::SelectorOptions;
#[derive(Serialize, Deserialize, Debug)]
@@ -93,10 +93,11 @@ impl State for RegionFailoverStart {
.await
.map_err(|e| {
if e.status_code() == StatusCode::RuntimeResourcesExhausted {
RetryLaterSnafu {
reason: format!("{e}"),
error::Error::RetryLaterWithSource {
reason: format!("Region failover aborted for {failed_region:?}"),
location: location!(),
source: BoxedError::new(e),
}
.build()
} else {
e
}

View File

@@ -15,6 +15,7 @@
use std::collections::HashMap;
use async_trait::async_trait;
use common_error::ext::BoxedError;
use common_meta::key::datanode_table::RegionInfo;
use common_meta::key::table_route::TableRouteKey;
use common_meta::peer::Peer;
@@ -27,7 +28,7 @@ use store_api::storage::RegionNumber;
use super::invalidate_cache::InvalidateCache;
use super::{RegionFailoverContext, State};
use crate::error::{self, Result, RetryLaterSnafu, TableRouteNotFoundSnafu};
use crate::error::{self, Result, TableRouteNotFoundSnafu};
use crate::lock::keys::table_metadata_lock_key;
use crate::lock::Opts;
@@ -172,14 +173,12 @@ impl State for UpdateRegionMetadata {
) -> Result<Box<dyn State>> {
self.update_metadata(ctx, failed_region)
.await
.map_err(|e| {
RetryLaterSnafu {
reason: format!(
"Failed to update metadata for failed region: {}, error: {}",
failed_region, e
),
}
.build()
.map_err(BoxedError::new)
.context(error::RetryLaterWithSourceSnafu {
reason: format!(
"Failed to update metadata for failed region: {}",
failed_region
),
})?;
Ok(Box::new(InvalidateCache))
}

View File

@@ -30,27 +30,28 @@ use std::fmt::Debug;
use std::time::Duration;
use api::v1::meta::MailboxMessage;
use common_error::ext::BoxedError;
use common_meta::instruction::Instruction;
use common_meta::key::datanode_table::{DatanodeTableKey, DatanodeTableValue};
use common_meta::key::table_info::TableInfoValue;
use common_meta::key::table_route::TableRouteValue;
use common_meta::key::{DeserializedValueWithBytes, TableMetadataManagerRef};
use common_meta::lock_key::{RegionLock, TableLock};
use common_meta::peer::Peer;
use common_meta::region_keeper::{MemoryRegionKeeperRef, OperatingRegionGuard};
use common_meta::ClusterId;
use common_procedure::error::{
Error as ProcedureError, FromJsonSnafu, Result as ProcedureResult, ToJsonSnafu,
};
use common_procedure::{Context as ProcedureContext, LockKey, Procedure, Status};
use common_procedure::{Context as ProcedureContext, LockKey, Procedure, Status, StringKey};
pub use manager::RegionMigrationProcedureTask;
use serde::{Deserialize, Serialize};
use snafu::{location, Location, OptionExt, ResultExt};
use snafu::{OptionExt, ResultExt};
use store_api::storage::RegionId;
use tokio::time::Instant;
use self::migration_start::RegionMigrationStart;
use crate::error::{self, Error, Result};
use crate::procedure::utils::region_lock_key;
use crate::error::{self, Result};
use crate::service::mailbox::{BroadcastChannel, MailboxRef};
/// It's shared in each step and available even after recovering.
@@ -68,11 +69,25 @@ pub struct PersistentContext {
to_peer: Peer,
/// The [RegionId] of migration region.
region_id: RegionId,
/// The timeout of waiting for a candidate to replay the WAL.
#[serde(with = "humantime_serde", default = "default_replay_timeout")]
replay_timeout: Duration,
}
fn default_replay_timeout() -> Duration {
Duration::from_secs(1)
}
impl PersistentContext {
pub fn lock_key(&self) -> String {
region_lock_key(self.region_id.table_id(), self.region_id.region_number())
pub fn lock_key(&self) -> Vec<StringKey> {
let region_id = self.region_id;
// TODO(weny): acquires the catalog, schema read locks.
let lock_key = vec![
TableLock::Read(region_id.table_id()).into(),
RegionLock::Write(region_id).into(),
];
lock_key
}
}
@@ -206,9 +221,9 @@ impl Context {
.get(table_id)
.await
.context(error::TableMetadataManagerSnafu)
.map_err(|e| error::Error::RetryLater {
reason: e.to_string(),
location: location!(),
.map_err(BoxedError::new)
.context(error::RetryLaterWithSourceSnafu {
reason: format!("Failed to get TableRoute: {table_id}"),
})?
.context(error::TableRouteNotFoundSnafu { table_id })?;
@@ -242,9 +257,9 @@ impl Context {
.get(table_id)
.await
.context(error::TableMetadataManagerSnafu)
.map_err(|e| error::Error::RetryLater {
reason: e.to_string(),
location: location!(),
.map_err(BoxedError::new)
.context(error::RetryLaterWithSourceSnafu {
reason: format!("Failed to get TableInfo: {table_id}"),
})?
.context(error::TableInfoNotFoundSnafu { table_id })?;
@@ -275,9 +290,9 @@ impl Context {
})
.await
.context(error::TableMetadataManagerSnafu)
.map_err(|e| error::Error::RetryLater {
reason: e.to_string(),
location: location!(),
.map_err(BoxedError::new)
.context(error::RetryLaterWithSourceSnafu {
reason: format!("Failed to get DatanodeTable: ({datanode_id},{table_id})"),
})?
.context(error::DatanodeTableNotFoundSnafu {
table_id,
@@ -398,7 +413,7 @@ impl Procedure for RegionMigrationProcedure {
let state = &mut self.state;
let (next, status) = state.next(&mut self.context).await.map_err(|e| {
if matches!(e, Error::RetryLater { .. }) {
if e.is_retryable() {
ProcedureError::retry_later(e)
} else {
ProcedureError::external(e)
@@ -418,8 +433,7 @@ impl Procedure for RegionMigrationProcedure {
}
fn lock_key(&self) -> LockKey {
let key = self.context.persistent_ctx.lock_key();
LockKey::single_exclusive(key)
LockKey::new(self.context.persistent_ctx.lock_key())
}
}
@@ -447,7 +461,7 @@ mod tests {
#[test]
fn test_lock_key() {
let persistent_context = new_persistent_context();
let expected_key = persistent_context.lock_key();
let expected_keys = persistent_context.lock_key();
let env = TestingEnv::new();
let context = env.context_factory();
@@ -455,13 +469,11 @@ mod tests {
let procedure = RegionMigrationProcedure::new(persistent_context, context);
let key = procedure.lock_key();
let keys = key
.keys_to_lock()
.cloned()
.map(|s| s.into_string())
.collect::<Vec<_>>();
let keys = key.keys_to_lock().cloned().collect::<Vec<_>>();
assert!(keys.contains(&expected_key));
for key in expected_keys {
assert!(keys.contains(&key));
}
}
#[test]
@@ -475,7 +487,7 @@ mod tests {
let serialized = procedure.dump().unwrap();
let expected = r#"{"persistent_ctx":{"cluster_id":0,"from_peer":{"id":1,"addr":""},"to_peer":{"id":2,"addr":""},"region_id":4398046511105},"state":{"region_migration_state":"RegionMigrationStart"}}"#;
let expected = r#"{"persistent_ctx":{"cluster_id":0,"from_peer":{"id":1,"addr":""},"to_peer":{"id":2,"addr":""},"region_id":4398046511105,"replay_timeout":"1s"},"state":{"region_migration_state":"RegionMigrationStart"}}"#;
assert_eq!(expected, serialized);
}

View File

@@ -55,6 +55,7 @@ impl Default for DowngradeLeaderRegion {
#[typetag::serde]
impl State for DowngradeLeaderRegion {
async fn next(&mut self, ctx: &mut Context) -> Result<(Box<dyn State>, Status)> {
let replay_timeout = ctx.persistent_ctx.replay_timeout;
// Ensures the `leader_region_lease_deadline` must exist after recovering.
ctx.volatile_ctx
.set_leader_region_lease_deadline(Duration::from_secs(REGION_LEASE_SECS));
@@ -69,7 +70,10 @@ impl State for DowngradeLeaderRegion {
}
Ok((
Box::<UpgradeCandidateRegion>::default(),
Box::new(UpgradeCandidateRegion {
replay_timeout,
..Default::default()
}),
Status::executing(false),
))
}
@@ -226,6 +230,7 @@ mod tests {
to_peer: Peer::empty(2),
region_id: RegionId::new(1024, 1),
cluster_id: 0,
replay_timeout: Duration::from_millis(1000),
}
}
@@ -369,7 +374,7 @@ mod tests {
assert_matches!(err, Error::RetryLater { .. });
assert!(err.is_retryable());
assert!(err.to_string().contains("test mocked"));
assert!(format!("{err:?}").contains("test mocked"), "err: {err:?}",);
}
#[tokio::test]

View File

@@ -16,6 +16,7 @@ use std::collections::hash_map::Entry;
use std::collections::HashMap;
use std::fmt::Display;
use std::sync::{Arc, RwLock};
use std::time::Duration;
use common_meta::key::table_route::TableRouteValue;
use common_meta::peer::Peer;
@@ -61,15 +62,23 @@ pub struct RegionMigrationProcedureTask {
pub(crate) region_id: RegionId,
pub(crate) from_peer: Peer,
pub(crate) to_peer: Peer,
pub(crate) replay_timeout: Duration,
}
impl RegionMigrationProcedureTask {
pub fn new(cluster_id: ClusterId, region_id: RegionId, from_peer: Peer, to_peer: Peer) -> Self {
pub fn new(
cluster_id: ClusterId,
region_id: RegionId,
from_peer: Peer,
to_peer: Peer,
replay_timeout: Duration,
) -> Self {
Self {
cluster_id,
region_id,
from_peer,
to_peer,
replay_timeout,
}
}
}
@@ -91,6 +100,7 @@ impl From<RegionMigrationProcedureTask> for PersistentContext {
region_id,
from_peer,
to_peer,
replay_timeout,
}: RegionMigrationProcedureTask,
) -> Self {
PersistentContext {
@@ -98,6 +108,7 @@ impl From<RegionMigrationProcedureTask> for PersistentContext {
from_peer,
to_peer,
region_id,
replay_timeout,
}
}
}
@@ -319,6 +330,7 @@ mod test {
region_id,
from_peer: Peer::empty(2),
to_peer: Peer::empty(1),
replay_timeout: Duration::from_millis(1000),
};
// Inserts one
manager
@@ -342,6 +354,7 @@ mod test {
region_id,
from_peer: Peer::empty(1),
to_peer: Peer::empty(1),
replay_timeout: Duration::from_millis(1000),
};
let err = manager.submit_procedure(task).await.unwrap_err();
@@ -359,6 +372,7 @@ mod test {
region_id,
from_peer: Peer::empty(1),
to_peer: Peer::empty(2),
replay_timeout: Duration::from_millis(1000),
};
let err = manager.submit_procedure(task).await.unwrap_err();
@@ -376,6 +390,7 @@ mod test {
region_id,
from_peer: Peer::empty(1),
to_peer: Peer::empty(2),
replay_timeout: Duration::from_millis(1000),
};
let table_info = new_test_table_info(1024, vec![1]).into();
@@ -403,6 +418,7 @@ mod test {
region_id,
from_peer: Peer::empty(1),
to_peer: Peer::empty(2),
replay_timeout: Duration::from_millis(1000),
};
let table_info = new_test_table_info(1024, vec![1]).into();
@@ -434,6 +450,7 @@ mod test {
region_id,
from_peer: Peer::empty(1),
to_peer: Peer::empty(2),
replay_timeout: Duration::from_millis(1000),
};
let table_info = new_test_table_info(1024, vec![1]).into();
@@ -460,6 +477,7 @@ mod test {
region_id,
from_peer: Peer::empty(1),
to_peer: Peer::empty(2),
replay_timeout: Duration::from_millis(1000),
};
let err = manager

View File

@@ -383,7 +383,7 @@ mod tests {
assert_matches!(err, Error::RetryLater { .. });
assert!(err.is_retryable());
assert!(err.to_string().contains("test mocked"));
assert!(format!("{err:?}").contains("test mocked"));
}
#[tokio::test]

View File

@@ -16,6 +16,7 @@ use std::assert_matches::assert_matches;
use std::collections::HashMap;
use std::sync::atomic::{AtomicUsize, Ordering};
use std::sync::Arc;
use std::time::Duration;
use api::v1::meta::mailbox_message::Payload;
use api::v1::meta::{HeartbeatResponse, MailboxMessage, RequestHeader};
@@ -281,6 +282,7 @@ pub fn new_persistent_context(from: u64, to: u64, region_id: RegionId) -> Persis
to_peer: Peer::empty(to),
region_id,
cluster_id: 0,
replay_timeout: Duration::from_millis(1000),
}
}

View File

@@ -12,6 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
use common_error::ext::BoxedError;
use common_meta::rpc::router::RegionStatus;
use snafu::ResultExt;
@@ -60,13 +61,15 @@ impl UpdateMetadata {
.await
.context(error::TableMetadataManagerSnafu)
{
debug_assert!(ctx.remove_table_route_value());
return error::RetryLaterSnafu {
reason: format!("Failed to update the table route during the downgrading leader region, error: {err}")
}.fail();
ctx.remove_table_route_value();
return Err(BoxedError::new(err)).context(error::RetryLaterWithSourceSnafu {
reason: format!(
"Failed to update the table route during the downgrading leader region, region_id: {region_id}, from_peer_id: {from_peer_id}"
),
});
}
debug_assert!(ctx.remove_table_route_value());
ctx.remove_table_route_value();
Ok(())
}
@@ -163,13 +166,9 @@ mod tests {
ctx.volatile_ctx.table_route = Some(original_table_route);
let err = state.downgrade_leader_region(&mut ctx).await.unwrap_err();
assert!(ctx.volatile_ctx.table_route.is_none());
assert_matches!(err, Error::RetryLater { .. });
assert!(err.is_retryable());
assert!(err.to_string().contains("Failed to update the table route"));
assert!(format!("{err:?}").contains("Failed to update the table route"));
}
#[tokio::test]

View File

@@ -12,6 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
use common_error::ext::BoxedError;
use snafu::ResultExt;
use crate::error::{self, Result};
@@ -44,13 +45,13 @@ impl UpdateMetadata {
.await
.context(error::TableMetadataManagerSnafu)
{
debug_assert!(ctx.remove_table_route_value());
return error::RetryLaterSnafu {
reason: format!("Failed to update the table route during the rollback downgraded leader region, error: {err}")
}.fail();
ctx.remove_table_route_value();
return Err(BoxedError::new(err)).context(error::RetryLaterWithSourceSnafu {
reason: format!("Failed to update the table route during the rollback downgraded leader region: {region_id}"),
});
}
debug_assert!(ctx.remove_table_route_value());
ctx.remove_table_route_value();
Ok(())
}
@@ -157,9 +158,8 @@ mod tests {
.await
.unwrap_err();
assert!(ctx.volatile_ctx.table_route.is_none());
assert_matches!(err, Error::RetryLater { .. });
assert!(err.is_retryable());
assert!(err.to_string().contains("Failed to update the table route"));
assert!(format!("{err:?}").contains("Failed to update the table route"));
state.rollback_downgraded_region(&mut ctx).await.unwrap();

View File

@@ -12,6 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
use common_error::ext::BoxedError;
use common_meta::key::datanode_table::RegionInfo;
use common_meta::rpc::router::{region_distribution, RegionRoute};
use common_telemetry::{info, warn};
@@ -167,13 +168,13 @@ impl UpdateMetadata {
.await
.context(error::TableMetadataManagerSnafu)
{
debug_assert!(ctx.remove_table_route_value());
return error::RetryLaterSnafu {
reason: format!("Failed to update the table route during the upgrading candidate region, error: {err}")
}.fail();
ctx.remove_table_route_value();
return Err(BoxedError::new(err)).context(error::RetryLaterWithSourceSnafu {
reason: format!("Failed to update the table route during the upgrading candidate region: {region_id}"),
});
};
debug_assert!(ctx.remove_table_route_value());
ctx.remove_table_route_value();
// Consumes the guard.
ctx.volatile_ctx.opening_region_guard.take();
@@ -354,15 +355,12 @@ mod tests {
.register(2, RegionId::new(table_id, 1))
.unwrap();
ctx.volatile_ctx.opening_region_guard = Some(guard);
let err = state.upgrade_candidate_region(&mut ctx).await.unwrap_err();
assert!(ctx.volatile_ctx.table_route.is_none());
assert!(ctx.volatile_ctx.opening_region_guard.is_some());
assert_matches!(err, Error::RetryLater { .. });
assert!(err.is_retryable());
assert!(err.to_string().contains("Failed to update the table route"));
assert!(format!("{err:?}").contains("Failed to update the table route"));
}
#[tokio::test]

View File

@@ -33,14 +33,14 @@ use crate::service::mailbox::Channel;
#[derive(Debug, Serialize, Deserialize)]
pub struct UpgradeCandidateRegion {
// The optimistic retry times.
optimistic_retry: usize,
pub(crate) optimistic_retry: usize,
// The retry initial interval.
retry_initial_interval: Duration,
pub(crate) retry_initial_interval: Duration,
// The replay timeout of a instruction.
replay_timeout: Duration,
pub(crate) replay_timeout: Duration,
// If it's true it requires the candidate region MUST replay the WAL to the latest entry id.
// Otherwise, it will rollback to the old leader region.
require_ready: bool,
pub(crate) require_ready: bool,
}
impl Default for UpgradeCandidateRegion {
@@ -236,6 +236,7 @@ mod tests {
to_peer: Peer::empty(2),
region_id: RegionId::new(1024, 1),
cluster_id: 0,
replay_timeout: Duration::from_millis(1000),
}
}
@@ -335,7 +336,7 @@ mod tests {
assert_matches!(err, Error::RetryLater { .. });
assert!(err.is_retryable());
assert!(err.to_string().contains("test mocked"));
assert!(format!("{err:?}").contains("test mocked"));
}
#[tokio::test]
@@ -397,7 +398,7 @@ mod tests {
assert_matches!(err, Error::RetryLater { .. });
assert!(err.is_retryable());
assert!(err.to_string().contains("still replaying the wal"));
assert!(format!("{err:?}").contains("still replaying the wal"));
// Sets the `require_ready` to false.
state.require_ready = false;

View File

@@ -12,13 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
use store_api::storage::{RegionNumber, TableId};
pub fn region_lock_key(table_id: TableId, region_number: RegionNumber) -> String {
format!("{}/region-{}", table_id, region_number)
}
#[cfg(feature = "mock")]
#[cfg(any(test, feature = "mock"))]
pub mod mock {
use std::io::Error;
use std::sync::Arc;

View File

@@ -15,9 +15,11 @@
use std::collections::HashMap;
use std::num::ParseIntError;
use std::str::FromStr;
use std::time::Duration;
use common_meta::peer::Peer;
use common_meta::{distributed_time_constants, ClusterId};
use humantime::parse_duration;
use serde::Serialize;
use snafu::{ensure, OptionExt, ResultExt};
use store_api::storage::RegionId;
@@ -43,6 +45,7 @@ struct SubmitRegionMigrationTaskRequest {
region_id: RegionId,
from_peer_id: u64,
to_peer_id: u64,
replay_timeout: Duration,
}
#[derive(Debug, Serialize)]
@@ -71,6 +74,8 @@ where
Ok(parse_result)
}
const DEFAULT_REPLAY_TIMEOUT: Duration = Duration::from_millis(1000);
impl TryFrom<&HashMap<String, String>> for SubmitRegionMigrationTaskRequest {
type Error = Error;
@@ -89,11 +94,18 @@ impl TryFrom<&HashMap<String, String>> for SubmitRegionMigrationTaskRequest {
error::MissingRequiredParameterSnafu { param: key }.fail()
})?;
let replay_timeout = if let Some(duration) = params.get("replay_timeout") {
parse_duration(duration).context(error::ParseDurationSnafu { duration })?
} else {
DEFAULT_REPLAY_TIMEOUT
};
Ok(SubmitRegionMigrationTaskRequest {
cluster_id,
region_id: RegionId::from_u64(region_id),
from_peer_id,
to_peer_id,
replay_timeout,
})
}
}
@@ -131,6 +143,7 @@ impl SubmitRegionMigrationTaskHandler {
region_id,
from_peer_id,
to_peer_id,
replay_timeout,
} = task;
let from_peer = self.lookup_peer(cluster_id, from_peer_id).await?.context(
@@ -150,6 +163,7 @@ impl SubmitRegionMigrationTaskHandler {
region_id,
from_peer,
to_peer,
replay_timeout,
})
.await?;
@@ -187,6 +201,7 @@ mod tests {
use std::collections::HashMap;
use crate::error;
use crate::service::admin::region_migration::DEFAULT_REPLAY_TIMEOUT;
#[test]
fn test_parse_migration_task_req() {
@@ -212,6 +227,7 @@ mod tests {
region_id: RegionId::new(1024, 1),
from_peer_id: 1,
to_peer_id: 2,
replay_timeout: DEFAULT_REPLAY_TIMEOUT
},
task_req
);
@@ -233,6 +249,7 @@ mod tests {
region_id: RegionId::new(1024, 1),
from_peer_id: 1,
to_peer_id: 2,
replay_timeout: DEFAULT_REPLAY_TIMEOUT
},
task_req
);

View File

@@ -15,6 +15,7 @@
mod alter;
mod close;
mod create;
mod drop;
mod open;
mod put;
mod read;
@@ -83,13 +84,15 @@ use crate::utils;
/// | Operations | Logical Region | Physical Region |
/// | ---------- | -------------- | --------------- |
/// | Create | ✅ | ✅ |
/// | Drop | ✅ | |
/// | Drop | ✅ | ❓* |
/// | Write | ✅ | ❌ |
/// | Read | ✅ | ✅ |
/// | Close | ✅ | ✅ |
/// | Open | ✅ | ✅ |
/// | Alter | ✅ | ❌ |
///
/// *: Physical region can be dropped only when all related logical regions are dropped.
///
/// ## Internal Columns
///
/// The physical data region contains two internal columns. Should
@@ -123,7 +126,7 @@ impl RegionEngine for MetricEngine {
RegionRequest::Put(put) => self.inner.put_region(region_id, put).await,
RegionRequest::Delete(_) => todo!(),
RegionRequest::Create(create) => self.inner.create_region(region_id, create).await,
RegionRequest::Drop(_) => todo!(),
RegionRequest::Drop(drop) => self.inner.drop_region(region_id, drop).await,
RegionRequest::Open(open) => self.inner.open_region(region_id, open).await,
RegionRequest::Close(close) => self.inner.close_region(region_id, close).await,
RegionRequest::Alter(alter) => self.inner.alter_region(region_id, alter).await,

View File

@@ -73,8 +73,7 @@ impl MetricEngineInner {
let (data_region_id, metadata_region_id) = Self::transform_region_id(region_id);
// create metadata region
let create_metadata_region_request =
self.create_request_for_metadata_region(&request.region_dir);
let create_metadata_region_request = self.create_request_for_metadata_region(&request);
self.mito
.handle_request(
metadata_region_id,
@@ -287,7 +286,10 @@ impl MetricEngineInner {
/// Build [RegionCreateRequest] for metadata region
///
/// This method will append [METADATA_REGION_SUBDIR] to the given `region_dir`.
pub fn create_request_for_metadata_region(&self, region_dir: &str) -> RegionCreateRequest {
pub fn create_request_for_metadata_region(
&self,
request: &RegionCreateRequest,
) -> RegionCreateRequest {
// ts TIME INDEX DEFAULT 0
let timestamp_column_metadata = ColumnMetadata {
column_id: METADATA_SCHEMA_TIMESTAMP_COLUMN_INDEX as _,
@@ -324,7 +326,7 @@ impl MetricEngineInner {
};
// concat region dir
let metadata_region_dir = join_dir(region_dir, METADATA_REGION_SUBDIR);
let metadata_region_dir = join_dir(&request.region_dir, METADATA_REGION_SUBDIR);
RegionCreateRequest {
engine: MITO_ENGINE_NAME.to_string(),
@@ -334,7 +336,7 @@ impl MetricEngineInner {
value_column_metadata,
],
primary_key: vec![METADATA_SCHEMA_KEY_COLUMN_INDEX as _],
options: HashMap::new(),
options: request.options.clone(),
region_dir: metadata_region_dir,
}
}
@@ -532,7 +534,7 @@ mod test {
],
primary_key: vec![0],
options: HashMap::new(),
region_dir: "test_dir".to_string(),
region_dir: "/test_dir".to_string(),
};
let env = TestEnv::new().await;

View File

@@ -0,0 +1,138 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//! Drop a metric region
use mito2::engine::MITO_ENGINE_NAME;
use object_store::util::join_dir;
use snafu::{OptionExt, ResultExt};
use store_api::metric_engine_consts::{
DATA_REGION_SUBDIR, METADATA_REGION_SUBDIR, PHYSICAL_TABLE_METADATA_KEY,
};
use store_api::region_engine::RegionEngine;
use store_api::region_request::{
AffectedRows, RegionDropRequest, RegionOpenRequest, RegionRequest,
};
use store_api::storage::RegionId;
use super::MetricEngineInner;
use crate::error::{
CloseMitoRegionSnafu, Error, LogicalRegionNotFoundSnafu, OpenMitoRegionSnafu,
PhysicalRegionBusySnafu, PhysicalRegionNotFoundSnafu, Result,
};
use crate::metrics::PHYSICAL_REGION_COUNT;
use crate::{metadata_region, utils};
impl MetricEngineInner {
pub async fn drop_region(
&self,
region_id: RegionId,
_req: RegionDropRequest,
) -> Result<AffectedRows> {
let data_region_id = utils::to_data_region_id(region_id);
// enclose the guard in a block to prevent the guard from polluting the async context
let (is_physical_region, is_physical_region_busy) = {
if let Some(logical_regions) = self
.state
.read()
.unwrap()
.physical_regions()
.get(&data_region_id)
{
(true, !logical_regions.is_empty())
} else {
// the second argument is not used, just pass in a dummy value
(false, true)
}
};
if is_physical_region {
// check if there is no logical region relates to this physical region
if is_physical_region_busy {
// reject if there is any present logical region
return Err(PhysicalRegionBusySnafu {
region_id: data_region_id,
}
.build());
}
self.drop_physical_region(data_region_id).await
} else {
// cannot merge these two `if` otherwise the stupid type checker will complain
let metadata_region_id = self
.state
.read()
.unwrap()
.logical_regions()
.get(&region_id)
.copied();
if let Some(metadata_region_id) = metadata_region_id {
self.drop_logical_region(region_id, metadata_region_id)
.await
} else {
Err(LogicalRegionNotFoundSnafu { region_id }.build())
}
}
}
async fn drop_physical_region(&self, region_id: RegionId) -> Result<AffectedRows> {
let data_region_id = utils::to_data_region_id(region_id);
let metadata_region_id = utils::to_metadata_region_id(region_id);
// Drop mito regions.
// Since the physical regions are going to be dropped, we don't need to
// update the contents in metadata region.
self.mito
.handle_request(data_region_id, RegionRequest::Drop(RegionDropRequest {}))
.await
.with_context(|_| CloseMitoRegionSnafu { region_id })?;
self.mito
.handle_request(
metadata_region_id,
RegionRequest::Drop(RegionDropRequest {}),
)
.await
.with_context(|_| CloseMitoRegionSnafu { region_id })?;
PHYSICAL_REGION_COUNT.dec();
// Update engine state
self.state
.write()
.unwrap()
.remove_physical_region(data_region_id)?;
Ok(0)
}
async fn drop_logical_region(
&self,
logical_region_id: RegionId,
physical_region_id: RegionId,
) -> Result<AffectedRows> {
// Update metadata
self.metadata_region
.remove_logical_region(physical_region_id, logical_region_id)
.await?;
// Update engine state
self.state
.write()
.unwrap()
.remove_logical_region(logical_region_id)?;
Ok(0)
}
}

View File

@@ -115,4 +115,20 @@ impl MetricEngineState {
self.physical_columns.remove(&physical_region_id);
Ok(())
}
/// Remove all data that are related to the logical region id.
pub fn remove_logical_region(&mut self, logical_region_id: RegionId) -> Result<()> {
let physical_region_id = self.logical_regions.remove(&logical_region_id).context(
PhysicalRegionNotFoundSnafu {
region_id: logical_region_id,
},
)?;
self.physical_regions
.get_mut(&physical_region_id)
.unwrap() // Safety: physical_region_id is got from physical_regions
.remove(&logical_region_id);
Ok(())
}
}

View File

@@ -146,6 +146,15 @@ pub enum Error {
source: store_api::metadata::MetadataError,
location: Location,
},
#[snafu(display(
"Physical region {} is busy, there are still some logical regions using it",
region_id
))]
PhysicalRegionBusy {
region_id: RegionId,
location: Location,
},
}
pub type Result<T, E = Error> = std::result::Result<T, E>;
@@ -158,7 +167,8 @@ impl ErrorExt for Error {
InternalColumnOccupied { .. }
| MissingRegionOption { .. }
| ConflictRegionOption { .. }
| ColumnTypeMismatch { .. } => StatusCode::InvalidArguments,
| ColumnTypeMismatch { .. }
| PhysicalRegionBusy { .. } => StatusCode::InvalidArguments,
ForbiddenPhysicalAlter { .. } => StatusCode::Unsupported,

View File

@@ -29,7 +29,7 @@ use store_api::metric_engine_consts::{
METADATA_SCHEMA_VALUE_COLUMN_NAME,
};
use store_api::region_engine::RegionEngine;
use store_api::region_request::RegionPutRequest;
use store_api::region_request::{RegionDeleteRequest, RegionPutRequest};
use store_api::storage::{RegionId, ScanRequest};
use crate::error::{
@@ -111,6 +111,34 @@ impl MetadataRegion {
.await
}
/// Remove a registered logical region from metadata.
///
/// This method doesn't check if the previous key exists.
pub async fn remove_logical_region(
&self,
physical_region_id: RegionId,
logical_region_id: RegionId,
) -> Result<()> {
// concat region key
let region_id = utils::to_metadata_region_id(physical_region_id);
let region_key = Self::concat_region_key(logical_region_id);
// concat column keys
let logical_columns = self
.logical_columns(physical_region_id, logical_region_id)
.await?;
let mut column_keys = logical_columns
.into_iter()
.map(|(col, _)| Self::concat_column_key(logical_region_id, &col))
.collect::<Vec<_>>();
// remove region key and column keys
column_keys.push(region_key);
self.delete(region_id, &column_keys).await?;
Ok(())
}
/// Check if the given logical region exists.
pub async fn is_logical_region_exists(
&self,
@@ -354,6 +382,20 @@ impl MetadataRegion {
Ok(result)
}
/// Delete the given keys. For performance consideration, this method
/// doesn't check if those keys exist or not.
async fn delete(&self, region_id: RegionId, keys: &[String]) -> Result<()> {
let delete_request = Self::build_delete_request(keys);
self.mito
.handle_request(
region_id,
store_api::region_request::RegionRequest::Delete(delete_request),
)
.await
.context(MitoWriteOperationSnafu)?;
Ok(())
}
/// Builds a [ScanRequest] to read metadata for a given key.
/// The request will contains a EQ filter on the key column.
///
@@ -409,6 +451,39 @@ impl MetadataRegion {
RegionPutRequest { rows }
}
fn build_delete_request(keys: &[String]) -> RegionDeleteRequest {
let cols = vec![
ColumnSchema {
column_name: METADATA_SCHEMA_TIMESTAMP_COLUMN_NAME.to_string(),
datatype: ColumnDataType::TimestampMillisecond as _,
semantic_type: SemanticType::Timestamp as _,
..Default::default()
},
ColumnSchema {
column_name: METADATA_SCHEMA_KEY_COLUMN_NAME.to_string(),
datatype: ColumnDataType::String as _,
semantic_type: SemanticType::Tag as _,
..Default::default()
},
];
let rows = keys
.iter()
.map(|key| Row {
values: vec![
Value {
value_data: Some(ValueData::TimestampMillisecondValue(0)),
},
Value {
value_data: Some(ValueData::StringValue(key.to_string())),
},
],
})
.collect();
let rows = Rows { schema: cols, rows };
RegionDeleteRequest { rows }
}
}
#[cfg(test)]

View File

@@ -14,15 +14,17 @@
use std::sync::Arc;
use object_store::services::Fs;
use object_store::util::{join_dir, with_instrument_layers};
use object_store::ObjectStore;
use snafu::ResultExt;
use store_api::metadata::RegionMetadataRef;
use crate::cache::write_cache::SstUploadRequest;
use crate::cache::CacheManagerRef;
use crate::error::{DeleteSstSnafu, Result};
use crate::error::{CleanDirSnafu, DeleteIndexSnafu, DeleteSstSnafu, OpenDalSnafu, Result};
use crate::read::Source;
use crate::sst::file::{FileHandle, FileId};
use crate::sst::file::{FileHandle, FileId, FileMeta};
use crate::sst::location;
use crate::sst::parquet::reader::ParquetReaderBuilder;
use crate::sst::parquet::writer::ParquetWriter;
@@ -64,13 +66,27 @@ impl AccessLayer {
&self.object_store
}
/// Deletes a SST file with given file id.
pub(crate) async fn delete_sst(&self, file_id: FileId) -> Result<()> {
let path = location::sst_file_path(&self.region_dir, file_id);
/// Deletes a SST file (and its index file if it has one) with given file id.
pub(crate) async fn delete_sst(&self, file_meta: &FileMeta) -> Result<()> {
let path = location::sst_file_path(&self.region_dir, file_meta.file_id);
self.object_store
.delete(&path)
.await
.context(DeleteSstSnafu { file_id })
.context(DeleteSstSnafu {
file_id: file_meta.file_id,
})?;
if file_meta.inverted_index_available() {
let path = location::index_file_path(&self.region_dir, file_meta.file_id);
self.object_store
.delete(&path)
.await
.context(DeleteIndexSnafu {
file_id: file_meta.file_id,
})?;
}
Ok(())
}
/// Returns a reader builder for specific `file`.
@@ -86,28 +102,45 @@ impl AccessLayer {
request: SstWriteRequest,
write_opts: &WriteOptions,
) -> Result<Option<SstInfo>> {
let path = location::sst_file_path(&self.region_dir, request.file_id);
let file_path = location::sst_file_path(&self.region_dir, request.file_id);
let index_file_path = location::index_file_path(&self.region_dir, request.file_id);
let region_id = request.metadata.region_id;
if let Some(write_cache) = request.cache_manager.write_cache() {
let sst_info = if let Some(write_cache) = request.cache_manager.write_cache() {
// Write to the write cache.
return write_cache
write_cache
.write_and_upload_sst(
SstUploadRequest {
file_id: request.file_id,
metadata: request.metadata,
source: request.source,
storage: request.storage,
upload_path: path,
upload_path: file_path,
index_upload_path: index_file_path,
remote_store: self.object_store.clone(),
},
write_opts,
)
.await;
.await?
} else {
// Write cache is disabled.
let mut writer =
ParquetWriter::new(file_path, request.metadata, self.object_store.clone());
writer.write_all(request.source, write_opts).await?
};
// Put parquet metadata to cache manager.
if let Some(sst_info) = &sst_info {
if let Some(parquet_metadata) = &sst_info.file_metadata {
request.cache_manager.put_parquet_meta_data(
region_id,
request.file_id,
parquet_metadata.clone(),
)
}
}
// Write cache is disabled.
let mut writer = ParquetWriter::new(path, request.metadata, self.object_store.clone());
writer.write_all(request.source, write_opts).await
Ok(sst_info)
}
}
@@ -119,3 +152,31 @@ pub(crate) struct SstWriteRequest {
pub(crate) cache_manager: CacheManagerRef,
pub(crate) storage: Option<String>,
}
/// Creates a fs object store with atomic write dir.
pub(crate) async fn new_fs_object_store(root: &str) -> Result<ObjectStore> {
let atomic_write_dir = join_dir(root, ".tmp/");
clean_dir(&atomic_write_dir).await?;
let mut builder = Fs::default();
builder.root(root).atomic_write_dir(&atomic_write_dir);
let object_store = ObjectStore::new(builder).context(OpenDalSnafu)?.finish();
// Add layers.
let object_store = with_instrument_layers(object_store);
Ok(object_store)
}
/// Clean the directory.
async fn clean_dir(dir: &str) -> Result<()> {
if tokio::fs::try_exists(dir)
.await
.context(CleanDirSnafu { dir })?
{
tokio::fs::remove_dir_all(dir)
.await
.context(CleanDirSnafu { dir })?;
}
Ok(())
}

View File

@@ -47,9 +47,10 @@ const PAGE_TYPE: &str = "page";
// Metrics type key for files on the local store.
const FILE_TYPE: &str = "file";
// TODO(yingwen): Builder for cache manager.
/// Manages cached data for the engine.
///
/// All caches are disabled by default.
#[derive(Default)]
pub struct CacheManager {
/// Cache for SST metadata.
sst_meta_cache: Option<SstMetaCache>,
@@ -58,70 +59,15 @@ pub struct CacheManager {
/// Cache for SST pages.
page_cache: Option<PageCache>,
/// A Cache for writing files to object stores.
// TODO(yingwen): Remove this once the cache is ready.
#[allow(unused)]
write_cache: Option<WriteCacheRef>,
}
pub type CacheManagerRef = Arc<CacheManager>;
impl CacheManager {
/// Creates a new manager with specific cache size in bytes.
pub fn new(
sst_meta_cache_size: u64,
vector_cache_size: u64,
page_cache_size: u64,
) -> CacheManager {
let sst_meta_cache = if sst_meta_cache_size == 0 {
None
} else {
let cache = Cache::builder()
.max_capacity(sst_meta_cache_size)
.weigher(meta_cache_weight)
.eviction_listener(|k, v, _cause| {
let size = meta_cache_weight(&k, &v);
CACHE_BYTES
.with_label_values(&[SST_META_TYPE])
.sub(size.into());
})
.build();
Some(cache)
};
let vector_cache = if vector_cache_size == 0 {
None
} else {
let cache = Cache::builder()
.max_capacity(vector_cache_size)
.weigher(vector_cache_weight)
.eviction_listener(|k, v, _cause| {
let size = vector_cache_weight(&k, &v);
CACHE_BYTES
.with_label_values(&[VECTOR_TYPE])
.sub(size.into());
})
.build();
Some(cache)
};
let page_cache = if page_cache_size == 0 {
None
} else {
let cache = Cache::builder()
.max_capacity(page_cache_size)
.weigher(page_cache_weight)
.eviction_listener(|k, v, _cause| {
let size = page_cache_weight(&k, &v);
CACHE_BYTES.with_label_values(&[PAGE_TYPE]).sub(size.into());
})
.build();
Some(cache)
};
CacheManager {
sst_meta_cache,
vector_cache,
page_cache,
write_cache: None,
}
/// Returns a builder to build the cache.
pub fn builder() -> CacheManagerBuilder {
CacheManagerBuilder::default()
}
/// Gets cached [ParquetMetaData].
@@ -201,6 +147,86 @@ impl CacheManager {
}
}
/// Builder to construct a [CacheManager].
#[derive(Default)]
pub struct CacheManagerBuilder {
sst_meta_cache_size: u64,
vector_cache_size: u64,
page_cache_size: u64,
write_cache: Option<WriteCacheRef>,
}
impl CacheManagerBuilder {
/// Sets meta cache size.
pub fn sst_meta_cache_size(mut self, bytes: u64) -> Self {
self.sst_meta_cache_size = bytes;
self
}
/// Sets vector cache size.
pub fn vector_cache_size(mut self, bytes: u64) -> Self {
self.vector_cache_size = bytes;
self
}
/// Sets page cache size.
pub fn page_cache_size(mut self, bytes: u64) -> Self {
self.page_cache_size = bytes;
self
}
/// Sets write cache.
pub fn write_cache(mut self, cache: Option<WriteCacheRef>) -> Self {
self.write_cache = cache;
self
}
/// Builds the [CacheManager].
pub fn build(self) -> CacheManager {
let sst_meta_cache = (self.sst_meta_cache_size != 0).then(|| {
Cache::builder()
.max_capacity(self.sst_meta_cache_size)
.weigher(meta_cache_weight)
.eviction_listener(|k, v, _cause| {
let size = meta_cache_weight(&k, &v);
CACHE_BYTES
.with_label_values(&[SST_META_TYPE])
.sub(size.into());
})
.build()
});
let vector_cache = (self.vector_cache_size != 0).then(|| {
Cache::builder()
.max_capacity(self.vector_cache_size)
.weigher(vector_cache_weight)
.eviction_listener(|k, v, _cause| {
let size = vector_cache_weight(&k, &v);
CACHE_BYTES
.with_label_values(&[VECTOR_TYPE])
.sub(size.into());
})
.build()
});
let page_cache = (self.page_cache_size != 0).then(|| {
Cache::builder()
.max_capacity(self.page_cache_size)
.weigher(page_cache_weight)
.eviction_listener(|k, v, _cause| {
let size = page_cache_weight(&k, &v);
CACHE_BYTES.with_label_values(&[PAGE_TYPE]).sub(size.into());
})
.build()
});
CacheManager {
sst_meta_cache,
vector_cache,
page_cache,
write_cache: self.write_cache,
}
}
}
fn meta_cache_weight(k: &SstMetaKey, v: &Arc<ParquetMetaData>) -> u32 {
// We ignore the size of `Arc`.
(k.estimated_size() + parquet_meta_size(v)) as u32
@@ -293,7 +319,7 @@ mod tests {
#[test]
fn test_disable_cache() {
let cache = CacheManager::new(0, 0, 0);
let cache = CacheManager::default();
assert!(cache.sst_meta_cache.is_none());
assert!(cache.vector_cache.is_none());
assert!(cache.page_cache.is_none());
@@ -318,11 +344,13 @@ mod tests {
let pages = Arc::new(PageValue::new(Vec::new()));
cache.put_pages(key.clone(), pages);
assert!(cache.get_pages(&key).is_none());
assert!(cache.write_cache().is_none());
}
#[test]
fn test_parquet_meta_cache() {
let cache = CacheManager::new(2000, 0, 0);
let cache = CacheManager::builder().sst_meta_cache_size(2000).build();
let region_id = RegionId::new(1, 1);
let file_id = FileId::random();
assert!(cache.get_parquet_meta_data(region_id, file_id).is_none());
@@ -335,7 +363,7 @@ mod tests {
#[test]
fn test_repeated_vector_cache() {
let cache = CacheManager::new(0, 4096, 0);
let cache = CacheManager::builder().vector_cache_size(4096).build();
let value = Value::Int64(10);
assert!(cache.get_repeated_vector(&value).is_none());
let vector: VectorRef = Arc::new(Int64Vector::from_slice([10, 10, 10, 10]));
@@ -346,7 +374,7 @@ mod tests {
#[test]
fn test_page_cache() {
let cache = CacheManager::new(0, 0, 1000);
let cache = CacheManager::builder().page_cache_size(1000).build();
let region_id = RegionId::new(1, 1);
let file_id = FileId::random();
let key = PageKey {

View File

@@ -14,9 +14,11 @@
//! A cache for files.
use std::ops::{Range, RangeBounds};
use std::sync::Arc;
use std::time::Instant;
use bytes::Bytes;
use common_base::readable_size::ReadableSize;
use common_telemetry::{info, warn};
use futures::{FutureExt, TryStreamExt};
@@ -31,6 +33,7 @@ use crate::cache::FILE_TYPE;
use crate::error::{OpenDalSnafu, Result};
use crate::metrics::{CACHE_BYTES, CACHE_HIT, CACHE_MISS};
use crate::sst::file::FileId;
use crate::sst::parquet::helper::fetch_byte_ranges;
/// Subdirectory of cached files.
const FILE_DIR: &str = "files/";
@@ -68,7 +71,7 @@ impl FileCache {
// The cache is replaced by another file. This is unexpected, we don't remove the same
// file but updates the metrics as the file is already replaced by users.
CACHE_BYTES.with_label_values(&[FILE_TYPE]).sub(value.file_size.into());
warn!("Replace existing cache {} for region {} unexpectedly", file_path, key.0);
warn!("Replace existing cache {} for region {} unexpectedly", file_path, key.region_id);
return;
}
@@ -77,7 +80,7 @@ impl FileCache {
CACHE_BYTES.with_label_values(&[FILE_TYPE]).sub(value.file_size.into());
}
Err(e) => {
warn!(e; "Failed to delete cached file {} for region {}", file_path, key.0);
warn!(e; "Failed to delete cached file {} for region {}", file_path, key.region_id);
}
}
}
@@ -100,17 +103,11 @@ impl FileCache {
self.memory_index.insert(key, value).await;
}
async fn get_reader(&self, file_path: &str) -> object_store::Result<Option<Reader>> {
if self.local_store.is_exist(file_path).await? {
Ok(Some(self.local_store.reader(file_path).await?))
} else {
Ok(None)
}
}
/// Reads a file from the cache.
pub(crate) async fn reader(&self, key: IndexKey) -> Option<Reader> {
if !self.memory_index.contains_key(&key) {
// We must use `get()` to update the estimator of the cache.
// See https://docs.rs/moka/latest/moka/future/struct.Cache.html#method.contains_key
if self.memory_index.get(&key).await.is_none() {
CACHE_MISS.with_label_values(&[FILE_TYPE]).inc();
return None;
}
@@ -135,6 +132,39 @@ impl FileCache {
None
}
/// Reads ranges from the cache.
pub(crate) async fn read_ranges(
&self,
key: IndexKey,
ranges: &[Range<u64>],
) -> Option<Vec<Bytes>> {
if self.memory_index.get(&key).await.is_none() {
CACHE_MISS.with_label_values(&[FILE_TYPE]).inc();
return None;
}
let file_path = self.cache_file_path(key);
// In most cases, it will use blocking read,
// because FileCache is normally based on local file system, which supports blocking read.
let bytes_result = fetch_byte_ranges(&file_path, self.local_store.clone(), ranges).await;
match bytes_result {
Ok(bytes) => {
CACHE_HIT.with_label_values(&[FILE_TYPE]).inc();
Some(bytes)
}
Err(e) => {
if e.kind() != ErrorKind::NotFound {
warn!("Failed to get file for key {:?}, err: {}", key, e);
}
// We removes the file from the index.
self.memory_index.remove(&key).await;
CACHE_MISS.with_label_values(&[FILE_TYPE]).inc();
None
}
}
}
/// Removes a file from the cache explicitly.
pub(crate) async fn remove(&self, key: IndexKey) {
let file_path = self.cache_file_path(key);
@@ -194,10 +224,68 @@ impl FileCache {
pub(crate) fn local_store(&self) -> ObjectStore {
self.local_store.clone()
}
async fn get_reader(&self, file_path: &str) -> object_store::Result<Option<Reader>> {
if self.local_store.is_exist(file_path).await? {
Ok(Some(self.local_store.reader(file_path).await?))
} else {
Ok(None)
}
}
/// Checks if the key is in the file cache.
#[cfg(test)]
pub(crate) fn contains_key(&self, key: &IndexKey) -> bool {
self.memory_index.contains_key(key)
}
}
/// Key of file cache index.
pub(crate) type IndexKey = (RegionId, FileId);
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub(crate) struct IndexKey {
pub region_id: RegionId,
pub file_id: FileId,
pub file_type: FileType,
}
impl IndexKey {
/// Creates a new index key.
pub fn new(region_id: RegionId, file_id: FileId, file_type: FileType) -> IndexKey {
IndexKey {
region_id,
file_id,
file_type,
}
}
}
/// Type of the file.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum FileType {
/// Parquet file.
Parquet,
/// Puffin file.
Puffin,
}
impl FileType {
/// Parses the file type from string.
fn parse(s: &str) -> Option<FileType> {
match s {
"parquet" => Some(FileType::Parquet),
"puffin" => Some(FileType::Puffin),
_ => None,
}
}
/// Converts the file type to string.
fn as_str(&self) -> &'static str {
match self {
FileType::Parquet => "parquet",
FileType::Puffin => "puffin",
}
}
}
/// An entity that describes the file in the file cache.
///
@@ -205,26 +293,35 @@ pub(crate) type IndexKey = (RegionId, FileId);
#[derive(Debug, Clone)]
pub(crate) struct IndexValue {
/// Size of the file in bytes.
file_size: u32,
pub(crate) file_size: u32,
}
/// Generates the path to the cached file.
///
/// The file name format is `{region_id}.{file_id}`
/// The file name format is `{region_id}.{file_id}.{file_type}`
fn cache_file_path(cache_file_dir: &str, key: IndexKey) -> String {
join_path(cache_file_dir, &format!("{}.{}", key.0.as_u64(), key.1))
join_path(
cache_file_dir,
&format!(
"{}.{}.{}",
key.region_id.as_u64(),
key.file_id,
key.file_type.as_str()
),
)
}
/// Parse index key from the file name.
fn parse_index_key(name: &str) -> Option<IndexKey> {
let mut splited = name.splitn(2, '.');
let region_id = splited.next().and_then(|s| {
let mut split = name.splitn(3, '.');
let region_id = split.next().and_then(|s| {
let id = s.parse::<u64>().ok()?;
Some(RegionId::from_u64(id))
})?;
let file_id = splited.next().and_then(|s| FileId::parse_str(s).ok())?;
let file_id = split.next().and_then(|s| FileId::parse_str(s).ok())?;
let file_type = split.next().and_then(FileType::parse)?;
Some((region_id, file_id))
Some(IndexKey::new(region_id, file_id, file_type))
}
#[cfg(test)]
@@ -249,7 +346,7 @@ mod tests {
let cache = FileCache::new(local_store.clone(), ReadableSize::mb(10));
let region_id = RegionId::new(2000, 0);
let file_id = FileId::random();
let key = (region_id, file_id);
let key = IndexKey::new(region_id, file_id, FileType::Parquet);
let file_path = cache.cache_file_path(key);
// Get an empty file.
@@ -262,7 +359,10 @@ mod tests {
.unwrap();
// Add to the cache.
cache
.put((region_id, file_id), IndexValue { file_size: 5 })
.put(
IndexKey::new(region_id, file_id, FileType::Parquet),
IndexValue { file_size: 5 },
)
.await;
// Read file content.
@@ -271,6 +371,10 @@ mod tests {
reader.read_to_string(&mut buf).await.unwrap();
assert_eq!("hello", buf);
// Get weighted size.
cache.memory_index.run_pending_tasks().await;
assert_eq!(5, cache.memory_index.weighted_size());
// Remove the file.
cache.remove(key).await;
assert!(cache.reader(key).await.is_none());
@@ -280,6 +384,7 @@ mod tests {
// The file also not exists.
assert!(!local_store.is_exist(&file_path).await.unwrap());
assert_eq!(0, cache.memory_index.weighted_size());
}
#[tokio::test]
@@ -290,7 +395,7 @@ mod tests {
let cache = FileCache::new(local_store.clone(), ReadableSize::mb(10));
let region_id = RegionId::new(2000, 0);
let file_id = FileId::random();
let key = (region_id, file_id);
let key = IndexKey::new(region_id, file_id, FileType::Parquet);
let file_path = cache.cache_file_path(key);
// Write a file.
@@ -300,7 +405,10 @@ mod tests {
.unwrap();
// Add to the cache.
cache
.put((region_id, file_id), IndexValue { file_size: 5 })
.put(
IndexKey::new(region_id, file_id, FileType::Parquet),
IndexValue { file_size: 5 },
)
.await;
// Remove the file but keep the index.
@@ -319,10 +427,12 @@ mod tests {
let cache = FileCache::new(local_store.clone(), ReadableSize::mb(10));
let region_id = RegionId::new(2000, 0);
let file_type = FileType::Parquet;
// Write N files.
let file_ids: Vec<_> = (0..10).map(|_| FileId::random()).collect();
let mut total_size = 0;
for (i, file_id) in file_ids.iter().enumerate() {
let key = (region_id, *file_id);
let key = IndexKey::new(region_id, *file_id, file_type);
let file_path = cache.cache_file_path(key);
let bytes = i.to_string().into_bytes();
local_store.write(&file_path, bytes.clone()).await.unwrap();
@@ -330,22 +440,30 @@ mod tests {
// Add to the cache.
cache
.put(
(region_id, *file_id),
IndexKey::new(region_id, *file_id, file_type),
IndexValue {
file_size: bytes.len() as u32,
},
)
.await;
total_size += bytes.len();
}
// Recover the cache.
let cache = FileCache::new(local_store.clone(), ReadableSize::mb(10));
// No entry before recovery.
assert!(cache.reader((region_id, file_ids[0])).await.is_none());
assert!(cache
.reader(IndexKey::new(region_id, file_ids[0], file_type))
.await
.is_none());
cache.recover().await.unwrap();
// Check size.
cache.memory_index.run_pending_tasks().await;
assert_eq!(total_size, cache.memory_index.weighted_size() as usize);
for (i, file_id) in file_ids.iter().enumerate() {
let key = (region_id, *file_id);
let key = IndexKey::new(region_id, *file_id, file_type);
let mut reader = cache.reader(key).await.unwrap();
let mut buf = String::new();
reader.read_to_string(&mut buf).await.unwrap();
@@ -353,16 +471,50 @@ mod tests {
}
}
#[tokio::test]
async fn test_file_cache_read_ranges() {
let dir = create_temp_dir("");
let local_store = new_fs_store(dir.path().to_str().unwrap());
let file_cache = FileCache::new(local_store.clone(), ReadableSize::mb(10));
let region_id = RegionId::new(2000, 0);
let file_id = FileId::random();
let key = IndexKey::new(region_id, file_id, FileType::Parquet);
let file_path = file_cache.cache_file_path(key);
// Write a file.
let data = b"hello greptime database";
local_store
.write(&file_path, data.as_slice())
.await
.unwrap();
// Add to the cache.
file_cache.put(key, IndexValue { file_size: 5 }).await;
// Ranges
let ranges = vec![0..5, 6..10, 15..19, 0..data.len() as u64];
let bytes = file_cache.read_ranges(key, &ranges).await.unwrap();
assert_eq!(4, bytes.len());
assert_eq!(b"hello", bytes[0].as_ref());
assert_eq!(b"grep", bytes[1].as_ref());
assert_eq!(b"data", bytes[2].as_ref());
assert_eq!(data, bytes[3].as_ref());
}
#[test]
fn test_cache_file_path() {
let file_id = FileId::parse_str("3368731b-a556-42b8-a5df-9c31ce155095").unwrap();
assert_eq!(
"test_dir/5299989643269.3368731b-a556-42b8-a5df-9c31ce155095",
cache_file_path("test_dir", (RegionId::new(1234, 5), file_id))
"test_dir/5299989643269.3368731b-a556-42b8-a5df-9c31ce155095.parquet",
cache_file_path(
"test_dir",
IndexKey::new(RegionId::new(1234, 5), file_id, FileType::Parquet)
)
);
assert_eq!(
"test_dir/5299989643269.3368731b-a556-42b8-a5df-9c31ce155095",
cache_file_path("test_dir/", (RegionId::new(1234, 5), file_id))
"test_dir/5299989643269.3368731b-a556-42b8-a5df-9c31ce155095.parquet",
cache_file_path(
"test_dir/",
IndexKey::new(RegionId::new(1234, 5), file_id, FileType::Parquet)
)
);
}
@@ -371,8 +523,8 @@ mod tests {
let file_id = FileId::parse_str("3368731b-a556-42b8-a5df-9c31ce155095").unwrap();
let region_id = RegionId::new(1234, 5);
assert_eq!(
(region_id, file_id),
parse_index_key("5299989643269.3368731b-a556-42b8-a5df-9c31ce155095").unwrap()
IndexKey::new(region_id, file_id, FileType::Parquet),
parse_index_key("5299989643269.3368731b-a556-42b8-a5df-9c31ce155095.parquet").unwrap()
);
assert!(parse_index_key("").is_none());
assert!(parse_index_key(".").is_none());
@@ -381,8 +533,13 @@ mod tests {
assert!(parse_index_key(".5299989643269").is_none());
assert!(parse_index_key("5299989643269.").is_none());
assert!(parse_index_key("5299989643269.3368731b-a556-42b8-a5df").is_none());
assert!(parse_index_key("5299989643269.3368731b-a556-42b8-a5df-9c31ce155095").is_none());
assert!(
parse_index_key("5299989643269.3368731b-a556-42b8-a5df-9c31ce155095.parquet").is_none()
parse_index_key("5299989643269.3368731b-a556-42b8-a5df-9c31ce155095.parque").is_none()
);
assert!(parse_index_key(
"5299989643269.3368731b-a556-42b8-a5df-9c31ce155095.parquet.puffin"
)
.is_none());
}
}

View File

@@ -19,6 +19,8 @@ use std::sync::Arc;
use bytes::Bytes;
use datatypes::arrow::array::{ArrayRef, Int64Array};
use datatypes::arrow::record_batch::RecordBatch;
use object_store::services::Fs;
use object_store::ObjectStore;
use parquet::arrow::arrow_reader::ParquetRecordBatchReaderBuilder;
use parquet::arrow::ArrowWriter;
use parquet::file::metadata::ParquetMetaData;
@@ -42,3 +44,9 @@ fn parquet_file_data() -> Vec<u8> {
buffer
}
pub(crate) fn new_fs_store(path: &str) -> ObjectStore {
let mut builder = Fs::default();
builder.root(path);
ObjectStore::new(builder).unwrap().finish()
}

View File

@@ -14,19 +14,28 @@
//! A write-through cache for remote object stores.
use std::ops::Range;
use std::sync::Arc;
use api::v1::region;
use bytes::Bytes;
use common_base::readable_size::ReadableSize;
use common_telemetry::{debug, info};
use object_store::manager::ObjectStoreManagerRef;
use object_store::ObjectStore;
use snafu::ResultExt;
use store_api::metadata::RegionMetadataRef;
use store_api::storage::RegionId;
use crate::cache::file_cache::{FileCache, FileCacheRef};
use crate::error::Result;
use crate::access_layer::new_fs_object_store;
use crate::cache::file_cache::{FileCache, FileCacheRef, FileType, IndexKey, IndexValue};
use crate::error::{self, Result};
use crate::metrics::{FLUSH_ELAPSED, UPLOAD_BYTES_TOTAL};
use crate::read::Source;
use crate::sst::file::FileId;
use crate::sst::parquet::writer::ParquetWriter;
use crate::sst::parquet::{SstInfo, WriteOptions};
use crate::sst::DEFAULT_WRITE_BUFFER_SIZE;
/// A cache for uploading files to remote object stores.
///
@@ -43,20 +52,35 @@ pub type WriteCacheRef = Arc<WriteCache>;
impl WriteCache {
/// Create the cache with a `local_store` to cache files and a
/// `object_store_manager` for all object stores.
pub fn new(
pub async fn new(
local_store: ObjectStore,
object_store_manager: ObjectStoreManagerRef,
cache_capacity: ReadableSize,
) -> Self {
Self {
file_cache: Arc::new(FileCache::new(local_store, cache_capacity)),
) -> Result<Self> {
let file_cache = FileCache::new(local_store, cache_capacity);
file_cache.recover().await?;
Ok(Self {
file_cache: Arc::new(file_cache),
object_store_manager,
}
})
}
/// Recovers the write cache from local store.
pub async fn recover(&self) -> Result<()> {
self.file_cache.recover().await
/// Creates a write cache based on local fs.
pub async fn new_fs(
cache_dir: &str,
object_store_manager: ObjectStoreManagerRef,
cache_capacity: ReadableSize,
) -> Result<Self> {
info!("Init write cache on {cache_dir}, capacity: {cache_capacity}");
let local_store = new_fs_object_store(cache_dir).await?;
Self::new(local_store, object_store_manager, cache_capacity).await
}
/// Returns the file cache of the write cache.
pub(crate) fn file_cache(&self) -> FileCacheRef {
self.file_cache.clone()
}
/// Writes SST to the cache and then uploads it to the remote object store.
@@ -65,11 +89,105 @@ impl WriteCache {
request: SstUploadRequest,
write_opts: &WriteOptions,
) -> Result<Option<SstInfo>> {
// TODO(yingwen): Write to the local store and then upload.
// Now we write to the remote and ignore local cache.
let mut writer =
ParquetWriter::new(request.upload_path, request.metadata, request.remote_store);
writer.write_all(request.source, write_opts).await
let timer = FLUSH_ELAPSED
.with_label_values(&["write_sst"])
.start_timer();
let region_id = request.metadata.region_id;
let file_id = request.file_id;
let parquet_key = IndexKey::new(region_id, file_id, FileType::Parquet);
// Write to FileCache.
let mut writer = ParquetWriter::new(
self.file_cache.cache_file_path(parquet_key),
request.metadata,
self.file_cache.local_store(),
);
let sst_info = writer.write_all(request.source, write_opts).await?;
timer.stop_and_record();
// Upload sst file to remote object store.
let Some(sst_info) = sst_info else {
// No data need to upload.
return Ok(None);
};
let parquet_path = &request.upload_path;
let remote_store = &request.remote_store;
self.upload(parquet_key, parquet_path, remote_store).await?;
if sst_info.inverted_index_available {
let puffin_key = IndexKey::new(region_id, file_id, FileType::Puffin);
let puffin_path = &request.index_upload_path;
self.upload(puffin_key, puffin_path, remote_store).await?;
}
Ok(Some(sst_info))
}
/// Uploads a Parquet file or a Puffin file to the remote object store.
async fn upload(
&self,
index_key: IndexKey,
upload_path: &str,
remote_store: &ObjectStore,
) -> Result<()> {
let region_id = index_key.region_id;
let file_id = index_key.file_id;
let file_type = index_key.file_type;
let cache_path = self.file_cache.cache_file_path(index_key);
let timer = FLUSH_ELAPSED
.with_label_values(&[match file_type {
FileType::Parquet => "upload_parquet",
FileType::Puffin => "upload_puffin",
}])
.start_timer();
let reader = self
.file_cache
.local_store()
.reader(&cache_path)
.await
.context(error::OpenDalSnafu)?;
let mut writer = remote_store
.writer_with(upload_path)
.buffer(DEFAULT_WRITE_BUFFER_SIZE.as_bytes() as usize)
.await
.context(error::OpenDalSnafu)?;
let bytes_written =
futures::io::copy(reader, &mut writer)
.await
.context(error::UploadSnafu {
region_id,
file_id,
file_type,
})?;
// Must close to upload all data.
writer.close().await.context(error::OpenDalSnafu)?;
UPLOAD_BYTES_TOTAL.inc_by(bytes_written);
debug!(
"Successfully upload file to remote, region: {}, file: {}, upload_path: {}, cost: {:?}s",
region_id,
file_id,
upload_path,
timer.stop_and_record()
);
let index_value = IndexValue {
file_size: bytes_written as _,
};
// Register to file cache
self.file_cache.put(index_key, index_value).await;
Ok(())
}
}
@@ -81,6 +199,95 @@ pub struct SstUploadRequest {
pub storage: Option<String>,
/// Path to upload the file.
pub upload_path: String,
/// Path to upload the index file.
pub index_upload_path: String,
/// Remote object store to upload.
pub remote_store: ObjectStore,
}
#[cfg(test)]
mod tests {
use api::v1::OpType;
use common_base::readable_size::ReadableSize;
use common_test_util::temp_dir::create_temp_dir;
use object_store::manager::ObjectStoreManager;
use object_store::services::Fs;
use object_store::ObjectStore;
use store_api::storage::RegionId;
use super::*;
use crate::cache::file_cache::{self, FileCache};
use crate::cache::test_util::new_fs_store;
use crate::sst::file::FileId;
use crate::sst::location::{index_file_path, sst_file_path};
use crate::test_util::sst_util::{
new_batch_by_range, new_source, sst_file_handle, sst_region_metadata,
};
use crate::test_util::{build_rows, new_batch_builder, CreateRequestBuilder, TestEnv};
#[tokio::test]
async fn test_write_and_upload_sst() {
// TODO(QuenKar): maybe find a way to create some object server for testing,
// and now just use local file system to mock.
let mut env = TestEnv::new();
let mock_store = env.init_object_store_manager();
let file_id = FileId::random();
let upload_path = sst_file_path("test", file_id);
let index_upload_path = index_file_path("test", file_id);
// Create WriteCache
let local_dir = create_temp_dir("");
let local_store = new_fs_store(local_dir.path().to_str().unwrap());
let object_store_manager = env.get_object_store_manager().unwrap();
let write_cache = WriteCache::new(
local_store.clone(),
object_store_manager,
ReadableSize::mb(10),
)
.await
.unwrap();
// Create Source
let metadata = Arc::new(sst_region_metadata());
let region_id = metadata.region_id;
let source = new_source(&[
new_batch_by_range(&["a", "d"], 0, 60),
new_batch_by_range(&["b", "f"], 0, 40),
new_batch_by_range(&["b", "h"], 100, 200),
]);
let request = SstUploadRequest {
file_id,
metadata,
source,
storage: None,
upload_path: upload_path.clone(),
index_upload_path,
remote_store: mock_store.clone(),
};
let write_opts = WriteOptions {
row_group_size: 512,
..Default::default()
};
// Write to cache and upload sst to mock remote store
let sst_info = write_cache
.write_and_upload_sst(request, &write_opts)
.await
.unwrap()
.unwrap();
// Check write cache contains the key
let key = IndexKey::new(region_id, file_id, FileType::Parquet);
assert!(write_cache.file_cache.contains_key(&key));
// Check file data
let remote_data = mock_store.read(&upload_path).await.unwrap();
let cache_data = local_store
.read(&write_cache.file_cache.cache_file_path(key))
.await
.unwrap();
assert_eq!(remote_data, cache_data);
}
}

View File

@@ -35,6 +35,8 @@ pub fn new_file_handle(
),
level,
file_size: 0,
available_indexes: Default::default(),
index_file_size: 0,
},
file_purger,
)

View File

@@ -22,6 +22,7 @@ use common_telemetry::{debug, error, info};
use common_time::timestamp::TimeUnit;
use common_time::timestamp_millis::BucketAligned;
use common_time::Timestamp;
use smallvec::SmallVec;
use snafu::ResultExt;
use store_api::metadata::RegionMetadataRef;
use store_api::storage::RegionId;
@@ -39,7 +40,7 @@ use crate::read::{BoxedBatchReader, Source};
use crate::request::{
BackgroundNotify, CompactionFailed, CompactionFinished, OutputTx, WorkerRequest,
};
use crate::sst::file::{FileHandle, FileId, FileMeta, Level};
use crate::sst::file::{FileHandle, FileId, FileMeta, IndexType, Level};
use crate::sst::file_purger::FilePurgerRef;
use crate::sst::parquet::WriteOptions;
use crate::sst::version::LevelMeta;
@@ -306,6 +307,7 @@ impl TwcsCompactionTask {
let metadata = self.metadata.clone();
let sst_layer = self.sst_layer.clone();
let region_id = self.region_id;
let file_id = output.output_file_id;
let cache_manager = self.cache_manager.clone();
let storage = self.storage.clone();
futs.push(async move {
@@ -314,7 +316,7 @@ impl TwcsCompactionTask {
let file_meta_opt = sst_layer
.write_sst(
SstWriteRequest {
file_id: output.output_file_id,
file_id,
metadata,
source: Source::Reader(reader),
cache_manager,
@@ -325,10 +327,15 @@ impl TwcsCompactionTask {
.await?
.map(|sst_info| FileMeta {
region_id,
file_id: output.output_file_id,
file_id,
time_range: sst_info.time_range,
level: output.output_level,
file_size: sst_info.file_size,
available_indexes: sst_info
.inverted_index_available
.then(|| SmallVec::from_iter([IndexType::InvertedIndex]))
.unwrap_or_default(),
index_file_size: sst_info.index_file_size,
});
Ok(file_meta_opt)
});

View File

@@ -19,6 +19,9 @@ use std::time::Duration;
use common_base::readable_size::ReadableSize;
use common_telemetry::warn;
use serde::{Deserialize, Serialize};
use snafu::ensure;
use crate::error::{InvalidConfigSnafu, Result};
/// Default max running background job.
const DEFAULT_MAX_BG_JOB: usize = 4;
@@ -67,6 +70,12 @@ pub struct MitoConfig {
pub vector_cache_size: ReadableSize,
/// Cache size for pages of SST row groups (default 512MB). Setting it to 0 to disable the cache.
pub page_cache_size: ReadableSize,
/// Whether to enable the experimental write cache.
pub enable_experimental_write_cache: bool,
/// Path for write cache.
pub experimental_write_cache_path: String,
/// Capacity for write cache.
pub experimental_write_cache_size: ReadableSize,
// Other configs:
/// Buffer size for SST writing.
@@ -78,6 +87,8 @@ pub struct MitoConfig {
pub scan_parallelism: usize,
/// Capacity of the channel to send data from parallel scan tasks to the main task (default 32).
pub parallel_scan_channel_size: usize,
/// Whether to allow stale entries read during replay.
pub allow_stale_entries: bool,
}
impl Default for MitoConfig {
@@ -95,16 +106,22 @@ impl Default for MitoConfig {
sst_meta_cache_size: ReadableSize::mb(128),
vector_cache_size: ReadableSize::mb(512),
page_cache_size: ReadableSize::mb(512),
enable_experimental_write_cache: false,
experimental_write_cache_path: String::new(),
experimental_write_cache_size: ReadableSize::mb(512),
sst_write_buffer_size: ReadableSize::mb(8),
scan_parallelism: divide_num_cpus(4),
parallel_scan_channel_size: DEFAULT_SCAN_CHANNEL_SIZE,
allow_stale_entries: false,
}
}
}
impl MitoConfig {
/// Sanitize incorrect configurations.
pub(crate) fn sanitize(&mut self) {
///
/// Returns an error if there is a configuration that unable to sanitize.
pub(crate) fn sanitize(&mut self) -> Result<()> {
// Use default value if `num_workers` is 0.
if self.num_workers == 0 {
self.num_workers = divide_num_cpus(2);
@@ -149,6 +166,17 @@ impl MitoConfig {
self.parallel_scan_channel_size
);
}
if self.enable_experimental_write_cache {
ensure!(
!self.experimental_write_cache_path.is_empty(),
InvalidConfigSnafu {
reason: "experimental_write_cache_path should not be empty",
}
);
}
Ok(())
}
}

Some files were not shown because too many files have changed in this diff Show More