Compare commits

...

35 Commits

Author SHA1 Message Date
Lei, HUANG
c9c2b3c91f fix: revert memtable pk rb cache to rwlock (#2565)
* fix: revert memtable pk rb cache to rwlock

* feat: refine
2023-10-10 20:51:05 +08:00
Yingwen
7f75190fce chore: update Cargo.lock (#2564) 2023-10-10 16:28:50 +08:00
Yingwen
0a394c73a2 chore: bump version to 0.4.0 (#2563) 2023-10-10 16:16:15 +08:00
JeremyHi
ae95f23e05 feat: add metrics for region server (#2552)
* feat: add metircs for region server

* fix: add comment and remove unused code
2023-10-10 07:40:16 +00:00
Lei, HUANG
6b39f5923d feat: add compaction metrics (#2560)
* feat: add compaction metrics

* feat: add compaction request total count

* fix: CR comments
2023-10-10 07:38:39 +00:00
JeremyHi
ed725d030f fix: support multi addrs while using etcd (#2562)
fix: support multi addrs while useing etcd
2023-10-10 07:30:48 +00:00
Wei
4fe7e162af fix: human_time mismatch (#2558)
* fix: human_time mismatch.

* fix: add comment
2023-10-10 07:22:12 +00:00
Yingwen
8a5ef826b9 fix(mito): Do not write to memtables if writing wal is failed (#2561)
* feat: add writes total metrics

* fix: don't write memtable if write ctx is failed

* feat: write rows metrics
2023-10-10 06:55:57 +00:00
Ruihang Xia
07be50403e feat: add basic metrics to query (#2559)
* add metrics to merge scan

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* count series in promql

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* tweak label name

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* tweak label name

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* document metric label

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

---------

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
2023-10-10 06:55:25 +00:00
Lei, HUANG
8bdef9a348 feat: memtable filter push down (#2539)
* feat: memtable support filter pushdown to prune primary keys

* fix: switch to next time series when pk not selected

* fix: allow predicate evaluation failure

* fix: some clippy warnings

* fix: panic when no primary key in schema

* feat: cache decoded record batch for primary key

* refactor: use arcswap instead of rwlock

* fix: format toml
2023-10-10 04:03:10 +00:00
Yingwen
d4577e7372 feat(mito): add metrics to mito engine (#2556)
* feat: allow discarding a timer

* feat: flush metrics

* feat: flush bytes and region count metrics

* refactor: add as_str to get static string

* feat: add handle request elapsed metrics

* feat: add some write related metrics

* style: fix clippy
2023-10-10 03:53:17 +00:00
dennis zhuang
88f26673f0 fix: adds back http_timeout for frontend subcommand (#2555) 2023-10-10 03:05:16 +00:00
Baasit
19f300fc5a feat: renaming kv directory to metadata (#2549)
* fix: renamed kv directory to metadata directory

* fix: changed function name

* fix: changed function name
2023-10-09 11:43:17 +00:00
Weny Xu
cc83764331 fix: check table exists before allocating table id (#2546)
* fix: check table exists before allocating table_id

* chore: apply suggestions from CR
2023-10-09 11:40:10 +00:00
Yingwen
81aa7a4caf chore(mito): change default batch size/row group size (#2550) 2023-10-09 11:10:12 +00:00
Yingwen
d68dd1f3eb fix: schema validation is skipped once we need to fill a column (#2548)
* test: test different order

* test: add tests for missing and invalid columns

* fix: do not skip schema validation while missing columns

* chore: use field_columns()

* test: add tests for different column order
2023-10-09 09:20:51 +00:00
Lei, HUANG
9b3470b049 feat: android image builder dockerfile (#2541)
* feat: android image builder dockerfile

* feat: add building android dev-builder to ci config file

* fix: add build arg

* feat: use makefile to build image and add strip command
2023-10-09 09:10:14 +00:00
Weny Xu
8cc862ff8a refactor: refactor cache invalidator (#2540) 2023-10-09 08:19:18 +00:00
Weny Xu
81ccb58fb4 refactor!: compare with origin bytes during the transactions (#2538)
* refactor: compare with origin bytes during the transaction

* refactor: use serialize_str instead

* Update src/common/meta/src/key.rs

Co-authored-by: JeremyHi <jiachun_feng@proton.me>

* chore: apply suggestions from CR

---------

Co-authored-by: JeremyHi <jiachun_feng@proton.me>
2023-10-09 08:17:19 +00:00
Weny Xu
ce3c10a86e refactor: de/encode protobuf-encoded byte array with base64 (#2545) 2023-10-09 05:31:44 +00:00
shuiyisong
007f7ba03c refactor: extract plugins crate (#2487)
* chore: move frontend plugins fn

* chore: move datanode plugins to fn

* chore: add opt plugins

* chore: add plugins to meta-srv

* chore: setup meta plugins, wait for router extension

* chore: try use configurator for grpc too

* chore: minor fix fmt

* chore: minor fix fmt

* chore: add start meta_srv for hook

* chore: merge develop

* chore: minor fix

* chore: replace Arc<Plugins> with PluginsRef

* chore: fix header

* chore: remove empty file

* chore: modify comments

* chore: remove PluginsRef type alias

* chore: remove `OptPlugins`
2023-10-09 04:54:27 +00:00
Weny Xu
dfe68a7e0b refactor: check push result out of loop (#2511)
* refactor: check push result out of loop

* chore: apply suggestions from CR
2023-10-09 02:49:48 +00:00
Ruihang Xia
d5e4fcaaff feat: dist plan optimize part 2 (#2543)
* allow udf and scalar fn

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* put CountWildcardRule before dist planner

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* bump datafusion to fix first_value/last_value

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* update sqlness result

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* use retain instead

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

---------

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
2023-10-09 02:18:36 +00:00
Yingwen
17b385a985 fix: compiler errors under pprof and mem-prof features (#2537)
* fix: compiler errors under pprof feature

* fix: compiler errors under mem-prof feature
2023-10-08 08:28:45 +00:00
shuiyisong
067917845f fix: carry dbname from frontend to datanode (#2520)
* chore: add dbname in region request header for tracking purpose

* chore: fix handle read

* chore: add write meter

* chore: add meter-core to dep

* chore: add converter between RegionRequestHeader and QueryContext & update proto version
2023-10-08 06:30:23 +00:00
Weny Xu
a680133acc feat: enable no delay for mysql, opentsdb, http (#2530)
* refactor: enable no delay for mysql, opentsdb, http

* Apply suggestions from code review

Co-authored-by: Yingwen <realevenyag@gmail.com>

---------

Co-authored-by: Yingwen <realevenyag@gmail.com>
2023-10-08 06:19:52 +00:00
Yingwen
0593c3bde3 fix(mito): pruning for mito2 (#2525)
* fix: pruning for mito2

* chore: refactor projection parameters; add some tests; customize row group size for each flush task.

* chore: pass whole RegionFlushRequest

---------

Co-authored-by: Lei, HUANG <mrsatangel@gmail.com>
2023-10-08 03:45:15 +00:00
Lei, HUANG
0292445476 fix: timestamp range filter (#2533)
* fix: timestamp range filter

* fix: rebase develop

* fix: some style issues
2023-10-08 03:29:02 +00:00
dennis zhuang
ff15bc41d6 feat: improve object storage cache (#2522)
* feat: refactor object storage cache with moka

* chore: minor fixes

* fix: concurrent issues and invalidate cache after write/delete

* chore: minor changes

* fix: cargo lock

* refactor: rename

* chore: change DEFAULT_OBJECT_STORE_CACHE_SIZE to 256Mib

* fix: typo

* chore: style

* fix: toml format

* chore: toml

* fix: toml format

* Update src/object-store/src/layers/lru_cache/read_cache.rs

Co-authored-by: Ruihang Xia <waynestxia@gmail.com>

* chore: update Cargo.toml

Co-authored-by: Yingwen <realevenyag@gmail.com>

* chore: update src/object-store/Cargo.toml

Co-authored-by: Yingwen <realevenyag@gmail.com>

* chore: refactor and apply suggestions

* fix: typo

* feat: adds back allow list for caching

* chore: cr suggestion

Co-authored-by: Yingwen <realevenyag@gmail.com>

* chore: cr suggestion

Co-authored-by: Yingwen <realevenyag@gmail.com>

* refactor: wrap inner Accessor with Arc

* chore: remove run_pending_task in read and write path

* chore: the arc is unnecessary

---------

Co-authored-by: Ruihang Xia <waynestxia@gmail.com>
Co-authored-by: Yingwen <realevenyag@gmail.com>
2023-10-08 03:27:49 +00:00
Yingwen
657542c0b8 feat(mito): Cache repeated vector for tags (#2523)
* feat: add vector_cache to CacheManager

* feat: cache repeated vectors

* feat: skip decoding pk if output doesn't contain tags

* test: add TestRegionMetadataBuilder

* test: test ProjectionMapper

* test: test vector cache

* test: test projection mapper convert

* style: fix clippy

* feat: do not cache vector if it is too large

* docs: update comment
2023-10-07 11:36:00 +00:00
Ning Sun
0ad3fb6040 fix: mysql timezone settings (#2534)
* fix: restore time zone settings for mysql

* test: add integration test for time zone

* test: fix unit test for check
2023-10-07 10:21:32 +00:00
Bamboo1
b44e39f897 feat: the schema of RegionMetadata is not output during debug (#2498)
* feat: the schema of RegionMetadata is not output during debug because column_metadatas contains duplicate information

Signed-off-by: ZhuZiyi <zyzhu2001@gmail.com>

* feat: the id_to_index of RegionMetadata is not output during debug

Signed-off-by: ZhuZiyi <zyzhu2001@gmail.com>

* feat: add debug trait

Signed-off-by: ZhuZiyi <zyzhu2001@gmail.com>

* feat: use default debug in ConcreteDataType

Signed-off-by: ZhuZiyi <zyzhu2001@gmail.com>

* chore: add std::fmt

Signed-off-by: ZhuZiyi <zyzhu2001@gmail.com>

* test: add debug trait test

Signed-off-by: ZhuZiyi <zyzhu2001@gmail.com>

* chore: typo

Signed-off-by: ZhuZiyi <zyzhu2001@gmail.com>

* chore: resolve conversation

Signed-off-by: ZhuZiyi <zyzhu2001@gmail.com>

* chore: format

Signed-off-by: ZhuZiyi <zyzhu2001@gmail.com>

* fix: test bug

Signed-off-by: ZhuZiyi <zyzhu2001@gmail.com>

---------

Signed-off-by: ZhuZiyi <zyzhu2001@gmail.com>
2023-10-07 08:01:54 +00:00
Weny Xu
f50f2a84a9 fix: open region missing options (#2473)
* fix: open region missing options

* refactor: remove redundant clone

* chore: apply suggestions from CR

* chore: apply suggestions

* chore: apply suggestions

* test: add test for initialize_region_server

* feat: introduce RegionInfo
2023-10-07 07:17:16 +00:00
Yingwen
fe783c7c1f perf(mito): Use a heap to merge batches for the same key (#2521)
* feat: merge by heap

* fix: fix heap order

* feat: avoid pop/push next and refactor some functions

* feat: replace merge_batches and fixe tests

* test: add test that a key is deleted

* fix: skip empty batch

* style: clippy

* chore: fix typos
2023-10-07 02:56:08 +00:00
Weny Xu
00fe7d104e feat: enable tcp no_delay by default for internal services (#2527) 2023-10-07 02:35:28 +00:00
161 changed files with 4292 additions and 1212 deletions

View File

@@ -62,6 +62,16 @@ runs:
IMAGE_NAMESPACE=${{ inputs.dockerhub-image-namespace }} \
IMAGE_TAG=${{ inputs.version }}
- name: Build and push android dev builder image to dockerhub
shell: bash
run:
make dev-builder \
BASE_IMAGE=android \
BUILDX_MULTI_PLATFORM_BUILD=true \
IMAGE_REGISTRY=${{ inputs.dockerhub-image-registry }} \
IMAGE_NAMESPACE=${{ inputs.dockerhub-image-namespace }} \
IMAGE_TAG=${{ inputs.version }}
- name: Login to ACR
uses: docker/login-action@v2
continue-on-error: true

View File

@@ -91,7 +91,7 @@ env:
# The scheduled version is '${{ env.NEXT_RELEASE_VERSION }}-nightly-YYYYMMDD', like v0.2.0-nigthly-20230313;
NIGHTLY_RELEASE_PREFIX: nightly
# Note: The NEXT_RELEASE_VERSION should be modified manually by every formal release.
NEXT_RELEASE_VERSION: v0.4.0
NEXT_RELEASE_VERSION: v0.5.0
jobs:
allocate-runners:

267
Cargo.lock generated
View File

@@ -204,7 +204,7 @@ checksum = "8f1f8f5a6f3d50d89e3797d7593a50f96bb2aaa20ca0cc7be1fb673232c91d72"
[[package]]
name = "api"
version = "0.4.0-nightly"
version = "0.4.0"
dependencies = [
"common-base",
"common-error",
@@ -579,26 +579,6 @@ dependencies = [
"zstd-safe 6.0.6",
]
[[package]]
name = "async-io"
version = "1.13.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0fc5b45d93ef0529756f812ca52e44c221b35341892d3dcc34132ac02f3dd2af"
dependencies = [
"async-lock",
"autocfg",
"cfg-if 1.0.0",
"concurrent-queue",
"futures-lite",
"log",
"parking",
"polling",
"rustix 0.37.23",
"slab",
"socket2 0.4.9",
"waker-fn",
]
[[package]]
name = "async-lock"
version = "2.8.0"
@@ -686,7 +666,7 @@ dependencies = [
[[package]]
name = "auth"
version = "0.4.0-nightly"
version = "0.4.0"
dependencies = [
"api",
"async-trait",
@@ -859,7 +839,7 @@ dependencies = [
[[package]]
name = "benchmarks"
version = "0.4.0-nightly"
version = "0.4.0"
dependencies = [
"arrow",
"clap 4.4.1",
@@ -1240,7 +1220,7 @@ checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5"
[[package]]
name = "catalog"
version = "0.4.0-nightly"
version = "0.4.0"
dependencies = [
"api",
"arc-swap",
@@ -1524,7 +1504,7 @@ checksum = "cd7cc57abe963c6d3b9d8be5b06ba7c8957a930305ca90304f24ef040aa6f961"
[[package]]
name = "client"
version = "0.4.0-nightly"
version = "0.4.0"
dependencies = [
"api",
"arrow-flight",
@@ -1554,7 +1534,7 @@ dependencies = [
"rand",
"session",
"snafu",
"substrait 0.4.0-nightly",
"substrait 0.4.0",
"substrait 0.7.5",
"tokio",
"tokio-stream",
@@ -1591,7 +1571,7 @@ dependencies = [
[[package]]
name = "cmd"
version = "0.4.0-nightly"
version = "0.4.0"
dependencies = [
"anymap",
"async-trait",
@@ -1627,6 +1607,7 @@ dependencies = [
"mito2",
"nu-ansi-term",
"partition",
"plugins",
"prost",
"query",
"rand",
@@ -1638,7 +1619,7 @@ dependencies = [
"servers",
"session",
"snafu",
"substrait 0.4.0-nightly",
"substrait 0.4.0",
"table",
"temp-env",
"tikv-jemallocator",
@@ -1671,7 +1652,7 @@ checksum = "55b672471b4e9f9e95499ea597ff64941a309b2cdbffcc46f2cc5e2d971fd335"
[[package]]
name = "common-base"
version = "0.4.0-nightly"
version = "0.4.0"
dependencies = [
"anymap",
"bitvec",
@@ -1686,7 +1667,7 @@ dependencies = [
[[package]]
name = "common-catalog"
version = "0.4.0-nightly"
version = "0.4.0"
dependencies = [
"chrono",
"common-error",
@@ -1699,7 +1680,7 @@ dependencies = [
[[package]]
name = "common-config"
version = "0.4.0-nightly"
version = "0.4.0"
dependencies = [
"common-base",
"humantime-serde",
@@ -1708,7 +1689,7 @@ dependencies = [
[[package]]
name = "common-datasource"
version = "0.4.0-nightly"
version = "0.4.0"
dependencies = [
"arrow",
"arrow-schema",
@@ -1737,7 +1718,7 @@ dependencies = [
[[package]]
name = "common-error"
version = "0.4.0-nightly"
version = "0.4.0"
dependencies = [
"snafu",
"strum 0.25.0",
@@ -1745,7 +1726,7 @@ dependencies = [
[[package]]
name = "common-function"
version = "0.4.0-nightly"
version = "0.4.0"
dependencies = [
"arc-swap",
"chrono-tz 0.6.3",
@@ -1768,7 +1749,7 @@ dependencies = [
[[package]]
name = "common-greptimedb-telemetry"
version = "0.4.0-nightly"
version = "0.4.0"
dependencies = [
"async-trait",
"common-error",
@@ -1787,7 +1768,7 @@ dependencies = [
[[package]]
name = "common-grpc"
version = "0.4.0-nightly"
version = "0.4.0"
dependencies = [
"api",
"arrow-flight",
@@ -1817,7 +1798,7 @@ dependencies = [
[[package]]
name = "common-grpc-expr"
version = "0.4.0-nightly"
version = "0.4.0"
dependencies = [
"api",
"async-trait",
@@ -1836,7 +1817,7 @@ dependencies = [
[[package]]
name = "common-macro"
version = "0.4.0-nightly"
version = "0.4.0"
dependencies = [
"arc-swap",
"backtrace",
@@ -1853,7 +1834,7 @@ dependencies = [
[[package]]
name = "common-mem-prof"
version = "0.4.0-nightly"
version = "0.4.0"
dependencies = [
"common-error",
"common-macro",
@@ -1866,12 +1847,14 @@ dependencies = [
[[package]]
name = "common-meta"
version = "0.4.0-nightly"
version = "0.4.0"
dependencies = [
"api",
"arrow-flight",
"async-stream",
"async-trait",
"base64 0.21.3",
"bytes",
"chrono",
"common-catalog",
"common-error",
@@ -1902,7 +1885,7 @@ dependencies = [
[[package]]
name = "common-procedure"
version = "0.4.0-nightly"
version = "0.4.0"
dependencies = [
"async-stream",
"async-trait",
@@ -1926,7 +1909,7 @@ dependencies = [
[[package]]
name = "common-procedure-test"
version = "0.4.0-nightly"
version = "0.4.0"
dependencies = [
"async-trait",
"common-procedure",
@@ -1934,7 +1917,7 @@ dependencies = [
[[package]]
name = "common-query"
version = "0.4.0-nightly"
version = "0.4.0"
dependencies = [
"api",
"async-trait",
@@ -1957,7 +1940,7 @@ dependencies = [
[[package]]
name = "common-recordbatch"
version = "0.4.0-nightly"
version = "0.4.0"
dependencies = [
"common-error",
"common-macro",
@@ -1974,7 +1957,7 @@ dependencies = [
[[package]]
name = "common-runtime"
version = "0.4.0-nightly"
version = "0.4.0"
dependencies = [
"async-trait",
"common-error",
@@ -1991,7 +1974,7 @@ dependencies = [
[[package]]
name = "common-telemetry"
version = "0.4.0-nightly"
version = "0.4.0"
dependencies = [
"backtrace",
"common-error",
@@ -2018,7 +2001,7 @@ dependencies = [
[[package]]
name = "common-test-util"
version = "0.4.0-nightly"
version = "0.4.0"
dependencies = [
"once_cell",
"rand",
@@ -2027,7 +2010,7 @@ dependencies = [
[[package]]
name = "common-time"
version = "0.4.0-nightly"
version = "0.4.0"
dependencies = [
"arrow",
"chrono",
@@ -2042,7 +2025,7 @@ dependencies = [
[[package]]
name = "common-version"
version = "0.4.0-nightly"
version = "0.4.0"
dependencies = [
"build-data",
]
@@ -2496,7 +2479,7 @@ dependencies = [
[[package]]
name = "datafusion"
version = "27.0.0"
source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=c0b0fca548e99d020c76e1a1cd7132aab26000e1#c0b0fca548e99d020c76e1a1cd7132aab26000e1"
source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=b6f3b28b6fe91924cc8dd3d83726b766f2a706ec#b6f3b28b6fe91924cc8dd3d83726b766f2a706ec"
dependencies = [
"ahash 0.8.3",
"arrow",
@@ -2544,7 +2527,7 @@ dependencies = [
[[package]]
name = "datafusion-common"
version = "27.0.0"
source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=c0b0fca548e99d020c76e1a1cd7132aab26000e1#c0b0fca548e99d020c76e1a1cd7132aab26000e1"
source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=b6f3b28b6fe91924cc8dd3d83726b766f2a706ec#b6f3b28b6fe91924cc8dd3d83726b766f2a706ec"
dependencies = [
"arrow",
"arrow-array",
@@ -2558,7 +2541,7 @@ dependencies = [
[[package]]
name = "datafusion-execution"
version = "27.0.0"
source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=c0b0fca548e99d020c76e1a1cd7132aab26000e1#c0b0fca548e99d020c76e1a1cd7132aab26000e1"
source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=b6f3b28b6fe91924cc8dd3d83726b766f2a706ec#b6f3b28b6fe91924cc8dd3d83726b766f2a706ec"
dependencies = [
"dashmap",
"datafusion-common",
@@ -2575,7 +2558,7 @@ dependencies = [
[[package]]
name = "datafusion-expr"
version = "27.0.0"
source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=c0b0fca548e99d020c76e1a1cd7132aab26000e1#c0b0fca548e99d020c76e1a1cd7132aab26000e1"
source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=b6f3b28b6fe91924cc8dd3d83726b766f2a706ec#b6f3b28b6fe91924cc8dd3d83726b766f2a706ec"
dependencies = [
"ahash 0.8.3",
"arrow",
@@ -2589,7 +2572,7 @@ dependencies = [
[[package]]
name = "datafusion-optimizer"
version = "27.0.0"
source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=c0b0fca548e99d020c76e1a1cd7132aab26000e1#c0b0fca548e99d020c76e1a1cd7132aab26000e1"
source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=b6f3b28b6fe91924cc8dd3d83726b766f2a706ec#b6f3b28b6fe91924cc8dd3d83726b766f2a706ec"
dependencies = [
"arrow",
"async-trait",
@@ -2606,7 +2589,7 @@ dependencies = [
[[package]]
name = "datafusion-physical-expr"
version = "27.0.0"
source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=c0b0fca548e99d020c76e1a1cd7132aab26000e1#c0b0fca548e99d020c76e1a1cd7132aab26000e1"
source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=b6f3b28b6fe91924cc8dd3d83726b766f2a706ec#b6f3b28b6fe91924cc8dd3d83726b766f2a706ec"
dependencies = [
"ahash 0.8.3",
"arrow",
@@ -2641,7 +2624,7 @@ dependencies = [
[[package]]
name = "datafusion-row"
version = "27.0.0"
source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=c0b0fca548e99d020c76e1a1cd7132aab26000e1#c0b0fca548e99d020c76e1a1cd7132aab26000e1"
source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=b6f3b28b6fe91924cc8dd3d83726b766f2a706ec#b6f3b28b6fe91924cc8dd3d83726b766f2a706ec"
dependencies = [
"arrow",
"datafusion-common",
@@ -2652,7 +2635,7 @@ dependencies = [
[[package]]
name = "datafusion-sql"
version = "27.0.0"
source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=c0b0fca548e99d020c76e1a1cd7132aab26000e1#c0b0fca548e99d020c76e1a1cd7132aab26000e1"
source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=b6f3b28b6fe91924cc8dd3d83726b766f2a706ec#b6f3b28b6fe91924cc8dd3d83726b766f2a706ec"
dependencies = [
"arrow",
"arrow-schema",
@@ -2665,7 +2648,7 @@ dependencies = [
[[package]]
name = "datafusion-substrait"
version = "27.0.0"
source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=c0b0fca548e99d020c76e1a1cd7132aab26000e1#c0b0fca548e99d020c76e1a1cd7132aab26000e1"
source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=b6f3b28b6fe91924cc8dd3d83726b766f2a706ec#b6f3b28b6fe91924cc8dd3d83726b766f2a706ec"
dependencies = [
"async-recursion",
"chrono",
@@ -2680,7 +2663,7 @@ dependencies = [
[[package]]
name = "datanode"
version = "0.4.0-nightly"
version = "0.4.0"
dependencies = [
"api",
"arrow-flight",
@@ -2739,7 +2722,7 @@ dependencies = [
"sql",
"storage",
"store-api",
"substrait 0.4.0-nightly",
"substrait 0.4.0",
"table",
"tokio",
"tokio-stream",
@@ -2753,7 +2736,7 @@ dependencies = [
[[package]]
name = "datatypes"
version = "0.4.0-nightly"
version = "0.4.0"
dependencies = [
"arrow",
"arrow-array",
@@ -3216,7 +3199,7 @@ dependencies = [
[[package]]
name = "file-engine"
version = "0.4.0-nightly"
version = "0.4.0"
dependencies = [
"api",
"async-trait",
@@ -3326,7 +3309,7 @@ dependencies = [
[[package]]
name = "frontend"
version = "0.4.0-nightly"
version = "0.4.0"
dependencies = [
"api",
"arc-swap",
@@ -3390,7 +3373,7 @@ dependencies = [
"storage",
"store-api",
"strfmt",
"substrait 0.4.0-nightly",
"substrait 0.4.0",
"table",
"tokio",
"toml 0.7.6",
@@ -3526,21 +3509,6 @@ version = "0.3.28"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4fff74096e71ed47f8e023204cfd0aa1289cd54ae5430a9523be060cdb849964"
[[package]]
name = "futures-lite"
version = "1.13.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "49a9d51ce47660b1e808d3c990b4709f2f415d928835a17dfd16991515c46bce"
dependencies = [
"fastrand 1.9.0",
"futures-core",
"futures-io",
"memchr",
"parking",
"pin-project-lite",
"waker-fn",
]
[[package]]
name = "futures-macro"
version = "0.3.28"
@@ -4210,7 +4178,7 @@ checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b"
[[package]]
name = "greptime-proto"
version = "0.1.0"
source = "git+https://github.com/GreptimeTeam/greptime-proto.git?rev=693128abe9adc70ba636010a172c9da55b206bba#693128abe9adc70ba636010a172c9da55b206bba"
source = "git+https://github.com/GreptimeTeam/greptime-proto.git?rev=1f1dd532a111e3834cc3019c5605e2993ffb9dc3#1f1dd532a111e3834cc3019c5605e2993ffb9dc3"
dependencies = [
"prost",
"serde",
@@ -5012,12 +4980,6 @@ version = "0.1.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f051f77a7c8e6957c0696eac88f26b0117e54f52d3fc682ab19397a8812846a4"
[[package]]
name = "linux-raw-sys"
version = "0.3.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ef53942eb7bf7ff43a617b3e2c1c4a5ecf5944a7c1bc12d7ee39bbb15e5c1519"
[[package]]
name = "linux-raw-sys"
version = "0.4.5"
@@ -5042,7 +5004,7 @@ checksum = "b5e6163cb8c49088c2c36f57875e58ccd8c87c7427f7fbd50ea6710b2f3f2e8f"
[[package]]
name = "log-store"
version = "0.4.0-nightly"
version = "0.4.0"
dependencies = [
"async-stream",
"async-trait",
@@ -5122,15 +5084,6 @@ dependencies = [
"vob",
]
[[package]]
name = "lru"
version = "0.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "71e7d46de488603ffdd5f30afbc64fbba2378214a2c3a2fb83abf3d33126df17"
dependencies = [
"hashbrown 0.13.2",
]
[[package]]
name = "lru"
version = "0.10.1"
@@ -5321,7 +5274,7 @@ dependencies = [
[[package]]
name = "meta-client"
version = "0.4.0-nightly"
version = "0.4.0"
dependencies = [
"api",
"async-trait",
@@ -5350,7 +5303,7 @@ dependencies = [
[[package]]
name = "meta-srv"
version = "0.4.0-nightly"
version = "0.4.0"
dependencies = [
"anymap",
"api",
@@ -5542,11 +5495,12 @@ dependencies = [
[[package]]
name = "mito2"
version = "0.4.0-nightly"
version = "0.4.0"
dependencies = [
"anymap",
"api",
"aquamarine",
"arc-swap",
"async-channel",
"async-compat",
"async-stream",
@@ -5598,12 +5552,12 @@ dependencies = [
[[package]]
name = "moka"
version = "0.11.3"
version = "0.12.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fa6e72583bf6830c956235bff0d5afec8cf2952f579ebad18ae7821a917d950f"
checksum = "8dc65d4615c08c8a13d91fd404b5a2a4485ba35b4091e3315cf8798d280c2f29"
dependencies = [
"async-io",
"async-lock",
"async-trait",
"crossbeam-channel",
"crossbeam-epoch",
"crossbeam-utils",
@@ -5612,7 +5566,6 @@ dependencies = [
"parking_lot 0.12.1",
"quanta 0.11.1",
"rustc_version",
"scheduled-thread-pool",
"skeptic",
"smallvec",
"tagptr",
@@ -5666,7 +5619,7 @@ dependencies = [
"futures-sink",
"futures-util",
"lazy_static",
"lru 0.10.1",
"lru",
"mio",
"mysql_common",
"once_cell",
@@ -6004,19 +5957,19 @@ dependencies = [
[[package]]
name = "object-store"
version = "0.4.0-nightly"
version = "0.4.0"
dependencies = [
"anyhow",
"async-trait",
"bytes",
"common-runtime",
"common-telemetry",
"common-test-util",
"futures",
"lru 0.9.0",
"md5",
"metrics",
"moka",
"opendal",
"pin-project",
"tokio",
"uuid",
]
@@ -6228,7 +6181,7 @@ dependencies = [
[[package]]
name = "operator"
version = "0.4.0-nightly"
version = "0.4.0"
dependencies = [
"api",
"async-compat",
@@ -6273,7 +6226,7 @@ dependencies = [
"sqlparser 0.34.0",
"storage",
"store-api",
"substrait 0.4.0-nightly",
"substrait 0.4.0",
"table",
"tokio",
"tonic 0.9.2",
@@ -6397,12 +6350,6 @@ dependencies = [
"winapi",
]
[[package]]
name = "parking"
version = "2.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "14f2252c834a40ed9bb5422029649578e63aa341ac401f74e719dd1afda8394e"
[[package]]
name = "parking_lot"
version = "0.11.2"
@@ -6499,7 +6446,7 @@ dependencies = [
[[package]]
name = "partition"
version = "0.4.0-nightly"
version = "0.4.0"
dependencies = [
"api",
"async-trait",
@@ -6823,6 +6770,18 @@ dependencies = [
"plotters-backend",
]
[[package]]
name = "plugins"
version = "0.4.0"
dependencies = [
"auth",
"common-base",
"datanode",
"frontend",
"meta-srv",
"snafu",
]
[[package]]
name = "pmutil"
version = "0.5.3"
@@ -6834,22 +6793,6 @@ dependencies = [
"syn 1.0.109",
]
[[package]]
name = "polling"
version = "2.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4b2d323e8ca7996b3e23126511a523f7e62924d93ecd5ae73b333815b0eb3dce"
dependencies = [
"autocfg",
"bitflags 1.3.2",
"cfg-if 1.0.0",
"concurrent-queue",
"libc",
"log",
"pin-project-lite",
"windows-sys 0.48.0",
]
[[package]]
name = "portable-atomic"
version = "0.3.20"
@@ -7079,7 +7022,7 @@ dependencies = [
[[package]]
name = "promql"
version = "0.4.0-nightly"
version = "0.4.0"
dependencies = [
"async-recursion",
"async-trait",
@@ -7093,6 +7036,7 @@ dependencies = [
"datatypes",
"futures",
"greptime-proto",
"metrics",
"promql-parser",
"prost",
"query",
@@ -7340,7 +7284,7 @@ dependencies = [
[[package]]
name = "query"
version = "0.4.0-nightly"
version = "0.4.0"
dependencies = [
"ahash 0.8.3",
"api",
@@ -7397,7 +7341,7 @@ dependencies = [
"stats-cli",
"store-api",
"streaming-stats",
"substrait 0.4.0-nightly",
"substrait 0.4.0",
"table",
"tokio",
"tokio-stream",
@@ -8058,20 +8002,6 @@ dependencies = [
"windows-sys 0.45.0",
]
[[package]]
name = "rustix"
version = "0.37.23"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4d69718bf81c6127a49dc64e44a742e8bb9213c0ff8869a22c308f84c1d4ab06"
dependencies = [
"bitflags 1.3.2",
"errno 0.3.3",
"io-lifetimes",
"libc",
"linux-raw-sys 0.3.8",
"windows-sys 0.48.0",
]
[[package]]
name = "rustix"
version = "0.38.10"
@@ -8577,15 +8507,6 @@ dependencies = [
"windows-sys 0.48.0",
]
[[package]]
name = "scheduled-thread-pool"
version = "0.2.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3cbc66816425a074528352f5789333ecff06ca41b36b0b0efdfbb29edc391a19"
dependencies = [
"parking_lot 0.12.1",
]
[[package]]
name = "schemars"
version = "0.8.13"
@@ -8619,7 +8540,7 @@ checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49"
[[package]]
name = "script"
version = "0.4.0-nightly"
version = "0.4.0"
dependencies = [
"api",
"arc-swap",
@@ -8899,7 +8820,7 @@ dependencies = [
[[package]]
name = "servers"
version = "0.4.0-nightly"
version = "0.4.0"
dependencies = [
"aide",
"api",
@@ -8993,8 +8914,9 @@ dependencies = [
[[package]]
name = "session"
version = "0.4.0-nightly"
version = "0.4.0"
dependencies = [
"api",
"arc-swap",
"auth",
"common-catalog",
@@ -9270,7 +9192,7 @@ dependencies = [
[[package]]
name = "sql"
version = "0.4.0-nightly"
version = "0.4.0"
dependencies = [
"api",
"common-base",
@@ -9321,7 +9243,7 @@ dependencies = [
[[package]]
name = "sqlness-runner"
version = "0.4.0-nightly"
version = "0.4.0"
dependencies = [
"async-trait",
"clap 4.4.1",
@@ -9527,7 +9449,7 @@ dependencies = [
[[package]]
name = "storage"
version = "0.4.0-nightly"
version = "0.4.0"
dependencies = [
"api",
"arc-swap",
@@ -9581,7 +9503,7 @@ dependencies = [
[[package]]
name = "store-api"
version = "0.4.0-nightly"
version = "0.4.0"
dependencies = [
"api",
"aquamarine",
@@ -9600,6 +9522,7 @@ dependencies = [
"serde",
"serde_json",
"snafu",
"strum 0.25.0",
"tokio",
]
@@ -9718,7 +9641,7 @@ dependencies = [
[[package]]
name = "substrait"
version = "0.4.0-nightly"
version = "0.4.0"
dependencies = [
"async-recursion",
"async-trait",
@@ -9876,7 +9799,7 @@ dependencies = [
[[package]]
name = "table"
version = "0.4.0-nightly"
version = "0.4.0"
dependencies = [
"anymap",
"async-trait",
@@ -9982,7 +9905,7 @@ dependencies = [
[[package]]
name = "tests-integration"
version = "0.4.0-nightly"
version = "0.4.0"
dependencies = [
"api",
"async-trait",
@@ -10035,7 +9958,7 @@ dependencies = [
"sql",
"sqlx",
"store-api",
"substrait 0.4.0-nightly",
"substrait 0.4.0",
"table",
"tempfile",
"tokio",
@@ -11196,12 +11119,6 @@ version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f8e76fae08f03f96e166d2dfda232190638c10e0383841252416f9cfe2ae60e6"
[[package]]
name = "waker-fn"
version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9d5b2c62b4012a3e1eca5a7e077d13b3bf498c4073e33ccd58626607748ceeca"
[[package]]
name = "walkdir"
version = "2.3.3"

View File

@@ -39,6 +39,7 @@ members = [
"src/object-store",
"src/operator",
"src/partition",
"src/plugins",
"src/promql",
"src/query",
"src/script",
@@ -54,40 +55,43 @@ members = [
resolver = "2"
[workspace.package]
version = "0.4.0-nightly"
version = "0.4.0"
edition = "2021"
license = "Apache-2.0"
[workspace.dependencies]
aquamarine = "0.3"
arrow = { version = "43.0" }
etcd-client = "0.11"
arrow-array = "43.0"
arrow-flight = "43.0"
arrow-schema = { version = "43.0", features = ["serde"] }
async-stream = "0.3"
async-trait = "0.1"
chrono = { version = "0.4", features = ["serde"] }
datafusion = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "c0b0fca548e99d020c76e1a1cd7132aab26000e1" }
datafusion-common = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "c0b0fca548e99d020c76e1a1cd7132aab26000e1" }
datafusion-expr = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "c0b0fca548e99d020c76e1a1cd7132aab26000e1" }
datafusion-optimizer = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "c0b0fca548e99d020c76e1a1cd7132aab26000e1" }
datafusion-physical-expr = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "c0b0fca548e99d020c76e1a1cd7132aab26000e1" }
datafusion-sql = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "c0b0fca548e99d020c76e1a1cd7132aab26000e1" }
datafusion-substrait = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "c0b0fca548e99d020c76e1a1cd7132aab26000e1" }
datafusion = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "b6f3b28b6fe91924cc8dd3d83726b766f2a706ec" }
datafusion-common = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "b6f3b28b6fe91924cc8dd3d83726b766f2a706ec" }
datafusion-expr = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "b6f3b28b6fe91924cc8dd3d83726b766f2a706ec" }
datafusion-optimizer = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "b6f3b28b6fe91924cc8dd3d83726b766f2a706ec" }
datafusion-physical-expr = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "b6f3b28b6fe91924cc8dd3d83726b766f2a706ec" }
datafusion-sql = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "b6f3b28b6fe91924cc8dd3d83726b766f2a706ec" }
datafusion-substrait = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "b6f3b28b6fe91924cc8dd3d83726b766f2a706ec" }
derive_builder = "0.12"
etcd-client = "0.11"
futures = "0.3"
futures-util = "0.3"
greptime-proto = { git = "https://github.com/GreptimeTeam/greptime-proto.git", rev = "693128abe9adc70ba636010a172c9da55b206bba" }
greptime-proto = { git = "https://github.com/GreptimeTeam/greptime-proto.git", rev = "1f1dd532a111e3834cc3019c5605e2993ffb9dc3" }
humantime-serde = "1.1"
itertools = "0.10"
lazy_static = "1.4"
moka = { version = "0.11" }
meter-core = { git = "https://github.com/GreptimeTeam/greptime-meter.git", rev = "abbd357c1e193cd270ea65ee7652334a150b628f" }
metrics = "0.20"
moka = "0.12"
once_cell = "1.18"
opentelemetry-proto = { version = "0.2", features = ["gen-tonic", "metrics"] }
parquet = "43.0"
paste = "1.0"
prost = "0.11"
raft-engine = { git = "https://github.com/tikv/raft-engine.git", rev = "22dfb426cd994602b57725ef080287d3e53db479" }
rand = "0.8"
regex = "1.8"
reqwest = { version = "0.11", default-features = false, features = [
@@ -109,8 +113,6 @@ tokio-util = { version = "0.7", features = ["io-util", "compat"] }
toml = "0.7"
tonic = { version = "0.9", features = ["tls"] }
uuid = { version = "1", features = ["serde", "v4", "fast-rng"] }
metrics = "0.20"
meter-core = { git = "https://github.com/GreptimeTeam/greptime-meter.git", rev = "abbd357c1e193cd270ea65ee7652334a150b628f" }
## workspaces members
api = { path = "src/api" }
auth = { path = "src/auth" }
@@ -123,19 +125,18 @@ common-config = { path = "src/common/config" }
common-datasource = { path = "src/common/datasource" }
common-error = { path = "src/common/error" }
common-function = { path = "src/common/function" }
common-macro = { path = "src/common/macro" }
common-greptimedb-telemetry = { path = "src/common/greptimedb-telemetry" }
common-grpc = { path = "src/common/grpc" }
common-grpc-expr = { path = "src/common/grpc-expr" }
common-macro = { path = "src/common/macro" }
common-mem-prof = { path = "src/common/mem-prof" }
common-meta = { path = "src/common/meta" }
common-pprof = { path = "src/common/pprof" }
common-procedure = { path = "src/common/procedure" }
common-procedure-test = { path = "src/common/procedure-test" }
common-pprof = { path = "src/common/pprof" }
common-query = { path = "src/common/query" }
common-recordbatch = { path = "src/common/recordbatch" }
common-runtime = { path = "src/common/runtime" }
substrait = { path = "src/common/substrait" }
common-telemetry = { path = "src/common/telemetry" }
common-test-util = { path = "src/common/test-util" }
common-time = { path = "src/common/time" }
@@ -149,20 +150,20 @@ meta-client = { path = "src/meta-client" }
meta-srv = { path = "src/meta-srv" }
mito = { path = "src/mito" }
mito2 = { path = "src/mito2" }
operator = { path = "src/operator" }
object-store = { path = "src/object-store" }
operator = { path = "src/operator" }
partition = { path = "src/partition" }
plugins = { path = "src/plugins" }
promql = { path = "src/promql" }
query = { path = "src/query" }
raft-engine = { git = "https://github.com/tikv/raft-engine.git", rev = "22dfb426cd994602b57725ef080287d3e53db479" }
script = { path = "src/script" }
servers = { path = "src/servers" }
session = { path = "src/session" }
sql = { path = "src/sql" }
storage = { path = "src/storage" }
store-api = { path = "src/store-api" }
substrait = { path = "src/common/substrait" }
table = { path = "src/table" }
table-procedure = { path = "src/table-procedure" }
[workspace.dependencies.meter-macros]
git = "https://github.com/GreptimeTeam/greptime-meter.git"

View File

@@ -55,11 +55,15 @@ else
BUILDX_MULTI_PLATFORM_BUILD_OPTS := -o type=docker
endif
ifneq ($(strip $(CARGO_BUILD_EXTRA_OPTS)),)
CARGO_BUILD_OPTS += ${CARGO_BUILD_EXTRA_OPTS}
endif
##@ Build
.PHONY: build
build: ## Build debug version greptime.
cargo build ${CARGO_BUILD_OPTS}
cargo ${CARGO_EXTENSION} build ${CARGO_BUILD_OPTS}
.POHNY: build-by-dev-builder
build-by-dev-builder: ## Build greptime by dev-builder.
@@ -67,11 +71,34 @@ build-by-dev-builder: ## Build greptime by dev-builder.
-v ${PWD}:/greptimedb -v ${CARGO_REGISTRY_CACHE}:/root/.cargo/registry \
-w /greptimedb ${IMAGE_REGISTRY}/${IMAGE_NAMESPACE}/dev-builder-${BASE_IMAGE}:latest \
make build \
CARGO_EXTENSION="${CARGO_EXTENSION}" \
CARGO_PROFILE=${CARGO_PROFILE} \
FEATURES=${FEATURES} \
TARGET_DIR=${TARGET_DIR} \
TARGET=${TARGET} \
RELEASE=${RELEASE}
RELEASE=${RELEASE} \
CARGO_BUILD_EXTRA_OPTS="${CARGO_BUILD_EXTRA_OPTS}"
.PHONY: build-android-bin
build-android-bin: ## Build greptime binary for android.
docker run --network=host \
-v ${PWD}:/greptimedb -v ${CARGO_REGISTRY_CACHE}:/root/.cargo/registry \
-w /greptimedb ${IMAGE_REGISTRY}/${IMAGE_NAMESPACE}/dev-builder-android:latest \
make build \
CARGO_EXTENSION="ndk --platform 23 -t aarch64-linux-android" \
CARGO_PROFILE=release \
FEATURES="${FEATURES}" \
TARGET_DIR="${TARGET_DIR}" \
TARGET="${TARGET}" \
RELEASE="${RELEASE}" \
CARGO_BUILD_EXTRA_OPTS="--bin greptime --no-default-features"
.PHONY: strip-android-bin
strip-android-bin: ## Strip greptime binary for android.
docker run --network=host \
-v ${PWD}:/greptimedb \
-w /greptimedb ${IMAGE_REGISTRY}/${IMAGE_NAMESPACE}/dev-builder-android:latest \
bash -c '$${NDK_ROOT}/toolchains/llvm/prebuilt/linux-x86_64/bin/llvm-strip /greptimedb/target/aarch64-linux-android/release/greptime'
.PHONY: clean
clean: ## Clean the project.

View File

@@ -47,6 +47,12 @@ type = "File"
# TTL for all tables. Disabled by default.
# global_ttl = "7d"
# Cache configuration for object storage such as 'S3' etc.
# The local file cache directory
# cache_path = "/path/local_cache"
# The local file cache capacity in bytes.
# cache_capacity = "256Mib"
# Compaction options, see `standalone.example.toml`.
[storage.compaction]
max_inflight_tasks = 4

View File

@@ -115,6 +115,10 @@ data_home = "/tmp/greptimedb/"
type = "File"
# TTL for all tables. Disabled by default.
# global_ttl = "7d"
# Cache configuration for object storage such as 'S3' etc.
# cache_path = "/path/local_cache"
# The local file cache capacity in bytes.
# cache_capacity = "256Mib"
# Compaction options.
[storage.compaction]

View File

@@ -0,0 +1,41 @@
FROM --platform=linux/amd64 saschpe/android-ndk:34-jdk17.0.8_7-ndk25.2.9519653-cmake3.22.1
ENV LANG en_US.utf8
WORKDIR /greptimedb
# Rename libunwind to libgcc
RUN cp ${NDK_ROOT}/toolchains/llvm/prebuilt/linux-x86_64/lib64/clang/14.0.7/lib/linux/aarch64/libunwind.a ${NDK_ROOT}/toolchains/llvm/prebuilt/linux-x86_64/lib64/clang/14.0.7/lib/linux/aarch64/libgcc.a
# Install dependencies.
RUN apt-get update && apt-get install -y \
libssl-dev \
protobuf-compiler \
curl \
git \
build-essential \
pkg-config \
python3 \
python3-dev \
python3-pip \
&& pip3 install --upgrade pip \
&& pip3 install pyarrow
# Trust workdir
RUN git config --global --add safe.directory /greptimedb
# Install Rust.
SHELL ["/bin/bash", "-c"]
RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- --no-modify-path --default-toolchain none -y
ENV PATH /root/.cargo/bin/:$PATH
# Add android toolchains
ARG RUST_TOOLCHAIN
RUN rustup toolchain install ${RUST_TOOLCHAIN}
RUN rustup target add aarch64-linux-android
# Install cargo-ndk
RUN cargo install cargo-ndk
ENV ANDROID_NDK_HOME $NDK_ROOT
# Builder entrypoint.
CMD ["cargo", "ndk", "--platform", "23", "-t", "aarch64-linux-android", "build", "--bin", "greptime", "--profile", "release", "--no-default-features"]

View File

@@ -4,8 +4,6 @@ version.workspace = true
edition.workspace = true
license.workspace = true
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[features]
default = []
testing = []

View File

@@ -18,9 +18,7 @@ use std::sync::{Arc, Weak};
use common_catalog::consts::{DEFAULT_SCHEMA_NAME, INFORMATION_SCHEMA_NAME, NUMBERS_TABLE_ID};
use common_error::ext::BoxedError;
use common_meta::cache_invalidator::{
CacheInvalidator, Context, KvCacheInvalidatorRef, TableMetadataCacheInvalidator,
};
use common_meta::cache_invalidator::{CacheInvalidator, CacheInvalidatorRef, Context};
use common_meta::datanode_manager::DatanodeManagerRef;
use common_meta::error::Result as MetaResult;
use common_meta::key::catalog_name::CatalogNameKey;
@@ -54,7 +52,7 @@ pub struct KvBackendCatalogManager {
// TODO(LFC): Maybe use a real implementation for Standalone mode.
// Now we use `NoopKvCacheInvalidator` for Standalone mode. In Standalone mode, the KV backend
// is implemented by RaftEngine. Maybe we need a cache for it?
table_metadata_cache_invalidator: TableMetadataCacheInvalidator,
cache_invalidator: CacheInvalidatorRef,
partition_manager: PartitionRuleManagerRef,
table_metadata_manager: TableMetadataManagerRef,
datanode_manager: DatanodeManagerRef,
@@ -65,13 +63,13 @@ pub struct KvBackendCatalogManager {
#[async_trait::async_trait]
impl CacheInvalidator for KvBackendCatalogManager {
async fn invalidate_table_name(&self, ctx: &Context, table_name: TableName) -> MetaResult<()> {
self.table_metadata_cache_invalidator
self.cache_invalidator
.invalidate_table_name(ctx, table_name)
.await
}
async fn invalidate_table_id(&self, ctx: &Context, table_id: TableId) -> MetaResult<()> {
self.table_metadata_cache_invalidator
self.cache_invalidator
.invalidate_table_id(ctx, table_id)
.await
}
@@ -80,15 +78,13 @@ impl CacheInvalidator for KvBackendCatalogManager {
impl KvBackendCatalogManager {
pub fn new(
backend: KvBackendRef,
backend_cache_invalidator: KvCacheInvalidatorRef,
cache_invalidator: CacheInvalidatorRef,
datanode_manager: DatanodeManagerRef,
) -> Arc<Self> {
Arc::new_cyclic(|me| Self {
partition_manager: Arc::new(PartitionRuleManager::new(backend.clone())),
table_metadata_manager: Arc::new(TableMetadataManager::new(backend)),
table_metadata_cache_invalidator: TableMetadataCacheInvalidator::new(
backend_cache_invalidator.clone(),
),
cache_invalidator,
datanode_manager,
system_catalog: SystemCatalog {
catalog_manager: me.clone(),
@@ -107,12 +103,6 @@ impl KvBackendCatalogManager {
pub fn datanode_manager(&self) -> DatanodeManagerRef {
self.datanode_manager.clone()
}
pub async fn invalidate_schema(&self, catalog: &str, schema: &str) {
self.table_metadata_cache_invalidator
.invalidate_schema(catalog, schema)
.await
}
}
#[async_trait::async_trait]
@@ -229,6 +219,7 @@ impl CatalogManager for KvBackendCatalogManager {
.get(table_id)
.await
.context(TableMetadataManagerSnafu)?
.map(|v| v.into_inner())
else {
return Ok(None);
};

View File

@@ -49,6 +49,7 @@ metrics.workspace = true
mito2 = { workspace = true }
nu-ansi-term = "0.46"
partition = { workspace = true }
plugins.workspace = true
prost.workspace = true
query = { workspace = true }
rand.workspace = true

View File

@@ -257,7 +257,7 @@ async fn create_query_engine(meta_addr: &str) -> Result<DatafusionQueryEngine> {
cached_meta_backend.clone(),
datanode_clients,
);
let plugins: Arc<Plugins> = Default::default();
let plugins: Plugins = Default::default();
let state = Arc::new(QueryEngineState::new(
catalog_list,
None,

View File

@@ -20,7 +20,7 @@ use client::api::v1::meta::TableRouteValue;
use common_meta::ddl::utils::region_storage_path;
use common_meta::error as MetaError;
use common_meta::key::catalog_name::{CatalogNameKey, CatalogNameValue};
use common_meta::key::datanode_table::{DatanodeTableKey, DatanodeTableValue};
use common_meta::key::datanode_table::{DatanodeTableKey, DatanodeTableValue, RegionInfo};
use common_meta::key::schema_name::{SchemaNameKey, SchemaNameValue};
use common_meta::key::table_info::{TableInfoKey, TableInfoValue};
use common_meta::key::table_name::{TableNameKey, TableNameValue};
@@ -405,8 +405,11 @@ impl MigrateTableMetadata {
DatanodeTableValue::new(
table_id,
regions,
engine.to_string(),
region_storage_path.clone(),
RegionInfo {
engine: engine.to_string(),
region_storage_path: region_storage_path.clone(),
region_options: (&value.table_info.meta.options).into(),
},
),
)
})

View File

@@ -31,6 +31,10 @@ pub struct Instance {
impl Instance {
pub async fn start(&mut self) -> Result<()> {
plugins::start_datanode_plugins(self.datanode.plugins())
.await
.context(StartDatanodeSnafu)?;
self.datanode.start().await.context(StartDatanodeSnafu)
}
@@ -159,11 +163,15 @@ impl StartCommand {
Ok(Options::Datanode(Box::new(opts)))
}
async fn build(self, opts: DatanodeOptions) -> Result<Instance> {
async fn build(self, mut opts: DatanodeOptions) -> Result<Instance> {
let plugins = plugins::setup_datanode_plugins(&mut opts)
.await
.context(StartDatanodeSnafu)?;
logging::info!("Datanode start command: {:#?}", self);
logging::info!("Datanode options: {:#?}", opts);
let datanode = DatanodeBuilder::new(opts, None, Default::default())
let datanode = DatanodeBuilder::new(opts, None, plugins)
.build()
.await
.context(StartDatanodeSnafu)?;

View File

@@ -85,12 +85,6 @@ pub enum Error {
#[snafu(display("Illegal config: {}", msg))]
IllegalConfig { msg: String, location: Location },
#[snafu(display("Illegal auth config"))]
IllegalAuthConfig {
location: Location,
source: auth::error::Error,
},
#[snafu(display("Unsupported selector type: {}", selector_type))]
UnsupportedSelectorType {
selector_type: String,
@@ -208,7 +202,6 @@ impl ErrorExt for Error {
| Error::LoadLayeredConfig { .. }
| Error::IllegalConfig { .. }
| Error::InvalidReplCommand { .. }
| Error::IllegalAuthConfig { .. }
| Error::ConnectEtcd { .. } => StatusCode::InvalidArguments,
Error::ReplCreation { .. } | Error::Readline { .. } => StatusCode::Internal,

View File

@@ -12,11 +12,9 @@
// See the License for the specific language governing permissions and
// limitations under the License.
use std::sync::Arc;
use std::time::Duration;
use auth::UserProviderRef;
use clap::Parser;
use common_base::Plugins;
use common_telemetry::logging;
use frontend::frontend::FrontendOptions;
use frontend::instance::{FrontendInstance, Instance as FeInstance};
@@ -25,7 +23,7 @@ use servers::tls::{TlsMode, TlsOption};
use servers::Mode;
use snafu::ResultExt;
use crate::error::{self, IllegalAuthConfigSnafu, Result};
use crate::error::{self, Result, StartFrontendSnafu};
use crate::options::{Options, TopLevelOptions};
pub struct Instance {
@@ -34,10 +32,11 @@ pub struct Instance {
impl Instance {
pub async fn start(&mut self) -> Result<()> {
self.frontend
.start()
plugins::start_frontend_plugins(self.frontend.plugins().clone())
.await
.context(error::StartFrontendSnafu)
.context(StartFrontendSnafu)?;
self.frontend.start().await.context(StartFrontendSnafu)
}
pub async fn stop(&self) -> Result<()> {
@@ -88,6 +87,8 @@ pub struct StartCommand {
#[clap(long)]
http_addr: Option<String>,
#[clap(long)]
http_timeout: Option<u64>,
#[clap(long)]
grpc_addr: Option<String>,
#[clap(long)]
mysql_addr: Option<String>,
@@ -141,6 +142,10 @@ impl StartCommand {
opts.http.addr = addr.clone()
}
if let Some(http_timeout) = self.http_timeout {
opts.http.timeout = Duration::from_secs(http_timeout)
}
if let Some(disable_dashboard) = self.disable_dashboard {
opts.http.disable_dashboard = disable_dashboard;
}
@@ -177,38 +182,32 @@ impl StartCommand {
opts.mode = Mode::Distributed;
}
opts.user_provider = self.user_provider.clone();
Ok(Options::Frontend(Box::new(opts)))
}
async fn build(self, opts: FrontendOptions) -> Result<Instance> {
async fn build(self, mut opts: FrontendOptions) -> Result<Instance> {
let plugins = plugins::setup_frontend_plugins(&mut opts)
.await
.context(StartFrontendSnafu)?;
logging::info!("Frontend start command: {:#?}", self);
logging::info!("Frontend options: {:#?}", opts);
let plugins = Arc::new(load_frontend_plugins(&self.user_provider)?);
let mut instance = FeInstance::try_new_distributed(&opts, plugins.clone())
.await
.context(error::StartFrontendSnafu)?;
.context(StartFrontendSnafu)?;
instance
.build_servers(&opts)
.await
.context(error::StartFrontendSnafu)?;
.context(StartFrontendSnafu)?;
Ok(Instance { frontend: instance })
}
}
pub fn load_frontend_plugins(user_provider: &Option<String>) -> Result<Plugins> {
let plugins = Plugins::new();
if let Some(provider) = user_provider {
let provider = auth::user_provider_from_option(provider).context(IllegalAuthConfigSnafu)?;
plugins.insert::<UserProviderRef>(provider);
}
Ok(plugins)
}
#[cfg(test)]
mod tests {
use std::io::Write;
@@ -218,6 +217,7 @@ mod tests {
use common_base::readable_size::ReadableSize;
use common_test_util::temp_dir::create_named_temp_file;
use frontend::service_config::GrpcOptions;
use servers::http::HttpOptions;
use super::*;
use crate::options::ENV_VAR_SEP;
@@ -303,14 +303,17 @@ mod tests {
#[tokio::test]
async fn test_try_from_start_command_to_anymap() {
let command = StartCommand {
let mut fe_opts = FrontendOptions {
http: HttpOptions {
disable_dashboard: false,
..Default::default()
},
user_provider: Some("static_user_provider:cmd:test=test".to_string()),
disable_dashboard: Some(false),
..Default::default()
};
let plugins = load_frontend_plugins(&command.user_provider);
let plugins = plugins.unwrap();
let plugins = plugins::setup_frontend_plugins(&mut fe_opts).await.unwrap();
let provider = plugins.get::<UserProviderRef>().unwrap();
let result = provider
.authenticate(

View File

@@ -20,7 +20,7 @@ use meta_srv::bootstrap::MetaSrvInstance;
use meta_srv::metasrv::MetaSrvOptions;
use snafu::ResultExt;
use crate::error::{self, Result};
use crate::error::{self, Result, StartMetaServerSnafu};
use crate::options::{Options, TopLevelOptions};
pub struct Instance {
@@ -29,10 +29,10 @@ pub struct Instance {
impl Instance {
pub async fn start(&mut self) -> Result<()> {
self.instance
.start()
plugins::start_meta_srv_plugins(self.instance.plugins())
.await
.context(error::StartMetaServerSnafu)
.context(StartMetaServerSnafu)?;
self.instance.start().await.context(StartMetaServerSnafu)
}
pub async fn stop(&self) -> Result<()> {
@@ -158,12 +158,15 @@ impl StartCommand {
Ok(Options::Metasrv(Box::new(opts)))
}
async fn build(self, opts: MetaSrvOptions) -> Result<Instance> {
logging::info!("MetaSrv start command: {:#?}", self);
async fn build(self, mut opts: MetaSrvOptions) -> Result<Instance> {
let plugins = plugins::setup_meta_srv_plugins(&mut opts)
.await
.context(StartMetaServerSnafu)?;
logging::info!("MetaSrv start command: {:#?}", self);
logging::info!("MetaSrv options: {:#?}", opts);
let instance = MetaSrvInstance::new(opts)
let instance = MetaSrvInstance::new(opts, plugins)
.await
.context(error::BuildMetaServerSnafu)?;

View File

@@ -18,7 +18,7 @@ use catalog::kvbackend::KvBackendCatalogManager;
use catalog::CatalogManagerRef;
use clap::Parser;
use common_base::Plugins;
use common_config::{kv_store_dir, KvStoreConfig, WalConfig};
use common_config::{metadata_store_dir, KvStoreConfig, WalConfig};
use common_meta::cache_invalidator::DummyKvCacheInvalidator;
use common_meta::kv_backend::KvBackendRef;
use common_procedure::ProcedureManagerRef;
@@ -44,7 +44,6 @@ use crate::error::{
IllegalConfigSnafu, InitMetadataSnafu, Result, ShutdownDatanodeSnafu, ShutdownFrontendSnafu,
StartDatanodeSnafu, StartFrontendSnafu,
};
use crate::frontend::load_frontend_plugins;
use crate::options::{MixOptions, Options, TopLevelOptions};
#[derive(Parser)]
@@ -298,8 +297,11 @@ impl StartCommand {
#[allow(unused_variables)]
#[allow(clippy::diverging_sub_expression)]
async fn build(self, opts: MixOptions) -> Result<Instance> {
let plugins = Arc::new(load_frontend_plugins(&self.user_provider)?);
let fe_opts = opts.frontend;
let mut fe_opts = opts.frontend;
let fe_plugins = plugins::setup_frontend_plugins(&mut fe_opts)
.await
.context(StartFrontendSnafu)?;
let dn_opts = opts.datanode;
info!("Standalone start command: {:#?}", self);
@@ -308,14 +310,17 @@ impl StartCommand {
fe_opts, dn_opts
);
let kv_dir = kv_store_dir(&opts.data_home);
let (kv_store, procedure_manager) =
FeInstance::try_build_standalone_components(kv_dir, opts.kv_store, opts.procedure)
.await
.context(StartFrontendSnafu)?;
let metadata_dir = metadata_store_dir(&opts.data_home);
let (kv_store, procedure_manager) = FeInstance::try_build_standalone_components(
metadata_dir,
opts.kv_store,
opts.procedure,
)
.await
.context(StartFrontendSnafu)?;
let datanode =
DatanodeBuilder::new(dn_opts.clone(), Some(kv_store.clone()), plugins.clone())
DatanodeBuilder::new(dn_opts.clone(), Some(kv_store.clone()), Default::default())
.build()
.await
.context(StartDatanodeSnafu)?;
@@ -335,7 +340,7 @@ impl StartCommand {
// TODO: build frontend instance like in distributed mode
let mut frontend = build_frontend(
plugins,
fe_plugins,
kv_store,
procedure_manager,
catalog_manager,
@@ -354,7 +359,7 @@ impl StartCommand {
/// Build frontend instance in standalone mode
async fn build_frontend(
plugins: Arc<Plugins>,
plugins: Plugins,
kv_store: KvBackendRef,
procedure_manager: ProcedureManagerRef,
catalog_manager: CatalogManagerRef,
@@ -388,13 +393,13 @@ mod tests {
#[tokio::test]
async fn test_try_from_start_command_to_anymap() {
let command = StartCommand {
let mut fe_opts = FrontendOptions {
user_provider: Some("static_user_provider:cmd:test=test".to_string()),
..Default::default()
};
let plugins = load_frontend_plugins(&command.user_provider);
let plugins = plugins.unwrap();
let plugins = plugins::setup_frontend_plugins(&mut fe_opts).await.unwrap();
let provider = plugins.get::<UserProviderRef>().unwrap();
let result = provider
.authenticate(

View File

@@ -23,6 +23,8 @@ use std::sync::{Arc, Mutex, MutexGuard};
pub use bit_vec::BitVec;
/// [`Plugins`] is a wrapper of Arc contents.
/// Make it Cloneable and we can treat it like an Arc struct.
#[derive(Default, Clone)]
pub struct Plugins {
inner: Arc<Mutex<anymap::Map<dyn Any + Send + Sync>>>,

View File

@@ -45,8 +45,8 @@ impl Default for WalConfig {
}
}
pub fn kv_store_dir(store_dir: &str) -> String {
format!("{store_dir}/kv")
pub fn metadata_store_dir(store_dir: &str) -> String {
format!("{store_dir}/metadata")
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]

View File

@@ -12,6 +12,8 @@ api = { workspace = true }
arrow-flight.workspace = true
async-stream.workspace = true
async-trait.workspace = true
base64 = "0.21"
bytes = "1.4"
common-catalog = { workspace = true }
common-error = { workspace = true }
common-grpc-expr.workspace = true

View File

@@ -17,7 +17,6 @@ use std::sync::Arc;
use table::metadata::TableId;
use crate::error::Result;
use crate::key::schema_name::SchemaNameKey;
use crate::key::table_info::TableInfoKey;
use crate::key::table_name::TableNameKey;
use crate::key::table_route::TableRouteKey;
@@ -68,36 +67,25 @@ impl CacheInvalidator for DummyCacheInvalidator {
}
}
#[derive(Clone)]
pub struct TableMetadataCacheInvalidator(KvCacheInvalidatorRef);
impl TableMetadataCacheInvalidator {
pub fn new(kv_cache_invalidator: KvCacheInvalidatorRef) -> Self {
Self(kv_cache_invalidator)
}
pub async fn invalidate_schema(&self, catalog: &str, schema: &str) {
let key = SchemaNameKey::new(catalog, schema).as_raw_key();
self.0.invalidate_key(&key).await;
}
}
#[async_trait::async_trait]
impl CacheInvalidator for TableMetadataCacheInvalidator {
impl<T> CacheInvalidator for T
where
T: KvCacheInvalidator,
{
async fn invalidate_table_name(&self, _ctx: &Context, table_name: TableName) -> Result<()> {
let key: TableNameKey = (&table_name).into();
self.0.invalidate_key(&key.as_raw_key()).await;
self.invalidate_key(&key.as_raw_key()).await;
Ok(())
}
async fn invalidate_table_id(&self, _ctx: &Context, table_id: TableId) -> Result<()> {
let key = TableInfoKey::new(table_id);
self.0.invalidate_key(&key.as_raw_key()).await;
self.invalidate_key(&key.as_raw_key()).await;
let key = &TableRouteKey { table_id };
self.0.invalidate_key(&key.as_raw_key()).await;
self.invalidate_key(&key.as_raw_key()).await;
Ok(())
}

View File

@@ -45,6 +45,7 @@ use crate::error::{
use crate::key::table_info::TableInfoValue;
use crate::key::table_name::TableNameKey;
use crate::key::table_route::TableRouteValue;
use crate::key::DeserializedValueWithBytes;
use crate::metrics;
use crate::rpc::ddl::AlterTableTask;
use crate::rpc::router::{find_leader_regions, find_leaders};
@@ -63,7 +64,7 @@ impl AlterTableProcedure {
pub fn new(
cluster_id: u64,
task: AlterTableTask,
table_info_value: TableInfoValue,
table_info_value: DeserializedValueWithBytes<TableInfoValue>,
context: DdlContext,
) -> Result<Self> {
let alter_kind = task
@@ -191,7 +192,8 @@ impl AlterTableProcedure {
.await?
.with_context(|| TableRouteNotFoundSnafu {
table_name: table_ref.to_string(),
})?;
})?
.into_inner();
let leaders = find_leaders(&region_routes);
let mut alter_region_tasks = Vec::with_capacity(leaders.len());
@@ -413,7 +415,7 @@ pub struct AlterTableData {
state: AlterTableState,
task: AlterTableTask,
/// Table info value before alteration.
table_info_value: TableInfoValue,
table_info_value: DeserializedValueWithBytes<TableInfoValue>,
cluster_id: u64,
/// Next column id of the table if the task adds columns to the table.
next_column_id: Option<ColumnId>,
@@ -422,7 +424,7 @@ pub struct AlterTableData {
impl AlterTableData {
pub fn new(
task: AlterTableTask,
table_info_value: TableInfoValue,
table_info_value: DeserializedValueWithBytes<TableInfoValue>,
cluster_id: u64,
next_column_id: Option<ColumnId>,
) -> Self {

View File

@@ -39,6 +39,7 @@ use crate::error::{self, Result};
use crate::key::table_info::TableInfoValue;
use crate::key::table_name::TableNameKey;
use crate::key::table_route::TableRouteValue;
use crate::key::DeserializedValueWithBytes;
use crate::metrics;
use crate::rpc::ddl::DropTableTask;
use crate::rpc::router::{find_leader_regions, find_leaders, RegionRoute};
@@ -55,8 +56,8 @@ impl DropTableProcedure {
pub fn new(
cluster_id: u64,
task: DropTableTask,
table_route_value: TableRouteValue,
table_info_value: TableInfoValue,
table_route_value: DeserializedValueWithBytes<TableRouteValue>,
table_info_value: DeserializedValueWithBytes<TableInfoValue>,
context: DdlContext,
) -> Self {
Self {
@@ -231,16 +232,16 @@ pub struct DropTableData {
pub state: DropTableState,
pub cluster_id: u64,
pub task: DropTableTask,
pub table_route_value: TableRouteValue,
pub table_info_value: TableInfoValue,
pub table_route_value: DeserializedValueWithBytes<TableRouteValue>,
pub table_info_value: DeserializedValueWithBytes<TableInfoValue>,
}
impl DropTableData {
pub fn new(
cluster_id: u64,
task: DropTableTask,
table_route_value: TableRouteValue,
table_info_value: TableInfoValue,
table_route_value: DeserializedValueWithBytes<TableRouteValue>,
table_info_value: DeserializedValueWithBytes<TableInfoValue>,
) -> Self {
Self {
state: DropTableState::Prepare,

View File

@@ -35,6 +35,7 @@ use crate::ddl::DdlContext;
use crate::error::{Result, TableNotFoundSnafu};
use crate::key::table_info::TableInfoValue;
use crate::key::table_name::TableNameKey;
use crate::key::DeserializedValueWithBytes;
use crate::metrics;
use crate::rpc::ddl::TruncateTableTask;
use crate::rpc::router::{find_leader_regions, find_leaders, RegionRoute};
@@ -90,7 +91,7 @@ impl TruncateTableProcedure {
pub(crate) fn new(
cluster_id: u64,
task: TruncateTableTask,
table_info_value: TableInfoValue,
table_info_value: DeserializedValueWithBytes<TableInfoValue>,
region_routes: Vec<RegionRoute>,
context: DdlContext,
) -> Self {
@@ -188,7 +189,7 @@ pub struct TruncateTableData {
state: TruncateTableState,
cluster_id: u64,
task: TruncateTableTask,
table_info_value: TableInfoValue,
table_info_value: DeserializedValueWithBytes<TableInfoValue>,
region_routes: Vec<RegionRoute>,
}
@@ -196,7 +197,7 @@ impl TruncateTableData {
pub fn new(
cluster_id: u64,
task: TruncateTableTask,
table_info_value: TableInfoValue,
table_info_value: DeserializedValueWithBytes<TableInfoValue>,
region_routes: Vec<RegionRoute>,
) -> Self {
Self {

View File

@@ -35,7 +35,7 @@ use crate::error::{
use crate::key::table_info::TableInfoValue;
use crate::key::table_name::TableNameKey;
use crate::key::table_route::TableRouteValue;
use crate::key::TableMetadataManagerRef;
use crate::key::{DeserializedValueWithBytes, TableMetadataManagerRef};
use crate::rpc::ddl::DdlTask::{AlterTable, CreateTable, DropTable, TruncateTable};
use crate::rpc::ddl::{
AlterTableTask, CreateTableTask, DropTableTask, SubmitDdlTaskRequest, SubmitDdlTaskResponse,
@@ -144,7 +144,7 @@ impl DdlManager {
&self,
cluster_id: u64,
alter_table_task: AlterTableTask,
table_info_value: TableInfoValue,
table_info_value: DeserializedValueWithBytes<TableInfoValue>,
) -> Result<ProcedureId> {
let context = self.create_context();
@@ -176,8 +176,8 @@ impl DdlManager {
&self,
cluster_id: u64,
drop_table_task: DropTableTask,
table_info_value: TableInfoValue,
table_route_value: TableRouteValue,
table_info_value: DeserializedValueWithBytes<TableInfoValue>,
table_route_value: DeserializedValueWithBytes<TableRouteValue>,
) -> Result<ProcedureId> {
let context = self.create_context();
@@ -198,7 +198,7 @@ impl DdlManager {
&self,
cluster_id: u64,
truncate_table_task: TruncateTableTask,
table_info_value: TableInfoValue,
table_info_value: DeserializedValueWithBytes<TableInfoValue>,
region_routes: Vec<RegionRoute>,
) -> Result<ProcedureId> {
let context = self.create_context();
@@ -252,7 +252,7 @@ async fn handle_truncate_table_task(
table_name: table_ref.to_string(),
})?;
let table_route = table_route_value.region_routes;
let table_route = table_route_value.into_inner().region_routes;
let id = ddl_manager
.submit_truncate_table_task(

View File

@@ -12,6 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
use std::collections::HashMap;
use std::fmt::{Display, Formatter};
use serde::{Deserialize, Serialize};
@@ -73,13 +74,15 @@ impl Display for OpenRegion {
pub struct OpenRegion {
pub region_ident: RegionIdent,
pub region_storage_path: String,
pub options: HashMap<String, String>,
}
impl OpenRegion {
pub fn new(region_ident: RegionIdent, path: &str) -> Self {
pub fn new(region_ident: RegionIdent, path: &str, options: HashMap<String, String>) -> Self {
Self {
region_ident,
region_storage_path: path.to_string(),
options,
}
}
}
@@ -127,12 +130,13 @@ mod tests {
engine: "mito2".to_string(),
},
"test/foo",
HashMap::new(),
));
let serialized = serde_json::to_string(&open_region).unwrap();
assert_eq!(
r#"{"OpenRegion":{"region_ident":{"cluster_id":1,"datanode_id":2,"table_id":1024,"region_number":1,"engine":"mito2"},"region_storage_path":"test/foo"}}"#,
r#"{"OpenRegion":{"region_ident":{"cluster_id":1,"datanode_id":2,"table_id":1024,"region_number":1,"engine":"mito2"},"region_storage_path":"test/foo","options":{}}}"#,
serialized
);

View File

@@ -55,13 +55,18 @@ pub mod table_region;
#[allow(deprecated)]
pub mod table_route;
use std::collections::BTreeMap;
use std::collections::{BTreeMap, HashMap};
use std::fmt::Debug;
use std::ops::Deref;
use std::sync::Arc;
use bytes::Bytes;
use common_catalog::consts::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME};
use datanode_table::{DatanodeTableKey, DatanodeTableManager, DatanodeTableValue};
use lazy_static::lazy_static;
use regex::Regex;
use serde::de::DeserializeOwned;
use serde::{Deserialize, Serialize};
use snafu::{ensure, OptionExt, ResultExt};
use store_api::storage::RegionNumber;
use table::metadata::{RawTableInfo, TableId};
@@ -69,6 +74,7 @@ use table_info::{TableInfoKey, TableInfoManager, TableInfoValue};
use table_name::{TableNameKey, TableNameManager, TableNameValue};
use self::catalog_name::{CatalogManager, CatalogNameKey, CatalogNameValue};
use self::datanode_table::RegionInfo;
use self::schema_name::{SchemaManager, SchemaNameKey, SchemaNameValue};
use self::table_route::{TableRouteManager, TableRouteValue};
use crate::ddl::utils::region_storage_path;
@@ -154,6 +160,116 @@ macro_rules! ensure_values {
};
}
/// A struct containing a deserialized value(`inner`) and an original bytes.
///
/// - Serialize behaviors:
///
/// The `inner` field will be ignored.
///
/// - Deserialize behaviors:
///
/// The `inner` field will be deserialized from the `bytes` field.
pub struct DeserializedValueWithBytes<T: DeserializeOwned + Serialize> {
// The original bytes of the inner.
bytes: Bytes,
// The value was deserialized from the original bytes.
inner: T,
}
impl<T: DeserializeOwned + Serialize> Deref for DeserializedValueWithBytes<T> {
type Target = T;
fn deref(&self) -> &Self::Target {
&self.inner
}
}
impl<T: DeserializeOwned + Serialize + Debug> Debug for DeserializedValueWithBytes<T> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(
f,
"DeserializedValueWithBytes(inner: {:?}, bytes: {:?})",
self.inner, self.bytes
)
}
}
impl<T: DeserializeOwned + Serialize> Serialize for DeserializedValueWithBytes<T> {
/// - Serialize behaviors:
///
/// The `inner` field will be ignored.
fn serialize<S>(&self, serializer: S) -> std::result::Result<S::Ok, S::Error>
where
S: serde::Serializer,
{
// Safety: The original bytes are always JSON encoded.
// It's more efficiently than `serialize_bytes`.
serializer.serialize_str(&String::from_utf8_lossy(&self.bytes))
}
}
impl<'de, T: DeserializeOwned + Serialize> Deserialize<'de> for DeserializedValueWithBytes<T> {
/// - Deserialize behaviors:
///
/// The `inner` field will be deserialized from the `bytes` field.
fn deserialize<D>(deserializer: D) -> std::result::Result<Self, D::Error>
where
D: serde::Deserializer<'de>,
{
let buf = String::deserialize(deserializer)?;
let bytes = Bytes::from(buf);
let value = DeserializedValueWithBytes::from_inner_bytes(bytes)
.map_err(|err| serde::de::Error::custom(err.to_string()))?;
Ok(value)
}
}
impl<T: Serialize + DeserializeOwned + Clone> Clone for DeserializedValueWithBytes<T> {
fn clone(&self) -> Self {
Self {
bytes: self.bytes.clone(),
inner: self.inner.clone(),
}
}
}
impl<T: Serialize + DeserializeOwned> DeserializedValueWithBytes<T> {
/// Returns a struct containing a deserialized value and an original `bytes`.
/// It accepts original bytes of inner.
pub fn from_inner_bytes(bytes: Bytes) -> Result<Self> {
let inner = serde_json::from_slice(&bytes).context(error::SerdeJsonSnafu)?;
Ok(Self { bytes, inner })
}
/// Returns a struct containing a deserialized value and an original `bytes`.
/// It accepts original bytes of inner.
pub fn from_inner_slice(bytes: &[u8]) -> Result<Self> {
Self::from_inner_bytes(Bytes::copy_from_slice(bytes))
}
pub fn into_inner(self) -> T {
self.inner
}
/// Returns original `bytes`
pub fn into_bytes(&self) -> Vec<u8> {
self.bytes.to_vec()
}
#[cfg(feature = "testing")]
/// Notes: used for test purpose.
pub fn from_inner(inner: T) -> Self {
let bytes = serde_json::to_vec(&inner).unwrap();
Self {
bytes: Bytes::from(bytes),
inner,
}
}
}
impl TableMetadataManager {
pub fn new(kv_backend: KvBackendRef) -> Self {
TableMetadataManager {
@@ -211,7 +327,10 @@ impl TableMetadataManager {
pub async fn get_full_table_info(
&self,
table_id: TableId,
) -> Result<(Option<TableInfoValue>, Option<TableRouteValue>)> {
) -> Result<(
Option<DeserializedValueWithBytes<TableInfoValue>>,
Option<DeserializedValueWithBytes<TableRouteValue>>,
)> {
let (get_table_route_txn, table_route_decoder) =
self.table_route_manager.build_get_txn(table_id);
@@ -256,6 +375,7 @@ impl TableMetadataManager {
.table_name_manager()
.build_create_txn(&table_name, table_id)?;
let region_options = (&table_info.meta.options).into();
// Creates table info.
let table_info_value = TableInfoValue::new(table_info);
let (create_table_info_txn, on_create_table_info_failure) = self
@@ -268,6 +388,7 @@ impl TableMetadataManager {
table_id,
&engine,
&region_storage_path,
region_options,
distribution,
)?;
@@ -288,15 +409,17 @@ impl TableMetadataManager {
// Checks whether metadata was already created.
if !r.succeeded {
let remote_table_info =
on_create_table_info_failure(&r.responses)?.context(error::UnexpectedSnafu {
let remote_table_info = on_create_table_info_failure(&r.responses)?
.context(error::UnexpectedSnafu {
err_msg: "Reads the empty table info during the create table metadata",
})?;
})?
.into_inner();
let remote_table_route =
on_create_table_route_failure(&r.responses)?.context(error::UnexpectedSnafu {
let remote_table_route = on_create_table_route_failure(&r.responses)?
.context(error::UnexpectedSnafu {
err_msg: "Reads the empty table route during the create table metadata",
})?;
})?
.into_inner();
let op_name = "the creating table metadata";
ensure_values!(remote_table_info, table_info_value, op_name);
@@ -310,8 +433,8 @@ impl TableMetadataManager {
/// The caller MUST ensure it has the exclusive access to `TableNameKey`.
pub async fn delete_table_metadata(
&self,
table_info_value: &TableInfoValue,
table_route_value: &TableRouteValue,
table_info_value: &DeserializedValueWithBytes<TableInfoValue>,
table_route_value: &DeserializedValueWithBytes<TableRouteValue>,
) -> Result<()> {
let table_info = &table_info_value.table_info;
let table_id = table_info.ident.table_id;
@@ -361,7 +484,7 @@ impl TableMetadataManager {
/// and the new `TableNameKey` MUST be empty.
pub async fn rename_table(
&self,
current_table_info_value: TableInfoValue,
current_table_info_value: DeserializedValueWithBytes<TableInfoValue>,
new_table_name: String,
) -> Result<()> {
let current_table_info = &current_table_info_value.table_info;
@@ -386,9 +509,11 @@ impl TableMetadataManager {
table_id,
)?;
let new_table_info_value = current_table_info_value.with_update(move |table_info| {
table_info.name = new_table_name;
});
let new_table_info_value = current_table_info_value
.inner
.with_update(move |table_info| {
table_info.name = new_table_name;
});
// Updates table info.
let (update_table_info_txn, on_update_table_info_failure) = self
@@ -401,10 +526,11 @@ impl TableMetadataManager {
// Checks whether metadata was already updated.
if !r.succeeded {
let remote_table_info =
on_update_table_info_failure(&r.responses)?.context(error::UnexpectedSnafu {
let remote_table_info = on_update_table_info_failure(&r.responses)?
.context(error::UnexpectedSnafu {
err_msg: "Reads the empty table info during the rename table metadata",
})?;
})?
.into_inner();
let op_name = "the renaming table metadata";
ensure_values!(remote_table_info, new_table_info_value, op_name);
@@ -416,7 +542,7 @@ impl TableMetadataManager {
/// Updates table info and returns an error if different metadata exists.
pub async fn update_table_info(
&self,
current_table_info_value: TableInfoValue,
current_table_info_value: DeserializedValueWithBytes<TableInfoValue>,
new_table_info: RawTableInfo,
) -> Result<()> {
let table_id = current_table_info_value.table_info.ident.table_id;
@@ -432,10 +558,11 @@ impl TableMetadataManager {
// Checks whether metadata was already updated.
if !r.succeeded {
let remote_table_info =
on_update_table_info_failure(&r.responses)?.context(error::UnexpectedSnafu {
let remote_table_info = on_update_table_info_failure(&r.responses)?
.context(error::UnexpectedSnafu {
err_msg: "Reads the empty table info during the updating table info",
})?;
})?
.into_inner();
let op_name = "the updating table info";
ensure_values!(remote_table_info, new_table_info_value, op_name);
@@ -446,10 +573,10 @@ impl TableMetadataManager {
pub async fn update_table_route(
&self,
table_id: TableId,
engine: &str,
region_storage_path: &str,
current_table_route_value: TableRouteValue,
region_info: RegionInfo,
current_table_route_value: DeserializedValueWithBytes<TableRouteValue>,
new_region_routes: Vec<RegionRoute>,
new_region_options: &HashMap<String, String>,
) -> Result<()> {
// Updates the datanode table key value pairs.
let current_region_distribution =
@@ -458,10 +585,10 @@ impl TableMetadataManager {
let update_datanode_table_txn = self.datanode_table_manager().build_update_txn(
table_id,
engine,
region_storage_path,
region_info,
current_region_distribution,
new_region_distribution,
new_region_options,
)?;
// Updates the table_route.
@@ -477,10 +604,11 @@ impl TableMetadataManager {
// Checks whether metadata was already updated.
if !r.succeeded {
let remote_table_route =
on_update_table_route_failure(&r.responses)?.context(error::UnexpectedSnafu {
let remote_table_route = on_update_table_route_failure(&r.responses)?
.context(error::UnexpectedSnafu {
err_msg: "Reads the empty table route during the updating table route",
})?;
})?
.into_inner();
let op_name = "the updating table route";
ensure_values!(remote_table_route, new_table_route_value, op_name);
@@ -553,9 +681,10 @@ impl_optional_meta_value! {
#[cfg(test)]
mod tests {
use std::collections::BTreeMap;
use std::collections::{BTreeMap, HashMap};
use std::sync::Arc;
use bytes::Bytes;
use datatypes::prelude::ConcreteDataType;
use datatypes::schema::{ColumnSchema, SchemaBuilder};
use futures::TryStreamExt;
@@ -563,14 +692,43 @@ mod tests {
use super::datanode_table::DatanodeTableKey;
use crate::ddl::utils::region_storage_path;
use crate::key::datanode_table::RegionInfo;
use crate::key::table_info::TableInfoValue;
use crate::key::table_name::TableNameKey;
use crate::key::table_route::TableRouteValue;
use crate::key::{to_removed_key, TableMetadataManager};
use crate::key::{to_removed_key, DeserializedValueWithBytes, TableMetadataManager};
use crate::kv_backend::memory::MemoryKvBackend;
use crate::peer::Peer;
use crate::rpc::router::{region_distribution, Region, RegionRoute};
#[test]
fn test_deserialized_value_with_bytes() {
let region_route = new_test_region_route();
let region_routes = vec![region_route.clone()];
let expected_region_routes =
TableRouteValue::new(vec![region_route.clone(), region_route.clone()]);
let expected = serde_json::to_vec(&expected_region_routes).unwrap();
// Serialize behaviors:
// The inner field will be ignored.
let value = DeserializedValueWithBytes {
// ignored
inner: TableRouteValue::new(region_routes.clone()),
bytes: Bytes::from(expected.clone()),
};
let encoded = serde_json::to_vec(&value).unwrap();
// Deserialize behaviors:
// The inner field will be deserialized from the bytes field.
let decoded: DeserializedValueWithBytes<TableRouteValue> =
serde_json::from_slice(&encoded).unwrap();
assert_eq!(decoded.inner, expected_region_routes);
assert_eq!(decoded.bytes, expected);
}
#[test]
fn test_to_removed_key() {
let key = "test_key";
@@ -660,8 +818,14 @@ mod tests {
.await
.unwrap();
assert_eq!(remote_table_info.unwrap().table_info, table_info);
assert_eq!(remote_table_route.unwrap().region_routes, region_routes);
assert_eq!(
remote_table_info.unwrap().into_inner().table_info,
table_info
);
assert_eq!(
remote_table_route.unwrap().into_inner().region_routes,
region_routes
);
}
#[tokio::test]
@@ -674,7 +838,8 @@ mod tests {
new_test_table_info(region_routes.iter().map(|r| r.region.id.region_number())).into();
let table_id = table_info.ident.table_id;
let datanode_id = 2;
let table_route_value = TableRouteValue::new(region_routes.clone());
let table_route_value =
DeserializedValueWithBytes::from_inner(TableRouteValue::new(region_routes.clone()));
// creates metadata.
table_metadata_manager
@@ -682,7 +847,8 @@ mod tests {
.await
.unwrap();
let table_info_value = TableInfoValue::new(table_info.clone());
let table_info_value =
DeserializedValueWithBytes::from_inner(TableInfoValue::new(table_info.clone()));
// deletes metadata.
table_metadata_manager
@@ -723,7 +889,8 @@ mod tests {
.get_removed(table_id)
.await
.unwrap()
.unwrap();
.unwrap()
.into_inner();
assert_eq!(removed_table_info.table_info, table_info);
let removed_table_route = table_metadata_manager
@@ -731,7 +898,8 @@ mod tests {
.get_removed(table_id)
.await
.unwrap()
.unwrap();
.unwrap()
.into_inner();
assert_eq!(removed_table_route.region_routes, region_routes);
}
@@ -750,7 +918,9 @@ mod tests {
.await
.unwrap();
let new_table_name = "another_name".to_string();
let table_info_value = TableInfoValue::new(table_info.clone());
let table_info_value =
DeserializedValueWithBytes::from_inner(TableInfoValue::new(table_info.clone()));
table_metadata_manager
.rename_table(table_info_value.clone(), new_table_name.clone())
.await
@@ -762,7 +932,8 @@ mod tests {
.unwrap();
let mut modified_table_info = table_info.clone();
modified_table_info.name = "hi".to_string();
let modified_table_info_value = table_info_value.update(modified_table_info);
let modified_table_info_value =
DeserializedValueWithBytes::from_inner(table_info_value.update(modified_table_info));
// if the table_info_value is wrong, it should return an error.
// The ABA problem.
assert!(table_metadata_manager
@@ -816,7 +987,8 @@ mod tests {
.unwrap();
let mut new_table_info = table_info.clone();
new_table_info.name = "hi".to_string();
let current_table_info_value = TableInfoValue::new(table_info.clone());
let current_table_info_value =
DeserializedValueWithBytes::from_inner(TableInfoValue::new(table_info.clone()));
// should be ok.
table_metadata_manager
.update_table_info(current_table_info_value.clone(), new_table_info.clone())
@@ -834,12 +1006,15 @@ mod tests {
.get(table_id)
.await
.unwrap()
.unwrap();
.unwrap()
.into_inner();
assert_eq!(updated_table_info.table_info, new_table_info);
let mut wrong_table_info = table_info.clone();
wrong_table_info.name = "wrong".to_string();
let wrong_table_info_value = current_table_info_value.update(wrong_table_info);
let wrong_table_info_value = DeserializedValueWithBytes::from_inner(
current_table_info_value.update(wrong_table_info),
);
// if the current_table_info_value is wrong, it should return an error.
// The ABA problem.
assert!(table_metadata_manager
@@ -878,7 +1053,8 @@ mod tests {
let engine = table_info.meta.engine.as_str();
let region_storage_path =
region_storage_path(&table_info.catalog_name, &table_info.schema_name);
let current_table_route_value = TableRouteValue::new(region_routes.clone());
let current_table_route_value =
DeserializedValueWithBytes::from_inner(TableRouteValue::new(region_routes.clone()));
// creates metadata.
table_metadata_manager
.create_table_metadata(table_info.clone(), region_routes.clone())
@@ -894,10 +1070,14 @@ mod tests {
table_metadata_manager
.update_table_route(
table_id,
engine,
&region_storage_path,
RegionInfo {
engine: engine.to_string(),
region_storage_path: region_storage_path.to_string(),
region_options: HashMap::new(),
},
current_table_route_value.clone(),
new_region_routes.clone(),
&HashMap::new(),
)
.await
.unwrap();
@@ -907,24 +1087,36 @@ mod tests {
table_metadata_manager
.update_table_route(
table_id,
engine,
&region_storage_path,
RegionInfo {
engine: engine.to_string(),
region_storage_path: region_storage_path.to_string(),
region_options: HashMap::new(),
},
current_table_route_value.clone(),
new_region_routes.clone(),
&HashMap::new(),
)
.await
.unwrap();
let current_table_route_value = current_table_route_value.update(new_region_routes.clone());
let current_table_route_value = DeserializedValueWithBytes::from_inner(
current_table_route_value
.inner
.update(new_region_routes.clone()),
);
let new_region_routes = vec![new_region_route(2, 4), new_region_route(5, 5)];
// it should be ok.
table_metadata_manager
.update_table_route(
table_id,
engine,
&region_storage_path,
RegionInfo {
engine: engine.to_string(),
region_storage_path: region_storage_path.to_string(),
region_options: HashMap::new(),
},
current_table_route_value.clone(),
new_region_routes.clone(),
&HashMap::new(),
)
.await
.unwrap();
@@ -932,19 +1124,24 @@ mod tests {
// if the current_table_route_value is wrong, it should return an error.
// The ABA problem.
let wrong_table_route_value = current_table_route_value.update(vec![
new_region_route(1, 1),
new_region_route(2, 2),
new_region_route(3, 3),
new_region_route(4, 4),
]);
let wrong_table_route_value =
DeserializedValueWithBytes::from_inner(current_table_route_value.update(vec![
new_region_route(1, 1),
new_region_route(2, 2),
new_region_route(3, 3),
new_region_route(4, 4),
]));
assert!(table_metadata_manager
.update_table_route(
table_id,
engine,
&region_storage_path,
RegionInfo {
engine: engine.to_string(),
region_storage_path: region_storage_path.to_string(),
region_options: HashMap::new(),
},
wrong_table_route_value,
new_region_routes
new_region_routes,
&HashMap::new(),
)
.await
.is_err());

View File

@@ -12,6 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
use std::collections::HashMap;
use std::sync::Arc;
use futures::stream::BoxStream;
@@ -32,6 +33,21 @@ use crate::rpc::store::RangeRequest;
use crate::rpc::KeyValue;
use crate::DatanodeId;
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Default)]
/// RegionInfo
/// For compatible reason, DON'T modify the field name.
pub struct RegionInfo {
#[serde(default)]
// The table engine, it SHOULD be immutable after created.
pub engine: String,
// The region storage path, it SHOULD be immutable after created.
#[serde(default)]
pub region_storage_path: String,
// The region options.
#[serde(default)]
pub region_options: HashMap<String, String>,
}
pub struct DatanodeTableKey {
datanode_id: DatanodeId,
table_id: TableId,
@@ -85,25 +101,17 @@ impl TableMetaKey for DatanodeTableKey {
pub struct DatanodeTableValue {
pub table_id: TableId,
pub regions: Vec<RegionNumber>,
#[serde(default)]
pub engine: String,
#[serde(default)]
pub region_storage_path: String,
#[serde(flatten)]
pub region_info: RegionInfo,
version: u64,
}
impl DatanodeTableValue {
pub fn new(
table_id: TableId,
regions: Vec<RegionNumber>,
engine: String,
region_storage_path: String,
) -> Self {
pub fn new(table_id: TableId, regions: Vec<RegionNumber>, region_info: RegionInfo) -> Self {
Self {
table_id,
regions,
engine,
region_storage_path,
region_info,
version: 0,
}
}
@@ -156,6 +164,7 @@ impl DatanodeTableManager {
table_id: TableId,
engine: &str,
region_storage_path: &str,
region_options: HashMap<String, String>,
distribution: RegionDistribution,
) -> Result<Txn> {
let txns = distribution
@@ -165,8 +174,11 @@ impl DatanodeTableManager {
let val = DatanodeTableValue::new(
table_id,
regions,
engine.to_string(),
region_storage_path.to_string(),
RegionInfo {
engine: engine.to_string(),
region_storage_path: region_storage_path.to_string(),
region_options: region_options.clone(),
},
);
Ok(TxnOp::Put(key.as_raw_key(), val.try_as_raw_value()?))
@@ -182,10 +194,10 @@ impl DatanodeTableManager {
pub(crate) fn build_update_txn(
&self,
table_id: TableId,
engine: &str,
region_storage_path: &str,
region_info: RegionInfo,
current_region_distribution: RegionDistribution,
new_region_distribution: RegionDistribution,
new_region_options: &HashMap<String, String>,
) -> Result<Txn> {
let mut opts = Vec::new();
@@ -197,33 +209,20 @@ impl DatanodeTableManager {
opts.push(TxnOp::Delete(raw_key))
}
}
let need_update_options = region_info.region_options != *new_region_options;
for (datanode, regions) in new_region_distribution.into_iter() {
if let Some(current_region) = current_region_distribution.get(&datanode) {
// Updates if need.
if *current_region != regions {
let key = DatanodeTableKey::new(datanode, table_id);
let raw_key = key.as_raw_key();
let val = DatanodeTableValue::new(
table_id,
regions,
engine.to_string(),
region_storage_path.to_string(),
)
.try_as_raw_value()?;
opts.push(TxnOp::Put(raw_key, val));
}
} else {
// New datanodes
let need_update =
if let Some(current_region) = current_region_distribution.get(&datanode) {
// Updates if need.
*current_region != regions || need_update_options
} else {
true
};
if need_update {
let key = DatanodeTableKey::new(datanode, table_id);
let raw_key = key.as_raw_key();
let val = DatanodeTableValue::new(
table_id,
regions,
engine.to_string(),
region_storage_path.to_string(),
)
.try_as_raw_value()?;
let val = DatanodeTableValue::new(table_id, regions, region_info.clone())
.try_as_raw_value()?;
opts.push(TxnOp::Put(raw_key, val));
}
}
@@ -270,11 +269,10 @@ mod tests {
let value = DatanodeTableValue {
table_id: 42,
regions: vec![1, 2, 3],
engine: Default::default(),
region_storage_path: Default::default(),
region_info: RegionInfo::default(),
version: 1,
};
let literal = br#"{"table_id":42,"regions":[1,2,3],"engine":"","region_storage_path":"","version":1}"#;
let literal = br#"{"table_id":42,"regions":[1,2,3],"engine":"","region_storage_path":"","region_options":{},"version":1}"#;
let raw_value = value.try_as_raw_value().unwrap();
assert_eq!(raw_value, literal);

View File

@@ -16,7 +16,7 @@ use serde::{Deserialize, Serialize};
use table::engine::TableReference;
use table::metadata::{RawTableInfo, TableId};
use super::TABLE_INFO_KEY_PREFIX;
use super::{DeserializedValueWithBytes, TABLE_INFO_KEY_PREFIX};
use crate::error::Result;
use crate::key::{to_removed_key, TableMetaKey};
use crate::kv_backend::txn::{Compare, CompareOp, Txn, TxnOp, TxnOpResponse};
@@ -103,7 +103,7 @@ impl TableInfoManager {
table_id: TableId,
) -> (
Txn,
impl FnOnce(&Vec<TxnOpResponse>) -> Result<Option<TableInfoValue>>,
impl FnOnce(&Vec<TxnOpResponse>) -> Result<Option<DeserializedValueWithBytes<TableInfoValue>>>,
) {
let key = TableInfoKey::new(table_id);
let raw_key = key.as_raw_key();
@@ -119,7 +119,7 @@ impl TableInfoManager {
table_info_value: &TableInfoValue,
) -> Result<(
Txn,
impl FnOnce(&Vec<TxnOpResponse>) -> Result<Option<TableInfoValue>>,
impl FnOnce(&Vec<TxnOpResponse>) -> Result<Option<DeserializedValueWithBytes<TableInfoValue>>>,
)> {
let key = TableInfoKey::new(table_id);
let raw_key = key.as_raw_key();
@@ -143,15 +143,15 @@ impl TableInfoManager {
pub(crate) fn build_update_txn(
&self,
table_id: TableId,
current_table_info_value: &TableInfoValue,
current_table_info_value: &DeserializedValueWithBytes<TableInfoValue>,
new_table_info_value: &TableInfoValue,
) -> Result<(
Txn,
impl FnOnce(&Vec<TxnOpResponse>) -> Result<Option<TableInfoValue>>,
impl FnOnce(&Vec<TxnOpResponse>) -> Result<Option<DeserializedValueWithBytes<TableInfoValue>>>,
)> {
let key = TableInfoKey::new(table_id);
let raw_key = key.as_raw_key();
let raw_value = current_table_info_value.try_as_raw_value()?;
let raw_value = current_table_info_value.into_bytes();
let txn = Txn::new()
.when(vec![Compare::with_value(
@@ -172,11 +172,11 @@ impl TableInfoManager {
pub(crate) fn build_delete_txn(
&self,
table_id: TableId,
table_info_value: &TableInfoValue,
table_info_value: &DeserializedValueWithBytes<TableInfoValue>,
) -> Result<Txn> {
let key = TableInfoKey::new(table_id);
let raw_key = key.as_raw_key();
let raw_value = table_info_value.try_as_raw_value()?;
let raw_value = table_info_value.into_bytes();
let removed_key = to_removed_key(&String::from_utf8_lossy(&raw_key));
let txn = Txn::new().and_then(vec![
@@ -189,7 +189,8 @@ impl TableInfoManager {
fn build_decode_fn(
raw_key: Vec<u8>,
) -> impl FnOnce(&Vec<TxnOpResponse>) -> Result<Option<TableInfoValue>> {
) -> impl FnOnce(&Vec<TxnOpResponse>) -> Result<Option<DeserializedValueWithBytes<TableInfoValue>>>
{
move |kvs: &Vec<TxnOpResponse>| {
kvs.iter()
.filter_map(|resp| {
@@ -201,29 +202,35 @@ impl TableInfoManager {
})
.flat_map(|r| &r.kvs)
.find(|kv| kv.key == raw_key)
.map(|kv| TableInfoValue::try_from_raw_value(&kv.value))
.map(|kv| DeserializedValueWithBytes::from_inner_slice(&kv.value))
.transpose()
}
}
#[cfg(test)]
pub async fn get_removed(&self, table_id: TableId) -> Result<Option<TableInfoValue>> {
pub async fn get_removed(
&self,
table_id: TableId,
) -> Result<Option<DeserializedValueWithBytes<TableInfoValue>>> {
let key = TableInfoKey::new(table_id).to_string();
let removed_key = to_removed_key(&key).into_bytes();
self.kv_backend
.get(&removed_key)
.await?
.map(|x| TableInfoValue::try_from_raw_value(&x.value))
.map(|x| DeserializedValueWithBytes::from_inner_slice(&x.value))
.transpose()
}
pub async fn get(&self, table_id: TableId) -> Result<Option<TableInfoValue>> {
pub async fn get(
&self,
table_id: TableId,
) -> Result<Option<DeserializedValueWithBytes<TableInfoValue>>> {
let key = TableInfoKey::new(table_id);
let raw_key = key.as_raw_key();
self.kv_backend
.get(&raw_key)
.await?
.map(|x| TableInfoValue::try_from_raw_value(&x.value))
.map(|x| DeserializedValueWithBytes::from_inner_slice(&x.value))
.transpose()
}
}

View File

@@ -17,6 +17,7 @@ use std::fmt::Display;
use serde::{Deserialize, Serialize};
use table::metadata::TableId;
use super::DeserializedValueWithBytes;
use crate::error::Result;
use crate::key::{to_removed_key, RegionDistribution, TableMetaKey, TABLE_ROUTE_PREFIX};
use crate::kv_backend::txn::{Compare, CompareOp, Txn, TxnOp, TxnOpResponse};
@@ -81,7 +82,7 @@ impl TableRouteManager {
table_id: TableId,
) -> (
Txn,
impl FnOnce(&Vec<TxnOpResponse>) -> Result<Option<TableRouteValue>>,
impl FnOnce(&Vec<TxnOpResponse>) -> Result<Option<DeserializedValueWithBytes<TableRouteValue>>>,
) {
let key = TableRouteKey::new(table_id);
let raw_key = key.as_raw_key();
@@ -97,7 +98,7 @@ impl TableRouteManager {
table_route_value: &TableRouteValue,
) -> Result<(
Txn,
impl FnOnce(&Vec<TxnOpResponse>) -> Result<Option<TableRouteValue>>,
impl FnOnce(&Vec<TxnOpResponse>) -> Result<Option<DeserializedValueWithBytes<TableRouteValue>>>,
)> {
let key = TableRouteKey::new(table_id);
let raw_key = key.as_raw_key();
@@ -121,15 +122,15 @@ impl TableRouteManager {
pub(crate) fn build_update_txn(
&self,
table_id: TableId,
current_table_route_value: &TableRouteValue,
current_table_route_value: &DeserializedValueWithBytes<TableRouteValue>,
new_table_route_value: &TableRouteValue,
) -> Result<(
Txn,
impl FnOnce(&Vec<TxnOpResponse>) -> Result<Option<TableRouteValue>>,
impl FnOnce(&Vec<TxnOpResponse>) -> Result<Option<DeserializedValueWithBytes<TableRouteValue>>>,
)> {
let key = TableRouteKey::new(table_id);
let raw_key = key.as_raw_key();
let raw_value = current_table_route_value.try_as_raw_value()?;
let raw_value = current_table_route_value.into_bytes();
let new_raw_value: Vec<u8> = new_table_route_value.try_as_raw_value()?;
let txn = Txn::new()
@@ -148,11 +149,11 @@ impl TableRouteManager {
pub(crate) fn build_delete_txn(
&self,
table_id: TableId,
table_route_value: &TableRouteValue,
table_route_value: &DeserializedValueWithBytes<TableRouteValue>,
) -> Result<Txn> {
let key = TableRouteKey::new(table_id);
let raw_key = key.as_raw_key();
let raw_value = table_route_value.try_as_raw_value()?;
let raw_value = table_route_value.into_bytes();
let removed_key = to_removed_key(&String::from_utf8_lossy(&raw_key));
let txn = Txn::new().and_then(vec![
@@ -165,7 +166,8 @@ impl TableRouteManager {
fn build_decode_fn(
raw_key: Vec<u8>,
) -> impl FnOnce(&Vec<TxnOpResponse>) -> Result<Option<TableRouteValue>> {
) -> impl FnOnce(&Vec<TxnOpResponse>) -> Result<Option<DeserializedValueWithBytes<TableRouteValue>>>
{
move |response: &Vec<TxnOpResponse>| {
response
.iter()
@@ -178,28 +180,34 @@ impl TableRouteManager {
})
.flat_map(|r| &r.kvs)
.find(|kv| kv.key == raw_key)
.map(|kv| TableRouteValue::try_from_raw_value(&kv.value))
.map(|kv| DeserializedValueWithBytes::from_inner_slice(&kv.value))
.transpose()
}
}
pub async fn get(&self, table_id: TableId) -> Result<Option<TableRouteValue>> {
pub async fn get(
&self,
table_id: TableId,
) -> Result<Option<DeserializedValueWithBytes<TableRouteValue>>> {
let key = TableRouteKey::new(table_id);
self.kv_backend
.get(&key.as_raw_key())
.await?
.map(|kv| TableRouteValue::try_from_raw_value(&kv.value))
.map(|kv| DeserializedValueWithBytes::from_inner_slice(&kv.value))
.transpose()
}
#[cfg(test)]
pub async fn get_removed(&self, table_id: TableId) -> Result<Option<TableRouteValue>> {
pub async fn get_removed(
&self,
table_id: TableId,
) -> Result<Option<DeserializedValueWithBytes<TableRouteValue>>> {
let key = TableRouteKey::new(table_id).to_string();
let removed_key = to_removed_key(&key).into_bytes();
self.kv_backend
.get(&removed_key)
.await?
.map(|x| TableRouteValue::try_from_raw_value(&x.value))
.map(|x| DeserializedValueWithBytes::from_inner_slice(&x.value))
.transpose()
}
@@ -209,7 +217,7 @@ impl TableRouteManager {
) -> Result<Option<RegionDistribution>> {
self.get(table_id)
.await?
.map(|table_route| region_distribution(&table_route.region_routes))
.map(|table_route| region_distribution(&table_route.into_inner().region_routes))
.transpose()
}
}

View File

@@ -21,6 +21,8 @@ use api::v1::meta::{
SubmitDdlTaskResponse as PbSubmitDdlTaskResponse, TruncateTableTask as PbTruncateTableTask,
};
use api::v1::{AlterExpr, CreateTableExpr, DropTableExpr, TruncateTableExpr};
use base64::engine::general_purpose;
use base64::Engine as _;
use prost::Message;
use serde::{Deserialize, Serialize};
use snafu::{OptionExt, ResultExt};
@@ -287,7 +289,8 @@ impl Serialize for CreateTableTask {
table_info,
};
let buf = pb.encode_to_vec();
serializer.serialize_bytes(&buf)
let encoded = general_purpose::STANDARD_NO_PAD.encode(buf);
serializer.serialize_str(&encoded)
}
}
@@ -296,7 +299,10 @@ impl<'de> Deserialize<'de> for CreateTableTask {
where
D: serde::Deserializer<'de>,
{
let buf = Vec::<u8>::deserialize(deserializer)?;
let encoded = String::deserialize(deserializer)?;
let buf = general_purpose::STANDARD_NO_PAD
.decode(encoded)
.map_err(|err| serde::de::Error::custom(err.to_string()))?;
let expr: PbCreateTableTask = PbCreateTableTask::decode(&*buf)
.map_err(|err| serde::de::Error::custom(err.to_string()))?;
@@ -353,7 +359,8 @@ impl Serialize for AlterTableTask {
alter_table: Some(self.alter_table.clone()),
};
let buf = pb.encode_to_vec();
serializer.serialize_bytes(&buf)
let encoded = general_purpose::STANDARD_NO_PAD.encode(buf);
serializer.serialize_str(&encoded)
}
}
@@ -362,7 +369,10 @@ impl<'de> Deserialize<'de> for AlterTableTask {
where
D: serde::Deserializer<'de>,
{
let buf = Vec::<u8>::deserialize(deserializer)?;
let encoded = String::deserialize(deserializer)?;
let buf = general_purpose::STANDARD_NO_PAD
.decode(encoded)
.map_err(|err| serde::de::Error::custom(err.to_string()))?;
let expr: PbAlterTableTask = PbAlterTableTask::decode(&*buf)
.map_err(|err| serde::de::Error::custom(err.to_string()))?;
@@ -425,12 +435,12 @@ impl TryFrom<PbTruncateTableTask> for TruncateTableTask {
mod tests {
use std::sync::Arc;
use api::v1::CreateTableExpr;
use api::v1::{AlterExpr, CreateTableExpr};
use datatypes::schema::SchemaBuilder;
use table::metadata::RawTableInfo;
use table::test_util::table_info::test_table_info;
use super::CreateTableTask;
use super::{AlterTableTask, CreateTableTask};
#[test]
fn test_basic_ser_de_create_table_task() {
@@ -447,4 +457,16 @@ mod tests {
let de = serde_json::from_slice(&output).unwrap();
assert_eq!(task, de);
}
#[test]
fn test_basic_ser_de_alter_table_task() {
let task = AlterTableTask {
alter_table: AlterExpr::default(),
};
let output = serde_json::to_vec(&task).unwrap();
let de = serde_json::from_slice(&output).unwrap();
assert_eq!(task, de);
}
}

View File

@@ -14,6 +14,7 @@
// metric stuffs, inspired by databend
use std::fmt;
use std::sync::{Arc, Once, RwLock};
use std::time::{Duration, Instant};
@@ -63,6 +64,7 @@ pub fn try_handle() -> Option<PrometheusHandle> {
pub struct Timer {
start: Instant,
histogram: Histogram,
observed: bool,
}
impl From<Histogram> for Timer {
@@ -71,12 +73,22 @@ impl From<Histogram> for Timer {
}
}
impl fmt::Debug for Timer {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.debug_struct("Timer")
.field("start", &self.start)
.field("observed", &self.observed)
.finish()
}
}
impl Timer {
/// Creates a timer from given histogram.
pub fn from_histogram(histogram: Histogram) -> Self {
Self {
start: Instant::now(),
histogram,
observed: false,
}
}
@@ -85,6 +97,7 @@ impl Timer {
Self {
start: Instant::now(),
histogram: register_histogram!(name),
observed: false,
}
}
@@ -93,6 +106,7 @@ impl Timer {
Self {
start: Instant::now(),
histogram: register_histogram!(name, labels),
observed: false,
}
}
@@ -100,11 +114,18 @@ impl Timer {
pub fn elapsed(&self) -> Duration {
self.start.elapsed()
}
/// Discards the timer result.
pub fn discard(mut self) {
self.observed = true;
}
}
impl Drop for Timer {
fn drop(&mut self) {
self.histogram.record(self.elapsed())
if !self.observed {
self.histogram.record(self.elapsed())
}
}
}

View File

@@ -93,18 +93,22 @@ impl From<Date> for DateTime {
}
impl DateTime {
pub fn new(val: i64) -> Self {
Self(val)
/// Create a new [DateTime] from milliseconds elapsed since "1970-01-01 00:00:00 UTC" (UNIX Epoch).
pub fn new(millis: i64) -> Self {
Self(millis)
}
/// Get the milliseconds elapsed since "1970-01-01 00:00:00 UTC" (UNIX Epoch).
pub fn val(&self) -> i64 {
self.0
}
/// Convert to [NaiveDateTime].
pub fn to_chrono_datetime(&self) -> Option<NaiveDateTime> {
NaiveDateTime::from_timestamp_millis(self.0)
}
/// Convert to [common_time::date].
pub fn to_date(&self) -> Option<Date> {
self.to_chrono_datetime().map(|d| Date::from(d.date()))
}

View File

@@ -37,7 +37,7 @@ use storage::config::{
};
use storage::scheduler::SchedulerConfig;
pub const DEFAULT_OBJECT_STORE_CACHE_SIZE: ReadableSize = ReadableSize(1024);
pub const DEFAULT_OBJECT_STORE_CACHE_SIZE: ReadableSize = ReadableSize::mb(256);
/// Default data home in file storage
const DEFAULT_DATA_HOME: &str = "/tmp/greptimedb";
@@ -90,6 +90,15 @@ impl Default for StorageConfig {
#[serde(default)]
pub struct FileConfig {}
#[derive(Debug, Clone, Serialize, Deserialize, Default)]
#[serde(default)]
pub struct ObjectStorageCacheConfig {
/// The local file cache directory
pub cache_path: Option<String>,
/// The cache capacity in bytes
pub cache_capacity: Option<ReadableSize>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(default)]
pub struct S3Config {
@@ -101,8 +110,8 @@ pub struct S3Config {
pub secret_access_key: SecretString,
pub endpoint: Option<String>,
pub region: Option<String>,
pub cache_path: Option<String>,
pub cache_capacity: Option<ReadableSize>,
#[serde(flatten)]
pub cache: ObjectStorageCacheConfig,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
@@ -115,8 +124,8 @@ pub struct OssConfig {
#[serde(skip_serializing)]
pub access_key_secret: SecretString,
pub endpoint: String,
pub cache_path: Option<String>,
pub cache_capacity: Option<ReadableSize>,
#[serde(flatten)]
pub cache: ObjectStorageCacheConfig,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
@@ -130,8 +139,8 @@ pub struct AzblobConfig {
pub account_key: SecretString,
pub endpoint: String,
pub sas_token: Option<String>,
pub cache_path: Option<String>,
pub cache_capacity: Option<ReadableSize>,
#[serde(flatten)]
pub cache: ObjectStorageCacheConfig,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
@@ -143,8 +152,8 @@ pub struct GcsConfig {
#[serde(skip_serializing)]
pub credential_path: SecretString,
pub endpoint: String,
pub cache_path: Option<String>,
pub cache_capacity: Option<ReadableSize>,
#[serde(flatten)]
pub cache: ObjectStorageCacheConfig,
}
impl Default for S3Config {
@@ -156,8 +165,7 @@ impl Default for S3Config {
secret_access_key: SecretString::from(String::default()),
endpoint: Option::default(),
region: Option::default(),
cache_path: Option::default(),
cache_capacity: Option::default(),
cache: ObjectStorageCacheConfig::default(),
}
}
}
@@ -170,8 +178,7 @@ impl Default for OssConfig {
access_key_id: SecretString::from(String::default()),
access_key_secret: SecretString::from(String::default()),
endpoint: String::default(),
cache_path: Option::default(),
cache_capacity: Option::default(),
cache: ObjectStorageCacheConfig::default(),
}
}
}
@@ -184,9 +191,8 @@ impl Default for AzblobConfig {
account_name: SecretString::from(String::default()),
account_key: SecretString::from(String::default()),
endpoint: String::default(),
cache_path: Option::default(),
cache_capacity: Option::default(),
sas_token: Option::default(),
cache: ObjectStorageCacheConfig::default(),
}
}
}
@@ -199,8 +205,7 @@ impl Default for GcsConfig {
scope: String::default(),
credential_path: SecretString::from(String::default()),
endpoint: String::default(),
cache_path: Option::default(),
cache_capacity: Option::default(),
cache: ObjectStorageCacheConfig::default(),
}
}
}

View File

@@ -14,13 +14,11 @@
//! Datanode implementation.
use std::collections::HashMap;
use std::path::Path;
use std::sync::Arc;
use catalog::kvbackend::MetaKvBackend;
use catalog::memory::MemoryCatalogManager;
use common_base::readable_size::ReadableSize;
use common_base::Plugins;
use common_error::ext::BoxedError;
use common_greptimedb_telemetry::GreptimeDBTelemetryTask;
@@ -63,8 +61,6 @@ use crate::region_server::RegionServer;
use crate::server::Services;
use crate::store;
pub const DEFAULT_OBJECT_STORE_CACHE_SIZE: ReadableSize = ReadableSize(1024);
const OPEN_REGION_PARALLELISM: usize = 16;
/// Datanode service.
@@ -76,6 +72,7 @@ pub struct Datanode {
region_server: RegionServer,
greptimedb_telemetry_task: Arc<GreptimeDBTelemetryTask>,
leases_notifier: Option<Arc<Notify>>,
plugins: Plugins,
}
impl Datanode {
@@ -141,11 +138,15 @@ impl Datanode {
pub fn region_server(&self) -> RegionServer {
self.region_server.clone()
}
pub fn plugins(&self) -> Plugins {
self.plugins.clone()
}
}
pub struct DatanodeBuilder {
opts: DatanodeOptions,
plugins: Arc<Plugins>,
plugins: Plugins,
meta_client: Option<MetaClient>,
kv_backend: Option<KvBackendRef>,
}
@@ -153,11 +154,7 @@ pub struct DatanodeBuilder {
impl DatanodeBuilder {
/// `kv_backend` is optional. If absent, the builder will try to build one
/// by using the given `opts`
pub fn new(
opts: DatanodeOptions,
kv_backend: Option<KvBackendRef>,
plugins: Arc<Plugins>,
) -> Self {
pub fn new(opts: DatanodeOptions, kv_backend: Option<KvBackendRef>, plugins: Plugins) -> Self {
Self {
opts,
plugins,
@@ -266,6 +263,7 @@ impl DatanodeBuilder {
greptimedb_telemetry_task,
region_event_receiver,
leases_notifier,
plugins: self.plugins.clone(),
})
}
@@ -286,8 +284,9 @@ impl DatanodeBuilder {
for region_number in table_value.regions {
regions.push((
RegionId::new(table_value.table_id, region_number),
table_value.engine.clone(),
table_value.region_storage_path.clone(),
table_value.region_info.engine.clone(),
table_value.region_info.region_storage_path.clone(),
table_value.region_info.region_options.clone(),
));
}
}
@@ -296,7 +295,7 @@ impl DatanodeBuilder {
let semaphore = Arc::new(tokio::sync::Semaphore::new(OPEN_REGION_PARALLELISM));
let mut tasks = vec![];
for (region_id, engine, store_path) in regions {
for (region_id, engine, store_path, options) in regions {
let region_dir = region_dir(&store_path, region_id);
let semaphore_moved = semaphore.clone();
tasks.push(async move {
@@ -307,7 +306,7 @@ impl DatanodeBuilder {
RegionRequest::Open(RegionOpenRequest {
engine: engine.clone(),
region_dir,
options: HashMap::new(),
options,
}),
)
.await?;
@@ -330,7 +329,7 @@ impl DatanodeBuilder {
async fn new_region_server(
opts: &DatanodeOptions,
plugins: Arc<Plugins>,
plugins: Plugins,
log_store: Arc<RaftEngineLogStore>,
event_listener: RegionServerEventListenerRef,
) -> Result<RegionServer> {
@@ -363,6 +362,8 @@ impl DatanodeBuilder {
Ok(region_server)
}
// internal utils
/// Build [RaftEngineLogStore]
async fn build_log_store(opts: &DatanodeOptions) -> Result<Arc<RaftEngineLogStore>> {
let data_home = normalize_dir(&opts.storage.data_home);
@@ -410,3 +411,80 @@ impl DatanodeBuilder {
Ok(engines)
}
}
#[cfg(test)]
mod tests {
use std::assert_matches::assert_matches;
use std::collections::{BTreeMap, HashMap};
use std::sync::Arc;
use common_base::Plugins;
use common_meta::key::datanode_table::DatanodeTableManager;
use common_meta::kv_backend::memory::MemoryKvBackend;
use common_meta::kv_backend::KvBackendRef;
use store_api::region_request::RegionRequest;
use store_api::storage::RegionId;
use crate::config::DatanodeOptions;
use crate::datanode::DatanodeBuilder;
use crate::tests::{mock_region_server, MockRegionEngine};
async fn setup_table_datanode(kv: &KvBackendRef) {
let mgr = DatanodeTableManager::new(kv.clone());
let txn = mgr
.build_create_txn(
1028,
"mock",
"foo/bar/weny",
HashMap::from([("foo".to_string(), "bar".to_string())]),
BTreeMap::from([(0, vec![0, 1, 2])]),
)
.unwrap();
let r = kv.txn(txn).await.unwrap();
assert!(r.succeeded);
}
#[tokio::test]
async fn test_initialize_region_server() {
let mut mock_region_server = mock_region_server();
let (mock_region, mut mock_region_handler) = MockRegionEngine::new();
mock_region_server.register_engine(mock_region.clone());
let builder = DatanodeBuilder::new(
DatanodeOptions {
node_id: Some(0),
..Default::default()
},
None,
Plugins::default(),
);
let kv = Arc::new(MemoryKvBackend::default()) as _;
setup_table_datanode(&kv).await;
builder
.initialize_region_server(&mock_region_server, kv.clone(), false)
.await
.unwrap();
for i in 0..3 {
let (region_id, req) = mock_region_handler.recv().await.unwrap();
assert_eq!(region_id, RegionId::new(1028, i));
if let RegionRequest::Open(req) = req {
assert_eq!(
req.options,
HashMap::from([("foo".to_string(), "bar".to_string())])
)
} else {
unreachable!()
}
}
assert_matches!(
mock_region_handler.try_recv(),
Err(tokio::sync::mpsc::error::TryRecvError::Empty)
);
}
}

View File

@@ -12,8 +12,6 @@
// See the License for the specific language governing permissions and
// limitations under the License.
use std::collections::HashMap;
use async_trait::async_trait;
use common_error::ext::ErrorExt;
use common_error::status_code::StatusCode;
@@ -49,12 +47,13 @@ impl RegionHeartbeatResponseHandler {
Instruction::OpenRegion(OpenRegion {
region_ident,
region_storage_path,
options,
}) => {
let region_id = Self::region_ident_to_region_id(&region_ident);
let open_region_req = RegionRequest::Open(RegionOpenRequest {
engine: region_ident.engine,
region_dir: region_dir(&region_storage_path, region_id),
options: HashMap::new(),
options,
});
Ok((region_id, open_region_req))
}

View File

@@ -14,5 +14,7 @@
//! datanode metrics
pub const HANDLE_SQL_ELAPSED: &str = "datanode.handle_sql_elapsed";
pub const HANDLE_PROMQL_ELAPSED: &str = "datanode.handle_promql_elapsed";
/// The elapsed time of handling a request in the region_server.
pub const HANDLE_REGION_REQUEST_ELAPSED: &str = "datanode.handle_region_request_elapsed";
/// Region request type label.
pub const REGION_REQUEST_TYPE: &str = "datanode.region_request_type";

View File

@@ -28,7 +28,7 @@ use common_query::physical_plan::DfPhysicalPlanAdapter;
use common_query::{DfPhysicalPlan, Output};
use common_recordbatch::SendableRecordBatchStream;
use common_runtime::Runtime;
use common_telemetry::{info, warn};
use common_telemetry::{info, timer, warn};
use dashmap::DashMap;
use datafusion::catalog::schema::SchemaProvider;
use datafusion::catalog::{CatalogList, CatalogProvider};
@@ -44,7 +44,7 @@ use query::QueryEngineRef;
use servers::error::{self as servers_error, ExecuteGrpcRequestSnafu, Result as ServerResult};
use servers::grpc::flight::{FlightCraft, FlightRecordBatchStream, TonicStream};
use servers::grpc::region_server::RegionServerHandler;
use session::context::QueryContext;
use session::context::{QueryContextBuilder, QueryContextRef};
use snafu::{OptionExt, ResultExt};
use store_api::metadata::RegionMetadataRef;
use store_api::region_engine::RegionEngineRef;
@@ -227,7 +227,11 @@ impl RegionServerInner {
region_id: RegionId,
request: RegionRequest,
) -> Result<Output> {
// TODO(ruihang): add some metrics
let request_type = request.request_type();
let _timer = timer!(
crate::metrics::HANDLE_REGION_REQUEST_ELAPSED,
&[(crate::metrics::REGION_REQUEST_TYPE, request_type),]
);
let region_change = match &request {
RegionRequest::Create(create) => RegionChange::Register(create.engine.clone()),
@@ -285,12 +289,17 @@ impl RegionServerInner {
// TODO(ruihang): add metrics and set trace id
let QueryRequest {
header: _,
header,
region_id,
plan,
} = request;
let region_id = RegionId::from_u64(region_id);
let ctx: QueryContextRef = header
.as_ref()
.map(|h| Arc::new(h.into()))
.unwrap_or_else(|| QueryContextBuilder::default().build());
// build dummy catalog list
let engine = self
.region_map
@@ -306,7 +315,7 @@ impl RegionServerInner {
.context(DecodeLogicalPlanSnafu)?;
let result = self
.query_engine
.execute(logical_plan.into(), QueryContext::arc())
.execute(logical_plan.into(), ctx)
.await
.context(ExecuteLogicalPlanSnafu)?;

View File

@@ -76,29 +76,33 @@ async fn create_object_store_with_cache(
) -> Result<ObjectStore> {
let (cache_path, cache_capacity) = match store_config {
ObjectStoreConfig::S3(s3_config) => {
let path = s3_config.cache_path.as_ref();
let path = s3_config.cache.cache_path.as_ref();
let capacity = s3_config
.cache
.cache_capacity
.unwrap_or(DEFAULT_OBJECT_STORE_CACHE_SIZE);
(path, capacity)
}
ObjectStoreConfig::Oss(oss_config) => {
let path = oss_config.cache_path.as_ref();
let path = oss_config.cache.cache_path.as_ref();
let capacity = oss_config
.cache
.cache_capacity
.unwrap_or(DEFAULT_OBJECT_STORE_CACHE_SIZE);
(path, capacity)
}
ObjectStoreConfig::Azblob(azblob_config) => {
let path = azblob_config.cache_path.as_ref();
let path = azblob_config.cache.cache_path.as_ref();
let capacity = azblob_config
.cache
.cache_capacity
.unwrap_or(DEFAULT_OBJECT_STORE_CACHE_SIZE);
(path, capacity)
}
ObjectStoreConfig::Gcs(gcs_config) => {
let path = gcs_config.cache_path.as_ref();
let path = gcs_config.cache.cache_path.as_ref();
let capacity = gcs_config
.cache
.cache_capacity
.unwrap_or(DEFAULT_OBJECT_STORE_CACHE_SIZE);
(path, capacity)
@@ -119,6 +123,12 @@ async fn create_object_store_with_cache(
let cache_layer = LruCacheLayer::new(Arc::new(cache_store), cache_capacity.0 as usize)
.await
.context(error::InitBackendSnafu)?;
info!(
"Enabled local object storage cache, path: {}, capacity: {}.",
path, cache_capacity
);
Ok(object_store.layer(cache_layer))
} else {
Ok(object_store)

View File

@@ -13,10 +13,12 @@
// limitations under the License.
use std::any::Any;
use std::collections::HashMap;
use std::sync::Arc;
use api::v1::meta::HeartbeatResponse;
use async_trait::async_trait;
use common_error::ext::BoxedError;
use common_function::scalars::aggregate::AggregateFunctionMetaRef;
use common_function::scalars::FunctionRef;
use common_meta::heartbeat::handler::{
@@ -26,6 +28,7 @@ use common_meta::heartbeat::mailbox::{HeartbeatMailbox, MessageMeta};
use common_meta::instruction::{Instruction, OpenRegion, RegionIdent};
use common_query::prelude::ScalarUdf;
use common_query::Output;
use common_recordbatch::SendableRecordBatchStream;
use common_runtime::Runtime;
use query::dataframe::DataFrame;
use query::plan::LogicalPlan;
@@ -33,7 +36,12 @@ use query::planner::LogicalPlanner;
use query::query_engine::DescribeResult;
use query::QueryEngine;
use session::context::QueryContextRef;
use store_api::metadata::RegionMetadataRef;
use store_api::region_engine::RegionEngine;
use store_api::region_request::RegionRequest;
use store_api::storage::{RegionId, ScanRequest};
use table::TableRef;
use tokio::sync::mpsc::{Receiver, Sender};
use crate::event_listener::NoopRegionServerEventListener;
use crate::region_server::RegionServer;
@@ -79,6 +87,7 @@ fn open_region_instruction() -> Instruction {
engine: "mito2".to_string(),
},
"path/dir",
HashMap::new(),
))
}
@@ -129,3 +138,52 @@ pub fn mock_region_server() -> RegionServer {
Box::new(NoopRegionServerEventListener),
)
}
pub struct MockRegionEngine {
sender: Sender<(RegionId, RegionRequest)>,
}
impl MockRegionEngine {
pub fn new() -> (Arc<Self>, Receiver<(RegionId, RegionRequest)>) {
let (tx, rx) = tokio::sync::mpsc::channel(8);
(Arc::new(Self { sender: tx }), rx)
}
}
#[async_trait::async_trait]
impl RegionEngine for MockRegionEngine {
fn name(&self) -> &str {
"mock"
}
async fn handle_request(
&self,
region_id: RegionId,
request: RegionRequest,
) -> Result<Output, BoxedError> {
let _ = self.sender.send((region_id, request)).await;
Ok(Output::AffectedRows(0))
}
async fn handle_query(
&self,
_region_id: RegionId,
_request: ScanRequest,
) -> Result<SendableRecordBatchStream, BoxedError> {
unimplemented!()
}
async fn get_metadata(&self, _region_id: RegionId) -> Result<RegionMetadataRef, BoxedError> {
unimplemented!()
}
async fn stop(&self) -> Result<(), BoxedError> {
Ok(())
}
fn set_writable(&self, _region_id: RegionId, _writable: bool) -> Result<(), BoxedError> {
Ok(())
}
}

View File

@@ -17,6 +17,7 @@ mod constraint;
mod raw;
use std::collections::HashMap;
use std::fmt;
use std::sync::Arc;
use arrow::datatypes::{Field, Schema as ArrowSchema};
@@ -32,7 +33,7 @@ pub use crate::schema::raw::RawSchema;
pub const VERSION_KEY: &str = "greptime:version";
/// A common schema, should be immutable.
#[derive(Debug, Clone, PartialEq, Eq)]
#[derive(Clone, PartialEq, Eq)]
pub struct Schema {
column_schemas: Vec<ColumnSchema>,
name_to_index: HashMap<String, usize>,
@@ -48,6 +49,17 @@ pub struct Schema {
version: u32,
}
impl fmt::Debug for Schema {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
f.debug_struct("Schema")
.field("column_schemas", &self.column_schemas)
.field("name_to_index", &self.name_to_index)
.field("timestamp_index", &self.timestamp_index)
.field("version", &self.version)
.finish()
}
}
impl Schema {
/// Initial version of the schema.
pub const INITIAL_VERSION: u32 = 0;

View File

@@ -13,6 +13,7 @@
// limitations under the License.
use std::collections::HashMap;
use std::fmt;
use arrow::datatypes::Field;
use serde::{Deserialize, Serialize};
@@ -33,7 +34,7 @@ pub const COMMENT_KEY: &str = "greptime:storage:comment";
const DEFAULT_CONSTRAINT_KEY: &str = "greptime:default_constraint";
/// Schema of a column, used as an immutable struct.
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
#[derive(Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct ColumnSchema {
pub name: String,
pub data_type: ConcreteDataType,
@@ -43,6 +44,30 @@ pub struct ColumnSchema {
metadata: Metadata,
}
impl fmt::Debug for ColumnSchema {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(
f,
"{} {} {}",
self.name,
self.data_type,
if self.is_nullable { "null" } else { "not null" },
)?;
// Add default constraint if present
if let Some(default_constraint) = &self.default_constraint {
write!(f, " default={:?}", default_constraint)?;
}
// Add metadata if present
if !self.metadata.is_empty() {
write!(f, " metadata={:?}", self.metadata)?;
}
Ok(())
}
}
impl ColumnSchema {
pub fn new<T: Into<String>>(
name: T,
@@ -394,4 +419,18 @@ mod tests {
let column_schema = ColumnSchema::new("test", ConcreteDataType::int32_datatype(), false);
assert!(column_schema.create_default().unwrap().is_none());
}
#[test]
fn test_debug_for_column_schema() {
let column_schema_int8 =
ColumnSchema::new("test_column_1", ConcreteDataType::int8_datatype(), true);
let column_schema_int32 =
ColumnSchema::new("test_column_2", ConcreteDataType::int32_datatype(), false);
let formatted_int8 = format!("{:?}", column_schema_int8);
let formatted_int32 = format!("{:?}", column_schema_int32);
assert_eq!(formatted_int8, "test_column_1 Int8 null");
assert_eq!(formatted_int32, "test_column_2 Int32 not null");
}
}

View File

@@ -182,9 +182,6 @@ pub enum Error {
#[snafu(display("Failed to find leaders when altering table, table: {}", table))]
LeaderNotFound { table: String, location: Location },
#[snafu(display("Table already exists: `{}`", table))]
TableAlreadyExist { table: String, location: Location },
#[snafu(display("Failed to found context value: {}", key))]
ContextValueNotFound { key: String, location: Location },
@@ -272,6 +269,9 @@ pub enum Error {
source: operator::error::Error,
location: Location,
},
#[snafu(display("Invalid auth config"))]
IllegalAuthConfig { source: auth::error::Error },
}
pub type Result<T> = std::result::Result<T, Error>;
@@ -288,6 +288,7 @@ impl ErrorExt for Error {
| Error::ColumnNotFound { .. }
| Error::MissingMetasrvOpts { .. }
| Error::UnsupportedFormat { .. }
| Error::IllegalAuthConfig { .. }
| Error::EmptyData { .. }
| Error::ColumnNoneDefaultValue { .. }
| Error::IncompleteGrpcRequest { .. } => StatusCode::InvalidArguments,
@@ -341,7 +342,6 @@ impl ErrorExt for Error {
| Error::ExecLogicalPlan { source, .. } => source.status_code(),
Error::LeaderNotFound { .. } => StatusCode::StorageUnavailable,
Error::TableAlreadyExist { .. } => StatusCode::TableAlreadyExists,
Error::InvokeRegionServer { source, .. } => source.status_code(),
Error::External { source, .. } => source.status_code(),

View File

@@ -41,6 +41,7 @@ pub struct FrontendOptions {
pub meta_client: Option<MetaClientOptions>,
pub logging: LoggingOptions,
pub datanode: DatanodeOptions,
pub user_provider: Option<String>,
}
impl Default for FrontendOptions {
@@ -60,6 +61,7 @@ impl Default for FrontendOptions {
meta_client: None,
logging: LoggingOptions::default(),
datanode: DatanodeOptions::default(),
user_provider: None,
}
}
}

View File

@@ -13,9 +13,7 @@
// limitations under the License.
use async_trait::async_trait;
use common_meta::cache_invalidator::{
CacheInvalidator, Context, KvCacheInvalidatorRef, TableMetadataCacheInvalidator,
};
use common_meta::cache_invalidator::{CacheInvalidatorRef, Context};
use common_meta::error::Result as MetaResult;
use common_meta::heartbeat::handler::{
HandleControl, HeartbeatResponseHandler, HeartbeatResponseHandlerContext,
@@ -26,7 +24,7 @@ use futures::future::Either;
#[derive(Clone)]
pub struct InvalidateTableCacheHandler {
table_metadata_cache_invalidator: TableMetadataCacheInvalidator,
cache_invalidator: CacheInvalidatorRef,
}
#[async_trait]
@@ -41,7 +39,7 @@ impl HeartbeatResponseHandler for InvalidateTableCacheHandler {
async fn handle(&self, ctx: &mut HeartbeatResponseHandlerContext) -> MetaResult<HandleControl> {
let mailbox = ctx.mailbox.clone();
let cache_invalidator = self.table_metadata_cache_invalidator.clone();
let cache_invalidator = self.cache_invalidator.clone();
let (meta, invalidator) = match ctx.incoming_message.take() {
Some((meta, Instruction::InvalidateTableIdCache(table_id))) => (
@@ -86,11 +84,7 @@ impl HeartbeatResponseHandler for InvalidateTableCacheHandler {
}
impl InvalidateTableCacheHandler {
pub fn new(backend_cache_invalidator: KvCacheInvalidatorRef) -> Self {
Self {
table_metadata_cache_invalidator: TableMetadataCacheInvalidator::new(
backend_cache_invalidator,
),
}
pub fn new(cache_invalidator: CacheInvalidatorRef) -> Self {
Self { cache_invalidator }
}
}

View File

@@ -122,9 +122,7 @@ pub struct Instance {
script_executor: Arc<ScriptExecutor>,
statement_executor: Arc<StatementExecutor>,
query_engine: QueryEngineRef,
/// plugins: this map holds extensions to customize query or auth
/// behaviours.
plugins: Arc<Plugins>,
plugins: Plugins,
servers: Arc<ServerHandlers>,
heartbeat_task: Option<HeartbeatTask>,
inserter: InserterRef,
@@ -132,10 +130,7 @@ pub struct Instance {
}
impl Instance {
pub async fn try_new_distributed(
opts: &FrontendOptions,
plugins: Arc<Plugins>,
) -> Result<Self> {
pub async fn try_new_distributed(opts: &FrontendOptions, plugins: Plugins) -> Result<Self> {
let meta_client = Self::create_meta_client(opts).await?;
let datanode_clients = Arc::new(DatanodeClients::default());
@@ -146,7 +141,7 @@ impl Instance {
pub async fn try_new_distributed_with(
meta_client: Arc<MetaClient>,
datanode_clients: Arc<DatanodeClients>,
plugins: Arc<Plugins>,
plugins: Plugins,
opts: &FrontendOptions,
) -> Result<Self> {
let meta_backend = Arc::new(CachedMetaKvBackend::new(meta_client.clone()));
@@ -297,7 +292,7 @@ impl Instance {
kv_backend: KvBackendRef,
procedure_manager: ProcedureManagerRef,
catalog_manager: CatalogManagerRef,
plugins: Arc<Plugins>,
plugins: Plugins,
region_server: RegionServer,
) -> Result<Self> {
let partition_manager = Arc::new(PartitionRuleManager::new(kv_backend.clone()));
@@ -377,7 +372,7 @@ impl Instance {
&self.catalog_manager
}
pub fn plugins(&self) -> Arc<Plugins> {
pub fn plugins(&self) -> Plugins {
self.plugins.clone()
}
@@ -593,7 +588,7 @@ impl PrometheusHandler for Instance {
}
pub fn check_permission(
plugins: Arc<Plugins>,
plugins: Plugins,
stmt: &Statement,
query_ctx: &QueryContextRef,
) -> Result<()> {
@@ -664,6 +659,7 @@ fn validate_param(name: &ObjectName, query_ctx: &QueryContextRef) -> Result<()>
mod tests {
use std::collections::HashMap;
use common_base::Plugins;
use query::query_engine::options::QueryOptions;
use session::context::QueryContext;
use sql::dialect::GreptimeDbDialect;
@@ -674,11 +670,10 @@ mod tests {
#[test]
fn test_exec_validation() {
let query_ctx = QueryContext::arc();
let plugins = Plugins::new();
let plugins: Plugins = Plugins::new();
plugins.insert(QueryOptions {
disallow_cross_schema_query: true,
});
let plugins = Arc::new(plugins);
let sql = r#"
SELECT * FROM demo;
@@ -704,7 +699,7 @@ mod tests {
re.unwrap();
}
fn replace_test(template_sql: &str, plugins: Arc<Plugins>, query_ctx: &QueryContextRef) {
fn replace_test(template_sql: &str, plugins: Plugins, query_ctx: &QueryContextRef) {
// test right
let right = vec![("", ""), ("", "public."), ("greptime.", "public.")];
for (catalog, schema) in right {
@@ -732,7 +727,7 @@ mod tests {
template.format(&vars).unwrap()
}
fn do_test(sql: &str, plugins: Arc<Plugins>, query_ctx: &QueryContextRef, is_ok: bool) {
fn do_test(sql: &str, plugins: Plugins, query_ctx: &QueryContextRef, is_ok: bool) {
let stmt = &parse_stmt(sql, &GreptimeDbDialect {}).unwrap()[0];
let re = check_permission(plugins, stmt, query_ctx);
if is_ok {

View File

@@ -20,7 +20,6 @@ use auth::UserProviderRef;
use common_base::Plugins;
use common_runtime::Builder as RuntimeBuilder;
use common_telemetry::info;
use servers::configurator::ConfiguratorRef;
use servers::error::InternalIoSnafu;
use servers::grpc::{GrpcServer, GrpcServerConfig};
use servers::http::HttpServerBuilder;
@@ -47,7 +46,7 @@ impl Services {
pub(crate) async fn build<T>(
opts: &FrontendOptions,
instance: Arc<T>,
plugins: Arc<Plugins>,
plugins: Plugins,
) -> Result<ServerHandlers>
where
T: FrontendInstance,
@@ -120,7 +119,7 @@ impl Services {
let http_server = http_server_builder
.with_metrics_handler(MetricsHandler)
.with_script_handler(instance.clone())
.with_configurator(plugins.get::<ConfiguratorRef>())
.with_plugins(plugins)
.with_greptime_config_options(opts.to_toml_string())
.build();
result.push((Box::new(http_server), http_addr));

View File

@@ -20,7 +20,9 @@ use api::v1::meta::heartbeat_server::HeartbeatServer;
use api::v1::meta::lock_server::LockServer;
use api::v1::meta::router_server::RouterServer;
use api::v1::meta::store_server::StoreServer;
use common_base::Plugins;
use etcd_client::Client;
use servers::configurator::ConfiguratorRef;
use servers::http::{HttpServer, HttpServerBuilder};
use servers::metrics_handler::MetricsHandler;
use servers::server::Server;
@@ -28,8 +30,7 @@ use snafu::ResultExt;
use tokio::net::TcpListener;
use tokio::select;
use tokio::sync::mpsc::{self, Receiver, Sender};
use tokio_stream::wrappers::TcpListenerStream;
use tonic::transport::server::Router;
use tonic::transport::server::{Router, TcpIncoming};
use crate::election::etcd::EtcdElection;
use crate::lock::etcd::EtcdLock;
@@ -54,22 +55,27 @@ pub struct MetaSrvInstance {
opts: MetaSrvOptions,
signal_sender: Option<Sender<()>>,
plugins: Plugins,
}
impl MetaSrvInstance {
pub async fn new(opts: MetaSrvOptions) -> Result<MetaSrvInstance> {
let meta_srv = build_meta_srv(&opts).await?;
pub async fn new(opts: MetaSrvOptions, plugins: Plugins) -> Result<MetaSrvInstance> {
let meta_srv = build_meta_srv(&opts, plugins.clone()).await?;
let http_srv = Arc::new(
HttpServerBuilder::new(opts.http.clone())
.with_metrics_handler(MetricsHandler)
.with_greptime_config_options(opts.to_toml_string())
.build(),
);
// put meta_srv into plugins for later use
plugins.insert::<Arc<MetaSrv>>(Arc::new(meta_srv.clone()));
Ok(MetaSrvInstance {
meta_srv,
http_srv,
opts,
signal_sender: None,
plugins,
})
}
@@ -80,8 +86,12 @@ impl MetaSrvInstance {
self.signal_sender = Some(tx);
let meta_srv =
bootstrap_meta_srv_with_router(&self.opts.bind_addr, router(self.meta_srv.clone()), rx);
let mut router = router(self.meta_srv.clone());
if let Some(configurator) = self.meta_srv.plugins().get::<ConfiguratorRef>() {
router = configurator.config_grpc(router);
}
let meta_srv = bootstrap_meta_srv_with_router(&self.opts.bind_addr, router, rx);
let addr = self.opts.http.addr.parse().context(error::ParseAddrSnafu {
addr: &self.opts.http.addr,
})?;
@@ -111,6 +121,10 @@ impl MetaSrvInstance {
})?;
Ok(())
}
pub fn plugins(&self) -> Plugins {
self.plugins.clone()
}
}
pub async fn bootstrap_meta_srv_with_router(
@@ -121,10 +135,12 @@ pub async fn bootstrap_meta_srv_with_router(
let listener = TcpListener::bind(bind_addr)
.await
.context(error::TcpBindSnafu { addr: bind_addr })?;
let listener = TcpListenerStream::new(listener);
let incoming =
TcpIncoming::from_listener(listener, true, None).context(error::TcpIncomingSnafu)?;
router
.serve_with_incoming_shutdown(listener, async {
.serve_with_incoming_shutdown(incoming, async {
let _ = signal.recv().await;
})
.await
@@ -145,7 +161,7 @@ pub fn router(meta_srv: MetaSrv) -> Router {
.add_service(admin::make_admin_service(meta_srv))
}
pub async fn build_meta_srv(opts: &MetaSrvOptions) -> Result<MetaSrv> {
pub async fn build_meta_srv(opts: &MetaSrvOptions, plugins: Plugins) -> Result<MetaSrv> {
let (kv_store, election, lock) = if opts.use_memory_store {
(
Arc::new(MemStore::new()) as _,
@@ -153,8 +169,13 @@ pub async fn build_meta_srv(opts: &MetaSrvOptions) -> Result<MetaSrv> {
Some(Arc::new(MemLock::default()) as _),
)
} else {
let etcd_endpoints = [&opts.store_addr];
let etcd_client = Client::connect(etcd_endpoints, None)
let etcd_endpoints = opts
.store_addr
.split(',')
.map(|x| x.trim())
.filter(|x| !x.is_empty())
.collect::<Vec<_>>();
let etcd_client = Client::connect(&etcd_endpoints, None)
.await
.context(error::ConnectEtcdSnafu)?;
(
@@ -178,14 +199,7 @@ pub async fn build_meta_srv(opts: &MetaSrvOptions) -> Result<MetaSrv> {
.selector(selector)
.election(election)
.lock(lock)
.plugins(plugins)
.build()
.await
}
pub async fn make_meta_srv(opts: &MetaSrvOptions) -> Result<MetaSrv> {
let meta_srv = build_meta_srv(opts).await?;
meta_srv.try_start().await?;
Ok(meta_srv)
}

View File

@@ -150,6 +150,12 @@ pub enum Error {
location: Location,
},
#[snafu(display("Failed to convert to TcpIncoming"))]
TcpIncoming {
#[snafu(source)]
error: Box<dyn std::error::Error + Send + Sync>,
},
#[snafu(display("Failed to start gRPC server"))]
StartGrpc {
#[snafu(source)]
@@ -546,6 +552,7 @@ impl ErrorExt for Error {
Error::EtcdFailed { .. }
| Error::ConnectEtcd { .. }
| Error::TcpBind { .. }
| Error::TcpIncoming { .. }
| Error::SerializeToJson { .. }
| Error::DeserializeFromJson { .. }
| Error::DecodeTableRoute { .. }

View File

@@ -26,6 +26,7 @@ use common_meta::instruction::{Instruction, InstructionReply};
use common_meta::sequence::Sequence;
use common_telemetry::{debug, info, timer, warn};
use dashmap::DashMap;
use futures::future::join_all;
use metrics::{decrement_gauge, increment_gauge};
use snafu::{OptionExt, ResultExt};
use tokio::sync::mpsc::Sender;
@@ -149,18 +150,25 @@ impl Pushers {
.range(range)
.map(|(_, value)| value)
.collect::<Vec<_>>();
let mut results = Vec::with_capacity(pushers.len());
for pusher in pushers {
let mut mailbox_message = mailbox_message.clone();
mailbox_message.id = 0; // one-way message
pusher
.push(HeartbeatResponse {
header: Some(pusher.header()),
mailbox_message: Some(mailbox_message),
..Default::default()
})
.await?;
results.push(pusher.push(HeartbeatResponse {
header: Some(pusher.header()),
mailbox_message: Some(mailbox_message),
..Default::default()
}))
}
// Checks the error out of the loop.
let _ = join_all(results)
.await
.into_iter()
.collect::<Result<Vec<_>>>()?;
Ok(())
}

View File

@@ -19,6 +19,7 @@ use std::sync::Arc;
use std::time::Duration;
use api::v1::meta::Peer;
use common_base::Plugins;
use common_greptimedb_telemetry::GreptimeDBTelemetryTask;
use common_grpc::channel_manager;
use common_meta::ddl::DdlTaskExecutorRef;
@@ -71,7 +72,7 @@ impl Default for MetaSrvOptions {
store_addr: "127.0.0.1:2379".to_string(),
selector: SelectorType::default(),
use_memory_store: false,
enable_region_failover: true,
enable_region_failover: false,
http: HttpOptions::default(),
logging: LoggingOptions {
dir: format!("{METASRV_HOME}/logs"),
@@ -188,7 +189,8 @@ pub struct MetaSrv {
ddl_executor: DdlTaskExecutorRef,
table_metadata_manager: TableMetadataManagerRef,
greptimedb_telemetry_task: Arc<GreptimeDBTelemetryTask>,
pubsub: Option<(PublishRef, SubscribeManagerRef)>,
plugins: Plugins,
}
impl MetaSrv {
@@ -208,7 +210,7 @@ impl MetaSrv {
let procedure_manager = self.procedure_manager.clone();
let in_memory = self.in_memory.clone();
let leader_cached_kv_store = self.leader_cached_kv_store.clone();
let subscribe_manager = self.subscribe_manager().cloned();
let subscribe_manager = self.subscribe_manager();
let mut rx = election.subscribe_leader_change();
let task_handler = self.greptimedb_telemetry_task.clone();
let _handle = common_runtime::spawn_bg(async move {
@@ -350,12 +352,16 @@ impl MetaSrv {
&self.table_metadata_manager
}
pub fn publish(&self) -> Option<&PublishRef> {
self.pubsub.as_ref().map(|suite| &suite.0)
pub fn publish(&self) -> Option<PublishRef> {
self.plugins.get::<PublishRef>()
}
pub fn subscribe_manager(&self) -> Option<&SubscribeManagerRef> {
self.pubsub.as_ref().map(|suite| &suite.1)
pub fn subscribe_manager(&self) -> Option<SubscribeManagerRef> {
self.plugins.get::<SubscribeManagerRef>()
}
pub fn plugins(&self) -> &Plugins {
&self.plugins
}
#[inline]

View File

@@ -17,6 +17,7 @@ use std::sync::Arc;
use std::time::Duration;
use client::client_manager::DatanodeClients;
use common_base::Plugins;
use common_grpc::channel_manager::ChannelConfig;
use common_meta::ddl_manager::{DdlManager, DdlManagerRef};
use common_meta::distributed_time_constants;
@@ -48,7 +49,7 @@ use crate::metasrv::{
ElectionRef, MetaSrv, MetaSrvOptions, MetasrvInfo, SelectorContext, SelectorRef, TABLE_ID_SEQ,
};
use crate::procedure::region_failover::RegionFailoverManager;
use crate::pubsub::{PublishRef, SubscribeManagerRef};
use crate::pubsub::PublishRef;
use crate::selector::lease_based::LeaseBasedSelector;
use crate::service::mailbox::MailboxRef;
use crate::service::store::cached_kv::{CheckLeader, LeaderCachedKvStore};
@@ -67,7 +68,7 @@ pub struct MetaSrvBuilder {
meta_peer_client: Option<MetaPeerClientRef>,
lock: Option<DistLockRef>,
datanode_clients: Option<Arc<DatanodeClients>>,
pubsub: Option<(PublishRef, SubscribeManagerRef)>,
plugins: Option<Plugins>,
}
impl MetaSrvBuilder {
@@ -82,7 +83,7 @@ impl MetaSrvBuilder {
options: None,
lock: None,
datanode_clients: None,
pubsub: None,
plugins: None,
}
}
@@ -131,8 +132,8 @@ impl MetaSrvBuilder {
self
}
pub fn pubsub(mut self, publish: PublishRef, subscribe_manager: SubscribeManagerRef) -> Self {
self.pubsub = Some((publish, subscribe_manager));
pub fn plugins(mut self, plugins: Plugins) -> Self {
self.plugins = Some(plugins);
self
}
@@ -149,7 +150,7 @@ impl MetaSrvBuilder {
handler_group,
lock,
datanode_clients,
pubsub,
plugins,
} = self;
let options = options.unwrap_or_default();
@@ -206,11 +207,10 @@ impl MetaSrvBuilder {
None
};
let publish_heartbeat_handler = if let Some((publish, _)) = pubsub.as_ref() {
Some(PublishHeartbeatHandler::new(publish.clone()))
} else {
None
};
let publish_heartbeat_handler = plugins
.clone()
.and_then(|plugins| plugins.get::<PublishRef>())
.map(|publish| PublishHeartbeatHandler::new(publish.clone()));
let region_lease_handler =
RegionLeaseHandler::new(distributed_time_constants::REGION_LEASE_SECS);
@@ -263,7 +263,7 @@ impl MetaSrvBuilder {
enable_telemetry,
)
.await,
pubsub,
plugins: plugins.unwrap_or_else(Plugins::default),
})
}
}

View File

@@ -630,7 +630,8 @@ mod tests {
Some(Payload::Json(
serde_json::to_string(&Instruction::OpenRegion(OpenRegion::new(
opening_region,
&path
&path,
HashMap::new(),
)))
.unwrap(),
))

View File

@@ -12,6 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
use std::collections::HashMap;
use std::time::Duration;
use api::v1::meta::MailboxMessage;
@@ -42,6 +43,7 @@ pub(super) struct ActivateRegion {
// to prevent it from renewing the lease.
remark_inactive_region: bool,
region_storage_path: Option<String>,
region_options: Option<HashMap<String, String>>,
}
impl ActivateRegion {
@@ -50,6 +52,7 @@ impl ActivateRegion {
candidate,
remark_inactive_region: false,
region_storage_path: None,
region_options: None,
}
}
@@ -67,6 +70,7 @@ impl ActivateRegion {
.await
.context(error::TableMetadataManagerSnafu)?
.context(error::TableInfoNotFoundSnafu { table_id })?
.into_inner()
.table_info;
let region_storage_path =
@@ -77,14 +81,15 @@ impl ActivateRegion {
..failed_region.clone()
};
info!("Activating region: {candidate_ident:?}");
let region_options: HashMap<String, String> = (&table_info.meta.options).into();
let instruction = Instruction::OpenRegion(OpenRegion::new(
candidate_ident.clone(),
&region_storage_path,
region_options.clone(),
));
self.region_storage_path = Some(region_storage_path);
self.region_options = Some(region_options);
let msg = MailboxMessage::json_message(
"Activate Region",
&format!("Metasrv@{}", ctx.selector_ctx.server_addr),
@@ -139,6 +144,11 @@ impl ActivateRegion {
.context(error::UnexpectedSnafu {
violated: "expected region_storage_path",
})?,
self.region_options
.clone()
.context(error::UnexpectedSnafu {
violated: "expected region_options",
})?,
)))
} else {
// The region could be just indeed cannot be opened by the candidate, retry
@@ -193,6 +203,8 @@ impl State for ActivateRegion {
#[cfg(test)]
mod tests {
use std::collections::HashMap;
use api::v1::meta::mailbox_message::Payload;
use common_meta::instruction::SimpleReply;
@@ -231,7 +243,8 @@ mod tests {
datanode_id: candidate,
..failed_region.clone()
},
&env.path
&env.path,
HashMap::new(),
)))
.unwrap(),
))
@@ -266,7 +279,7 @@ mod tests {
.unwrap();
assert_eq!(
format!("{next_state:?}"),
r#"UpdateRegionMetadata { candidate: Peer { id: 2, addr: "" }, region_storage_path: "greptime/public" }"#
r#"UpdateRegionMetadata { candidate: Peer { id: 2, addr: "" }, region_storage_path: "greptime/public", region_options: {} }"#
);
}
@@ -300,7 +313,8 @@ mod tests {
datanode_id: candidate,
..failed_region.clone()
},
&env.path
&env.path,
HashMap::new(),
)))
.unwrap(),
))

View File

@@ -226,7 +226,7 @@ mod tests {
.unwrap();
assert_eq!(
format!("{next_state:?}"),
r#"ActivateRegion { candidate: Peer { id: 2, addr: "" }, remark_inactive_region: false, region_storage_path: None }"#
r#"ActivateRegion { candidate: Peer { id: 2, addr: "" }, remark_inactive_region: false, region_storage_path: None, region_options: None }"#
);
}
@@ -268,7 +268,7 @@ mod tests {
// Timeout or not, proceed to `ActivateRegion`.
assert_eq!(
format!("{next_state:?}"),
r#"ActivateRegion { candidate: Peer { id: 2, addr: "" }, remark_inactive_region: false, region_storage_path: None }"#
r#"ActivateRegion { candidate: Peer { id: 2, addr: "" }, remark_inactive_region: false, region_storage_path: None, region_options: None }"#
);
}
}

View File

@@ -12,7 +12,10 @@
// See the License for the specific language governing permissions and
// limitations under the License.
use std::collections::HashMap;
use async_trait::async_trait;
use common_meta::key::datanode_table::RegionInfo;
use common_meta::key::table_route::TableRouteKey;
use common_meta::peer::Peer;
use common_meta::rpc::router::RegionRoute;
@@ -31,13 +34,19 @@ use crate::lock::Opts;
pub(super) struct UpdateRegionMetadata {
candidate: Peer,
region_storage_path: String,
region_options: HashMap<String, String>,
}
impl UpdateRegionMetadata {
pub(super) fn new(candidate: Peer, region_storage_path: String) -> Self {
pub(super) fn new(
candidate: Peer,
region_storage_path: String,
region_options: HashMap<String, String>,
) -> Self {
Self {
candidate,
region_storage_path,
region_options,
}
}
@@ -90,10 +99,14 @@ impl UpdateRegionMetadata {
ctx.table_metadata_manager
.update_table_route(
table_id,
engine,
&self.region_storage_path,
RegionInfo {
engine: engine.to_string(),
region_storage_path: self.region_storage_path.to_string(),
region_options: self.region_options.clone(),
},
table_route_value,
new_region_routes,
&self.region_options,
)
.await
.context(error::UpdateTableRouteSnafu)?;
@@ -174,7 +187,8 @@ mod tests {
let env = TestingEnvBuilder::new().build().await;
let failed_region = env.failed_region(1).await;
let mut state = UpdateRegionMetadata::new(Peer::new(2, ""), env.path.clone());
let mut state =
UpdateRegionMetadata::new(Peer::new(2, ""), env.path.clone(), HashMap::new());
let next_state = state.next(&env.context, &failed_region).await.unwrap();
assert_eq!(format!("{next_state:?}"), "InvalidateCache");
@@ -187,7 +201,11 @@ mod tests {
async fn test(env: TestingEnv, failed_region: u32, candidate: u64) -> Vec<RegionRoute> {
let failed_region = env.failed_region(failed_region).await;
let state = UpdateRegionMetadata::new(Peer::new(candidate, ""), env.path.clone());
let state = UpdateRegionMetadata::new(
Peer::new(candidate, ""),
env.path.clone(),
HashMap::new(),
);
state
.update_table_route(&env.context, &failed_region)
.await
@@ -202,6 +220,7 @@ mod tests {
.await
.unwrap()
.unwrap()
.into_inner()
.region_routes
}
@@ -328,14 +347,18 @@ mod tests {
let path = env.path.clone();
let _ = futures::future::join_all(vec![
tokio::spawn(async move {
let state = UpdateRegionMetadata::new(Peer::new(2, ""), path);
let state = UpdateRegionMetadata::new(Peer::new(2, ""), path, HashMap::new());
state
.update_metadata(&ctx_1, &failed_region_1)
.await
.unwrap();
}),
tokio::spawn(async move {
let state = UpdateRegionMetadata::new(Peer::new(3, ""), env.path.clone());
let state = UpdateRegionMetadata::new(
Peer::new(3, ""),
env.path.clone(),
HashMap::new(),
);
state
.update_metadata(&ctx_2, &failed_region_2)
.await
@@ -351,7 +374,8 @@ mod tests {
.get(table_id)
.await
.unwrap()
.unwrap();
.unwrap()
.into_inner();
let peers = &extract_all_peers(&table_route_value.region_routes);
let actual = &table_route_value.region_routes;
@@ -370,7 +394,8 @@ mod tests {
.get(table_id)
.await
.unwrap()
.unwrap();
.unwrap()
.into_inner();
let map = region_distribution(&table_route_value.region_routes).unwrap();
assert_eq!(map.len(), 2);

View File

@@ -31,6 +31,7 @@ use common_meta::ddl::create_table::*;
use common_meta::ddl::drop_table::DropTableProcedure;
use common_meta::key::table_info::TableInfoValue;
use common_meta::key::table_route::TableRouteValue;
use common_meta::key::DeserializedValueWithBytes;
use common_meta::rpc::ddl::{AlterTableTask, CreateTableTask, DropTableTask};
use common_meta::rpc::router::{find_leaders, RegionRoute};
use common_procedure::Status;
@@ -235,8 +236,8 @@ async fn test_on_datanode_drop_regions() {
let procedure = DropTableProcedure::new(
1,
drop_table_task,
TableRouteValue::new(region_routes),
TableInfoValue::new(test_data::new_table_info()),
DeserializedValueWithBytes::from_inner(TableRouteValue::new(region_routes)),
DeserializedValueWithBytes::from_inner(TableInfoValue::new(test_data::new_table_info())),
test_data::new_ddl_context(datanode_manager),
);
@@ -299,7 +300,7 @@ fn test_create_alter_region_request() {
let procedure = AlterTableProcedure::new(
1,
alter_table_task,
TableInfoValue::new(test_data::new_table_info()),
DeserializedValueWithBytes::from_inner(TableInfoValue::new(test_data::new_table_info())),
test_data::new_ddl_context(Arc::new(DatanodeClients::default())),
)
.unwrap();
@@ -364,7 +365,7 @@ async fn test_submit_alter_region_requests() {
let mut procedure = AlterTableProcedure::new(
1,
alter_table_task,
TableInfoValue::new(table_info),
DeserializedValueWithBytes::from_inner(TableInfoValue::new(table_info)),
context,
)
.unwrap();

View File

@@ -43,7 +43,7 @@ impl HttpHandler for NodeLeaseHandler {
.into_iter()
.map(|(k, v)| HumanLease {
name: k,
human_time: common_time::DateTime::new(v.timestamp_millis / 1000).to_string(),
human_time: common_time::DateTime::new(v.timestamp_millis).to_string(),
lease: v,
})
.collect::<Vec<_>>();

View File

@@ -32,7 +32,9 @@ pub(crate) async fn fetch_table(
.context(TableMetadataManagerSnafu)?;
if let Some(table_info) = table_info {
let table_route = table_route.context(TableRouteNotFoundSnafu { table_id })?;
let table_route = table_route
.context(TableRouteNotFoundSnafu { table_id })?
.into_inner();
let table = Table {
id: table_id as u64,
@@ -44,7 +46,7 @@ pub(crate) async fn fetch_table(
.try_into()
.context(error::TableRouteConversionSnafu)?;
Ok(Some((table_info, table_route_value)))
Ok(Some((table_info.into_inner(), table_route_value)))
} else {
Ok(None)
}

View File

@@ -12,6 +12,7 @@ test = ["common-test-util"]
anymap = "1.0.0-beta.2"
api.workspace = true
aquamarine.workspace = true
arc-swap = "1.6"
async-channel = "1.9"
async-compat = "0.2"
async-stream.workspace = true
@@ -40,7 +41,7 @@ humantime-serde = { workspace = true }
lazy_static = "1.4"
memcomparable = "0.2"
metrics.workspace = true
moka.workspace = true
moka = { workspace = true, features = ["sync"] }
object-store = { workspace = true }
parquet = { workspace = true, features = ["async"] }
paste.workspace = true

View File

@@ -21,6 +21,8 @@ pub(crate) mod test_util;
use std::mem;
use std::sync::Arc;
use datatypes::value::Value;
use datatypes::vectors::VectorRef;
use moka::sync::Cache;
use parquet::file::metadata::ParquetMetaData;
use store_api::storage::RegionId;
@@ -32,13 +34,15 @@ use crate::sst::file::FileId;
pub struct CacheManager {
/// Cache for SST metadata.
sst_meta_cache: Option<SstMetaCache>,
/// Cache for vectors.
vector_cache: Option<VectorCache>,
}
pub type CacheManagerRef = Arc<CacheManager>;
impl CacheManager {
/// Creates a new manager with specific cache size in bytes.
pub fn new(sst_meta_cache_size: u64) -> CacheManager {
pub fn new(sst_meta_cache_size: u64, vector_cache_size: u64) -> CacheManager {
let sst_meta_cache = if sst_meta_cache_size == 0 {
None
} else {
@@ -51,8 +55,23 @@ impl CacheManager {
.build();
Some(cache)
};
let vector_cache = if vector_cache_size == 0 {
None
} else {
let cache = Cache::builder()
.max_capacity(vector_cache_size)
.weigher(|_k, v: &VectorRef| {
// We ignore the heap size of `Value`.
(mem::size_of::<Value>() + v.memory_size()) as u32
})
.build();
Some(cache)
};
CacheManager { sst_meta_cache }
CacheManager {
sst_meta_cache,
vector_cache,
}
}
/// Gets cached [ParquetMetaData].
@@ -84,9 +103,23 @@ impl CacheManager {
cache.remove(&SstMetaKey(region_id, file_id));
}
}
/// Gets a vector with repeated value for specific `key`.
pub fn get_repeated_vector(&self, key: &Value) -> Option<VectorRef> {
self.vector_cache
.as_ref()
.and_then(|vector_cache| vector_cache.get(key))
}
/// Puts a vector with repeated value into the cache.
pub fn put_repeated_vector(&self, key: Value, vector: VectorRef) {
if let Some(cache) = &self.vector_cache {
cache.insert(key, vector);
}
}
}
/// Cache key for SST meta.
/// Cache key (region id, file id) for SST meta.
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
struct SstMetaKey(RegionId, FileId);
@@ -97,16 +130,23 @@ impl SstMetaKey {
}
}
/// Maps (region id, file id) to [ParquetMetaData].
type SstMetaCache = Cache<SstMetaKey, Arc<ParquetMetaData>>;
/// Maps [Value] to a vector that holds this value repeatedly.
///
/// e.g. `"hello" => ["hello", "hello", "hello"]`
type VectorCache = Cache<Value, VectorRef>;
#[cfg(test)]
mod tests {
use datatypes::vectors::Int64Vector;
use super::*;
use crate::cache::test_util::parquet_meta;
#[test]
fn test_disable_meta_cache() {
let cache = CacheManager::new(0);
fn test_disable_cache() {
let cache = CacheManager::new(0, 0);
assert!(cache.sst_meta_cache.is_none());
let region_id = RegionId::new(1, 1);
@@ -114,11 +154,16 @@ mod tests {
let metadata = parquet_meta();
cache.put_parquet_meta_data(region_id, file_id, metadata);
assert!(cache.get_parquet_meta_data(region_id, file_id).is_none());
let value = Value::Int64(10);
let vector: VectorRef = Arc::new(Int64Vector::from_slice([10, 10, 10, 10]));
cache.put_repeated_vector(value.clone(), vector.clone());
assert!(cache.get_repeated_vector(&value).is_none());
}
#[test]
fn test_parquet_meta_cache() {
let cache = CacheManager::new(2000);
let cache = CacheManager::new(2000, 0);
let region_id = RegionId::new(1, 1);
let file_id = FileId::random();
assert!(cache.get_parquet_meta_data(region_id, file_id).is_none());
@@ -128,4 +173,15 @@ mod tests {
cache.remove_parquet_meta_data(region_id, file_id);
assert!(cache.get_parquet_meta_data(region_id, file_id).is_none());
}
#[test]
fn test_repeated_vector_cache() {
let cache = CacheManager::new(0, 4096);
let value = Value::Int64(10);
assert!(cache.get_repeated_vector(&value).is_none());
let vector: VectorRef = Arc::new(Int64Vector::from_slice([10, 10, 10, 10]));
cache.put_repeated_vector(value.clone(), vector.clone());
let cached = cache.get_repeated_vector(&value).unwrap();
assert_eq!(vector, cached);
}
}

View File

@@ -20,8 +20,9 @@ mod twcs;
use std::collections::HashMap;
use std::sync::Arc;
use std::time::Instant;
use common_telemetry::{debug, error};
use common_telemetry::{debug, error, timer};
pub use picker::CompactionPickerRef;
use snafu::ResultExt;
use store_api::storage::RegionId;
@@ -32,6 +33,7 @@ use crate::compaction::twcs::TwcsPicker;
use crate::error::{
CompactRegionSnafu, Error, RegionClosedSnafu, RegionDroppedSnafu, RegionTruncatedSnafu, Result,
};
use crate::metrics::{COMPACTION_STAGE_ELAPSED, STAGE_LABEL};
use crate::region::options::CompactionOptions;
use crate::region::version::{VersionControlRef, VersionRef};
use crate::request::{OptionOutputTx, OutputTx, WorkerRequest};
@@ -47,6 +49,8 @@ pub struct CompactionRequest {
/// Waiters of the compaction request.
pub(crate) waiters: Vec<OutputTx>,
pub(crate) file_purger: FilePurgerRef,
/// Start time of compaction task.
pub(crate) start_time: Instant,
}
impl CompactionRequest {
@@ -175,11 +179,14 @@ impl CompactionScheduler {
"Pick compaction strategy {:?} for region: {}",
picker, region_id
);
let pick_timer = timer!(COMPACTION_STAGE_ELAPSED, &[(STAGE_LABEL, "pick")]);
let Some(mut task) = picker.pick(request) else {
// Nothing to compact, remove it from the region status map.
self.region_status.remove(&region_id);
return Ok(());
};
drop(pick_timer);
// Submit the compaction task.
self.scheduler
@@ -188,10 +195,8 @@ impl CompactionScheduler {
}))
.map_err(|e| {
error!(e; "Failed to submit compaction request for region {}", region_id);
// If failed to submit the job, we need to remove the region from the scheduler.
self.region_status.remove(&region_id);
e
})
}
@@ -295,12 +300,14 @@ impl CompactionStatus {
waiter: OptionOutputTx,
) -> CompactionRequest {
let current_version = self.version_control.current().version;
let start_time = Instant::now();
let mut req = CompactionRequest {
current_version,
access_layer: self.access_layer.clone(),
request_sender: request_sender.clone(),
waiters: Vec::new(),
file_purger: self.file_purger.clone(),
start_time,
};
if let Some(pending) = self.pending_compaction.take() {

View File

@@ -15,14 +15,15 @@
use std::collections::BTreeMap;
use std::fmt::{Debug, Formatter};
use std::sync::Arc;
use std::time::Duration;
use std::time::{Duration, Instant};
use common_base::readable_size::ReadableSize;
use common_query::Output;
use common_telemetry::{debug, error, info};
use common_telemetry::{debug, error, info, timer};
use common_time::timestamp::TimeUnit;
use common_time::timestamp_millis::BucketAligned;
use common_time::Timestamp;
use metrics::increment_counter;
use snafu::ResultExt;
use store_api::metadata::RegionMetadataRef;
use store_api::storage::RegionId;
@@ -34,6 +35,7 @@ use crate::compaction::picker::{CompactionTask, Picker};
use crate::compaction::CompactionRequest;
use crate::error;
use crate::error::CompactRegionSnafu;
use crate::metrics::{COMPACTION_FAILURE_COUNT, COMPACTION_STAGE_ELAPSED, STAGE_LABEL};
use crate::request::{
BackgroundNotify, CompactionFailed, CompactionFinished, OutputTx, WorkerRequest,
};
@@ -118,6 +120,7 @@ impl Picker for TwcsPicker {
request_sender,
waiters,
file_purger,
start_time,
} = req;
let region_metadata = current_version.metadata.clone();
@@ -170,6 +173,7 @@ impl Picker for TwcsPicker {
request_sender,
waiters,
file_purger,
start_time,
};
Some(Box::new(task))
}
@@ -228,6 +232,8 @@ pub(crate) struct TwcsCompactionTask {
pub(crate) request_sender: mpsc::Sender<WorkerRequest>,
/// Senders that are used to notify waiters waiting for pending compaction tasks.
pub waiters: Vec<OutputTx>,
/// Start time of compaction task
pub start_time: Instant,
}
impl Debug for TwcsCompactionTask {
@@ -310,8 +316,10 @@ impl TwcsCompactionTask {
async fn handle_compaction(&mut self) -> error::Result<(Vec<FileMeta>, Vec<FileMeta>)> {
self.mark_files_compacting(true);
let merge_timer = timer!(COMPACTION_STAGE_ELAPSED, &[(STAGE_LABEL, "merge")]);
let (output, mut compacted) = self.merge_ssts().await.map_err(|e| {
error!(e; "Failed to compact region: {}", self.region_id);
merge_timer.discard();
e
})?;
compacted.extend(self.expired_ssts.iter().map(FileHandle::meta));
@@ -320,6 +328,7 @@ impl TwcsCompactionTask {
/// Handles compaction failure, notifies all waiters.
fn on_failure(&mut self, err: Arc<error::Error>) {
increment_counter!(COMPACTION_FAILURE_COUNT);
for waiter in self.waiters.drain(..) {
waiter.send(Err(err.clone()).context(CompactRegionSnafu {
region_id: self.region_id,
@@ -357,6 +366,7 @@ impl CompactionTask for TwcsCompactionTask {
compaction_time_window: self
.compaction_time_window
.map(|seconds| Duration::from_secs(seconds as u64)),
start_time: self.start_time,
})
}
Err(e) => {

View File

@@ -58,8 +58,10 @@ pub struct MitoConfig {
pub global_write_buffer_reject_size: ReadableSize,
// Cache configs:
/// Cache size for SST metadata (default 128MB). Setting it to 0 to disable cache.
/// Cache size for SST metadata (default 128MB). Setting it to 0 to disable the cache.
pub sst_meta_cache_size: ReadableSize,
/// Cache size for vectors and arrow arrays (default 512MB). Setting it to 0 to disable the cache.
pub vector_cache_size: ReadableSize,
}
impl Default for MitoConfig {
@@ -75,6 +77,7 @@ impl Default for MitoConfig {
global_write_buffer_size: ReadableSize::gb(1),
global_write_buffer_reject_size: ReadableSize::gb(2),
sst_meta_cache_size: ReadableSize::mb(128),
vector_cache_size: ReadableSize::mb(512),
}
}
}

View File

@@ -35,6 +35,8 @@ mod open_test;
#[cfg(test)]
mod projection_test;
#[cfg(test)]
mod prune_test;
#[cfg(test)]
mod truncate_test;
use std::sync::Arc;
@@ -43,6 +45,7 @@ use async_trait::async_trait;
use common_error::ext::BoxedError;
use common_query::Output;
use common_recordbatch::SendableRecordBatchStream;
use common_telemetry::timer;
use object_store::ObjectStore;
use snafu::{OptionExt, ResultExt};
use store_api::logstore::LogStore;
@@ -53,6 +56,7 @@ use store_api::storage::{RegionId, ScanRequest};
use crate::config::MitoConfig;
use crate::error::{RecvSnafu, RegionNotFoundSnafu, Result};
use crate::metrics::{HANDLE_REQUEST_ELAPSED, TYPE_LABEL};
use crate::read::scan_region::{ScanRegion, Scanner};
use crate::request::WorkerRequest;
use crate::worker::WorkerGroup;
@@ -130,6 +134,8 @@ impl EngineInner {
/// Handles [RegionRequest] and return its executed result.
async fn handle_request(&self, region_id: RegionId, request: RegionRequest) -> Result<Output> {
let _timer = timer!(HANDLE_REQUEST_ELAPSED, &[(TYPE_LABEL, request.type_name())]);
let (request, receiver) = WorkerRequest::try_from_region_request(region_id, request)?;
self.workers.submit_to_worker(region_id, request).await?;

View File

@@ -16,11 +16,13 @@
use std::collections::HashMap;
use api::v1::value::ValueData;
use api::v1::Rows;
use common_error::ext::ErrorExt;
use common_error::status_code::StatusCode;
use common_recordbatch::RecordBatches;
use store_api::region_request::RegionOpenRequest;
use datatypes::prelude::ConcreteDataType;
use store_api::region_request::{RegionOpenRequest, RegionPutRequest};
use store_api::storage::RegionId;
use super::*;
@@ -176,6 +178,129 @@ async fn test_write_query_region() {
assert_eq!(expected, batches.pretty_print().unwrap());
}
#[tokio::test]
async fn test_different_order() {
let mut env = TestEnv::new();
let engine = env.create_engine(MitoConfig::default()).await;
let region_id = RegionId::new(1, 1);
let request = CreateRequestBuilder::new().tag_num(2).field_num(2).build();
// tag_0, tag_1, field_0, field_1, ts,
let mut column_schemas = rows_schema(&request);
engine
.handle_request(region_id, RegionRequest::Create(request))
.await
.unwrap();
// Swap position of columns.
column_schemas.swap(0, 3);
column_schemas.swap(2, 4);
// Now the schema is field_1, tag_1, ts, tag_0, field_0
let rows = (0..3)
.map(|i| api::v1::Row {
values: vec![
api::v1::Value {
value_data: Some(ValueData::F64Value((i + 10) as f64)),
},
api::v1::Value {
value_data: Some(ValueData::StringValue(format!("b{i}"))),
},
api::v1::Value {
value_data: Some(ValueData::TimestampMillisecondValue(i as i64 * 1000)),
},
api::v1::Value {
value_data: Some(ValueData::StringValue(format!("a{i}"))),
},
api::v1::Value {
value_data: Some(ValueData::F64Value(i as f64)),
},
],
})
.collect();
let rows = Rows {
schema: column_schemas,
rows,
};
put_rows(&engine, region_id, rows).await;
let request = ScanRequest::default();
let stream = engine.handle_query(region_id, request).await.unwrap();
let batches = RecordBatches::try_collect(stream).await.unwrap();
let expected = "\
+-------+-------+---------+---------+---------------------+
| tag_0 | tag_1 | field_0 | field_1 | ts |
+-------+-------+---------+---------+---------------------+
| a0 | b0 | 0.0 | 10.0 | 1970-01-01T00:00:00 |
| a1 | b1 | 1.0 | 11.0 | 1970-01-01T00:00:01 |
| a2 | b2 | 2.0 | 12.0 | 1970-01-01T00:00:02 |
+-------+-------+---------+---------+---------------------+";
assert_eq!(expected, batches.pretty_print().unwrap());
}
#[tokio::test]
async fn test_different_order_and_type() {
let mut env = TestEnv::new();
let engine = env.create_engine(MitoConfig::default()).await;
let region_id = RegionId::new(1, 1);
// tag_0, tag_1, field_0, field_1, ts,
let mut request = CreateRequestBuilder::new().tag_num(2).field_num(2).build();
// Change the field type of field_1.
request.column_metadatas[3].column_schema.data_type = ConcreteDataType::string_datatype();
let mut column_schemas = rows_schema(&request);
engine
.handle_request(region_id, RegionRequest::Create(request))
.await
.unwrap();
// Swap position of columns.
column_schemas.swap(2, 3);
// Now the schema is tag_0, tag_1, field_1, field_0, ts
let rows = (0..3)
.map(|i| api::v1::Row {
values: vec![
api::v1::Value {
value_data: Some(ValueData::StringValue(format!("a{i}"))),
},
api::v1::Value {
value_data: Some(ValueData::StringValue(format!("b{i}"))),
},
api::v1::Value {
value_data: Some(ValueData::StringValue((i + 10).to_string())),
},
api::v1::Value {
value_data: Some(ValueData::F64Value(i as f64)),
},
api::v1::Value {
value_data: Some(ValueData::TimestampMillisecondValue(i as i64 * 1000)),
},
],
})
.collect();
let rows = Rows {
schema: column_schemas,
rows,
};
put_rows(&engine, region_id, rows).await;
let request = ScanRequest::default();
let stream = engine.handle_query(region_id, request).await.unwrap();
let batches = RecordBatches::try_collect(stream).await.unwrap();
let expected = "\
+-------+-------+---------+---------+---------------------+
| tag_0 | tag_1 | field_0 | field_1 | ts |
+-------+-------+---------+---------+---------------------+
| a0 | b0 | 0.0 | 10 | 1970-01-01T00:00:00 |
| a1 | b1 | 1.0 | 11 | 1970-01-01T00:00:01 |
| a2 | b2 | 2.0 | 12 | 1970-01-01T00:00:02 |
+-------+-------+---------+---------+---------------------+";
assert_eq!(expected, batches.pretty_print().unwrap());
}
#[tokio::test]
async fn test_put_delete() {
let mut env = TestEnv::new();
@@ -287,3 +412,48 @@ async fn test_put_overwrite() {
+-------+---------+---------------------+";
assert_eq!(expected, batches.pretty_print().unwrap());
}
#[tokio::test]
async fn test_absent_and_invalid_columns() {
let mut env = TestEnv::new();
let engine = env.create_engine(MitoConfig::default()).await;
let region_id = RegionId::new(1, 1);
// tag_0, field_0, field_1, ts,
let request = CreateRequestBuilder::new().field_num(2).build();
let mut column_schemas = rows_schema(&request);
engine
.handle_request(region_id, RegionRequest::Create(request))
.await
.unwrap();
// Change the type of field_1 in input.
column_schemas[2].datatype = api::v1::ColumnDataType::String as i32;
// Input tag_0, field_1 (invalid type string), ts
column_schemas.remove(1);
let rows = (0..3)
.map(|i| api::v1::Row {
values: vec![
api::v1::Value {
value_data: Some(ValueData::StringValue(format!("a{i}"))),
},
api::v1::Value {
value_data: Some(ValueData::StringValue(i.to_string())),
},
api::v1::Value {
value_data: Some(ValueData::TimestampMillisecondValue(i as i64 * 1000)),
},
],
})
.collect();
let rows = Rows {
schema: column_schemas,
rows,
};
let err = engine
.handle_request(region_id, RegionRequest::Put(RegionPutRequest { rows }))
.await
.unwrap_err();
assert_eq!(StatusCode::InvalidArguments, err.status_code());
}

View File

@@ -44,7 +44,12 @@ async fn put_and_flush(
put_rows(engine, region_id, rows).await;
let Output::AffectedRows(rows) = engine
.handle_request(region_id, RegionRequest::Flush(RegionFlushRequest {}))
.handle_request(
region_id,
RegionRequest::Flush(RegionFlushRequest {
row_group_size: None,
}),
)
.await
.unwrap()
else {
@@ -79,7 +84,12 @@ async fn delete_and_flush(
assert_eq!(row_cnt, rows_affected);
let Output::AffectedRows(rows) = engine
.handle_request(region_id, RegionRequest::Flush(RegionFlushRequest {}))
.handle_request(
region_id,
RegionRequest::Flush(RegionFlushRequest {
row_group_size: None,
}),
)
.await
.unwrap()
else {

View File

@@ -67,7 +67,7 @@ async fn test_engine_drop_region() {
rows: build_rows_for_key("a", 0, 2, 0),
};
put_rows(&engine, region_id, rows).await;
flush_region(&engine, region_id).await;
flush_region(&engine, region_id, None).await;
// drop the created region.
engine

View File

@@ -49,7 +49,7 @@ async fn test_manual_flush() {
};
put_rows(&engine, region_id, rows).await;
flush_region(&engine, region_id).await;
flush_region(&engine, region_id, None).await;
let request = ScanRequest::default();
let scanner = engine.scanner(region_id, request).unwrap();
@@ -164,7 +164,7 @@ async fn test_write_stall() {
tokio::spawn(async move {
listener.wait().await;
flush_region(&engine_cloned, region_id).await;
flush_region(&engine_cloned, region_id, None).await;
});
// Triggers write stall.
@@ -212,7 +212,7 @@ async fn test_flush_empty() {
.await
.unwrap();
flush_region(&engine, region_id).await;
flush_region(&engine, region_id, None).await;
let request = ScanRequest::default();
let scanner = engine.scanner(region_id, request).unwrap();
@@ -247,7 +247,7 @@ async fn test_flush_reopen_region() {
};
put_rows(&engine, region_id, rows).await;
flush_region(&engine, region_id).await;
flush_region(&engine, region_id, None).await;
let check_region = || {
let region = engine.get_region(region_id).unwrap();
let version_data = region.version_control.current();

View File

@@ -0,0 +1,102 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use api::v1::Rows;
use common_query::logical_plan::DfExpr;
use common_query::prelude::Expr;
use common_recordbatch::RecordBatches;
use datafusion_common::ScalarValue;
use datafusion_expr::lit;
use store_api::region_engine::RegionEngine;
use store_api::region_request::RegionRequest;
use store_api::storage::{RegionId, ScanRequest};
use crate::config::MitoConfig;
use crate::test_util::{
build_rows, flush_region, put_rows, rows_schema, CreateRequestBuilder, TestEnv,
};
async fn check_prune_row_groups(expr: DfExpr, expected: &str) {
let mut env = TestEnv::new();
let engine = env.create_engine(MitoConfig::default()).await;
let region_id = RegionId::new(1, 1);
let request = CreateRequestBuilder::new().build();
let column_schemas = rows_schema(&request);
engine
.handle_request(region_id, RegionRequest::Create(request))
.await
.unwrap();
put_rows(
&engine,
region_id,
Rows {
schema: column_schemas.clone(),
rows: build_rows(0, 10),
},
)
.await;
flush_region(&engine, region_id, Some(5)).await;
let stream = engine
.handle_query(
region_id,
ScanRequest {
filters: vec![Expr::from(expr)],
..Default::default()
},
)
.await
.unwrap();
let batches = RecordBatches::try_collect(stream).await.unwrap();
assert_eq!(expected, batches.pretty_print().unwrap());
}
#[tokio::test]
async fn test_read_parquet_stats() {
common_telemetry::init_default_ut_logging();
check_prune_row_groups(
datafusion_expr::col("ts").gt(lit(ScalarValue::TimestampMillisecond(Some(4000), None))),
"\
+-------+---------+---------------------+
| tag_0 | field_0 | ts |
+-------+---------+---------------------+
| 5 | 5.0 | 1970-01-01T00:00:05 |
| 6 | 6.0 | 1970-01-01T00:00:06 |
| 7 | 7.0 | 1970-01-01T00:00:07 |
| 8 | 8.0 | 1970-01-01T00:00:08 |
| 9 | 9.0 | 1970-01-01T00:00:09 |
+-------+---------+---------------------+",
)
.await;
check_prune_row_groups(
datafusion_expr::col("tag_0").gt(lit(ScalarValue::Utf8(Some("4".to_string())))),
"\
+-------+---------+---------------------+
| tag_0 | field_0 | ts |
+-------+---------+---------------------+
| 5 | 5.0 | 1970-01-01T00:00:05 |
| 6 | 6.0 | 1970-01-01T00:00:06 |
| 7 | 7.0 | 1970-01-01T00:00:07 |
| 8 | 8.0 | 1970-01-01T00:00:08 |
| 9 | 9.0 | 1970-01-01T00:00:09 |
+-------+---------+---------------------+",
)
.await;
}

View File

@@ -167,7 +167,12 @@ async fn test_engine_truncate_after_flush() {
// Flush the region.
engine
.handle_request(region_id, RegionRequest::Flush(RegionFlushRequest {}))
.handle_request(
region_id,
RegionRequest::Flush(RegionFlushRequest {
row_group_size: None,
}),
)
.await
.unwrap();
@@ -304,7 +309,12 @@ async fn test_engine_truncate_during_flush() {
let flush_task = tokio::spawn(async move {
info!("do flush task!!!!");
engine_cloned
.handle_request(region_id, RegionRequest::Flush(RegionFlushRequest {}))
.handle_request(
region_id,
RegionRequest::Flush(RegionFlushRequest {
row_group_size: None,
}),
)
.await
});

View File

@@ -19,10 +19,11 @@ use std::sync::atomic::{AtomicUsize, Ordering};
use std::sync::Arc;
use common_query::Output;
use common_telemetry::{error, info};
use common_telemetry::{error, info, timer};
use metrics::{counter, increment_counter};
use snafu::ResultExt;
use store_api::storage::RegionId;
use strum::AsRefStr;
use strum::IntoStaticStr;
use tokio::sync::mpsc;
use crate::access_layer::AccessLayerRef;
@@ -30,6 +31,10 @@ use crate::error::{
Error, FlushRegionSnafu, RegionClosedSnafu, RegionDroppedSnafu, RegionTruncatedSnafu, Result,
};
use crate::memtable::MemtableBuilderRef;
use crate::metrics::{
FLUSH_BYTES_TOTAL, FLUSH_ELAPSED, FLUSH_ERRORS_TOTAL, FLUSH_REASON, FLUSH_REQUESTS_TOTAL,
TYPE_LABEL,
};
use crate::read::Source;
use crate::region::version::{VersionControlData, VersionControlRef, VersionRef};
use crate::request::{
@@ -114,8 +119,8 @@ impl WriteBufferManager for WriteBufferManagerImpl {
let mutable_memtable_memory_usage = self.memory_active.load(Ordering::Relaxed);
if mutable_memtable_memory_usage > self.mutable_limit {
info!(
"Engine should flush (over mutable limit), mutable_usage: {}, mutable_limit: {}.",
mutable_memtable_memory_usage, self.mutable_limit,
"Engine should flush (over mutable limit), mutable_usage: {}, memory_usage: {}, mutable_limit: {}, global_limit: {}",
mutable_memtable_memory_usage, self.memory_usage(), self.mutable_limit, self.global_write_buffer_size,
);
return true;
}
@@ -163,7 +168,7 @@ impl WriteBufferManager for WriteBufferManagerImpl {
}
/// Reason of a flush task.
#[derive(Debug, AsRefStr)]
#[derive(Debug, IntoStaticStr)]
pub enum FlushReason {
/// Other reasons.
Others,
@@ -175,6 +180,13 @@ pub enum FlushReason {
Alter,
}
impl FlushReason {
/// Get flush reason as static str.
fn as_str(&self) -> &'static str {
self.into()
}
}
/// Task to flush a region.
pub(crate) struct RegionFlushTask {
/// Region to flush.
@@ -190,6 +202,7 @@ pub(crate) struct RegionFlushTask {
pub(crate) memtable_builder: MemtableBuilderRef,
pub(crate) file_purger: FilePurgerRef,
pub(crate) listener: WorkerListener,
pub(crate) row_group_size: Option<usize>,
}
impl RegionFlushTask {
@@ -231,6 +244,7 @@ impl RegionFlushTask {
/// Runs the flush task.
async fn do_flush(&mut self, version_data: VersionControlData) {
let timer = timer!(FLUSH_ELAPSED, &[(TYPE_LABEL, "total")]);
self.listener.on_flush_begin(self.region_id).await;
let worker_request = match self.flush_memtables(&version_data.version).await {
Ok(file_metas) => {
@@ -250,6 +264,7 @@ impl RegionFlushTask {
memtables_to_remove,
senders: std::mem::take(&mut self.senders),
file_purger: self.file_purger.clone(),
timer,
};
WorkerRequest::Background {
region_id: self.region_id,
@@ -258,6 +273,9 @@ impl RegionFlushTask {
}
Err(e) => {
error!(e; "Failed to flush region {}", self.region_id);
// Discard the timer.
timer.discard();
let err = Arc::new(e);
self.on_failure(err.clone());
WorkerRequest::Background {
@@ -271,10 +289,16 @@ impl RegionFlushTask {
/// Flushes memtables to level 0 SSTs.
async fn flush_memtables(&self, version: &VersionRef) -> Result<Vec<FileMeta>> {
let timer = timer!(FLUSH_ELAPSED, &[(TYPE_LABEL, "flush_memtables")]);
// TODO(yingwen): Make it configurable.
let write_opts = WriteOptions::default();
let mut write_opts = WriteOptions::default();
if let Some(row_group_size) = self.row_group_size {
write_opts.row_group_size = row_group_size;
}
let memtables = version.memtables.immutables();
let mut file_metas = Vec::with_capacity(memtables.len());
let mut flushed_bytes = 0;
for mem in memtables {
if mem.is_empty() {
@@ -283,7 +307,7 @@ impl RegionFlushTask {
}
let file_id = FileId::random();
let iter = mem.iter(None, &[]);
let iter = mem.iter(None, None);
let source = Source::Iter(iter);
let mut writer = self
.access_layer
@@ -293,6 +317,7 @@ impl RegionFlushTask {
continue;
};
flushed_bytes += sst_info.file_size;
file_metas.push(FileMeta {
region_id: version.metadata.region_id,
file_id,
@@ -302,12 +327,17 @@ impl RegionFlushTask {
});
}
if !file_metas.is_empty() {
counter!(FLUSH_BYTES_TOTAL, flushed_bytes);
}
let file_ids: Vec<_> = file_metas.iter().map(|f| f.file_id).collect();
info!(
"Successfully flush memtables, region: {}, reason: {}, files: {:?}",
"Successfully flush memtables, region: {}, reason: {}, files: {:?}, cost: {:?}",
version.metadata.region_id,
self.reason.as_ref(),
file_ids
self.reason.as_str(),
file_ids,
timer.elapsed(),
);
Ok(file_metas)
@@ -362,6 +392,8 @@ impl FlushScheduler {
) -> Result<()> {
debug_assert_eq!(region_id, task.region_id);
increment_counter!(FLUSH_REQUESTS_TOTAL, FLUSH_REASON => task.reason.as_str());
let version = version_control.current().version;
if version.memtables.mutable.is_empty() && version.memtables.immutables().is_empty() {
debug_assert!(!self.region_status.contains_key(&region_id));
@@ -442,6 +474,8 @@ impl FlushScheduler {
pub(crate) fn on_flush_failed(&mut self, region_id: RegionId, err: Arc<Error>) {
error!(err; "Region {} failed to flush, cancel all pending tasks", region_id);
increment_counter!(FLUSH_ERRORS_TOTAL);
// Remove this region.
let Some(flush_status) = self.region_status.remove(&region_id) else {
return;
@@ -689,6 +723,7 @@ mod tests {
memtable_builder: builder.memtable_builder(),
file_purger: builder.file_purger(),
listener: WorkerListener::default(),
row_group_size: None,
};
task.push_sender(OptionOutputTx::from(output_tx));
scheduler

View File

@@ -23,11 +23,11 @@ use std::fmt;
use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering};
use std::sync::Arc;
use common_query::logical_plan::Expr;
use common_time::Timestamp;
use metrics::{decrement_gauge, increment_gauge};
use store_api::metadata::RegionMetadataRef;
use store_api::storage::ColumnId;
use table::predicate::Predicate;
use crate::error::Result;
use crate::flush::WriteBufferManagerRef;
@@ -73,7 +73,11 @@ pub trait Memtable: Send + Sync + fmt::Debug {
/// Scans the memtable.
/// `projection` selects columns to read, `None` means reading all columns.
/// `filters` are the predicates to be pushed down to memtable.
fn iter(&self, projection: Option<&[ColumnId]>, filters: &[Expr]) -> BoxedBatchIterator;
fn iter(
&self,
projection: Option<&[ColumnId]>,
predicate: Option<Predicate>,
) -> BoxedBatchIterator;
/// Returns true if the memtable is empty.
fn is_empty(&self) -> bool;

View File

@@ -14,7 +14,7 @@
use std::collections::HashMap;
use api::v1::{Mutation, OpType, Row, Rows, SemanticType};
use api::v1::{Mutation, OpType, Row, Rows};
use datatypes::value::ValueRef;
use store_api::metadata::RegionMetadata;
use store_api::storage::SequenceNumber;
@@ -169,12 +169,10 @@ impl ReadRowHelper {
.unwrap();
indices.push(*ts_index);
// Iterate columns and find field columns.
for column in metadata.column_metadatas.iter() {
if column.semantic_type == SemanticType::Field {
// Get index in request for each field column.
let index = name_to_index.get(&column.column_schema.name).unwrap();
indices.push(*index);
}
for column in metadata.field_columns() {
// Get index in request for each field column.
let index = name_to_index.get(&column.column_schema.name).unwrap();
indices.push(*index);
}
ReadRowHelper {
@@ -186,61 +184,22 @@ impl ReadRowHelper {
#[cfg(test)]
mod tests {
use api::v1;
use api::v1::ColumnDataType;
use datatypes::prelude::ConcreteDataType;
use datatypes::schema::ColumnSchema;
use store_api::metadata::{ColumnMetadata, RegionMetadataBuilder};
use store_api::storage::RegionId;
use api::v1::{self, ColumnDataType, SemanticType};
use super::*;
use crate::test_util::i64_value;
use crate::test_util::meta_util::TestRegionMetadataBuilder;
const TS_NAME: &str = "ts";
const START_SEQ: SequenceNumber = 100;
/// Creates a region: `ts, k0, k1, ..., v0, v1, ...`
fn new_region_metadata(num_tag: usize, num_field: usize) -> RegionMetadata {
let mut builder = RegionMetadataBuilder::new(RegionId::new(1, 1));
let mut column_id = 0;
builder.push_column_metadata(ColumnMetadata {
column_schema: ColumnSchema::new(
TS_NAME,
ConcreteDataType::timestamp_millisecond_datatype(),
false,
),
semantic_type: SemanticType::Timestamp,
column_id,
});
// For simplicity, we use the same data type for tag/field columns.
let mut primary_key = Vec::with_capacity(num_tag);
for i in 0..num_tag {
column_id += 1;
builder.push_column_metadata(ColumnMetadata {
column_schema: ColumnSchema::new(
format!("k{i}"),
ConcreteDataType::int64_datatype(),
true,
),
semantic_type: SemanticType::Tag,
column_id,
});
primary_key.push(i as u32 + 1);
}
for i in 0..num_field {
column_id += 1;
builder.push_column_metadata(ColumnMetadata {
column_schema: ColumnSchema::new(
format!("v{i}"),
ConcreteDataType::int64_datatype(),
true,
),
semantic_type: SemanticType::Field,
column_id,
});
}
builder.primary_key(primary_key);
builder.build().unwrap()
fn new_region_metadata(num_tags: usize, num_fields: usize) -> RegionMetadata {
TestRegionMetadataBuilder::default()
.ts_name(TS_NAME)
.num_tags(num_tags)
.num_fields(num_fields)
.build()
}
/// Creates rows `[ 0, 1, ..., n ] x num_rows`

View File

@@ -19,9 +19,10 @@ use std::sync::atomic::{AtomicI64, AtomicU32, Ordering};
use std::sync::{Arc, RwLock};
use api::v1::OpType;
use common_query::logical_plan::Expr;
use common_telemetry::debug;
use datatypes::arrow;
use datatypes::arrow::array::ArrayRef;
use datatypes::arrow::record_batch::RecordBatch;
use datatypes::data_type::DataType;
use datatypes::prelude::{MutableVector, ScalarVectorBuilder, Vector, VectorRef};
use datatypes::value::ValueRef;
@@ -31,8 +32,12 @@ use datatypes::vectors::{
use snafu::{ensure, ResultExt};
use store_api::metadata::RegionMetadataRef;
use store_api::storage::ColumnId;
use table::predicate::Predicate;
use crate::error::{ComputeArrowSnafu, ConvertVectorSnafu, PrimaryKeyLengthMismatchSnafu, Result};
use crate::error::{
ComputeArrowSnafu, ConvertVectorSnafu, NewRecordBatchSnafu, PrimaryKeyLengthMismatchSnafu,
Result,
};
use crate::flush::WriteBufferManagerRef;
use crate::memtable::{
AllocTracker, BoxedBatchIterator, KeyValues, Memtable, MemtableBuilder, MemtableId,
@@ -76,7 +81,7 @@ impl MemtableBuilder for TimeSeriesMemtableBuilder {
pub struct TimeSeriesMemtable {
id: MemtableId,
region_metadata: RegionMetadataRef,
row_codec: McmpRowCodec,
row_codec: Arc<McmpRowCodec>,
series_set: SeriesSet,
alloc_tracker: AllocTracker,
max_timestamp: AtomicI64,
@@ -89,13 +94,13 @@ impl TimeSeriesMemtable {
id: MemtableId,
write_buffer_manager: Option<WriteBufferManagerRef>,
) -> Self {
let row_codec = McmpRowCodec::new(
let row_codec = Arc::new(McmpRowCodec::new(
region_metadata
.primary_key_columns()
.map(|c| SortField::new(c.column_schema.data_type.clone()))
.collect(),
);
let series_set = SeriesSet::new(region_metadata.clone());
));
let series_set = SeriesSet::new(region_metadata.clone(), row_codec.clone());
Self {
id,
region_metadata,
@@ -200,7 +205,11 @@ impl Memtable for TimeSeriesMemtable {
Ok(())
}
fn iter(&self, projection: Option<&[ColumnId]>, _filters: &[Expr]) -> BoxedBatchIterator {
fn iter(
&self,
projection: Option<&[ColumnId]>,
filters: Option<Predicate>,
) -> BoxedBatchIterator {
let projection = if let Some(projection) = projection {
projection.iter().copied().collect()
} else {
@@ -210,7 +219,7 @@ impl Memtable for TimeSeriesMemtable {
.collect()
};
Box::new(self.series_set.iter_series(projection))
Box::new(self.series_set.iter_series(projection, filters))
}
fn is_empty(&self) -> bool {
@@ -253,13 +262,15 @@ type SeriesRwLockMap = RwLock<BTreeMap<Vec<u8>, Arc<RwLock<Series>>>>;
struct SeriesSet {
region_metadata: RegionMetadataRef,
series: Arc<SeriesRwLockMap>,
codec: Arc<McmpRowCodec>,
}
impl SeriesSet {
fn new(region_metadata: RegionMetadataRef) -> Self {
fn new(region_metadata: RegionMetadataRef, codec: Arc<McmpRowCodec>) -> Self {
Self {
region_metadata,
series: Default::default(),
codec,
}
}
}
@@ -285,21 +296,55 @@ impl SeriesSet {
}
/// Iterates all series in [SeriesSet].
fn iter_series(&self, projection: HashSet<ColumnId>) -> Iter {
fn iter_series(&self, projection: HashSet<ColumnId>, predicate: Option<Predicate>) -> Iter {
let (primary_key_builders, primary_key_schema) =
primary_key_builders(&self.region_metadata, 1);
Iter {
metadata: self.region_metadata.clone(),
series: self.series.clone(),
projection,
last_key: None,
predicate,
pk_schema: primary_key_schema,
primary_key_builders,
codec: self.codec.clone(),
}
}
}
/// Creates primary key array builders and arrow's schema for primary keys of given region schema.
fn primary_key_builders(
region_metadata: &RegionMetadataRef,
num_pk_rows: usize,
) -> (Vec<Box<dyn MutableVector>>, arrow::datatypes::SchemaRef) {
let (builders, fields): (_, Vec<_>) = region_metadata
.primary_key_columns()
.map(|pk| {
(
pk.column_schema
.data_type
.create_mutable_vector(num_pk_rows),
arrow::datatypes::Field::new(
pk.column_schema.name.clone(),
pk.column_schema.data_type.as_arrow_type(),
pk.column_schema.is_nullable(),
),
)
})
.unzip();
(builders, Arc::new(arrow::datatypes::Schema::new(fields)))
}
struct Iter {
metadata: RegionMetadataRef,
series: Arc<SeriesRwLockMap>,
projection: HashSet<ColumnId>,
last_key: Option<Vec<u8>>,
predicate: Option<Predicate>,
pk_schema: arrow::datatypes::SchemaRef,
primary_key_builders: Vec<Box<dyn MutableVector>>,
codec: Arc<McmpRowCodec>,
}
impl Iterator for Iter {
@@ -307,25 +352,92 @@ impl Iterator for Iter {
fn next(&mut self) -> Option<Self::Item> {
let map = self.series.read().unwrap();
let mut range = match &self.last_key {
let range = match &self.last_key {
None => map.range::<Vec<u8>, _>(..),
Some(last_key) => {
map.range::<Vec<u8>, _>((Bound::Excluded(last_key), Bound::Unbounded))
}
};
if let Some((primary_key, series)) = range.next() {
// TODO(hl): maybe yield more than one time series to amortize range overhead.
for (primary_key, series) in range {
let mut series = series.write().unwrap();
if let Some(predicate) = &self.predicate {
if !prune_primary_key(
&self.codec,
primary_key.as_slice(),
&mut series,
&mut self.primary_key_builders,
self.pk_schema.clone(),
predicate,
) {
// read next series
continue;
}
}
self.last_key = Some(primary_key.clone());
let values = series.write().unwrap().compact(&self.metadata);
Some(values.and_then(|v| v.to_batch(primary_key, &self.metadata, &self.projection)))
} else {
None
let values = series.compact(&self.metadata);
return Some(
values.and_then(|v| v.to_batch(primary_key, &self.metadata, &self.projection)),
);
}
None
}
}
fn prune_primary_key(
codec: &Arc<McmpRowCodec>,
pk: &[u8],
series: &mut Series,
builders: &mut Vec<Box<dyn MutableVector>>,
pk_schema: arrow::datatypes::SchemaRef,
predicate: &Predicate,
) -> bool {
// no primary key, we simply return true.
if pk_schema.fields().is_empty() {
return true;
}
if let Some(rb) = series.pk_cache.as_ref() {
let res = predicate.prune_primary_key(rb).unwrap_or(true);
debug!("Prune primary key: {:?}, res: {:?}", rb, res);
res
} else {
let Ok(rb) = pk_to_record_batch(codec, pk, builders, pk_schema) else {
return true;
};
let res = predicate.prune_primary_key(&rb).unwrap_or(true);
debug!("Prune primary key: {:?}, res: {:?}", rb, res);
series.update_pk_cache(rb);
res
}
}
fn pk_to_record_batch(
codec: &Arc<McmpRowCodec>,
bytes: &[u8],
builders: &mut Vec<Box<dyn MutableVector>>,
pk_schema: arrow::datatypes::SchemaRef,
) -> Result<RecordBatch> {
let pk_values = codec.decode(bytes).unwrap();
assert_eq!(builders.len(), pk_values.len());
let arrays = builders
.iter_mut()
.zip(pk_values.iter())
.map(|(builder, pk_value)| {
builder.push_value_ref(pk_value.as_value_ref());
builder.to_vector().to_arrow_array()
})
.collect();
RecordBatch::try_new(pk_schema, arrays).context(NewRecordBatchSnafu)
}
/// A `Series` holds a list of field values of some given primary key.
struct Series {
pk_cache: Option<RecordBatch>,
active: ValueBuilder,
frozen: Vec<Values>,
}
@@ -333,6 +445,7 @@ struct Series {
impl Series {
fn new(region_metadata: &RegionMetadataRef) -> Self {
Self {
pk_cache: None,
active: ValueBuilder::new(region_metadata, INITIAL_BUILDER_CAPACITY),
frozen: vec![],
}
@@ -343,6 +456,10 @@ impl Series {
self.active.push(ts, sequence, op_type as u8, values);
}
fn update_pk_cache(&mut self, pk_batch: RecordBatch) {
self.pk_cache = Some(pk_batch);
}
/// Freezes the active part and push it to `frozen`.
fn freeze(&mut self, region_metadata: &RegionMetadataRef) {
if self.active.len() != 0 {
@@ -784,7 +901,13 @@ mod tests {
#[test]
fn test_series_set_concurrency() {
let schema = schema_for_test();
let set = Arc::new(SeriesSet::new(schema.clone()));
let row_codec = Arc::new(McmpRowCodec::new(
schema
.primary_key_columns()
.map(|c| SortField::new(c.column_schema.data_type.clone()))
.collect(),
));
let set = Arc::new(SeriesSet::new(schema.clone(), row_codec));
let concurrency = 32;
let pk_num = concurrency * 2;
@@ -866,7 +989,7 @@ mod tests {
.map(|kv| kv.timestamp().as_timestamp().unwrap().unwrap().value())
.collect::<HashSet<_>>();
let iter = memtable.iter(None, &[]);
let iter = memtable.iter(None, None);
let read = iter
.flat_map(|batch| {
batch
@@ -892,7 +1015,7 @@ mod tests {
let memtable = TimeSeriesMemtable::new(schema, 42, None);
memtable.write(&kvs).unwrap();
let iter = memtable.iter(Some(&[3]), &[]);
let iter = memtable.iter(Some(&[3]), None);
let mut v0_all = vec![];

View File

@@ -12,5 +12,49 @@
// See the License for the specific language governing permissions and
// limitations under the License.
/// Stage label.
pub const STAGE_LABEL: &str = "stage";
/// Global write buffer size in bytes.
pub const WRITE_BUFFER_BYTES: &str = "storage.write_buffer_bytes";
pub const WRITE_BUFFER_BYTES: &str = "mito.write_buffer_bytes";
/// Type label.
pub const TYPE_LABEL: &str = "type";
/// Gauge for open regions
pub const REGION_COUNT: &str = "mito.region_count";
/// Elapsed time to handle requests.
pub const HANDLE_REQUEST_ELAPSED: &str = "mito.handle_request.elapsed";
// ------ Flush related metrics
/// Counter of scheduled flush requests.
/// Note that the flush scheduler may merge some flush requests.
pub const FLUSH_REQUESTS_TOTAL: &str = "mito.flush.requests_total";
/// Reason to flush.
pub const FLUSH_REASON: &str = "reason";
/// Counter of scheduled failed flush jobs.
pub const FLUSH_ERRORS_TOTAL: &str = "mito.flush.errors_total";
/// Elapsed time of a flush job.
pub const FLUSH_ELAPSED: &str = "mito.flush.elapsed";
/// Histogram of flushed bytes.
pub const FLUSH_BYTES_TOTAL: &str = "mito.flush.bytes_total";
// ------ End of flush related metrics
// ------ Write related metrics
/// Counter of stalled write requests.
pub const WRITE_STALL_TOTAL: &str = "mito.write.stall_total";
/// Counter of rejected write requests.
pub const WRITE_REJECT_TOTAL: &str = "mito.write.reject_total";
/// Elapsed time of each write stage.
pub const WRITE_STAGE_ELAPSED: &str = "mito.write.stage_elapsed";
/// Counter of rows to write.
pub const WRITE_ROWS_TOTAL: &str = "mito.write.rows_total";
// ------ End of write related metrics
// Compaction metrics
/// Timer of different stages in compaction.
pub const COMPACTION_STAGE_ELAPSED: &str = "mito.compaction.stage_elapsed";
/// Timer of whole compaction task.
pub const COMPACTION_ELAPSED_TOTAL: &str = "mito.compaction.total_elapsed";
/// Counter of all requested compaction task.
pub const COMPACTION_REQUEST_COUNT: &str = "mito.compaction.requests_total";
/// Counter of failed compaction task.
pub const COMPACTION_FAILURE_COUNT: &str = "mito.compaction.failure_total";

View File

@@ -30,10 +30,13 @@ use datatypes::arrow;
use datatypes::arrow::array::{Array, ArrayRef};
use datatypes::arrow::compute::SortOptions;
use datatypes::arrow::row::{RowConverter, SortField};
use datatypes::prelude::{DataType, ScalarVector};
use datatypes::prelude::{ConcreteDataType, DataType, ScalarVector};
use datatypes::types::TimestampType;
use datatypes::value::ValueRef;
use datatypes::vectors::{
BooleanVector, Helper, UInt32Vector, UInt64Vector, UInt8Vector, Vector, VectorRef,
BooleanVector, Helper, TimestampMicrosecondVector, TimestampMillisecondVector,
TimestampNanosecondVector, TimestampSecondVector, UInt32Vector, UInt64Vector, UInt8Vector,
Vector, VectorRef,
};
use snafu::{ensure, OptionExt, ResultExt};
use store_api::metadata::RegionMetadata;
@@ -355,6 +358,47 @@ impl Batch {
.collect()
}
/// Returns timestamps in a native slice or `None` if the batch is empty.
pub(crate) fn timestamps_native(&self) -> Option<&[i64]> {
if self.timestamps.is_empty() {
return None;
}
let values = match self.timestamps.data_type() {
ConcreteDataType::Timestamp(TimestampType::Second(_)) => self
.timestamps
.as_any()
.downcast_ref::<TimestampSecondVector>()
.unwrap()
.as_arrow()
.values(),
ConcreteDataType::Timestamp(TimestampType::Millisecond(_)) => self
.timestamps
.as_any()
.downcast_ref::<TimestampMillisecondVector>()
.unwrap()
.as_arrow()
.values(),
ConcreteDataType::Timestamp(TimestampType::Microsecond(_)) => self
.timestamps
.as_any()
.downcast_ref::<TimestampMicrosecondVector>()
.unwrap()
.as_arrow()
.values(),
ConcreteDataType::Timestamp(TimestampType::Nanosecond(_)) => self
.timestamps
.as_any()
.downcast_ref::<TimestampNanosecondVector>()
.unwrap()
.as_arrow()
.values(),
other => panic!("timestamps in a Batch has other type {:?}", other),
};
Some(values)
}
/// Takes the batch in place.
fn take_in_place(&mut self, indices: &UInt32Vector) -> Result<()> {
self.timestamps = self.timestamps.take(indices).context(ComputeVectorSnafu)?;
@@ -392,7 +436,7 @@ impl Batch {
///
/// # Panics
/// Panics if `index` is out-of-bound or the sequence vector returns null.
fn get_sequence(&self, index: usize) -> SequenceNumber {
pub(crate) fn get_sequence(&self, index: usize) -> SequenceNumber {
// Safety: sequences is not null so it actually returns Some.
self.sequences.get_data(index).unwrap()
}
@@ -646,12 +690,13 @@ mod tests {
}
#[test]
fn test_first_last_empty() {
fn test_empty_batch() {
let batch = new_batch(&[], &[], &[], &[]);
assert_eq!(None, batch.first_timestamp());
assert_eq!(None, batch.last_timestamp());
assert_eq!(None, batch.first_sequence());
assert_eq!(None, batch.last_sequence());
assert!(batch.timestamps_native().is_none());
}
#[test]
@@ -707,6 +752,17 @@ mod tests {
assert_eq!(expect, batch);
}
#[test]
fn test_timestamps_native() {
let batch = new_batch(
&[1, 2, 3, 4],
&[11, 12, 13, 14],
&[OpType::Put, OpType::Delete, OpType::Put, OpType::Put],
&[21, 22, 23, 24],
);
assert_eq!(&[1, 2, 3, 4], batch.timestamps_native().unwrap());
}
#[test]
fn test_concat_empty() {
let err = Batch::concat(vec![]).unwrap_err();

View File

@@ -15,7 +15,7 @@
//! Merge reader implementation.
use std::cmp::Ordering;
use std::collections::BinaryHeap;
use std::collections::{BinaryHeap, VecDeque};
use std::mem;
use async_trait::async_trait;
@@ -37,32 +37,27 @@ pub struct MergeReader {
nodes: BinaryHeap<Node>,
/// Batches for the next primary key.
batch_merger: BatchMerger,
/// Sorted batches to output.
output: VecDeque<Batch>,
}
#[async_trait]
impl BatchReader for MergeReader {
async fn next_batch(&mut self) -> Result<Option<Batch>> {
// Collect batches from sources for the same primary key and return
// the collected batch.
while !self.nodes.is_empty() {
// Peek current key.
let Some(current_key) = self.batch_merger.primary_key() else {
// The merger is empty, we could push it directly.
self.take_batch_from_heap().await?;
// Try next node.
continue;
};
// If next node has a different key, we have finish collecting current key.
// Safety: node is not empty.
if self.nodes.peek().unwrap().primary_key() != current_key {
break;
while !self.output.is_empty() || !self.nodes.is_empty() {
// Takes from sorted output if there are batches in it.
if let Some(batch) = self.output.pop_front() {
return Ok(Some(batch));
}
// They have the same primary key, we could take it and try next node.
self.take_batch_from_heap().await?;
// Collects batches to the merger.
self.collect_batches_to_merge().await?;
// Merge collected batches to output.
self.output = self.batch_merger.merge_batches()?;
}
// Merge collected batches.
self.batch_merger.merge_batches()
Ok(None)
}
}
@@ -81,9 +76,32 @@ impl MergeReader {
Ok(MergeReader {
nodes,
batch_merger: BatchMerger::new(),
output: VecDeque::new(),
})
}
/// Collect batches from sources for the same primary key.
async fn collect_batches_to_merge(&mut self) -> Result<()> {
while !self.nodes.is_empty() {
// Peek current key.
let Some(current_key) = self.batch_merger.primary_key() else {
// The merger is empty, we could push it directly.
self.take_batch_from_heap().await?;
// Try next node.
continue;
};
// If next node has a different key, we have finish collecting current key.
// Safety: node is not empty.
if self.nodes.peek().unwrap().primary_key() != current_key {
break;
}
// They have the same primary key, we could take it and try next node.
self.take_batch_from_heap().await?;
}
Ok(())
}
/// Takes batch from heap top and reheap.
async fn take_batch_from_heap(&mut self) -> Result<()> {
let mut next_node = self.nodes.pop().unwrap();
@@ -201,32 +219,143 @@ impl BatchMerger {
/// Merge all buffered batches and returns the merged batch. Then
/// reset the buffer.
fn merge_batches(&mut self) -> Result<Option<Batch>> {
fn merge_batches(&mut self) -> Result<VecDeque<Batch>> {
if self.batches.is_empty() {
return Ok(None);
return Ok(VecDeque::new());
}
let batches = mem::take(&mut self.batches);
// Concat all batches.
let mut batch = Batch::concat(batches)?;
let mut output = VecDeque::with_capacity(self.batches.len());
if self.is_sorted {
// Fast path. We can output batches directly.
for batch in self.batches.drain(..) {
output_batch(&mut output, batch)?;
}
// TODO(yingwen): metrics for sorted and unsorted batches.
if !self.is_sorted {
// Slow path. We need to merge overlapping batches. For simplicity, we
// just sort the all batches and remove duplications.
batch.sort_and_dedup()?;
// We don't need to remove duplications if timestamps of batches
// are not overlapping.
return Ok(output);
}
// Filter rows by op type. Currently, the reader only removes deleted rows but doesn't filter
// rows by sequence for simplicity and performance reason.
batch.filter_deleted()?;
// Reset merger.
// Slow path. We need to merge overlapping batches.
// Constructs a heap from batches. Batches in the heap is not empty, we need to check
// this before pushing a batch into the heap.
let mut heap = BinaryHeap::from_iter(self.batches.drain(..).map(CompareTimeSeq));
// Reset merger as sorted as we have cleared batches.
self.is_sorted = true;
Ok(Some(batch))
// Sorts batches.
while let Some(top) = heap.pop() {
let top = top.0;
let Some(next) = heap.peek() else {
// If there is no remaining batch, we can output the top-most batch.
output_batch(&mut output, top)?;
break;
};
let next = &next.0;
if top.last_timestamp() < next.first_timestamp() {
// If the top-most batch doesn't overlaps with the next batch, we can output it.
output_batch(&mut output, top)?;
continue;
}
// Safety: Batches (top, next) in the heap is not empty, so we can use unwrap here.
// Min timestamp in the next batch.
let next_min_ts = next.first_timestamp().unwrap();
let timestamps = top.timestamps_native().unwrap();
// Binary searches the timestamp in the top batch.
// Safety: Batches should have the same timestamp resolution so we can compare the native
// value directly.
match timestamps.binary_search(&next_min_ts.value()) {
Ok(pos) => {
// They have duplicate timestamps. Outputs non overlapping timestamps.
// Batch itself doesn't contain duplicate timestamps so timestamps before `pos`
// must be less than `next_min_ts`.
// It's possible to output a very small batch but concatenating small batches
// slows down the reader.
output_batch(&mut output, top.slice(0, pos))?;
// Removes duplicate timestamp and fixes the heap. Keeps the timestamp with largest
// sequence.
// Safety: pos is a valid index returned by `binary_search` and `sequences` are always
// not null.
if top.get_sequence(pos) > next.first_sequence().unwrap() {
// Safety: `next` is not None.
let next = heap.pop().unwrap().0;
// Keeps the timestamp in top and skips the first timestamp in the `next`
// batch.
push_remaining_to_heap(&mut heap, next, 1);
// Skips already outputted timestamps.
push_remaining_to_heap(&mut heap, top, pos);
} else {
// Keeps timestamp in next and skips the duplicated timestamp and already outputted
// timestamp in top.
push_remaining_to_heap(&mut heap, top, pos + 1);
}
}
Err(pos) => {
// No duplicate timestamp. Outputs timestamp before `pos`.
output_batch(&mut output, top.slice(0, pos))?;
push_remaining_to_heap(&mut heap, top, pos);
}
}
}
Ok(output)
}
}
/// Skips first `num_to_skip` rows from the batch and pushes remaining batch into the heap if the batch
/// is still not empty.
fn push_remaining_to_heap(heap: &mut BinaryHeap<CompareTimeSeq>, batch: Batch, num_to_skip: usize) {
debug_assert!(batch.num_rows() >= num_to_skip);
let remaining = batch.num_rows() - num_to_skip;
if remaining == 0 {
// Nothing remains.
return;
}
heap.push(CompareTimeSeq(batch.slice(num_to_skip, remaining)));
}
/// Removes deleted items from the `batch` and pushes it back to the `output` if
/// the `batch` is not empty.
fn output_batch(output: &mut VecDeque<Batch>, mut batch: Batch) -> Result<()> {
// Filter rows by op type. Currently, the reader only removes deleted rows but doesn't filter
// rows by sequence for simplicity and performance reason.
batch.filter_deleted()?;
if batch.is_empty() {
return Ok(());
}
output.push_back(batch);
Ok(())
}
/// Compare [Batch] by timestamp and sequence.
struct CompareTimeSeq(Batch);
impl PartialEq for CompareTimeSeq {
fn eq(&self, other: &Self) -> bool {
self.0.first_timestamp() == other.0.first_timestamp()
&& self.0.first_sequence() == other.0.first_sequence()
}
}
impl Eq for CompareTimeSeq {}
impl PartialOrd for CompareTimeSeq {
fn partial_cmp(&self, other: &CompareTimeSeq) -> Option<Ordering> {
Some(self.cmp(other))
}
}
impl Ord for CompareTimeSeq {
/// Compares by first timestamp desc, first sequence. (The heap is a max heap).
fn cmp(&self, other: &CompareTimeSeq) -> Ordering {
self.0
.first_timestamp()
.cmp(&other.0.first_timestamp())
.then_with(|| other.0.first_sequence().cmp(&self.0.first_sequence()))
// We reverse the ordering as the heap is a max heap.
.reverse()
}
}
@@ -396,17 +525,19 @@ mod tests {
&[
new_batch(
b"k1",
&[1, 2, 4, 5, 7],
&[11, 12, 14, 15, 17],
&[
OpType::Put,
OpType::Put,
OpType::Put,
OpType::Put,
OpType::Put,
],
&[21, 22, 24, 25, 27],
&[1, 2],
&[11, 12],
&[OpType::Put, OpType::Put],
&[21, 22],
),
new_batch(
b"k1",
&[4, 5],
&[14, 15],
&[OpType::Put, OpType::Put],
&[24, 25],
),
new_batch(b"k1", &[7], &[17], &[OpType::Put], &[27]),
new_batch(b"k2", &[3], &[13], &[OpType::Put], &[23]),
],
)
@@ -467,27 +598,63 @@ mod tests {
&[
new_batch(
b"k1",
&[1, 2, 3, 4],
&[11, 12, 10, 14],
&[OpType::Put, OpType::Put, OpType::Put, OpType::Put],
&[21, 22, 33, 24],
),
new_batch(
b"k2",
&[1, 3, 10],
&[11, 13, 20],
&[OpType::Put, OpType::Put, OpType::Put],
&[21, 23, 30],
&[1, 2],
&[11, 12],
&[OpType::Put, OpType::Put],
&[21, 22],
),
new_batch(b"k1", &[3], &[10], &[OpType::Put], &[33]),
new_batch(b"k1", &[4], &[14], &[OpType::Put], &[24]),
new_batch(b"k2", &[1], &[11], &[OpType::Put], &[21]),
new_batch(b"k2", &[3], &[13], &[OpType::Put], &[23]),
new_batch(b"k2", &[10], &[20], &[OpType::Put], &[30]),
],
)
.await;
}
#[tokio::test]
async fn test_merge_deleted() {
let reader1 = VecBatchReader::new(&[
new_batch(
b"k1",
&[1, 2],
&[11, 12],
&[OpType::Delete, OpType::Delete],
&[21, 22],
),
new_batch(
b"k2",
&[2, 3],
&[12, 13],
&[OpType::Delete, OpType::Put],
&[22, 23],
),
]);
let reader2 = VecBatchReader::new(&[new_batch(
b"k1",
&[4, 5],
&[14, 15],
&[OpType::Delete, OpType::Delete],
&[24, 25],
)]);
let mut reader = MergeReaderBuilder::new()
.push_batch_reader(Box::new(reader1))
.push_batch_iter(Box::new(reader2))
.build()
.await
.unwrap();
check_reader_result(
&mut reader,
&[new_batch(b"k2", &[3], &[13], &[OpType::Put], &[23])],
)
.await;
}
#[test]
fn test_batch_merger_empty() {
let mut merger = BatchMerger::new();
assert!(merger.merge_batches().unwrap().is_none());
assert!(merger.merge_batches().unwrap().is_empty());
}
#[test]
@@ -509,7 +676,48 @@ mod tests {
&[22, 24],
));
assert!(!merger.is_sorted);
let batch = merger.merge_batches().unwrap().unwrap();
let batches = merger.merge_batches().unwrap();
let batch = Batch::concat(batches.into_iter().collect()).unwrap();
assert_eq!(
batch,
new_batch(
b"k1",
&[1, 2, 3, 4, 5],
&[10, 11, 10, 11, 10],
&[
OpType::Put,
OpType::Put,
OpType::Put,
OpType::Put,
OpType::Put
],
&[21, 22, 23, 24, 25]
)
);
assert!(merger.is_sorted);
}
#[test]
fn test_batch_merger_unsorted_by_heap() {
let mut merger = BatchMerger::new();
merger.push(new_batch(
b"k1",
&[1, 3, 5],
&[10, 10, 10],
&[OpType::Put, OpType::Put, OpType::Put],
&[21, 23, 25],
));
assert!(merger.is_sorted);
merger.push(new_batch(
b"k1",
&[2, 4],
&[11, 11],
&[OpType::Put, OpType::Put],
&[22, 24],
));
assert!(!merger.is_sorted);
let batches = merger.merge_batches().unwrap();
let batch = Batch::concat(batches.into_iter().collect()).unwrap();
assert_eq!(
batch,
new_batch(

View File

@@ -14,6 +14,7 @@
//! Utilities for projection.
use std::cmp::Ordering;
use std::collections::HashMap;
use std::sync::Arc;
@@ -23,22 +24,28 @@ use common_recordbatch::error::ExternalSnafu;
use common_recordbatch::RecordBatch;
use datatypes::prelude::{ConcreteDataType, DataType};
use datatypes::schema::{Schema, SchemaRef};
use datatypes::value::ValueRef;
use datatypes::value::Value;
use datatypes::vectors::VectorRef;
use snafu::{OptionExt, ResultExt};
use store_api::metadata::RegionMetadataRef;
use store_api::storage::ColumnId;
use crate::cache::CacheManager;
use crate::error::{InvalidRequestSnafu, Result};
use crate::read::Batch;
use crate::row_converter::{McmpRowCodec, RowCodec, SortField};
/// Only cache vector when its length `<=` this value.
const MAX_VECTOR_LENGTH_TO_CACHE: usize = 16384;
/// Handles projection and converts a projected [Batch] to a projected [RecordBatch].
pub struct ProjectionMapper {
/// Metadata of the region.
metadata: RegionMetadataRef,
/// Maps column in [RecordBatch] to index in [Batch].
batch_indices: Vec<BatchIndex>,
/// Output record batch contains tags.
has_tags: bool,
/// Decoder for primary key.
codec: McmpRowCodec,
/// Schema for converted [RecordBatch].
@@ -92,6 +99,7 @@ impl ProjectionMapper {
.collect();
// For each projected column, compute its index in batches.
let mut batch_indices = Vec::with_capacity(projection.len());
let mut has_tags = false;
for idx in &projection {
// Safety: idx is valid.
let column = &metadata.column_metadatas[*idx];
@@ -100,6 +108,8 @@ impl ProjectionMapper {
SemanticType::Tag => {
// Safety: It is a primary key column.
let index = metadata.primary_key_index(column.column_id).unwrap();
// We need to output a tag.
has_tags = true;
// We always read all primary key so the column always exists and the tag
// index is always valid.
BatchIndex::Tag(index)
@@ -117,6 +127,7 @@ impl ProjectionMapper {
Ok(ProjectionMapper {
metadata: metadata.clone(),
batch_indices,
has_tags,
codec,
output_schema,
column_ids,
@@ -152,7 +163,11 @@ impl ProjectionMapper {
/// Converts a [Batch] to a [RecordBatch].
///
/// The batch must match the `projection` using to build the mapper.
pub(crate) fn convert(&self, batch: &Batch) -> common_recordbatch::error::Result<RecordBatch> {
pub(crate) fn convert(
&self,
batch: &Batch,
cache_manager: Option<&CacheManager>,
) -> common_recordbatch::error::Result<RecordBatch> {
debug_assert_eq!(self.batch_fields.len(), batch.fields().len());
debug_assert!(self
.batch_fields
@@ -160,11 +175,15 @@ impl ProjectionMapper {
.zip(batch.fields())
.all(|(id, batch_col)| *id == batch_col.column_id));
let pk_values = self
.codec
.decode(batch.primary_key())
.map_err(BoxedError::new)
.context(ExternalSnafu)?;
// Skips decoding pk if we don't need to output it.
let pk_values = if self.has_tags {
self.codec
.decode(batch.primary_key())
.map_err(BoxedError::new)
.context(ExternalSnafu)?
} else {
Vec::new()
};
let mut columns = Vec::with_capacity(self.output_schema.num_columns());
let num_rows = batch.num_rows();
@@ -175,8 +194,16 @@ impl ProjectionMapper {
{
match index {
BatchIndex::Tag(idx) => {
let value = pk_values[*idx].as_value_ref();
let vector = new_repeated_vector(&column_schema.data_type, value, num_rows)?;
let value = &pk_values[*idx];
let vector = match cache_manager {
Some(cache) => repeated_vector_with_cache(
&column_schema.data_type,
value,
num_rows,
cache,
)?,
None => new_repeated_vector(&column_schema.data_type, value, num_rows)?,
};
columns.push(vector);
}
BatchIndex::Timestamp => {
@@ -203,21 +230,161 @@ enum BatchIndex {
Field(usize),
}
/// Gets a vector with repeated values from specific cache or creates a new one.
fn repeated_vector_with_cache(
data_type: &ConcreteDataType,
value: &Value,
num_rows: usize,
cache_manager: &CacheManager,
) -> common_recordbatch::error::Result<VectorRef> {
if let Some(vector) = cache_manager.get_repeated_vector(value) {
// Tries to get the vector from cache manager. If the vector doesn't
// have enough length, creates a new one.
match vector.len().cmp(&num_rows) {
Ordering::Less => (),
Ordering::Equal => return Ok(vector),
Ordering::Greater => return Ok(vector.slice(0, num_rows)),
}
}
// Creates a new one.
let vector = new_repeated_vector(data_type, value, num_rows)?;
// Updates cache.
if vector.len() <= MAX_VECTOR_LENGTH_TO_CACHE {
cache_manager.put_repeated_vector(value.clone(), vector.clone());
}
Ok(vector)
}
/// Returns a vector with repeated values.
fn new_repeated_vector(
data_type: &ConcreteDataType,
value: ValueRef,
value: &Value,
num_rows: usize,
) -> common_recordbatch::error::Result<VectorRef> {
let mut mutable_vector = data_type.create_mutable_vector(1);
mutable_vector
.try_push_value_ref(value)
.try_push_value_ref(value.as_value_ref())
.map_err(BoxedError::new)
.context(ExternalSnafu)?;
// This requires an additional allocation.
// TODO(yingwen): Add a way to create repeated vector to data type.
let base_vector = mutable_vector.to_vector();
Ok(base_vector.replicate(&[num_rows]))
}
// TODO(yingwen): Add tests for mapper.
#[cfg(test)]
mod tests {
use api::v1::OpType;
use datatypes::arrow::array::{Int64Array, TimestampMillisecondArray, UInt64Array, UInt8Array};
use datatypes::arrow::util::pretty;
use datatypes::value::ValueRef;
use super::*;
use crate::read::BatchBuilder;
use crate::test_util::meta_util::TestRegionMetadataBuilder;
fn new_batch(
ts_start: i64,
tags: &[i64],
fields: &[(ColumnId, i64)],
num_rows: usize,
) -> Batch {
let converter = McmpRowCodec::new(
(0..tags.len())
.map(|_| SortField::new(ConcreteDataType::int64_datatype()))
.collect(),
);
let primary_key = converter
.encode(tags.iter().map(|v| ValueRef::Int64(*v)))
.unwrap();
let mut builder = BatchBuilder::new(primary_key);
builder
.timestamps_array(Arc::new(TimestampMillisecondArray::from_iter_values(
(0..num_rows).map(|i| ts_start + i as i64 * 1000),
)))
.unwrap()
.sequences_array(Arc::new(UInt64Array::from_iter_values(0..num_rows as u64)))
.unwrap()
.op_types_array(Arc::new(UInt8Array::from_iter_values(
(0..num_rows).map(|_| OpType::Put as u8),
)))
.unwrap();
for (column_id, field) in fields {
builder
.push_field_array(
*column_id,
Arc::new(Int64Array::from_iter_values(
std::iter::repeat(*field).take(num_rows),
)),
)
.unwrap();
}
builder.build().unwrap()
}
fn print_record_batch(record_batch: RecordBatch) -> String {
pretty::pretty_format_batches(&[record_batch.into_df_record_batch()])
.unwrap()
.to_string()
}
#[test]
fn test_projection_mapper_all() {
let metadata = Arc::new(
TestRegionMetadataBuilder::default()
.num_tags(2)
.num_fields(2)
.build(),
);
let mapper = ProjectionMapper::all(&metadata).unwrap();
assert_eq!([0, 1, 2, 3, 4], mapper.column_ids());
assert_eq!([3, 4], mapper.batch_fields());
let cache = CacheManager::new(0, 1024);
let batch = new_batch(0, &[1, 2], &[(3, 3), (4, 4)], 3);
let record_batch = mapper.convert(&batch, Some(&cache)).unwrap();
let expect = "\
+---------------------+----+----+----+----+
| ts | k0 | k1 | v0 | v1 |
+---------------------+----+----+----+----+
| 1970-01-01T00:00:00 | 1 | 2 | 3 | 4 |
| 1970-01-01T00:00:01 | 1 | 2 | 3 | 4 |
| 1970-01-01T00:00:02 | 1 | 2 | 3 | 4 |
+---------------------+----+----+----+----+";
assert_eq!(expect, print_record_batch(record_batch));
assert!(cache.get_repeated_vector(&Value::Int64(1)).is_some());
assert!(cache.get_repeated_vector(&Value::Int64(2)).is_some());
assert!(cache.get_repeated_vector(&Value::Int64(3)).is_none());
let record_batch = mapper.convert(&batch, Some(&cache)).unwrap();
assert_eq!(expect, print_record_batch(record_batch));
}
#[test]
fn test_projection_mapper_with_projection() {
let metadata = Arc::new(
TestRegionMetadataBuilder::default()
.num_tags(2)
.num_fields(2)
.build(),
);
// Columns v1, k0
let mapper = ProjectionMapper::new(&metadata, [4, 1].into_iter()).unwrap();
assert_eq!([4, 1], mapper.column_ids());
assert_eq!([4], mapper.batch_fields());
let batch = new_batch(0, &[1, 2], &[(4, 4)], 3);
let record_batch = mapper.convert(&batch, None).unwrap();
let expect = "\
+----+----+
| v1 | k0 |
+----+----+
| 4 | 1 |
| 4 | 1 |
| 4 | 1 |
+----+----+";
assert_eq!(expect, print_record_batch(record_batch));
}
}

View File

@@ -165,8 +165,9 @@ impl ScanRegion {
.collect();
debug!(
"Seq scan region {}, memtables: {}, ssts_to_read: {}, total_ssts: {}",
"Seq scan region {}, request: {:?}, memtables: {}, ssts_to_read: {}, total_ssts: {}",
self.version.metadata.region_id,
self.request,
memtables.len(),
files.len(),
total_ssts

View File

@@ -110,9 +110,16 @@ impl SeqScan {
// Creates a stream to poll the batch reader and convert batch into record batch.
let mapper = self.mapper.clone();
let cache_manager = self.cache_manager.clone();
let stream = try_stream! {
while let Some(batch) = reader.next_batch().await.map_err(BoxedError::new).context(ExternalSnafu)? {
yield mapper.convert(&batch)?;
let cache = cache_manager.as_ref().map(|cache| cache.as_ref());
while let Some(batch) = reader
.next_batch()
.await
.map_err(BoxedError::new)
.context(ExternalSnafu)?
{
yield mapper.convert(&batch, cache)?;
}
};
let stream = Box::pin(RecordBatchStreamAdaptor::new(
@@ -128,8 +135,7 @@ impl SeqScan {
// Scans all memtables and SSTs. Builds a merge reader to merge results.
let mut builder = MergeReaderBuilder::new();
for mem in &self.memtables {
// TODO(hl): pass filters once memtable supports filter pushdown.
let iter = mem.iter(Some(self.mapper.column_ids()), &[]);
let iter = mem.iter(Some(self.mapper.column_ids()), self.predicate.clone());
builder.push_batch_iter(iter);
}
for file in &self.files {

View File

@@ -15,7 +15,7 @@
use std::mem;
use std::sync::Arc;
use api::v1::{Mutation, Rows, WalEntry};
use api::v1::{Mutation, OpType, Rows, WalEntry};
use common_query::Output;
use snafu::ResultExt;
use store_api::logstore::LogStore;
@@ -92,6 +92,14 @@ pub(crate) struct RegionWriteCtx {
///
/// The i-th notify is for i-th mutation.
notifiers: Vec<WriteNotify>,
/// The write operation is failed and we should not write to the mutable memtable.
failed: bool,
// Metrics:
/// Rows to put.
pub(crate) put_num: usize,
/// Rows to delete.
pub(crate) delete_num: usize,
}
impl RegionWriteCtx {
@@ -112,6 +120,9 @@ impl RegionWriteCtx {
next_entry_id: last_entry_id + 1,
wal_entry: WalEntry::default(),
notifiers: Vec::new(),
failed: false,
put_num: 0,
delete_num: 0,
}
}
@@ -130,6 +141,13 @@ impl RegionWriteCtx {
// Increase sequence number.
self.next_sequence += num_rows as u64;
// Update metrics.
match OpType::from_i32(op_type) {
Some(OpType::Delete) => self.delete_num += num_rows,
Some(OpType::Put) => self.put_num += num_rows,
None => (),
}
}
/// Encode and add WAL entry to the writer.
@@ -153,6 +171,9 @@ impl RegionWriteCtx {
for notify in &mut self.notifiers {
notify.err = Some(err.clone());
}
// Fail the whole write operation.
self.failed = true;
}
/// Updates next entry id.
@@ -164,6 +185,10 @@ impl RegionWriteCtx {
pub(crate) fn write_memtable(&mut self) {
debug_assert_eq!(self.notifiers.len(), self.wal_entry.mutations.len());
if self.failed {
return;
}
let mutable = &self.version.memtables.mutable;
// Takes mutations from the wal entry.
let mutations = mem::take(&mut self.wal_entry.mutations);

View File

@@ -16,7 +16,7 @@
use std::collections::HashMap;
use std::sync::Arc;
use std::time::Duration;
use std::time::{Duration, Instant};
use api::helper::{
is_column_type_value_eq, is_semantic_type_eq, proto_value_type, to_column_data_type,
@@ -25,9 +25,11 @@ use api::helper::{
use api::v1::{ColumnDataType, ColumnSchema, OpType, Rows, SemanticType, Value};
use common_query::Output;
use common_query::Output::AffectedRows;
use common_telemetry::metric::Timer;
use common_telemetry::tracing::log::info;
use common_telemetry::warn;
use datatypes::prelude::DataType;
use metrics::histogram;
use prost::Message;
use smallvec::SmallVec;
use snafu::{ensure, OptionExt, ResultExt};
@@ -44,6 +46,7 @@ use crate::error::{
InvalidRequestSnafu, Result,
};
use crate::memtable::MemtableId;
use crate::metrics::COMPACTION_ELAPSED_TOTAL;
use crate::sst::file::FileMeta;
use crate::sst::file_purger::{FilePurgerRef, PurgeRequest};
use crate::wal::EntryId;
@@ -145,6 +148,7 @@ impl WriteRequest {
.map(|column| (&column.column_name, column))
.collect();
let mut need_fill_default = false;
// Checks all columns in this region.
for column in &metadata.column_metadatas {
if let Some(input_col) = rows_columns.remove(&column.column_schema.name) {
@@ -199,7 +203,7 @@ impl WriteRequest {
// Rows don't have this column.
self.check_missing_column(column)?;
return FillDefaultSnafu { region_id }.fail();
need_fill_default = true;
}
}
@@ -213,6 +217,9 @@ impl WriteRequest {
.fail();
}
// If we need to fill default values, return a special error.
ensure!(!need_fill_default, FillDefaultSnafu { region_id });
Ok(())
}
@@ -588,9 +595,12 @@ pub(crate) struct FlushFinished {
pub(crate) senders: Vec<OutputTx>,
/// File purger for cleaning files on failure.
pub(crate) file_purger: FilePurgerRef,
/// Flush timer.
pub(crate) timer: Timer,
}
impl FlushFinished {
/// Marks the flush job as successful and observes the timer.
pub(crate) fn on_success(self) {
for sender in self.senders {
sender.send(Ok(Output::AffectedRows(0)));
@@ -638,10 +648,15 @@ pub(crate) struct CompactionFinished {
pub(crate) file_purger: FilePurgerRef,
/// Inferred Compaction time window.
pub(crate) compaction_time_window: Option<Duration>,
/// Start time of compaction task.
pub(crate) start_time: Instant,
}
impl CompactionFinished {
pub fn on_success(self) {
// only update compaction time on success
histogram!(COMPACTION_ELAPSED_TOTAL, self.start_time.elapsed());
for sender in self.senders {
sender.send(Ok(AffectedRows(0)));
}
@@ -683,6 +698,7 @@ pub(crate) struct CompactionFailed {
#[cfg(test)]
mod tests {
use api::v1::value::ValueData;
use api::v1::{Row, SemanticType};
use datatypes::prelude::ConcreteDataType;
use datatypes::schema::ColumnDefaultConstraint;
@@ -950,7 +966,7 @@ mod tests {
assert_eq!(expect_rows, request.rows);
}
fn region_metadata_for_delete() -> RegionMetadata {
fn region_metadata_two_fields() -> RegionMetadata {
let mut builder = RegionMetadataBuilder::new(RegionId::new(1, 1));
builder
.push_column_metadata(ColumnMetadata {
@@ -1010,7 +1026,7 @@ mod tests {
values: vec![ts_ms_value(1)],
}],
};
let metadata = region_metadata_for_delete();
let metadata = region_metadata_two_fields();
let mut request = WriteRequest::new(RegionId::new(1, 1), OpType::Delete, rows).unwrap();
let err = request.check_schema(&metadata).unwrap_err();
@@ -1078,4 +1094,37 @@ mod tests {
let err = request.fill_missing_columns(&metadata).unwrap_err();
check_invalid_request(&err, "column ts does not have default value");
}
#[test]
fn test_missing_and_invalid() {
// Missing f0 and f1 has invalid type (string).
let rows = Rows {
schema: vec![
new_column_schema("k0", ColumnDataType::Int64, SemanticType::Tag),
new_column_schema(
"ts",
ColumnDataType::TimestampMillisecond,
SemanticType::Timestamp,
),
new_column_schema("f1", ColumnDataType::String, SemanticType::Field),
],
rows: vec![Row {
values: vec![
i64_value(100),
ts_ms_value(1),
Value {
value_data: Some(ValueData::StringValue("xxxxx".to_string())),
},
],
}],
};
let metadata = region_metadata_two_fields();
let request = WriteRequest::new(RegionId::new(1, 1), OpType::Put, rows).unwrap();
let err = request.check_schema(&metadata).unwrap_err();
check_invalid_request(
&err,
"column f1 expect type Int64(Int64Type), given: STRING(12)",
);
}
}

View File

@@ -16,6 +16,7 @@
mod format;
pub mod reader;
mod stats;
pub mod writer;
use common_base::readable_size::ReadableSize;
@@ -25,7 +26,10 @@ use crate::sst::file::FileTimeRange;
/// Key of metadata in parquet SST.
pub const PARQUET_METADATA_KEY: &str = "greptime:metadata";
const DEFAULT_WRITE_BUFFER_SIZE: ReadableSize = ReadableSize::mb(8);
const DEFAULT_ROW_GROUP_SIZE: usize = 100000;
/// Default batch size to read parquet files.
pub(crate) const DEFAULT_READ_BATCH_SIZE: usize = 1024;
/// Default row group size for parquet files.
const DEFAULT_ROW_GROUP_SIZE: usize = 100 * DEFAULT_READ_BATCH_SIZE;
/// Parquet write options.
#[derive(Debug)]

View File

@@ -30,14 +30,18 @@ use std::collections::HashMap;
use std::sync::Arc;
use api::v1::SemanticType;
use datatypes::arrow::array::{ArrayRef, BinaryArray, DictionaryArray, UInt16Array};
use datafusion_common::ScalarValue;
use datatypes::arrow::array::{ArrayRef, BinaryArray, DictionaryArray, UInt16Array, UInt64Array};
use datatypes::arrow::datatypes::{
DataType, Field, FieldRef, Fields, Schema, SchemaRef, UInt16Type,
DataType as ArrowDataType, Field, FieldRef, Fields, Schema, SchemaRef, UInt16Type,
};
use datatypes::arrow::record_batch::RecordBatch;
use datatypes::prelude::DataType;
use datatypes::vectors::{Helper, Vector};
use parquet::file::metadata::RowGroupMetaData;
use parquet::file::statistics::Statistics;
use snafu::{ensure, OptionExt, ResultExt};
use store_api::metadata::{RegionMetadata, RegionMetadataRef};
use store_api::metadata::{ColumnMetadata, RegionMetadata, RegionMetadataRef};
use store_api::storage::consts::{
OP_TYPE_COLUMN_NAME, PRIMARY_KEY_COLUMN_NAME, SEQUENCE_COLUMN_NAME,
};
@@ -47,6 +51,7 @@ use crate::error::{
ConvertVectorSnafu, InvalidBatchSnafu, InvalidRecordBatchSnafu, NewRecordBatchSnafu, Result,
};
use crate::read::{Batch, BatchBuilder, BatchColumn};
use crate::row_converter::{McmpRowCodec, RowCodec, SortField};
/// Number of columns that have fixed positions.
///
@@ -250,6 +255,66 @@ impl ReadFormat {
Ok(())
}
/// Returns min values of specific column in row groups.
pub(crate) fn min_values(
&self,
row_groups: &[RowGroupMetaData],
column_id: ColumnId,
) -> Option<ArrayRef> {
let column = self.metadata.column_by_id(column_id)?;
match column.semantic_type {
SemanticType::Tag => self.tag_values(row_groups, column, true),
SemanticType::Field => {
let index = self.field_id_to_index.get(&column_id)?;
Self::column_values(row_groups, column, *index, true)
}
SemanticType::Timestamp => {
let index = self.time_index_position();
Self::column_values(row_groups, column, index, true)
}
}
}
/// Returns max values of specific column in row groups.
pub(crate) fn max_values(
&self,
row_groups: &[RowGroupMetaData],
column_id: ColumnId,
) -> Option<ArrayRef> {
let column = self.metadata.column_by_id(column_id)?;
match column.semantic_type {
SemanticType::Tag => self.tag_values(row_groups, column, false),
SemanticType::Field => {
let index = self.field_id_to_index.get(&column_id)?;
Self::column_values(row_groups, column, *index, false)
}
SemanticType::Timestamp => {
let index = self.time_index_position();
Self::column_values(row_groups, column, index, false)
}
}
}
/// Returns null counts of specific column in row groups.
pub(crate) fn null_counts(
&self,
row_groups: &[RowGroupMetaData],
column_id: ColumnId,
) -> Option<ArrayRef> {
let column = self.metadata.column_by_id(column_id)?;
match column.semantic_type {
SemanticType::Tag => None,
SemanticType::Field => {
let index = self.field_id_to_index.get(&column_id)?;
Self::column_null_counts(row_groups, *index)
}
SemanticType::Timestamp => {
let index = self.time_index_position();
Self::column_null_counts(row_groups, index)
}
}
}
/// Get fields from `record_batch`.
fn get_field_batch_columns(&self, record_batch: &RecordBatch) -> Result<Vec<BatchColumn>> {
record_batch
@@ -273,6 +338,148 @@ impl ReadFormat {
})
.collect()
}
/// Returns min/max values of specific tag.
fn tag_values(
&self,
row_groups: &[RowGroupMetaData],
column: &ColumnMetadata,
is_min: bool,
) -> Option<ArrayRef> {
let is_first_tag = self
.metadata
.primary_key
.first()
.map(|id| *id == column.column_id)
.unwrap_or(false);
if !is_first_tag {
// Only the min-max of the first tag is available in the primary key.
return None;
}
let converter =
McmpRowCodec::new(vec![SortField::new(column.column_schema.data_type.clone())]);
let values = row_groups.iter().map(|meta| {
let stats = meta.column(self.primary_key_position()).statistics()?;
if !stats.has_min_max_set() {
return None;
}
match stats {
Statistics::Boolean(_) => None,
Statistics::Int32(_) => None,
Statistics::Int64(_) => None,
Statistics::Int96(_) => None,
Statistics::Float(_) => None,
Statistics::Double(_) => None,
Statistics::ByteArray(s) => {
let bytes = if is_min { s.min_bytes() } else { s.max_bytes() };
let mut values = converter.decode(bytes).ok()?;
values.pop()
}
Statistics::FixedLenByteArray(_) => None,
}
});
let mut builder = column
.column_schema
.data_type
.create_mutable_vector(row_groups.len());
for value_opt in values {
match value_opt {
// Safety: We use the same data type to create the converter.
Some(v) => builder.push_value_ref(v.as_value_ref()),
None => builder.push_null(),
}
}
let vector = builder.to_vector();
Some(vector.to_arrow_array())
}
/// Returns min/max values of specific non-tag columns.
fn column_values(
row_groups: &[RowGroupMetaData],
column: &ColumnMetadata,
column_index: usize,
is_min: bool,
) -> Option<ArrayRef> {
let null_scalar: ScalarValue = column
.column_schema
.data_type
.as_arrow_type()
.try_into()
.ok()?;
let scalar_values = row_groups
.iter()
.map(|meta| {
let stats = meta.column(column_index).statistics()?;
if !stats.has_min_max_set() {
return None;
}
match stats {
Statistics::Boolean(s) => Some(ScalarValue::Boolean(Some(if is_min {
*s.min()
} else {
*s.max()
}))),
Statistics::Int32(s) => Some(ScalarValue::Int32(Some(if is_min {
*s.min()
} else {
*s.max()
}))),
Statistics::Int64(s) => Some(ScalarValue::Int64(Some(if is_min {
*s.min()
} else {
*s.max()
}))),
Statistics::Int96(_) => None,
Statistics::Float(s) => Some(ScalarValue::Float32(Some(if is_min {
*s.min()
} else {
*s.max()
}))),
Statistics::Double(s) => Some(ScalarValue::Float64(Some(if is_min {
*s.min()
} else {
*s.max()
}))),
Statistics::ByteArray(s) => {
let bytes = if is_min { s.min_bytes() } else { s.max_bytes() };
let s = String::from_utf8(bytes.to_vec()).ok();
Some(ScalarValue::Utf8(s))
}
Statistics::FixedLenByteArray(_) => None,
}
})
.map(|maybe_scalar| maybe_scalar.unwrap_or_else(|| null_scalar.clone()))
.collect::<Vec<ScalarValue>>();
debug_assert_eq!(scalar_values.len(), row_groups.len());
ScalarValue::iter_to_array(scalar_values).ok()
}
/// Returns null counts of specific non-tag columns.
fn column_null_counts(
row_groups: &[RowGroupMetaData],
column_index: usize,
) -> Option<ArrayRef> {
let values = row_groups.iter().map(|meta| {
let col = meta.column(column_index);
let stat = col.statistics()?;
Some(stat.null_count())
});
Some(Arc::new(UInt64Array::from_iter(values)))
}
/// Field index of the primary key.
fn primary_key_position(&self) -> usize {
self.arrow_schema.fields.len() - 3
}
/// Field index of the time index.
fn time_index_position(&self) -> usize {
self.arrow_schema.fields.len() - FIXED_POS_COLUMN_NUM
}
}
/// Gets the arrow schema to store in parquet.
@@ -328,12 +535,16 @@ fn internal_fields() -> [FieldRef; 3] {
[
Arc::new(Field::new_dictionary(
PRIMARY_KEY_COLUMN_NAME,
DataType::UInt16,
DataType::Binary,
ArrowDataType::UInt16,
ArrowDataType::Binary,
false,
)),
Arc::new(Field::new(SEQUENCE_COLUMN_NAME, DataType::UInt64, false)),
Arc::new(Field::new(OP_TYPE_COLUMN_NAME, DataType::UInt8, false)),
Arc::new(Field::new(
SEQUENCE_COLUMN_NAME,
ArrowDataType::UInt64,
false,
)),
Arc::new(Field::new(OP_TYPE_COLUMN_NAME, ArrowDataType::UInt8, false)),
]
}
@@ -408,20 +619,23 @@ mod tests {
fn build_test_arrow_schema() -> SchemaRef {
let fields = vec![
Field::new("field1", DataType::Int64, true),
Field::new("field0", DataType::Int64, true),
Field::new("field1", ArrowDataType::Int64, true),
Field::new("field0", ArrowDataType::Int64, true),
Field::new(
"ts",
DataType::Timestamp(TimeUnit::Millisecond, None),
ArrowDataType::Timestamp(TimeUnit::Millisecond, None),
false,
),
Field::new(
"__primary_key",
DataType::Dictionary(Box::new(DataType::UInt16), Box::new(DataType::Binary)),
ArrowDataType::Dictionary(
Box::new(ArrowDataType::UInt16),
Box::new(ArrowDataType::Binary),
),
false,
),
Field::new("__sequence", DataType::UInt64, false),
Field::new("__op_type", DataType::UInt8, false),
Field::new("__sequence", ArrowDataType::UInt64, false),
Field::new("__op_type", ArrowDataType::UInt8, false),
];
Arc::new(Schema::new(fields))
}

View File

@@ -14,6 +14,7 @@
//! Parquet reader.
use std::collections::HashSet;
use std::ops::Range;
use std::sync::Arc;
@@ -44,7 +45,8 @@ use crate::error::{
use crate::read::{Batch, BatchReader};
use crate::sst::file::{FileHandle, FileId};
use crate::sst::parquet::format::ReadFormat;
use crate::sst::parquet::PARQUET_METADATA_KEY;
use crate::sst::parquet::stats::RowGroupPruningStats;
use crate::sst::parquet::{DEFAULT_READ_BATCH_SIZE, PARQUET_METADATA_KEY};
/// Parquet SST reader builder.
pub struct ParquetReaderBuilder {
@@ -145,24 +147,24 @@ impl ParquetReaderBuilder {
};
let mut builder = ParquetRecordBatchStreamBuilder::new(reader)
.await
.context(ReadParquetSnafu { path: file_path })?;
.context(ReadParquetSnafu { path: file_path })?
.with_batch_size(DEFAULT_READ_BATCH_SIZE);
// Decode region metadata.
let key_value_meta = builder.metadata().file_metadata().key_value_metadata();
let region_meta = self.get_region_metadata(file_path, key_value_meta)?;
// Prune row groups by metadata.
if let Some(predicate) = &self.predicate {
// TODO(yingwen): Now we encode tags into the full primary key so we need some approach
// to implement pruning.
let pruned_row_groups = predicate
.prune_row_groups(builder.metadata().row_groups())
.into_iter()
.enumerate()
.filter_map(|(idx, valid)| if valid { Some(idx) } else { None })
.collect::<Vec<_>>();
builder = builder.with_row_groups(pruned_row_groups);
}
let column_ids: HashSet<_> = self
.projection
.as_ref()
.map(|p| p.iter().cloned().collect())
.unwrap_or_else(|| {
region_meta
.column_metadatas
.iter()
.map(|c| c.column_id)
.collect()
});
let read_format = ReadFormat::new(Arc::new(region_meta));
// The arrow schema converted from the region meta should be the same as parquet's.
@@ -179,6 +181,22 @@ impl ParquetReaderBuilder {
}
);
// Prune row groups by metadata.
if let Some(predicate) = &self.predicate {
let stats = RowGroupPruningStats::new(
builder.metadata().row_groups(),
&read_format,
column_ids,
);
let pruned_row_groups = predicate
.prune_with_stats(&stats)
.into_iter()
.enumerate()
.filter_map(|(idx, valid)| if valid { Some(idx) } else { None })
.collect::<Vec<_>>();
builder = builder.with_row_groups(pruned_row_groups);
}
let parquet_schema_desc = builder.metadata().file_metadata().schema_descr();
if let Some(column_ids) = self.projection.as_ref() {
let indices = read_format.projection_indices(column_ids.iter().copied());

View File

@@ -0,0 +1,83 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//! Statistics of parquet SSTs.
use std::collections::HashSet;
use datafusion::physical_optimizer::pruning::PruningStatistics;
use datafusion_common::Column;
use datatypes::arrow::array::ArrayRef;
use parquet::file::metadata::RowGroupMetaData;
use store_api::storage::ColumnId;
use crate::sst::parquet::format::ReadFormat;
/// Statistics for pruning row groups.
pub(crate) struct RowGroupPruningStats<'a> {
/// Metadata of SST row groups.
row_groups: &'a [RowGroupMetaData],
/// Helper to read the SST.
read_format: &'a ReadFormat,
/// Projected column ids to read.
///
/// We need column ids to distinguish different columns with the same name.
/// e.g. Drops and then adds a column again.
column_ids: HashSet<ColumnId>,
}
impl<'a> RowGroupPruningStats<'a> {
/// Creates a new statistics to prune specific `row_groups`.
pub(crate) fn new(
row_groups: &'a [RowGroupMetaData],
read_format: &'a ReadFormat,
column_ids: HashSet<ColumnId>,
) -> Self {
Self {
row_groups,
read_format,
column_ids,
}
}
/// Returns the column id of specific column name if we need to read it.
fn column_id_to_prune(&self, name: &str) -> Option<ColumnId> {
// Only use stats when the column to read has the same id as the column in the SST.
self.read_format
.metadata()
.column_by_name(name)
.and_then(|col| self.column_ids.get(&col.column_id).copied())
}
}
impl<'a> PruningStatistics for RowGroupPruningStats<'a> {
fn min_values(&self, column: &Column) -> Option<ArrayRef> {
let column_id = self.column_id_to_prune(&column.name)?;
self.read_format.min_values(self.row_groups, column_id)
}
fn max_values(&self, column: &Column) -> Option<ArrayRef> {
let column_id = self.column_id_to_prune(&column.name)?;
self.read_format.max_values(self.row_groups, column_id)
}
fn num_containers(&self) -> usize {
self.row_groups.len()
}
fn null_counts(&self, column: &Column) -> Option<ArrayRef> {
let column_id = self.column_id_to_prune(&column.name)?;
self.read_format.null_counts(self.row_groups, column_id)
}
}

View File

@@ -15,6 +15,7 @@
//! Utilities for testing.
pub mod memtable_util;
pub mod meta_util;
pub mod scheduler_util;
pub mod version_util;
@@ -574,9 +575,12 @@ pub async fn delete_rows(engine: &MitoEngine, region_id: RegionId, rows: Rows) {
}
/// Flush a region manually.
pub async fn flush_region(engine: &MitoEngine, region_id: RegionId) {
pub async fn flush_region(engine: &MitoEngine, region_id: RegionId, row_group_size: Option<usize>) {
let Output::AffectedRows(rows) = engine
.handle_request(region_id, RegionRequest::Flush(RegionFlushRequest {}))
.handle_request(
region_id,
RegionRequest::Flush(RegionFlushRequest { row_group_size }),
)
.await
.unwrap()
else {

View File

@@ -17,9 +17,9 @@
use std::sync::atomic::{AtomicU32, Ordering};
use std::sync::Arc;
use common_query::logical_plan::Expr;
use store_api::metadata::RegionMetadataRef;
use store_api::storage::ColumnId;
use table::predicate::Predicate;
use crate::error::Result;
use crate::memtable::{
@@ -50,7 +50,11 @@ impl Memtable for EmptyMemtable {
Ok(())
}
fn iter(&self, _projection: Option<&[ColumnId]>, _filters: &[Expr]) -> BoxedBatchIterator {
fn iter(
&self,
_projection: Option<&[ColumnId]>,
_filters: Option<Predicate>,
) -> BoxedBatchIterator {
Box::new(std::iter::empty())
}

View File

@@ -0,0 +1,107 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//! Utilities to create a [RegionMetadata](store_api::metadata::RegionMetadata).
use api::v1::SemanticType;
use datatypes::prelude::ConcreteDataType;
use datatypes::schema::ColumnSchema;
use store_api::metadata::{ColumnMetadata, RegionMetadata, RegionMetadataBuilder};
use store_api::storage::RegionId;
/// Builder to builds a region with schema `ts, k0, k1, ..., v0, v1, ...`.
///
/// All tags and fields have int64 type.
#[derive(Debug)]
pub struct TestRegionMetadataBuilder {
region_id: RegionId,
ts_name: String,
num_tags: usize,
num_fields: usize,
}
impl Default for TestRegionMetadataBuilder {
fn default() -> Self {
Self {
region_id: RegionId::new(1, 1),
ts_name: "ts".to_string(),
num_tags: 1,
num_fields: 1,
}
}
}
impl TestRegionMetadataBuilder {
/// Sets ts name.
pub fn ts_name(&mut self, value: &str) -> &mut Self {
self.ts_name = value.to_string();
self
}
/// Sets tags num.
pub fn num_tags(&mut self, value: usize) -> &mut Self {
self.num_tags = value;
self
}
/// Sets fields num.
pub fn num_fields(&mut self, value: usize) -> &mut Self {
self.num_fields = value;
self
}
/// Builds a metadata.
pub fn build(&self) -> RegionMetadata {
let mut builder = RegionMetadataBuilder::new(self.region_id);
let mut column_id = 0;
builder.push_column_metadata(ColumnMetadata {
column_schema: ColumnSchema::new(
&self.ts_name,
ConcreteDataType::timestamp_millisecond_datatype(),
false,
),
semantic_type: SemanticType::Timestamp,
column_id,
});
// For simplicity, we use the same data type for tag/field columns.
let mut primary_key = Vec::with_capacity(self.num_tags);
for i in 0..self.num_tags {
column_id += 1;
builder.push_column_metadata(ColumnMetadata {
column_schema: ColumnSchema::new(
format!("k{i}"),
ConcreteDataType::int64_datatype(),
true,
),
semantic_type: SemanticType::Tag,
column_id,
});
primary_key.push(i as u32 + 1);
}
for i in 0..self.num_fields {
column_id += 1;
builder.push_column_metadata(ColumnMetadata {
column_schema: ColumnSchema::new(
format!("v{i}"),
ConcreteDataType::int64_datatype(),
true,
),
semantic_type: SemanticType::Field,
column_id,
});
}
builder.primary_key(primary_key);
builder.build().unwrap()
}
}

View File

@@ -120,7 +120,10 @@ impl WorkerGroup {
config.global_write_buffer_size.as_bytes() as usize,
));
let scheduler = Arc::new(LocalScheduler::new(config.max_background_jobs));
let cache_manager = Arc::new(CacheManager::new(config.sst_meta_cache_size.as_bytes()));
let cache_manager = Arc::new(CacheManager::new(
config.sst_meta_cache_size.as_bytes(),
config.vector_cache_size.as_bytes(),
));
let workers = (0..config.num_workers)
.map(|id| {
@@ -215,7 +218,10 @@ impl WorkerGroup {
))
});
let scheduler = Arc::new(LocalScheduler::new(config.max_background_jobs));
let cache_manager = Arc::new(CacheManager::new(config.sst_meta_cache_size.as_bytes()));
let cache_manager = Arc::new(CacheManager::new(
config.sst_meta_cache_size.as_bytes(),
config.vector_cache_size.as_bytes(),
));
let workers = (0..config.num_workers)
.map(|id| {
@@ -528,8 +534,9 @@ impl<S: LogStore> RegionWorkerLoop<S> {
.await;
continue;
}
DdlRequest::Flush(_) => {
self.handle_flush_request(ddl.region_id, ddl.sender).await;
DdlRequest::Flush(req) => {
self.handle_flush_request(ddl.region_id, req, ddl.sender)
.await;
continue;
}
DdlRequest::Compact(_) => {

View File

@@ -80,7 +80,7 @@ impl<S> RegionWorkerLoop<S> {
info!("Flush region: {} before alteration", region_id);
// Try to submit a flush task.
let task = self.new_flush_task(&region, FlushReason::Alter);
let task = self.new_flush_task(&region, FlushReason::Alter, None);
if let Err(e) =
self.flush_scheduler
.schedule_flush(region.region_id, &region.version_control, task)

View File

@@ -16,9 +16,11 @@
use common_query::Output;
use common_telemetry::info;
use metrics::decrement_gauge;
use store_api::storage::RegionId;
use crate::error::Result;
use crate::metrics::REGION_COUNT;
use crate::worker::RegionWorkerLoop;
impl<S> RegionWorkerLoop<S> {
@@ -38,6 +40,8 @@ impl<S> RegionWorkerLoop<S> {
info!("Region {} closed", region_id);
decrement_gauge!(REGION_COUNT, 1.0);
Ok(Output::AffectedRows(0))
}
}

View File

@@ -12,11 +12,13 @@
// See the License for the specific language governing permissions and
// limitations under the License.
use common_telemetry::{error, info};
use common_telemetry::{error, info, timer};
use metrics::increment_counter;
use store_api::logstore::LogStore;
use store_api::storage::RegionId;
use crate::manifest::action::{RegionEdit, RegionMetaAction, RegionMetaActionList};
use crate::metrics::{COMPACTION_REQUEST_COUNT, COMPACTION_STAGE_ELAPSED, STAGE_LABEL};
use crate::request::{CompactionFailed, CompactionFinished, OnFailure, OptionOutputTx};
use crate::worker::RegionWorkerLoop;
@@ -30,7 +32,7 @@ impl<S: LogStore> RegionWorkerLoop<S> {
let Some(region) = self.regions.writable_region_or(region_id, &mut sender) else {
return;
};
increment_counter!(COMPACTION_REQUEST_COUNT);
if let Err(e) = self.compaction_scheduler.schedule_compaction(
region.region_id,
&region.version_control,
@@ -57,27 +59,33 @@ impl<S: LogStore> RegionWorkerLoop<S> {
return;
};
// Write region edit to manifest.
let edit = RegionEdit {
files_to_add: std::mem::take(&mut request.compaction_outputs),
files_to_remove: std::mem::take(&mut request.compacted_files),
compaction_time_window: request.compaction_time_window,
flushed_entry_id: None,
flushed_sequence: None,
};
let action_list = RegionMetaActionList::with_action(RegionMetaAction::Edit(edit.clone()));
if let Err(e) = region.manifest_manager.update(action_list).await {
error!(e; "Failed to update manifest, region: {}", region_id);
request.on_failure(e);
return;
{
let manifest_timer =
timer!(COMPACTION_STAGE_ELAPSED, &[(STAGE_LABEL, "write_manifest")]);
// Write region edit to manifest.
let edit = RegionEdit {
files_to_add: std::mem::take(&mut request.compaction_outputs),
files_to_remove: std::mem::take(&mut request.compacted_files),
compaction_time_window: request.compaction_time_window,
flushed_entry_id: None,
flushed_sequence: None,
};
let action_list =
RegionMetaActionList::with_action(RegionMetaAction::Edit(edit.clone()));
if let Err(e) = region.manifest_manager.update(action_list).await {
error!(e; "Failed to update manifest, region: {}", region_id);
manifest_timer.discard();
request.on_failure(e);
return;
}
// Apply edit to region's version.
region
.version_control
.apply_edit(edit, &[], region.file_purger.clone());
}
// Apply edit to region's version.
region
.version_control
.apply_edit(edit, &[], region.file_purger.clone());
// compaction finished.
request.on_success();
// Schedule next compaction if necessary.
self.compaction_scheduler.on_compaction_finished(region_id);
}

View File

@@ -18,6 +18,7 @@ use std::sync::Arc;
use common_query::Output;
use common_telemetry::info;
use metrics::increment_gauge;
use snafu::ResultExt;
use store_api::logstore::LogStore;
use store_api::metadata::RegionMetadataBuilder;
@@ -25,6 +26,7 @@ use store_api::region_request::RegionCreateRequest;
use store_api::storage::RegionId;
use crate::error::{InvalidMetadataSnafu, Result};
use crate::metrics::REGION_COUNT;
use crate::region::opener::{check_recovered_region, RegionOpener};
use crate::worker::RegionWorkerLoop;
@@ -69,10 +71,9 @@ impl<S: LogStore> RegionWorkerLoop<S> {
.create_or_open(&self.config, &self.wal)
.await?;
// TODO(yingwen): Custom the Debug format for the metadata and also print it.
info!("A new region created, region_id: {}", region.region_id);
info!("A new region created, region: {:?}", region.metadata());
// TODO(yingwen): Metrics.
increment_gauge!(REGION_COUNT, 1.0);
// Insert the MitoRegion into the RegionMap.
self.regions.insert_region(Arc::new(region));

View File

@@ -20,6 +20,7 @@ use common_query::Output;
use common_telemetry::info;
use common_telemetry::tracing::warn;
use futures::TryStreamExt;
use metrics::decrement_gauge;
use object_store::util::join_path;
use object_store::{EntryMode, ObjectStore};
use snafu::ResultExt;
@@ -27,6 +28,7 @@ use store_api::storage::RegionId;
use tokio::time::sleep;
use crate::error::{OpenDalSnafu, Result};
use crate::metrics::REGION_COUNT;
use crate::region::RegionMapRef;
use crate::worker::{RegionWorkerLoop, DROPPING_MARKER_FILE};
@@ -62,6 +64,8 @@ impl<S> RegionWorkerLoop<S> {
region_id
);
decrement_gauge!(REGION_COUNT, 1.0);
// detach a background task to delete the region dir
let region_dir = region.access_layer.region_dir().to_owned();
let object_store = self.object_store.clone();

View File

@@ -17,6 +17,7 @@
use common_telemetry::{error, info, warn};
use common_time::util::current_time_millis;
use store_api::logstore::LogStore;
use store_api::region_request::RegionFlushRequest;
use store_api::storage::RegionId;
use crate::error::{RegionTruncatedSnafu, Result};
@@ -31,13 +32,14 @@ impl<S> RegionWorkerLoop<S> {
pub(crate) async fn handle_flush_request(
&mut self,
region_id: RegionId,
request: RegionFlushRequest,
mut sender: OptionOutputTx,
) {
let Some(region) = self.regions.writable_region_or(region_id, &mut sender) else {
return;
};
let mut task = self.new_flush_task(&region, FlushReason::Manual);
let mut task = self.new_flush_task(&region, FlushReason::Manual, request.row_group_size);
task.push_sender(sender);
if let Err(e) =
self.flush_scheduler
@@ -92,7 +94,7 @@ impl<S> RegionWorkerLoop<S> {
if region.last_flush_millis() < min_last_flush_time {
// If flush time of this region is earlier than `min_last_flush_time`, we can flush this region.
let task = self.new_flush_task(region, FlushReason::EngineFull);
let task = self.new_flush_task(region, FlushReason::EngineFull, None);
self.flush_scheduler.schedule_flush(
region.region_id,
&region.version_control,
@@ -105,7 +107,7 @@ impl<S> RegionWorkerLoop<S> {
// TODO(yingwen): Maybe flush more tables to reduce write buffer size.
if let Some(region) = max_mem_region {
if !self.flush_scheduler.is_flush_requested(region.region_id) {
let task = self.new_flush_task(region, FlushReason::EngineFull);
let task = self.new_flush_task(region, FlushReason::EngineFull, None);
self.flush_scheduler.schedule_flush(
region.region_id,
&region.version_control,
@@ -122,6 +124,7 @@ impl<S> RegionWorkerLoop<S> {
&self,
region: &MitoRegionRef,
reason: FlushReason,
row_group_size: Option<usize>,
) -> RegionFlushTask {
// TODO(yingwen): metrics for flush requested.
RegionFlushTask {
@@ -133,6 +136,7 @@ impl<S> RegionWorkerLoop<S> {
memtable_builder: self.memtable_builder.clone(),
file_purger: region.file_purger.clone(),
listener: self.listener.clone(),
row_group_size,
}
}
}
@@ -182,8 +186,10 @@ impl<S: LogStore> RegionWorkerLoop<S> {
// Delete wal.
info!(
"Region {} flush finished, tries to bump wal to {}",
region_id, request.flushed_entry_id
"Region {} flush finished, elapsed: {:?}, tries to bump wal to {}",
region_id,
request.timer.elapsed(),
request.flushed_entry_id
);
if let Err(e) = self.wal.obsolete(region_id, request.flushed_entry_id).await {
error!(e; "Failed to write wal, region: {}", region_id);
@@ -191,7 +197,7 @@ impl<S: LogStore> RegionWorkerLoop<S> {
return;
}
// Notifies waiters.
// Notifies waiters and observes the flush timer.
request.on_success();
// Handle pending requests for the region.

View File

@@ -18,6 +18,7 @@ use std::sync::Arc;
use common_query::Output;
use common_telemetry::info;
use metrics::increment_gauge;
use object_store::util::join_path;
use snafu::ResultExt;
use store_api::logstore::LogStore;
@@ -25,6 +26,7 @@ use store_api::region_request::RegionOpenRequest;
use store_api::storage::RegionId;
use crate::error::{OpenDalSnafu, RegionNotFoundSnafu, Result};
use crate::metrics::REGION_COUNT;
use crate::region::opener::RegionOpener;
use crate::worker::handle_drop::remove_region_dir_once;
use crate::worker::{RegionWorkerLoop, DROPPING_MARKER_FILE};
@@ -69,6 +71,8 @@ impl<S: LogStore> RegionWorkerLoop<S> {
info!("Region {} is opened", region_id);
increment_gauge!(REGION_COUNT, 1.0);
// Insert the MitoRegion into the RegionMap.
self.regions.insert_region(Arc::new(region));

View File

@@ -17,11 +17,17 @@
use std::collections::{hash_map, HashMap};
use std::sync::Arc;
use common_telemetry::timer;
use metrics::counter;
use store_api::logstore::LogStore;
use store_api::metadata::RegionMetadata;
use store_api::storage::RegionId;
use crate::error::{RejectWriteSnafu, Result};
use crate::metrics::{
STAGE_LABEL, TYPE_LABEL, WRITE_REJECT_TOTAL, WRITE_ROWS_TOTAL, WRITE_STAGE_ELAPSED,
WRITE_STALL_TOTAL,
};
use crate::region_write_ctx::RegionWriteCtx;
use crate::request::{SenderWriteRequest, WriteRequest};
use crate::worker::RegionWorkerLoop;
@@ -50,7 +56,8 @@ impl<S: LogStore> RegionWorkerLoop<S> {
}
if self.write_buffer_manager.should_stall() && allow_stall {
// TODO(yingwen): stalled metrics.
counter!(WRITE_STALL_TOTAL, write_requests.len() as u64);
self.stalled_requests.append(&mut write_requests);
self.listener.on_write_stall();
return;
@@ -59,24 +66,36 @@ impl<S: LogStore> RegionWorkerLoop<S> {
let mut region_ctxs = self.prepare_region_write_ctx(write_requests);
// Write to WAL.
let mut wal_writer = self.wal.writer();
for region_ctx in region_ctxs.values_mut() {
if let Err(e) = region_ctx.add_wal_entry(&mut wal_writer).map_err(Arc::new) {
region_ctx.set_error(e);
{
let _timer = timer!(WRITE_STAGE_ELAPSED, &[(STAGE_LABEL, "write_wal")]);
let mut wal_writer = self.wal.writer();
for region_ctx in region_ctxs.values_mut() {
if let Err(e) = region_ctx.add_wal_entry(&mut wal_writer).map_err(Arc::new) {
region_ctx.set_error(e);
}
}
}
if let Err(e) = wal_writer.write_to_wal().await.map_err(Arc::new) {
// Failed to write wal.
for mut region_ctx in region_ctxs.into_values() {
region_ctx.set_error(e.clone());
if let Err(e) = wal_writer.write_to_wal().await.map_err(Arc::new) {
// Failed to write wal.
for mut region_ctx in region_ctxs.into_values() {
region_ctx.set_error(e.clone());
}
return;
}
return;
}
let (mut put_rows, mut delete_rows) = (0, 0);
// Write to memtables.
for mut region_ctx in region_ctxs.into_values() {
region_ctx.write_memtable();
{
let _timer = timer!(WRITE_STAGE_ELAPSED, &[(STAGE_LABEL, "write_memtable")]);
for mut region_ctx in region_ctxs.into_values() {
region_ctx.write_memtable();
put_rows += region_ctx.put_num;
delete_rows += region_ctx.delete_num;
}
}
counter!(WRITE_ROWS_TOTAL, put_rows as u64, TYPE_LABEL => "put");
counter!(WRITE_ROWS_TOTAL, delete_rows as u64, TYPE_LABEL => "delete");
}
}
@@ -148,6 +167,8 @@ impl<S> RegionWorkerLoop<S> {
/// Send rejected error to all `write_requests`.
fn reject_write_requests(write_requests: Vec<SenderWriteRequest>) {
counter!(WRITE_REJECT_TOTAL, write_requests.len() as u64);
for req in write_requests {
req.sender.send(
RejectWriteSnafu {

Some files were not shown because too many files have changed in this diff Show More