Compare commits

..

55 Commits

Author SHA1 Message Date
niebayes
122b47210e chore: bump version to 0.5.1 (#3116) 2024-01-08 11:32:56 +00:00
tison
316d843482 feat: support CSV format in sql HTTP API (#3062)
* chore: fix typo

Signed-off-by: tison <wander4096@gmail.com>

* add csv format

Signed-off-by: tison <wander4096@gmail.com>

* flatten response

Signed-off-by: tison <wander4096@gmail.com>

* more flatten response

Signed-off-by: tison <wander4096@gmail.com>

* add CSV format

Signed-off-by: tison <wander4096@gmail.com>

* format InfluxdbV1Response

Signed-off-by: tison <wander4096@gmail.com>

* format ErrorResponse

Signed-off-by: tison <wander4096@gmail.com>

* propagate ErrorResponse to InfluxdbV1Response

Signed-off-by: tison <wander4096@gmail.com>

* format GreptimedbV1Response

Signed-off-by: tison <wander4096@gmail.com>

* format CsvResponse

Signed-off-by: tison <wander4096@gmail.com>

* impl IntoResponse for QueryResponse

Signed-off-by: tison <wander4096@gmail.com>

* promql

Signed-off-by: tison <wander4096@gmail.com>

* sql

Signed-off-by: tison <wander4096@gmail.com>

* compile

Signed-off-by: tison <wander4096@gmail.com>

* fixup aide

Signed-off-by: tison <wander4096@gmail.com>

* clear debt

Signed-off-by: tison <wander4096@gmail.com>

* fixup UT test_recordbatches_conversion

Signed-off-by: tison <wander4096@gmail.com>

* fixup IT cases

Signed-off-by: tison <wander4096@gmail.com>

* fixup more IT cases

Signed-off-by: tison <wander4096@gmail.com>

* fixup test-integration cases

Signed-off-by: tison <wander4096@gmail.com>

* update comment

Signed-off-by: tison <wander4096@gmail.com>

* fixup deserialize and most query < 1ms

Signed-off-by: tison <wander4096@gmail.com>

* fixup auth tests

Signed-off-by: tison <wander4096@gmail.com>

* fixup tests

Signed-off-by: tison <wander4096@gmail.com>

* fixup and align X-GreptimeDB headers

Signed-off-by: tison <wander4096@gmail.com>

* fixup compile

Signed-off-by: tison <wander4096@gmail.com>

---------

Signed-off-by: tison <wander4096@gmail.com>
2024-01-08 10:54:27 +00:00
niebayes
8c58d3f85b test(remote_wal): add unit tests for kafka remote wal (#2993)
* test: add unit tests

* feat: introduce kafka runtime backed by testcontainers

* test: add test for kafka runtime

* fix: format

* chore: make kafka image ready to be used

* feat: add entry builder

* tmp

* test: add unit tests for client manager

* test: add some unit tests for kafka log store

* chore: resolve some todos

* chore: resolve some todos

* test: add unit tests for kafka log store

* chore: add deprecate develop branch warning

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* tmp: ready to move unit tests to an indie dir

* test: update unit tests for client manager

* test: add unit tests for meta srv remote wal

* fix: license

* fix: test

* refactor: kafka image

* doc: add doc example for kafka image

* chore: migrate kafka image to an indie PR

* fix: CR

* fix: CR

* fix: test

* fix: CR

* fix: update Cargo.toml

* fix: CR

* feat: skip test if no endpoints env

* fix: format

* test: rewrite parallel test with barrier

---------

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
Co-authored-by: Ruihang Xia <waynestxia@gmail.com>
2024-01-08 10:48:11 +00:00
LFC
fcacb100a2 chore: expose some codes to let other projects use them (#3115) 2024-01-08 06:32:01 +00:00
Weny Xu
58ada1dfef fix: check env before running kafka test (#3110)
* fix: check env before running kafka test

* Apply suggestions from code review

Co-authored-by: niebayes <niebayes@gmail.com>

---------

Co-authored-by: niebayes <niebayes@gmail.com>
2024-01-08 06:30:43 +00:00
Weny Xu
f78c467a86 chore: bump opendal to 0.44.1 (#3111) 2024-01-08 03:55:58 +00:00
niebayes
78303639db feat(remote_wal): split an entry if it's too large (#3092)
* feat: split an entry if it's too large

* chore: rewrite check records

* test: add some unit tests for record

* chore: rewrite entry splitting

* chore: add unit tests for build records

* chore: add more unit tests for record

* chore: rewrite encdec of record

* revert: ignored test

* fix: set limit for max_batch_size

* fix: clippy

* chore: remove heavy logging

* fix: CR

* fix: properly terminate

* fix: CR

* fix: compiling

* fix: sqlness

* fix: CR

* fix: license

* fix: license
2024-01-05 12:41:43 +00:00
JeremyHi
bd1a5dc265 feat: metric engine support alter (#3098)
* feat: metric engine support alter

* chore: by comment

* feat: get physical table route for frontend
2024-01-05 09:46:39 +00:00
Weny Xu
e0a43f37d7 chore: bump opendal to 0.44 (#3058)
* chore: bump opendal to 0.44

* fix: fix test_object_store_cache_policy

* Revert "fix: fix test_object_store_cache_policy"

This reverts commit 46c37c343f66114e0f6ee7a0a3b9ee2b79c810af.

* fix: fix test_object_store_cache_policy

* fix: fix test_file_backend_with_lru_cache

* chore: apply suggestions from CR

* fix(mito): fix mito2 cache

* chore: apply suggestions from CR

* chore: apply suggestions from CR
2024-01-05 09:05:41 +00:00
zyy17
a89840f5f9 refactor(metrics): add 'greptime_' prefix for every metrics (#3093)
* refactor(metrics): add 'greptimedb_' prefix for every metrics

* chore: use 'greptime_' as prefix

* chore: add some prefix for new metrics

* chore: fix format error
2024-01-05 08:12:23 +00:00
dennis zhuang
c2db970687 feat: pushdown filters for some information_schema tables (#3091)
* feat: pushdown scan request to information_schema tables stream

* feat: supports filter pushdown for columns

* feat: supports filter pushdown for some information_schema tables

* fix: typo

* fix: predicate evaluate

* fix: typo

* test: predicates

* fix: comment

* fix: pub mod

* docs: improve comments

* fix: cr comments and supports like predicate

* chore: typo

* fix: cargo toml format

* chore: apply suggestion
2024-01-05 07:18:22 +00:00
LFC
e0525dbfeb chore: expose some codes to let other projects use them (#3102) 2024-01-05 06:54:01 +00:00
Weny Xu
cdc9021160 feat(metric): implement role and region_disk_usage (#3095)
* feat(metric): implement `role` and `region_disk_usage`

* Update src/datanode/src/region_server.rs

* Update src/datanode/src/heartbeat.rs

---------

Co-authored-by: LFC <990479+MichaelScofield@users.noreply.github.com>
2024-01-05 06:53:52 +00:00
dennis zhuang
702ea32538 docs: update the description of greptimedb project (#3099)
* docs: update the info of greptimedb project

* chore: move up SQL/PromQL
2024-01-05 03:06:02 +00:00
Weny Xu
342faa4e07 test: add tests for lease keeper with logical table (#3096) 2024-01-05 02:29:48 +00:00
tison
44ba131987 fix: improve redact sql regexp (#3080)
Signed-off-by: tison <wander4096@gmail.com>
2024-01-04 14:53:20 +00:00
Yingwen
96b6235f25 feat(mito): Add WriteCache struct and write SSTs to write cache (#2999)
* docs: remove todo

* feat: add upload cache

* feat: add cache to sst write path

* feat: add storage to part

* feat: add dir to part

* feat: revert storage name

* feat: flush use upload part writer

* feat: use upload part writer in compaction task

* refactor: upload part writer builds parquet writer

* chore: suppress warnings

* refactor: rename UploadCache to WriteCache

* refactor: move source to write_all()

* chore: typos

* chore: remove output mod

* feat: changes upload to async method

* docs: update cache

* chore: fix compiler errors

* docs: remove comment

* chore: simplify upload part

* refactor: remove option from cache manager param to access layer

* feat: remove cache home from file cache

* feat: write cache holds file cache

* feat: add recover and pub some methods

* feat: remove usages of UploadPartWriter

* refactor: move sst_file_path to sst mod

* refactor: use write cache in access layer

* refactor: remove upload

* style: fix clippy

* refactor: pub write cache method/structs
2024-01-04 10:53:43 +00:00
Weny Xu
f1a4750576 feat(tests-integration): add more region migration integration tests (#3094) 2024-01-04 08:18:46 +00:00
Zhenchi
d973cf81f0 feat(inverted_index): implement apply for SstIndexApplier (#3088)
* feat(inverted_index): implement apply for SstIndexApplier

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

* chore: rename metrics

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

---------

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>
2024-01-04 07:33:03 +00:00
Weny Xu
284a496f54 feat: add logs for upgrading candidate region and updating metadata (#3077)
* feat: add logs for upgrading candidate region

* feat: add logs for update metadata

* chore: apply suggestions from CR
2024-01-04 06:57:07 +00:00
WU Jingdi
4d250ed054 fix: Optimize export metric behavior (#3047)
* fix: optimze export metric bahavior

* chor: fix ci

* chore: update config format

* chore: fix format
2024-01-04 06:40:50 +00:00
LFC
ec43b9183d feat: table route for metric engine (#3053)
* feat: table route for metric engine

* feat: register logical regions

* fix: open logical region (#96)

---------

Co-authored-by: JeremyHi <jiachun_feng@proton.me>
2024-01-04 06:30:17 +00:00
ZonaHe
b025bed45c feat: update dashboard to v0.4.6 (#3089)
Co-authored-by: ZonaHex <ZonaHex@users.noreply.github.com>
2024-01-04 02:56:41 +00:00
Weny Xu
21694c2a1d feat: abort region migration if leader region peer is unexpected (#3086) 2024-01-03 11:46:51 +00:00
ClSlaid
5c66ce6e88 chore: remove unnecessary result wrappings (#3084)
patch: remove unnecessary result wrappings

Signed-off-by: 蔡略 <cailue@bupt.edu.cn>
2024-01-03 10:20:33 +00:00
Weny Xu
b2b752337b fix: fix non-physical error msg (#3087) 2024-01-03 09:40:03 +00:00
Weny Xu
aa22f9c94a refactor: allow procedure to acquire share lock (#3061)
* feat: implement `KeyRwLock`

* refactor: use KeyRwLock instead of LockMap

* refactor: use StringKey instead of String

* chore: remove redundant code

* refactor: cleanup KeyRwLock staled locks before granting new lock

* feat: clean staled locks manually

* feat: sort lock key in lexicographically order

* feat: ensure the ref count before dropping the rwlock

* feat: add more tests for rwlock

* feat: drop the key guards first

* feat: drops the key guards in the reverse order

* chore: apply suggestions from CR

* chore: apply suggestions from CR

* chore: apply suggestions from CR
2024-01-03 08:05:45 +00:00
Weny Xu
611a8aa2fe feat(tests-integration): add a naive region migration integration test (#3078)
* fix: fix heartbeat handler ignore upgrade candidate instruction

* fix: fix handler did not inject wal options

* feat: expose `RegionMigrationProcedureTask`

* feat(tests-integration): add a naive region migration test

* chore: apply suggestions from CR

* feat: add test if the target region has migrated

* chore: apply suggestions from CR
2024-01-03 07:12:59 +00:00
Zhenchi
e4c71843e6 feat(inverted_index): get memory usage of appliers (#3081)
Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>
2024-01-03 06:56:56 +00:00
Zhenchi
e1ad7af10c feat(puffin): finish return written bytes (#3082)
Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>
2024-01-03 06:55:09 +00:00
Zhenchi
b9302e4f0d feat(inverted_index): Add applier builder to convert Expr to Predicates (Part 2) (#3068)
* feat(inverted_index.integration): Add applier builder to convert Expr to Predicates (Part 1)

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

* feat(inverted_index.integration): Add applier builder to convert Expr to Predicates (Part 2)

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

* test: add comparison unit tests

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

* test: add eq_list unit tests

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

* test: add in_list unit tests

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

* test: add and unit tests

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

* test: strip tests

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

* fix: address comments

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

---------

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>
2024-01-03 05:14:40 +00:00
Yingwen
2e686fe053 feat(mito): Implement file cache (#3022)
* feat: recover cache

* feat: moka features

* test: tests for file cache

* chore: suppress warninig

* fix: parse_inde_key consider suffix

* feat: update cache

* feat: expose cache file path

* feat: use cache_path in test
2024-01-03 02:05:06 +00:00
Weny Xu
128d3717fa test(tests-integration): add a naive test with kafka wal (#3071)
* chore(tests-integration): add setup tests with kafka wal to README.md

* feat(tests-integration): add meta wal config

* fix(tests-integration): fix sign of both_instances_cases_with_kafka_wal

* chore(tests-integration): set num_topic to 3 for tests

* test(tests-integration): add a naive test with kafka wal

* chore: apply suggestions from CR
2024-01-02 09:05:20 +00:00
Weny Xu
2b181e91e0 refactor: unify the injection of WAL option (#3066)
* feat: add prepare_wal_option

* refactor: use integer hashmap

* feat: unify the injection of WAL option

* fix: fix procedure_flow_upgrade_candidate_with_retry

* chore: apply suggestions from CR
2024-01-02 07:40:02 +00:00
Weny Xu
d87ab06b28 feat: add kafka wal integration test utils (#3069)
* feat(tests-integration): add wal_config

* feat: add kafka wal integration test utils
2024-01-02 07:38:43 +00:00
Weny Xu
5653389063 feat!: correct the kafka config option (#3065)
* feat: correct the kafka config option

* refactor: rewrite the verbose comments
2024-01-02 07:31:37 +00:00
dimbtp
c4d7b0d91d feat: add some tables for information_schema (#3060)
* feat: add information_schema.optimizer_trace

* feat: add information_schema.parameters

* feat: add information_schema.profiling

* feat: add information_schema.referential_constraints

* feat: add information_schema.routines

* feat: add information_schema.schema_privileges

* feat: add information_schema.table_privileges

* feat: add information_schema.triggers

* fix: update sql test result

* feat: add information_schema.global_status

* feat: add information_schema.session_status

* fix: update sql test result

* fix: add TODO for some tables

* Update src/catalog/src/information_schema/memory_table/tables.rs

Co-authored-by: Yingwen <realevenyag@gmail.com>

---------

Co-authored-by: dennis zhuang <killme2008@gmail.com>
Co-authored-by: Yingwen <realevenyag@gmail.com>
2024-01-02 04:10:59 +00:00
dimbtp
f735f739e5 feat: add information_schema.key_column_usage (#3057)
* feat: add information_schema.key_column_usage

* fix: follow #3057 review comments

* fix: add sql test for `key_column_usage` table

* fix: fix spell typo

* fix: resolve conflict in sql test result
2023-12-31 12:29:06 +00:00
dimbtp
6070e88077 feat: add information_schema.files (#3054)
* feat: add information_schema.files

* fix: update information_schema.result

* fix: change `EXTRA` field type to string
2023-12-31 02:08:16 +00:00
niebayes
9db168875c fix(remote_wal): some known issues (#3052)
* fix: some known issues

* fix: CR

* fix: CR

* chore: replace Mutex with RwLock
2023-12-30 15:28:10 +00:00
AntiTopQuark
4460af800f feat(TableRouteValue): add panic notes and type checks (#3031)
* refactor(TableRouteValue): add panic notes and type checks

* chore: add deprecate develop branch warning

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* add error defines and checks

* Update README.md

* update code format and fix tests

* update name of error

* delete unused note

* fix unsafe .expect() for region_route()

* update error name

* update unwrap

* update code format

---------

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
Co-authored-by: Ruihang Xia <waynestxia@gmail.com>
2023-12-30 13:02:26 +00:00
Zhenchi
69a53130c2 feat(inverted_index): Add applier builder to convert Expr to Predicates (Part 1) (#3034)
* feat(inverted_index.integration): Add applier builder to convert Expr to Predicates (Part 1)

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

* chore: add docs

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

* fix: typos

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

* fix: address comments

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

* Update src/mito2/src/sst/index/applier/builder.rs

Co-authored-by: Yingwen <realevenyag@gmail.com>

* fix: remove unwrap

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

* chore: error source

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

---------

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>
Co-authored-by: Yingwen <realevenyag@gmail.com>
2023-12-30 07:32:32 +00:00
Ning Sun
1c94d4c506 ci: fix duplicatd doc issue (#3056) 2023-12-30 13:36:14 +08:00
Ning Sun
41e51d4ab3 chore: attempt to add doc issue in label task (#3021)
* chore: attempt to add doc issue in label task

* ci: check pr body for doc issue creation
2023-12-29 20:17:34 +08:00
dennis zhuang
11ae85b1cd feat: adds information_schema.schemata (#3051)
* feat: improve information_schema.columns

* feat: adds information_schema.schemata

* fix: instance test

* fix: comment
2023-12-29 09:22:31 +00:00
LFC
7551432cff refactor: merge standalone and metasrv table metadata allocators (#3035)
* refactor: merge standalone and metasrv table metadata allocators

* Update src/common/meta/src/ddl/table_meta.rs

Co-authored-by: niebayes <niebayes@gmail.com>

* Update src/common/meta/src/ddl/table_meta.rs

Co-authored-by: Weny Xu <wenymedia@gmail.com>

---------

Co-authored-by: niebayes <niebayes@gmail.com>
Co-authored-by: Weny Xu <wenymedia@gmail.com>
2023-12-29 08:50:59 +00:00
Weny Xu
e16f093282 test(remote_wal): add sqlness with kafka wal (#3027)
* feat(sqlness): add kafka wal config

* chore: add sqlness with kafka wal ci config

* fix: fix config

* chore: apply suggestions from CR

* fix: add metasrv config to sqlness with kafka

* fix: replay memtable should from flushed_entry_id + 1

* fix: should set append flag to fopen

* feat: start wal allocator in standalone meta mode

* feat: append a noop record after kafka topic initialization

* test: ignore tests temporally

* test: change sqlness kafka wal config
2023-12-29 08:17:22 +00:00
Weny Xu
301ffc1d91 feat(remote_wal): append a noop record after kafka topic initialization (#3040)
* feat: append a noop record after kafka topic initialization

* chore: apply suggestions from CR

* feat: ignore the noop record during the read
2023-12-29 07:46:48 +00:00
Weny Xu
d22072f68b feat: expose region migration http endpoint (#3032)
* feat: add region migration endpoint

* feat: implement naive peer registry

* chore: apply suggestions from CR

* chore: rename `ContextFactoryImpl` to `DefaultContextFactory`

* chore: rename unregister to deregister

* refactor: use lease-based alive datanode checking
2023-12-29 06:57:00 +00:00
Weny Xu
b526d159c3 fix: replay memtable should from flushed_entry_id + 1 (#3038)
* fix: replay memtable should from flushed_entry_id + 1

* chore: apply suggestions from CR
2023-12-28 16:12:07 +00:00
ZonaHe
7152407428 feat: update dashboard to v0.4.5 (#3033)
Co-authored-by: ZonaHex <ZonaHex@users.noreply.github.com>
2023-12-28 11:51:43 +00:00
Ruihang Xia
b58296de22 feat: Implement OR for PromQL (#3024)
* with anit-join

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* impl UnionDistinctOn

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* unify schema

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* fix clippy

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* add sqlness case

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* add UTs

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* Update src/promql/src/planner.rs

Co-authored-by: dennis zhuang <killme2008@gmail.com>

---------

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
Co-authored-by: dennis zhuang <killme2008@gmail.com>
2023-12-28 06:56:17 +00:00
Yingwen
1d80a0f2d6 chore: Update CI badge in README.md (#3028)
chore: Update README.md

Fix CI badge
2023-12-28 05:59:27 +00:00
Ruihang Xia
286b9af661 chore: change all reference from develop to main (#3026)
Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
2023-12-28 04:11:00 +00:00
dennis zhuang
af13eeaad3 feat: adds character_sets, collations and events etc. (#3017)
feat: adds character_sets, collations and events etc. to information_schema
2023-12-28 04:01:42 +00:00
256 changed files with 12271 additions and 2784 deletions

View File

@@ -19,3 +19,5 @@ GT_GCS_BUCKET = GCS bucket
GT_GCS_SCOPE = GCS scope
GT_GCS_CREDENTIAL_PATH = GCS credential path
GT_GCS_ENDPOINT = GCS end point
# Settings for kafka wal test
GT_KAFKA_ENDPOINTS = localhost:9092

View File

@@ -1,7 +1,7 @@
on:
push:
branches:
- develop
- main
paths-ignore:
- 'docs/**'
- 'config/**'

View File

@@ -11,7 +11,6 @@ on:
- '.gitignore'
push:
branches:
- develop
- main
paths-ignore:
- 'docs/**'
@@ -105,6 +104,37 @@ jobs:
path: ${{ runner.temp }}/greptime-*.log
retention-days: 3
sqlness-kafka-wal:
name: Sqlness Test with Kafka Wal
if: github.event.pull_request.draft == false
runs-on: ${{ matrix.os }}
strategy:
matrix:
os: [ ubuntu-20.04-8-cores ]
timeout-minutes: 60
steps:
- uses: actions/checkout@v3
- uses: arduino/setup-protoc@v1
with:
repo-token: ${{ secrets.GITHUB_TOKEN }}
- uses: dtolnay/rust-toolchain@master
with:
toolchain: ${{ env.RUST_TOOLCHAIN }}
- name: Rust Cache
uses: Swatinem/rust-cache@v2
- name: Setup kafka server
working-directory: tests-integration/fixtures/kafka
run: docker compose -f docker-compose-standalone.yml up -d --wait
- name: Run sqlness
run: cargo sqlness -w kafka -k 127.0.0.1:9092
- name: Upload sqlness logs
if: always()
uses: actions/upload-artifact@v3
with:
name: sqlness-logs
path: ${{ runner.temp }}/greptime-*.log
retention-days: 3
fmt:
name: Rustfmt
if: github.event.pull_request.draft == false

View File

@@ -18,3 +18,14 @@ jobs:
enable-versioned-regex: false
repo-token: ${{ secrets.GITHUB_TOKEN }}
sync-labels: 1
- name: create an issue in doc repo
uses: dacbd/create-issue-action@main
if: ${{ github.event.action == 'opened' && contains(github.event.pull_request.body, '- [ ] This PR does not require documentation updates.') }}
with:
owner: GreptimeTeam
repo: docs
token: ${{ secrets.DOCS_REPO_TOKEN }}
title: Update docs for ${{ github.event.issue.title || github.event.pull_request.title }}
body: |
A document change request is generated from
${{ github.event.issue.html_url || github.event.pull_request.html_url }}

View File

@@ -11,7 +11,6 @@ on:
- '.gitignore'
push:
branches:
- develop
- main
paths:
- 'docs/**'

View File

@@ -3,7 +3,7 @@ name: License checker
on:
push:
branches:
- develop
- main
pull_request:
types: [opened, synchronize, reopened, ready_for_review]
jobs:

View File

@@ -10,7 +10,7 @@ Follow our [README](https://github.com/GreptimeTeam/greptimedb#readme) to get th
It can feel intimidating to contribute to a complex project, but it can also be exciting and fun. These general notes will help everyone participate in this communal activity.
- Follow the [Code of Conduct](https://github.com/GreptimeTeam/greptimedb/blob/develop/CODE_OF_CONDUCT.md)
- Follow the [Code of Conduct](https://github.com/GreptimeTeam/greptimedb/blob/main/CODE_OF_CONDUCT.md)
- Small changes make huge differences. We will happily accept a PR making a single character change if it helps move forward. Don't wait to have everything working.
- Check the closed issues before opening your issue.
- Try to follow the existing style of the code.
@@ -26,7 +26,7 @@ Pull requests are great, but we accept all kinds of other help if you like. Such
## Code of Conduct
Also, there are things that we are not looking for because they don't match the goals of the product or benefit the community. Please read [Code of Conduct](https://github.com/GreptimeTeam/greptimedb/blob/develop/CODE_OF_CONDUCT.md); we hope everyone can keep good manners and become an honored member.
Also, there are things that we are not looking for because they don't match the goals of the product or benefit the community. Please read [Code of Conduct](https://github.com/GreptimeTeam/greptimedb/blob/main/CODE_OF_CONDUCT.md); we hope everyone can keep good manners and become an honored member.
## License

159
Cargo.lock generated
View File

@@ -196,7 +196,7 @@ checksum = "8f1f8f5a6f3d50d89e3797d7593a50f96bb2aaa20ca0cc7be1fb673232c91d72"
[[package]]
name = "api"
version = "0.5.0"
version = "0.5.1"
dependencies = [
"common-base",
"common-decimal",
@@ -674,7 +674,7 @@ dependencies = [
[[package]]
name = "auth"
version = "0.5.0"
version = "0.5.1"
dependencies = [
"api",
"async-trait",
@@ -847,7 +847,7 @@ dependencies = [
[[package]]
name = "benchmarks"
version = "0.5.0"
version = "0.5.1"
dependencies = [
"arrow",
"chrono",
@@ -1179,10 +1179,11 @@ checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5"
[[package]]
name = "catalog"
version = "0.5.0"
version = "0.5.1"
dependencies = [
"api",
"arc-swap",
"arrow",
"arrow-schema",
"async-stream",
"async-trait",
@@ -1450,7 +1451,7 @@ checksum = "702fc72eb24e5a1e48ce58027a675bc24edd52096d5397d4aea7c6dd9eca0bd1"
[[package]]
name = "client"
version = "0.5.0"
version = "0.5.1"
dependencies = [
"api",
"arrow-flight",
@@ -1483,7 +1484,7 @@ dependencies = [
"session",
"snafu",
"substrait 0.17.1",
"substrait 0.5.0",
"substrait 0.5.1",
"tokio",
"tokio-stream",
"tonic 0.10.2",
@@ -1513,7 +1514,7 @@ dependencies = [
[[package]]
name = "cmd"
version = "0.5.0"
version = "0.5.1"
dependencies = [
"anymap",
"async-trait",
@@ -1564,7 +1565,7 @@ dependencies = [
"session",
"snafu",
"store-api",
"substrait 0.5.0",
"substrait 0.5.1",
"table",
"temp-env",
"tikv-jemallocator",
@@ -1597,7 +1598,7 @@ checksum = "55b672471b4e9f9e95499ea597ff64941a309b2cdbffcc46f2cc5e2d971fd335"
[[package]]
name = "common-base"
version = "0.5.0"
version = "0.5.1"
dependencies = [
"anymap",
"bitvec",
@@ -1612,7 +1613,7 @@ dependencies = [
[[package]]
name = "common-catalog"
version = "0.5.0"
version = "0.5.1"
dependencies = [
"chrono",
"common-error",
@@ -1623,7 +1624,7 @@ dependencies = [
[[package]]
name = "common-config"
version = "0.5.0"
version = "0.5.1"
dependencies = [
"common-base",
"humantime-serde",
@@ -1636,7 +1637,7 @@ dependencies = [
[[package]]
name = "common-datasource"
version = "0.5.0"
version = "0.5.1"
dependencies = [
"arrow",
"arrow-schema",
@@ -1667,7 +1668,7 @@ dependencies = [
[[package]]
name = "common-decimal"
version = "0.5.0"
version = "0.5.1"
dependencies = [
"arrow",
"bigdecimal",
@@ -1681,7 +1682,7 @@ dependencies = [
[[package]]
name = "common-error"
version = "0.5.0"
version = "0.5.1"
dependencies = [
"snafu",
"strum 0.25.0",
@@ -1689,7 +1690,7 @@ dependencies = [
[[package]]
name = "common-function"
version = "0.5.0"
version = "0.5.1"
dependencies = [
"arc-swap",
"build-data",
@@ -1713,7 +1714,7 @@ dependencies = [
[[package]]
name = "common-greptimedb-telemetry"
version = "0.5.0"
version = "0.5.1"
dependencies = [
"async-trait",
"common-error",
@@ -1732,7 +1733,7 @@ dependencies = [
[[package]]
name = "common-grpc"
version = "0.5.0"
version = "0.5.1"
dependencies = [
"api",
"arrow-flight",
@@ -1762,7 +1763,7 @@ dependencies = [
[[package]]
name = "common-grpc-expr"
version = "0.5.0"
version = "0.5.1"
dependencies = [
"api",
"async-trait",
@@ -1781,7 +1782,7 @@ dependencies = [
[[package]]
name = "common-macro"
version = "0.5.0"
version = "0.5.1"
dependencies = [
"arc-swap",
"common-query",
@@ -1796,7 +1797,7 @@ dependencies = [
[[package]]
name = "common-mem-prof"
version = "0.5.0"
version = "0.5.1"
dependencies = [
"common-error",
"common-macro",
@@ -1809,7 +1810,7 @@ dependencies = [
[[package]]
name = "common-meta"
version = "0.5.0"
version = "0.5.1"
dependencies = [
"api",
"async-recursion",
@@ -1832,6 +1833,7 @@ dependencies = [
"derive_builder 0.12.0",
"etcd-client",
"futures",
"futures-util",
"humantime-serde",
"hyper",
"lazy_static",
@@ -1850,11 +1852,12 @@ dependencies = [
"tokio",
"toml 0.8.8",
"tonic 0.10.2",
"uuid",
]
[[package]]
name = "common-procedure"
version = "0.5.0"
version = "0.5.1"
dependencies = [
"async-stream",
"async-trait",
@@ -1878,7 +1881,7 @@ dependencies = [
[[package]]
name = "common-procedure-test"
version = "0.5.0"
version = "0.5.1"
dependencies = [
"async-trait",
"common-procedure",
@@ -1886,7 +1889,7 @@ dependencies = [
[[package]]
name = "common-query"
version = "0.5.0"
version = "0.5.1"
dependencies = [
"api",
"async-trait",
@@ -1909,7 +1912,7 @@ dependencies = [
[[package]]
name = "common-recordbatch"
version = "0.5.0"
version = "0.5.1"
dependencies = [
"common-error",
"common-macro",
@@ -1926,7 +1929,7 @@ dependencies = [
[[package]]
name = "common-runtime"
version = "0.5.0"
version = "0.5.1"
dependencies = [
"async-trait",
"common-error",
@@ -1946,7 +1949,7 @@ dependencies = [
[[package]]
name = "common-telemetry"
version = "0.5.0"
version = "0.5.1"
dependencies = [
"backtrace",
"common-error",
@@ -1972,7 +1975,7 @@ dependencies = [
[[package]]
name = "common-test-util"
version = "0.5.0"
version = "0.5.1"
dependencies = [
"once_cell",
"rand",
@@ -1981,7 +1984,7 @@ dependencies = [
[[package]]
name = "common-time"
version = "0.5.0"
version = "0.5.1"
dependencies = [
"arrow",
"chrono",
@@ -1997,7 +2000,7 @@ dependencies = [
[[package]]
name = "common-version"
version = "0.5.0"
version = "0.5.1"
dependencies = [
"build-data",
]
@@ -2627,7 +2630,7 @@ dependencies = [
[[package]]
name = "datanode"
version = "0.5.0"
version = "0.5.1"
dependencies = [
"api",
"arrow-flight",
@@ -2687,7 +2690,7 @@ dependencies = [
"snafu",
"sql",
"store-api",
"substrait 0.5.0",
"substrait 0.5.1",
"table",
"tokio",
"tokio-stream",
@@ -2701,7 +2704,7 @@ dependencies = [
[[package]]
name = "datatypes"
version = "0.5.0"
version = "0.5.1"
dependencies = [
"arrow",
"arrow-array",
@@ -3162,7 +3165,7 @@ dependencies = [
[[package]]
name = "file-engine"
version = "0.5.0"
version = "0.5.1"
dependencies = [
"api",
"async-trait",
@@ -3293,7 +3296,7 @@ checksum = "6c2141d6d6c8512188a7891b4b01590a45f6dac67afb4f255c4124dbb86d4eaa"
[[package]]
name = "frontend"
version = "0.5.0"
version = "0.5.1"
dependencies = [
"api",
"arc-swap",
@@ -3357,7 +3360,7 @@ dependencies = [
"sqlparser 0.38.0 (git+https://github.com/GreptimeTeam/sqlparser-rs.git?rev=6a93567ae38d42be5c8d08b13c8ff4dde26502ef)",
"store-api",
"strfmt",
"substrait 0.5.0",
"substrait 0.5.1",
"table",
"tokio",
"toml 0.8.8",
@@ -4011,7 +4014,7 @@ dependencies = [
[[package]]
name = "index"
version = "0.5.0"
version = "0.5.1"
dependencies = [
"async-trait",
"asynchronous-codec",
@@ -4029,7 +4032,7 @@ dependencies = [
"prost 0.12.3",
"rand",
"regex",
"regex-automata 0.1.10",
"regex-automata 0.2.0",
"snafu",
"tokio",
"tokio-util",
@@ -4491,12 +4494,13 @@ checksum = "b5e6163cb8c49088c2c36f57875e58ccd8c87c7427f7fbd50ea6710b2f3f2e8f"
[[package]]
name = "log-store"
version = "0.5.0"
version = "0.5.1"
dependencies = [
"async-stream",
"async-trait",
"byteorder",
"bytes",
"chrono",
"common-base",
"common-config",
"common-error",
@@ -4505,13 +4509,14 @@ dependencies = [
"common-runtime",
"common-telemetry",
"common-test-util",
"dashmap",
"futures",
"futures-util",
"itertools 0.10.5",
"protobuf",
"protobuf-build",
"raft-engine",
"rand",
"rand_distr",
"rskafka",
"serde",
"serde_json",
@@ -4519,6 +4524,7 @@ dependencies = [
"store-api",
"tokio",
"tokio-util",
"uuid",
]
[[package]]
@@ -4765,7 +4771,7 @@ dependencies = [
[[package]]
name = "meta-client"
version = "0.5.0"
version = "0.5.1"
dependencies = [
"api",
"async-trait",
@@ -4795,7 +4801,7 @@ dependencies = [
[[package]]
name = "meta-srv"
version = "0.5.0"
version = "0.5.1"
dependencies = [
"anymap",
"api",
@@ -4873,7 +4879,7 @@ dependencies = [
[[package]]
name = "metric-engine"
version = "0.5.0"
version = "0.5.1"
dependencies = [
"api",
"aquamarine",
@@ -4944,7 +4950,7 @@ dependencies = [
[[package]]
name = "mito2"
version = "0.5.0"
version = "0.5.1"
dependencies = [
"anymap",
"api",
@@ -4977,6 +4983,7 @@ dependencies = [
"datatypes",
"futures",
"humantime-serde",
"index",
"lazy_static",
"log-store",
"memcomparable",
@@ -4985,8 +4992,10 @@ dependencies = [
"object-store",
"parquet",
"paste",
"pin-project",
"prometheus",
"prost 0.12.3",
"puffin",
"regex",
"serde",
"serde_json",
@@ -5442,7 +5451,7 @@ dependencies = [
[[package]]
name = "object-store"
version = "0.5.0"
version = "0.5.1"
dependencies = [
"anyhow",
"async-trait",
@@ -5498,9 +5507,9 @@ checksum = "0ab1bc2a289d34bd04a330323ac98a1b4bc82c9d9fcb1e66b63caa84da26b575"
[[package]]
name = "opendal"
version = "0.40.0"
version = "0.44.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ddba7299bab261d3ae2f37617fb7f45b19ed872752bb4e22cf93a69d979366c5"
checksum = "bc0ad72f7b44ca4ae59d27ea151fdc6f37305cf6efe099bdaedbb30ec34579c0"
dependencies = [
"anyhow",
"async-compat",
@@ -5511,15 +5520,15 @@ dependencies = [
"chrono",
"flagset",
"futures",
"getrandom",
"http",
"hyper",
"log",
"md-5",
"once_cell",
"parking_lot 0.12.1",
"percent-encoding",
"pin-project",
"quick-xml 0.29.0",
"quick-xml 0.30.0",
"reqsign",
"reqwest",
"serde",
@@ -5687,7 +5696,7 @@ dependencies = [
[[package]]
name = "operator"
version = "0.5.0"
version = "0.5.1"
dependencies = [
"api",
"async-trait",
@@ -5731,7 +5740,7 @@ dependencies = [
"sql",
"sqlparser 0.38.0 (git+https://github.com/GreptimeTeam/sqlparser-rs.git?rev=6a93567ae38d42be5c8d08b13c8ff4dde26502ef)",
"store-api",
"substrait 0.5.0",
"substrait 0.5.1",
"table",
"tokio",
"tonic 0.10.2",
@@ -5962,7 +5971,7 @@ dependencies = [
[[package]]
name = "partition"
version = "0.5.0"
version = "0.5.1"
dependencies = [
"api",
"async-trait",
@@ -6281,7 +6290,7 @@ dependencies = [
[[package]]
name = "plugins"
version = "0.5.0"
version = "0.5.1"
dependencies = [
"auth",
"common-base",
@@ -6539,8 +6548,9 @@ dependencies = [
[[package]]
name = "promql"
version = "0.5.0"
version = "0.5.1"
dependencies = [
"ahash 0.8.6",
"async-recursion",
"async-trait",
"bytemuck",
@@ -6748,7 +6758,7 @@ dependencies = [
[[package]]
name = "puffin"
version = "0.5.0"
version = "0.5.1"
dependencies = [
"async-trait",
"bitflags 2.4.1",
@@ -6859,7 +6869,7 @@ dependencies = [
[[package]]
name = "query"
version = "0.5.0"
version = "0.5.1"
dependencies = [
"ahash 0.8.6",
"api",
@@ -6917,7 +6927,7 @@ dependencies = [
"stats-cli",
"store-api",
"streaming-stats",
"substrait 0.5.0",
"substrait 0.5.1",
"table",
"tokio",
"tokio-stream",
@@ -6934,9 +6944,9 @@ dependencies = [
[[package]]
name = "quick-xml"
version = "0.29.0"
version = "0.30.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "81b9228215d82c7b61490fec1de287136b5de6f5700f6e58ea9ad61a7964ca51"
checksum = "eff6510e86862b57b210fd8cbe8ed3f0d7d600b9c2863cd4549a2e033c66e956"
dependencies = [
"memchr",
"serde",
@@ -7133,8 +7143,18 @@ name = "regex-automata"
version = "0.1.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6c230d73fb8d8c1b9c0b3135c5142a8acee3a0558fb8db5cf1cb65f8d7862132"
dependencies = [
"regex-syntax 0.6.29",
]
[[package]]
name = "regex-automata"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e9368763f5a9b804326f3af749e16f9abf378d227bcdee7634b13d8f17793782"
dependencies = [
"fst",
"memchr",
"regex-syntax 0.6.29",
]
@@ -8177,7 +8197,7 @@ checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49"
[[package]]
name = "script"
version = "0.5.0"
version = "0.5.1"
dependencies = [
"api",
"arc-swap",
@@ -8437,7 +8457,7 @@ dependencies = [
[[package]]
name = "servers"
version = "0.5.0"
version = "0.5.1"
dependencies = [
"aide",
"api",
@@ -8533,7 +8553,7 @@ dependencies = [
[[package]]
name = "session"
version = "0.5.0"
version = "0.5.1"
dependencies = [
"api",
"arc-swap",
@@ -8794,7 +8814,7 @@ dependencies = [
[[package]]
name = "sql"
version = "0.5.0"
version = "0.5.1"
dependencies = [
"api",
"common-base",
@@ -8846,7 +8866,7 @@ dependencies = [
[[package]]
name = "sqlness-runner"
version = "0.5.0"
version = "0.5.1"
dependencies = [
"async-trait",
"clap 4.4.11",
@@ -8858,6 +8878,7 @@ dependencies = [
"common-recordbatch",
"common-time",
"serde",
"serde_json",
"sqlness",
"tinytemplate",
"tokio",
@@ -9052,7 +9073,7 @@ dependencies = [
[[package]]
name = "store-api"
version = "0.5.0"
version = "0.5.1"
dependencies = [
"api",
"aquamarine",
@@ -9192,7 +9213,7 @@ dependencies = [
[[package]]
name = "substrait"
version = "0.5.0"
version = "0.5.1"
dependencies = [
"async-recursion",
"async-trait",
@@ -9340,7 +9361,7 @@ dependencies = [
[[package]]
name = "table"
version = "0.5.0"
version = "0.5.1"
dependencies = [
"anymap",
"async-trait",
@@ -9452,7 +9473,7 @@ checksum = "3369f5ac52d5eb6ab48c6b4ffdc8efbcad6b89c765749064ba298f2c68a16a76"
[[package]]
name = "tests-integration"
version = "0.5.0"
version = "0.5.1"
dependencies = [
"api",
"async-trait",
@@ -9508,7 +9529,7 @@ dependencies = [
"sql",
"sqlx",
"store-api",
"substrait 0.5.0",
"substrait 0.5.1",
"table",
"tempfile",
"time",

View File

@@ -58,7 +58,7 @@ members = [
resolver = "2"
[workspace.package]
version = "0.5.0"
version = "0.5.1"
edition = "2021"
license = "Apache-2.0"
@@ -111,7 +111,7 @@ prost = "0.12"
raft-engine = { git = "https://github.com/tikv/raft-engine.git", rev = "22dfb426cd994602b57725ef080287d3e53db479" }
rand = "0.8"
regex = "1.8"
regex-automata = { version = "0.1", features = ["transducer"] }
regex-automata = { version = "0.2", features = ["transducer"] }
reqwest = { version = "0.11", default-features = false, features = [
"json",
"rustls-tls-native-roots",
@@ -169,6 +169,7 @@ datanode = { path = "src/datanode" }
datatypes = { path = "src/datatypes" }
file-engine = { path = "src/file-engine" }
frontend = { path = "src/frontend" }
index = { path = "src/index" }
log-store = { path = "src/log-store" }
meta-client = { path = "src/meta-client" }
meta-srv = { path = "src/meta-srv" }
@@ -179,6 +180,7 @@ operator = { path = "src/operator" }
partition = { path = "src/partition" }
plugins = { path = "src/plugins" }
promql = { path = "src/promql" }
puffin = { path = "src/puffin" }
query = { path = "src/query" }
script = { path = "src/script" }
servers = { path = "src/servers" }

View File

@@ -1,8 +1,8 @@
<p align="center">
<picture>
<source media="(prefers-color-scheme: light)" srcset="https://cdn.jsdelivr.net/gh/GreptimeTeam/greptimedb@develop/docs/logo-text-padding.png">
<source media="(prefers-color-scheme: dark)" srcset="https://cdn.jsdelivr.net/gh/GreptimeTeam/greptimedb@develop/docs/logo-text-padding-dark.png">
<img alt="GreptimeDB Logo" src="https://cdn.jsdelivr.net/gh/GreptimeTeam/greptimedb@develop/docs/logo-text-padding.png" width="400px">
<source media="(prefers-color-scheme: light)" srcset="https://cdn.jsdelivr.net/gh/GreptimeTeam/greptimedb@main/docs/logo-text-padding.png">
<source media="(prefers-color-scheme: dark)" srcset="https://cdn.jsdelivr.net/gh/GreptimeTeam/greptimedb@main/docs/logo-text-padding-dark.png">
<img alt="GreptimeDB Logo" src="https://cdn.jsdelivr.net/gh/GreptimeTeam/greptimedb@main/docs/logo-text-padding.png" width="400px">
</picture>
</p>
@@ -12,11 +12,11 @@
</h3>
<p align="center">
<a href="https://codecov.io/gh/GrepTimeTeam/greptimedb"><img src="https://codecov.io/gh/GrepTimeTeam/greptimedb/branch/develop/graph/badge.svg?token=FITFDI3J3C"></img></a>
<a href="https://codecov.io/gh/GrepTimeTeam/greptimedb"><img src="https://codecov.io/gh/GrepTimeTeam/greptimedb/branch/main/graph/badge.svg?token=FITFDI3J3C"></img></a>
&nbsp;
<a href="https://github.com/GreptimeTeam/greptimedb/actions/workflows/develop.yml"><img src="https://github.com/GreptimeTeam/greptimedb/actions/workflows/develop.yml/badge.svg" alt="CI"></img></a>
&nbsp;
<a href="https://github.com/greptimeTeam/greptimedb/blob/develop/LICENSE"><img src="https://img.shields.io/github/license/greptimeTeam/greptimedb"></a>
<a href="https://github.com/greptimeTeam/greptimedb/blob/main/LICENSE"><img src="https://img.shields.io/github/license/greptimeTeam/greptimedb"></a>
</p>
<p align="center">
@@ -27,26 +27,19 @@
<a href="https://greptime.com/slack"><img src="https://img.shields.io/badge/slack-GreptimeDB-0abd59?logo=slack" alt="slack" /></a>
</p>
> [!WARNING]
> Our default branch has changed from `develop` to `main` (issue [#3025](https://github.com/GreptimeTeam/greptimedb/issues/3025)). Please update your local repository to use the `main` branch.
## What is GreptimeDB
GreptimeDB is an open-source time-series database with a special focus on
scalability, analytical capabilities and efficiency. It's designed to work on
infrastructure of the cloud era, and users benefit from its elasticity and commodity
storage.
GreptimeDB is an open-source time-series database focusing on efficiency, scalability, and analytical capabilities.
It's designed to work on infrastructure of the cloud era, and users benefit from its elasticity and commodity storage.
Our core developers have been building time-series data platform
for years. Based on their best-practices, GreptimeDB is born to give you:
Our core developers have been building time-series data platforms for years. Based on their best-practices, GreptimeDB is born to give you:
- A standalone binary that scales to highly-available distributed cluster, providing a transparent experience for cluster users
- Optimized columnar layout for handling time-series data; compacted, compressed, and stored on various storage backends
- Flexible indexes, tackling high cardinality issues down
- Distributed, parallel query execution, leveraging elastic computing resource
- Native SQL, and Python scripting for advanced analytical scenarios
- Widely adopted database protocols and APIs, native PromQL supports
- Extensible table engine architecture for extensive workloads
- Optimized columnar layout for handling time-series data; compacted, compressed, and stored on various storage backends, particularly cloud object storage with 50x cost efficiency.
- Fully open-source distributed cluster architecture that harnesses the power of cloud-native elastic computing resources.
- Seamless scalability from a standalone binary at edge to a robust, highly available distributed cluster in cloud, with a transparent experience for both developers and administrators.
- Native SQL and PromQL for queries, and Python scripting to facilitate complex analytical tasks.
- Flexible indexing capabilities and distributed, parallel-processing query engine, tackling high cardinality issues down.
- Widely adopted database protocols and APIs, including MySQL, PostgreSQL, and Prometheus Remote Storage, etc.
## Quick Start
@@ -171,7 +164,7 @@ In addition, you may:
GreptimeDB uses the [Apache 2.0 license][1] to strike a balance between
open contributions and allowing you to use the software however you want.
[1]: <https://github.com/greptimeTeam/greptimedb/blob/develop/LICENSE>
[1]: <https://github.com/greptimeTeam/greptimedb/blob/main/LICENSE>
## Contributing

View File

@@ -51,9 +51,10 @@ sync_write = false
# Kafka wal options, see `standalone.example.toml`.
# broker_endpoints = ["127.0.0.1:9092"]
# max_batch_size = "4MB"
# Warning: Kafka has a default limit of 1MB per message in a topic.
# max_batch_size = "1MB"
# linger = "200ms"
# produce_record_timeout = "100ms"
# consumer_wait_timeout = "100ms"
# backoff_init = "500ms"
# backoff_max = "10s"
# backoff_base = 2
@@ -129,11 +130,10 @@ parallel_scan_channel_size = 32
# [export_metrics]
# whether enable export metrics, default is false
# enable = false
# The url of metrics export endpoint, default is `frontend` default HTTP endpoint.
# endpoint = "127.0.0.1:4000"
# The database name of exported metrics stores, user needs to specify a valid database
# db = ""
# The interval of export metrics
# write_interval = "30s"
# [export_metrics.remote_write]
# The url the metrics send to. The url is empty by default, url example: `http://127.0.0.1:4000/v1/prometheus/write?db=information_schema`
# url = ""
# HTTP headers of Prometheus remote-write carry
# headers = {}

View File

@@ -87,11 +87,8 @@ tcp_nodelay = true
# [export_metrics]
# whether enable export metrics, default is false
# enable = false
# The url of metrics export endpoint, default is `frontend` default HTTP endpoint.
# endpoint = "127.0.0.1:4000"
# The database name of exported metrics stores, user needs to specify a valid database
# db = ""
# The interval of export metrics
# write_interval = "30s"
# HTTP headers of Prometheus remote-write carry
# headers = {}
# for `frontend`, `self_import` is recommend to collect metrics generated by itself
# [export_metrics.self_import]
# db = "information_schema"

View File

@@ -86,11 +86,10 @@ provider = "raft_engine"
# [export_metrics]
# whether enable export metrics, default is false
# enable = false
# The url of metrics export endpoint, default is `frontend` default HTTP endpoint.
# endpoint = "127.0.0.1:4000"
# The database name of exported metrics stores, user needs to specify a valid database
# db = ""
# The interval of export metrics
# write_interval = "30s"
# [export_metrics.remote_write]
# The url the metrics send to. The url is empty by default, url example: `http://127.0.0.1:4000/v1/prometheus/write?db=information_schema`
# url = ""
# HTTP headers of Prometheus remote-write carry
# headers = {}

View File

@@ -100,29 +100,30 @@ provider = "raft_engine"
# Available selector types:
# - "round_robin" (default)
# selector_type = "round_robin"
# A Kafka topic is constructed by concatenating `topic_name_prefix` and `topic_id`.
# The prefix of topic name.
# topic_name_prefix = "greptimedb_wal_topic"
# Number of partitions per topic.
# num_partitions = 1
# Expected number of replicas of each partition.
# The number of replicas of each partition.
# replication_factor = 1
# The maximum log size a kafka batch producer could buffer.
# max_batch_size = "4MB"
# The linger duration of a kafka batch producer.
# The max size of a single producer batch.
# Warning: Kafka has a default limit of 1MB per message in a topic.
# max_batch_size = "1MB"
# The linger duration.
# linger = "200ms"
# The maximum amount of time (in milliseconds) to wait for Kafka records to be returned.
# produce_record_timeout = "100ms"
# Above which a topic creation operation will be cancelled.
# The consumer wait timeout.
# consumer_wait_timeout = "100ms"
# Create topic timeout.
# create_topic_timeout = "30s"
# The initial backoff for kafka clients.
# The initial backoff delay.
# backoff_init = "500ms"
# The maximum backoff for kafka clients.
# The maximum backoff delay.
# backoff_max = "10s"
# Exponential backoff rate, i.e. next backoff = base * current backoff.
# backoff_base = 2
# Stop reconnecting if the total wait time reaches the deadline. If this config is missing, the reconnecting won't terminate.
# The deadline of retries.
# backoff_deadline = "5mins"
# WAL data directory
@@ -230,11 +231,8 @@ parallel_scan_channel_size = 32
# [export_metrics]
# whether enable export metrics, default is false
# enable = false
# The url of metrics export endpoint, default is `frontend` default HTTP endpoint.
# endpoint = "127.0.0.1:4000"
# The database name of exported metrics stores, user needs to specify a valid database
# db = ""
# The interval of export metrics
# write_interval = "30s"
# HTTP headers of Prometheus remote-write carry
# headers = {}
# for `standalone`, `self_import` is recommend to collect metrics generated by itself
# [export_metrics.self_import]
# db = "information_schema"

View File

@@ -11,6 +11,7 @@ testing = []
api.workspace = true
arc-swap = "1.0"
arrow-schema.workspace = true
arrow.workspace = true
async-stream.workspace = true
async-trait = "0.1"
build-data = "0.1"

View File

@@ -13,7 +13,10 @@
// limitations under the License.
mod columns;
mod key_column_usage;
mod memory_table;
mod predicate;
mod schemata;
mod table_names;
mod tables;
@@ -27,6 +30,7 @@ use datatypes::schema::SchemaRef;
use futures_util::StreamExt;
use lazy_static::lazy_static;
use paste::paste;
pub(crate) use predicate::Predicates;
use snafu::ResultExt;
use store_api::data_source::DataSource;
use store_api::storage::{ScanRequest, TableId};
@@ -40,7 +44,9 @@ pub use table_names::*;
use self::columns::InformationSchemaColumns;
use crate::error::Result;
use crate::information_schema::key_column_usage::InformationSchemaKeyColumnUsage;
use crate::information_schema::memory_table::{get_schema_columns, MemoryTable};
use crate::information_schema::schemata::InformationSchemaSchemata;
use crate::information_schema::tables::InformationSchemaTables;
use crate::CatalogManager;
@@ -51,6 +57,22 @@ lazy_static! {
COLUMN_PRIVILEGES,
COLUMN_STATISTICS,
BUILD_INFO,
CHARACTER_SETS,
COLLATIONS,
COLLATION_CHARACTER_SET_APPLICABILITY,
CHECK_CONSTRAINTS,
EVENTS,
FILES,
OPTIMIZER_TRACE,
PARAMETERS,
PROFILING,
REFERENTIAL_CONSTRAINTS,
ROUTINES,
SCHEMA_PRIVILEGES,
TABLE_PRIVILEGES,
TRIGGERS,
GLOBAL_STATUS,
SESSION_STATUS,
];
}
@@ -121,11 +143,16 @@ impl InformationSchemaProvider {
fn build_tables(&mut self) {
let mut tables = HashMap::new();
tables.insert(TABLES.to_string(), self.build_table(TABLES).unwrap());
tables.insert(SCHEMATA.to_string(), self.build_table(SCHEMATA).unwrap());
tables.insert(COLUMNS.to_string(), self.build_table(COLUMNS).unwrap());
tables.insert(
KEY_COLUMN_USAGE.to_string(),
self.build_table(KEY_COLUMN_USAGE).unwrap(),
);
// Add memory tables
for name in MEMORY_TABLES.iter() {
tables.insert((*name).to_string(), self.build_table(name).unwrap());
tables.insert((*name).to_string(), self.build_table(name).expect(name));
}
self.tables = tables;
@@ -134,7 +161,7 @@ impl InformationSchemaProvider {
fn build_table(&self, name: &str) -> Option<TableRef> {
self.information_table(name).map(|table| {
let table_info = Self::table_info(self.catalog_name.clone(), &table);
let filter_pushdown = FilterPushDownType::Unsupported;
let filter_pushdown = FilterPushDownType::Inexact;
let thin_table = ThinTable::new(table_info, filter_pushdown);
let data_source = Arc::new(InformationTableDataSource::new(table));
@@ -156,6 +183,32 @@ impl InformationSchemaProvider {
COLUMN_PRIVILEGES => setup_memory_table!(COLUMN_PRIVILEGES),
COLUMN_STATISTICS => setup_memory_table!(COLUMN_STATISTICS),
BUILD_INFO => setup_memory_table!(BUILD_INFO),
CHARACTER_SETS => setup_memory_table!(CHARACTER_SETS),
COLLATIONS => setup_memory_table!(COLLATIONS),
COLLATION_CHARACTER_SET_APPLICABILITY => {
setup_memory_table!(COLLATION_CHARACTER_SET_APPLICABILITY)
}
CHECK_CONSTRAINTS => setup_memory_table!(CHECK_CONSTRAINTS),
EVENTS => setup_memory_table!(EVENTS),
FILES => setup_memory_table!(FILES),
OPTIMIZER_TRACE => setup_memory_table!(OPTIMIZER_TRACE),
PARAMETERS => setup_memory_table!(PARAMETERS),
PROFILING => setup_memory_table!(PROFILING),
REFERENTIAL_CONSTRAINTS => setup_memory_table!(REFERENTIAL_CONSTRAINTS),
ROUTINES => setup_memory_table!(ROUTINES),
SCHEMA_PRIVILEGES => setup_memory_table!(SCHEMA_PRIVILEGES),
TABLE_PRIVILEGES => setup_memory_table!(TABLE_PRIVILEGES),
TRIGGERS => setup_memory_table!(TRIGGERS),
GLOBAL_STATUS => setup_memory_table!(GLOBAL_STATUS),
SESSION_STATUS => setup_memory_table!(SESSION_STATUS),
KEY_COLUMN_USAGE => Some(Arc::new(InformationSchemaKeyColumnUsage::new(
self.catalog_name.clone(),
self.catalog_manager.clone(),
)) as _),
SCHEMATA => Some(Arc::new(InformationSchemaSchemata::new(
self.catalog_name.clone(),
self.catalog_manager.clone(),
)) as _),
_ => None,
}
}
@@ -187,7 +240,7 @@ trait InformationTable {
fn schema(&self) -> SchemaRef;
fn to_stream(&self) -> Result<SendableRecordBatchStream>;
fn to_stream(&self, request: ScanRequest) -> Result<SendableRecordBatchStream>;
fn table_type(&self) -> TableType {
TableType::Temporary
@@ -221,7 +274,7 @@ impl DataSource for InformationTableDataSource {
&self,
request: ScanRequest,
) -> std::result::Result<SendableRecordBatchStream, BoxedError> {
let projection = request.projection;
let projection = request.projection.clone();
let projected_schema = match &projection {
Some(projection) => self.try_project(projection)?,
None => self.table.schema(),
@@ -229,7 +282,7 @@ impl DataSource for InformationTableDataSource {
let stream = self
.table
.to_stream()
.to_stream(request)
.map_err(BoxedError::new)
.context(TablesRecordBatchSnafu)
.map_err(BoxedError::new)?

View File

@@ -29,14 +29,16 @@ use datafusion::physical_plan::SendableRecordBatchStream as DfSendableRecordBatc
use datatypes::prelude::{ConcreteDataType, DataType};
use datatypes::scalars::ScalarVectorBuilder;
use datatypes::schema::{ColumnSchema, Schema, SchemaRef};
use datatypes::value::Value;
use datatypes::vectors::{StringVectorBuilder, VectorRef};
use snafu::{OptionExt, ResultExt};
use store_api::storage::TableId;
use store_api::storage::{ScanRequest, TableId};
use super::{InformationTable, COLUMNS};
use crate::error::{
CreateRecordBatchSnafu, InternalSnafu, Result, UpgradeWeakCatalogManagerRefSnafu,
};
use crate::information_schema::Predicates;
use crate::CatalogManager;
pub(super) struct InformationSchemaColumns {
@@ -51,6 +53,10 @@ const TABLE_NAME: &str = "table_name";
const COLUMN_NAME: &str = "column_name";
const DATA_TYPE: &str = "data_type";
const SEMANTIC_TYPE: &str = "semantic_type";
const COLUMN_DEFAULT: &str = "column_default";
const IS_NULLABLE: &str = "is_nullable";
const COLUMN_TYPE: &str = "column_type";
const COLUMN_COMMENT: &str = "column_comment";
impl InformationSchemaColumns {
pub(super) fn new(catalog_name: String, catalog_manager: Weak<dyn CatalogManager>) -> Self {
@@ -69,6 +75,10 @@ impl InformationSchemaColumns {
ColumnSchema::new(COLUMN_NAME, ConcreteDataType::string_datatype(), false),
ColumnSchema::new(DATA_TYPE, ConcreteDataType::string_datatype(), false),
ColumnSchema::new(SEMANTIC_TYPE, ConcreteDataType::string_datatype(), false),
ColumnSchema::new(COLUMN_DEFAULT, ConcreteDataType::string_datatype(), true),
ColumnSchema::new(IS_NULLABLE, ConcreteDataType::string_datatype(), false),
ColumnSchema::new(COLUMN_TYPE, ConcreteDataType::string_datatype(), false),
ColumnSchema::new(COLUMN_COMMENT, ConcreteDataType::string_datatype(), true),
]))
}
@@ -94,14 +104,14 @@ impl InformationTable for InformationSchemaColumns {
self.schema.clone()
}
fn to_stream(&self) -> Result<SendableRecordBatchStream> {
fn to_stream(&self, request: ScanRequest) -> Result<SendableRecordBatchStream> {
let schema = self.schema.arrow_schema().clone();
let mut builder = self.builder();
let stream = Box::pin(DfRecordBatchStreamAdapter::new(
schema,
futures::stream::once(async move {
builder
.make_columns()
.make_columns(Some(request))
.await
.map(|x| x.into_df_record_batch())
.map_err(Into::into)
@@ -126,6 +136,11 @@ struct InformationSchemaColumnsBuilder {
column_names: StringVectorBuilder,
data_types: StringVectorBuilder,
semantic_types: StringVectorBuilder,
column_defaults: StringVectorBuilder,
is_nullables: StringVectorBuilder,
column_types: StringVectorBuilder,
column_comments: StringVectorBuilder,
}
impl InformationSchemaColumnsBuilder {
@@ -144,16 +159,21 @@ impl InformationSchemaColumnsBuilder {
column_names: StringVectorBuilder::with_capacity(42),
data_types: StringVectorBuilder::with_capacity(42),
semantic_types: StringVectorBuilder::with_capacity(42),
column_defaults: StringVectorBuilder::with_capacity(42),
is_nullables: StringVectorBuilder::with_capacity(42),
column_types: StringVectorBuilder::with_capacity(42),
column_comments: StringVectorBuilder::with_capacity(42),
}
}
/// Construct the `information_schema.columns` virtual table
async fn make_columns(&mut self) -> Result<RecordBatch> {
async fn make_columns(&mut self, request: Option<ScanRequest>) -> Result<RecordBatch> {
let catalog_name = self.catalog_name.clone();
let catalog_manager = self
.catalog_manager
.upgrade()
.context(UpgradeWeakCatalogManagerRefSnafu)?;
let predicates = Predicates::from_scan_request(&request);
for schema_name in catalog_manager.schema_names(&catalog_name).await? {
if !catalog_manager
@@ -184,12 +204,12 @@ impl InformationSchemaColumnsBuilder {
};
self.add_column(
&predicates,
&catalog_name,
&schema_name,
&table_name,
&column.name,
&column.data_type.name(),
semantic_type,
column,
);
}
} else {
@@ -203,19 +223,48 @@ impl InformationSchemaColumnsBuilder {
fn add_column(
&mut self,
predicates: &Predicates,
catalog_name: &str,
schema_name: &str,
table_name: &str,
column_name: &str,
data_type: &str,
semantic_type: &str,
column_schema: &ColumnSchema,
) {
let data_type = &column_schema.data_type.name();
let row = [
(TABLE_CATALOG, &Value::from(catalog_name)),
(TABLE_SCHEMA, &Value::from(schema_name)),
(TABLE_NAME, &Value::from(table_name)),
(COLUMN_NAME, &Value::from(column_schema.name.as_str())),
(DATA_TYPE, &Value::from(data_type.as_str())),
(SEMANTIC_TYPE, &Value::from(semantic_type)),
];
if !predicates.eval(&row) {
return;
}
self.catalog_names.push(Some(catalog_name));
self.schema_names.push(Some(schema_name));
self.table_names.push(Some(table_name));
self.column_names.push(Some(column_name));
self.column_names.push(Some(&column_schema.name));
self.data_types.push(Some(data_type));
self.semantic_types.push(Some(semantic_type));
self.column_defaults.push(
column_schema
.default_constraint()
.map(|s| format!("{}", s))
.as_deref(),
);
if column_schema.is_nullable() {
self.is_nullables.push(Some("Yes"));
} else {
self.is_nullables.push(Some("No"));
}
self.column_types.push(Some(data_type));
self.column_comments
.push(column_schema.column_comment().map(|x| x.as_ref()));
}
fn finish(&mut self) -> Result<RecordBatch> {
@@ -226,6 +275,10 @@ impl InformationSchemaColumnsBuilder {
Arc::new(self.column_names.finish()),
Arc::new(self.data_types.finish()),
Arc::new(self.semantic_types.finish()),
Arc::new(self.column_defaults.finish()),
Arc::new(self.is_nullables.finish()),
Arc::new(self.column_types.finish()),
Arc::new(self.column_comments.finish()),
];
RecordBatch::new(self.schema.clone(), columns).context(CreateRecordBatchSnafu)
@@ -244,7 +297,7 @@ impl DfPartitionStream for InformationSchemaColumns {
schema,
futures::stream::once(async move {
builder
.make_columns()
.make_columns(None)
.await
.map(|x| x.into_df_record_batch())
.map_err(Into::into)

View File

@@ -0,0 +1,347 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::sync::{Arc, Weak};
use arrow_schema::SchemaRef as ArrowSchemaRef;
use common_catalog::consts::INFORMATION_SCHEMA_KEY_COLUMN_USAGE_TABLE_ID;
use common_error::ext::BoxedError;
use common_query::physical_plan::TaskContext;
use common_recordbatch::adapter::RecordBatchStreamAdapter;
use common_recordbatch::{RecordBatch, SendableRecordBatchStream};
use datafusion::physical_plan::stream::RecordBatchStreamAdapter as DfRecordBatchStreamAdapter;
use datafusion::physical_plan::streaming::PartitionStream as DfPartitionStream;
use datafusion::physical_plan::SendableRecordBatchStream as DfSendableRecordBatchStream;
use datatypes::prelude::{ConcreteDataType, ScalarVectorBuilder, VectorRef};
use datatypes::schema::{ColumnSchema, Schema, SchemaRef};
use datatypes::value::Value;
use datatypes::vectors::{StringVectorBuilder, UInt32VectorBuilder};
use snafu::{OptionExt, ResultExt};
use store_api::storage::{ScanRequest, TableId};
use super::KEY_COLUMN_USAGE;
use crate::error::{
CreateRecordBatchSnafu, InternalSnafu, Result, UpgradeWeakCatalogManagerRefSnafu,
};
use crate::information_schema::{InformationTable, Predicates};
use crate::CatalogManager;
const CONSTRAINT_SCHEMA: &str = "constraint_schema";
const CONSTRAINT_NAME: &str = "constraint_name";
const TABLE_CATALOG: &str = "table_catalog";
const TABLE_SCHEMA: &str = "table_schema";
const TABLE_NAME: &str = "table_name";
const COLUMN_NAME: &str = "column_name";
const ORDINAL_POSITION: &str = "ordinal_position";
/// The virtual table implementation for `information_schema.KEY_COLUMN_USAGE`.
pub(super) struct InformationSchemaKeyColumnUsage {
schema: SchemaRef,
catalog_name: String,
catalog_manager: Weak<dyn CatalogManager>,
}
impl InformationSchemaKeyColumnUsage {
pub(super) fn new(catalog_name: String, catalog_manager: Weak<dyn CatalogManager>) -> Self {
Self {
schema: Self::schema(),
catalog_name,
catalog_manager,
}
}
pub(crate) fn schema() -> SchemaRef {
Arc::new(Schema::new(vec![
ColumnSchema::new(
"constraint_catalog",
ConcreteDataType::string_datatype(),
false,
),
ColumnSchema::new(
CONSTRAINT_SCHEMA,
ConcreteDataType::string_datatype(),
false,
),
ColumnSchema::new(CONSTRAINT_NAME, ConcreteDataType::string_datatype(), false),
ColumnSchema::new(TABLE_CATALOG, ConcreteDataType::string_datatype(), false),
ColumnSchema::new(TABLE_SCHEMA, ConcreteDataType::string_datatype(), false),
ColumnSchema::new(TABLE_NAME, ConcreteDataType::string_datatype(), false),
ColumnSchema::new(COLUMN_NAME, ConcreteDataType::string_datatype(), false),
ColumnSchema::new(ORDINAL_POSITION, ConcreteDataType::uint32_datatype(), false),
ColumnSchema::new(
"position_in_unique_constraint",
ConcreteDataType::uint32_datatype(),
true,
),
ColumnSchema::new(
"referenced_table_schema",
ConcreteDataType::string_datatype(),
true,
),
ColumnSchema::new(
"referenced_table_name",
ConcreteDataType::string_datatype(),
true,
),
ColumnSchema::new(
"referenced_column_name",
ConcreteDataType::string_datatype(),
true,
),
]))
}
fn builder(&self) -> InformationSchemaKeyColumnUsageBuilder {
InformationSchemaKeyColumnUsageBuilder::new(
self.schema.clone(),
self.catalog_name.clone(),
self.catalog_manager.clone(),
)
}
}
impl InformationTable for InformationSchemaKeyColumnUsage {
fn table_id(&self) -> TableId {
INFORMATION_SCHEMA_KEY_COLUMN_USAGE_TABLE_ID
}
fn table_name(&self) -> &'static str {
KEY_COLUMN_USAGE
}
fn schema(&self) -> SchemaRef {
self.schema.clone()
}
fn to_stream(&self, request: ScanRequest) -> Result<SendableRecordBatchStream> {
let schema = self.schema.arrow_schema().clone();
let mut builder = self.builder();
let stream = Box::pin(DfRecordBatchStreamAdapter::new(
schema,
futures::stream::once(async move {
builder
.make_key_column_usage(Some(request))
.await
.map(|x| x.into_df_record_batch())
.map_err(Into::into)
}),
));
Ok(Box::pin(
RecordBatchStreamAdapter::try_new(stream)
.map_err(BoxedError::new)
.context(InternalSnafu)?,
))
}
}
/// Builds the `information_schema.KEY_COLUMN_USAGE` table row by row
///
/// Columns are based on <https://dev.mysql.com/doc/refman/8.2/en/information-schema-key-column-usage-table.html>
struct InformationSchemaKeyColumnUsageBuilder {
schema: SchemaRef,
catalog_name: String,
catalog_manager: Weak<dyn CatalogManager>,
constraint_catalog: StringVectorBuilder,
constraint_schema: StringVectorBuilder,
constraint_name: StringVectorBuilder,
table_catalog: StringVectorBuilder,
table_schema: StringVectorBuilder,
table_name: StringVectorBuilder,
column_name: StringVectorBuilder,
ordinal_position: UInt32VectorBuilder,
position_in_unique_constraint: UInt32VectorBuilder,
referenced_table_schema: StringVectorBuilder,
referenced_table_name: StringVectorBuilder,
referenced_column_name: StringVectorBuilder,
}
impl InformationSchemaKeyColumnUsageBuilder {
fn new(
schema: SchemaRef,
catalog_name: String,
catalog_manager: Weak<dyn CatalogManager>,
) -> Self {
Self {
schema,
catalog_name,
catalog_manager,
constraint_catalog: StringVectorBuilder::with_capacity(42),
constraint_schema: StringVectorBuilder::with_capacity(42),
constraint_name: StringVectorBuilder::with_capacity(42),
table_catalog: StringVectorBuilder::with_capacity(42),
table_schema: StringVectorBuilder::with_capacity(42),
table_name: StringVectorBuilder::with_capacity(42),
column_name: StringVectorBuilder::with_capacity(42),
ordinal_position: UInt32VectorBuilder::with_capacity(42),
position_in_unique_constraint: UInt32VectorBuilder::with_capacity(42),
referenced_table_schema: StringVectorBuilder::with_capacity(42),
referenced_table_name: StringVectorBuilder::with_capacity(42),
referenced_column_name: StringVectorBuilder::with_capacity(42),
}
}
/// Construct the `information_schema.KEY_COLUMN_USAGE` virtual table
async fn make_key_column_usage(&mut self, request: Option<ScanRequest>) -> Result<RecordBatch> {
let catalog_name = self.catalog_name.clone();
let catalog_manager = self
.catalog_manager
.upgrade()
.context(UpgradeWeakCatalogManagerRefSnafu)?;
let predicates = Predicates::from_scan_request(&request);
let mut primary_constraints = vec![];
for schema_name in catalog_manager.schema_names(&catalog_name).await? {
if !catalog_manager
.schema_exists(&catalog_name, &schema_name)
.await?
{
continue;
}
for table_name in catalog_manager
.table_names(&catalog_name, &schema_name)
.await?
{
if let Some(table) = catalog_manager
.table(&catalog_name, &schema_name, &table_name)
.await?
{
let keys = &table.table_info().meta.primary_key_indices;
let schema = table.schema();
for (idx, column) in schema.column_schemas().iter().enumerate() {
if column.is_time_index() {
self.add_key_column_usage(
&predicates,
&schema_name,
"TIME INDEX",
&schema_name,
&table_name,
&column.name,
1, //always 1 for time index
);
}
if keys.contains(&idx) {
primary_constraints.push((
schema_name.clone(),
table_name.clone(),
column.name.clone(),
));
}
// TODO(dimbtp): foreign key constraint not supported yet
}
} else {
unreachable!();
}
}
}
for (i, (schema_name, table_name, column_name)) in
primary_constraints.into_iter().enumerate()
{
self.add_key_column_usage(
&predicates,
&schema_name,
"PRIMARY",
&schema_name,
&table_name,
&column_name,
i as u32 + 1,
);
}
self.finish()
}
// TODO(dimbtp): Foreign key constraint has not `None` value for last 4
// fields, but it is not supported yet.
#[allow(clippy::too_many_arguments)]
fn add_key_column_usage(
&mut self,
predicates: &Predicates,
constraint_schema: &str,
constraint_name: &str,
table_schema: &str,
table_name: &str,
column_name: &str,
ordinal_position: u32,
) {
let row = [
(CONSTRAINT_SCHEMA, &Value::from(constraint_schema)),
(CONSTRAINT_NAME, &Value::from(constraint_name)),
(TABLE_SCHEMA, &Value::from(table_schema)),
(TABLE_NAME, &Value::from(table_name)),
(COLUMN_NAME, &Value::from(column_name)),
(ORDINAL_POSITION, &Value::from(ordinal_position)),
];
if !predicates.eval(&row) {
return;
}
self.constraint_catalog.push(Some("def"));
self.constraint_schema.push(Some(constraint_schema));
self.constraint_name.push(Some(constraint_name));
self.table_catalog.push(Some("def"));
self.table_schema.push(Some(table_schema));
self.table_name.push(Some(table_name));
self.column_name.push(Some(column_name));
self.ordinal_position.push(Some(ordinal_position));
self.position_in_unique_constraint.push(None);
self.referenced_table_schema.push(None);
self.referenced_table_name.push(None);
self.referenced_column_name.push(None);
}
fn finish(&mut self) -> Result<RecordBatch> {
let columns: Vec<VectorRef> = vec![
Arc::new(self.constraint_catalog.finish()),
Arc::new(self.constraint_schema.finish()),
Arc::new(self.constraint_name.finish()),
Arc::new(self.table_catalog.finish()),
Arc::new(self.table_schema.finish()),
Arc::new(self.table_name.finish()),
Arc::new(self.column_name.finish()),
Arc::new(self.ordinal_position.finish()),
Arc::new(self.position_in_unique_constraint.finish()),
Arc::new(self.referenced_table_schema.finish()),
Arc::new(self.referenced_table_name.finish()),
Arc::new(self.referenced_column_name.finish()),
];
RecordBatch::new(self.schema.clone(), columns).context(CreateRecordBatchSnafu)
}
}
impl DfPartitionStream for InformationSchemaKeyColumnUsage {
fn schema(&self) -> &ArrowSchemaRef {
self.schema.arrow_schema()
}
fn execute(&self, _: Arc<TaskContext>) -> DfSendableRecordBatchStream {
let schema = self.schema.arrow_schema().clone();
let mut builder = self.builder();
Box::pin(DfRecordBatchStreamAdapter::new(
schema,
futures::stream::once(async move {
builder
.make_key_column_usage(None)
.await
.map(|x| x.into_df_record_batch())
.map_err(Into::into)
}),
))
}
}

View File

@@ -26,7 +26,7 @@ use datafusion::physical_plan::SendableRecordBatchStream as DfSendableRecordBatc
use datatypes::schema::SchemaRef;
use datatypes::vectors::VectorRef;
use snafu::ResultExt;
use store_api::storage::TableId;
use store_api::storage::{ScanRequest, TableId};
pub use tables::get_schema_columns;
use crate::error::{CreateRecordBatchSnafu, InternalSnafu, Result};
@@ -74,7 +74,7 @@ impl InformationTable for MemoryTable {
self.schema.clone()
}
fn to_stream(&self) -> Result<SendableRecordBatchStream> {
fn to_stream(&self, _request: ScanRequest) -> Result<SendableRecordBatchStream> {
let schema = self.schema.arrow_schema().clone();
let mut builder = self.builder();
let stream = Box::pin(DfRecordBatchStreamAdapter::new(
@@ -169,7 +169,7 @@ mod tests {
assert_eq!("test", table.table_name());
assert_eq!(schema, InformationTable::schema(&table));
let stream = table.to_stream().unwrap();
let stream = table.to_stream(ScanRequest::default()).unwrap();
let batches = RecordBatches::try_collect(stream).await.unwrap();
@@ -198,7 +198,7 @@ mod tests {
assert_eq!("test", table.table_name());
assert_eq!(schema, InformationTable::schema(&table));
let stream = table.to_stream().unwrap();
let stream = table.to_stream(ScanRequest::default()).unwrap();
let batches = RecordBatches::try_collect(stream).await.unwrap();

View File

@@ -17,7 +17,7 @@ use std::sync::Arc;
use common_catalog::consts::MITO_ENGINE;
use datatypes::prelude::{ConcreteDataType, VectorRef};
use datatypes::schema::{ColumnSchema, Schema, SchemaRef};
use datatypes::vectors::StringVector;
use datatypes::vectors::{Int64Vector, StringVector};
use crate::information_schema::table_names::*;
@@ -97,6 +97,320 @@ pub fn get_schema_columns(table_name: &str) -> (SchemaRef, Vec<VectorRef>) {
],
),
CHARACTER_SETS => (
vec![
string_column("CHARACTER_SET_NAME"),
string_column("DEFAULT_COLLATE_NAME"),
string_column("DESCRIPTION"),
bigint_column("MAXLEN"),
],
vec![
Arc::new(StringVector::from(vec!["utf8"])),
Arc::new(StringVector::from(vec!["utf8_bin"])),
Arc::new(StringVector::from(vec!["UTF-8 Unicode"])),
Arc::new(Int64Vector::from_slice([4])),
],
),
COLLATIONS => (
vec![
string_column("COLLATION_NAME"),
string_column("CHARACTER_SET_NAME"),
bigint_column("ID"),
string_column("IS_DEFAULT"),
string_column("IS_COMPILED"),
bigint_column("SORTLEN"),
],
vec![
Arc::new(StringVector::from(vec!["utf8_bin"])),
Arc::new(StringVector::from(vec!["utf8"])),
Arc::new(Int64Vector::from_slice([1])),
Arc::new(StringVector::from(vec!["Yes"])),
Arc::new(StringVector::from(vec!["Yes"])),
Arc::new(Int64Vector::from_slice([1])),
],
),
COLLATION_CHARACTER_SET_APPLICABILITY => (
vec![
string_column("COLLATION_NAME"),
string_column("CHARACTER_SET_NAME"),
],
vec![
Arc::new(StringVector::from(vec!["utf8_bin"])),
Arc::new(StringVector::from(vec!["utf8"])),
],
),
CHECK_CONSTRAINTS => (
string_columns(&[
"CONSTRAINT_CATALOG",
"CONSTRAINT_SCHEMA",
"CONSTRAINT_NAME",
"CHECK_CLAUSE",
]),
// Not support check constraints yet
vec![],
),
EVENTS => (
vec![
string_column("EVENT_CATALOG"),
string_column("EVENT_SCHEMA"),
string_column("EVENT_NAME"),
string_column("DEFINER"),
string_column("TIME_ZONE"),
string_column("EVENT_BODY"),
string_column("EVENT_DEFINITION"),
string_column("EVENT_TYPE"),
datetime_column("EXECUTE_AT"),
bigint_column("INTERVAL_VALUE"),
string_column("INTERVAL_FIELD"),
string_column("SQL_MODE"),
datetime_column("STARTS"),
datetime_column("ENDS"),
string_column("STATUS"),
string_column("ON_COMPLETION"),
datetime_column("CREATED"),
datetime_column("LAST_ALTERED"),
datetime_column("LAST_EXECUTED"),
string_column("EVENT_COMMENT"),
bigint_column("ORIGINATOR"),
string_column("CHARACTER_SET_CLIENT"),
string_column("COLLATION_CONNECTION"),
string_column("DATABASE_COLLATION"),
],
vec![],
),
FILES => (
vec![
bigint_column("FILE_ID"),
string_column("FILE_NAME"),
string_column("FILE_TYPE"),
string_column("TABLESPACE_NAME"),
string_column("TABLE_CATALOG"),
string_column("TABLE_SCHEMA"),
string_column("TABLE_NAME"),
string_column("LOGFILE_GROUP_NAME"),
bigint_column("LOGFILE_GROUP_NUMBER"),
string_column("ENGINE"),
string_column("FULLTEXT_KEYS"),
bigint_column("DELETED_ROWS"),
bigint_column("UPDATE_COUNT"),
bigint_column("FREE_EXTENTS"),
bigint_column("TOTAL_EXTENTS"),
bigint_column("EXTENT_SIZE"),
bigint_column("INITIAL_SIZE"),
bigint_column("MAXIMUM_SIZE"),
bigint_column("AUTOEXTEND_SIZE"),
datetime_column("CREATION_TIME"),
datetime_column("LAST_UPDATE_TIME"),
datetime_column("LAST_ACCESS_TIME"),
datetime_column("RECOVER_TIME"),
bigint_column("TRANSACTION_COUNTER"),
string_column("VERSION"),
string_column("ROW_FORMAT"),
bigint_column("TABLE_ROWS"),
bigint_column("AVG_ROW_LENGTH"),
bigint_column("DATA_LENGTH"),
bigint_column("MAX_DATA_LENGTH"),
bigint_column("INDEX_LENGTH"),
bigint_column("DATA_FREE"),
datetime_column("CREATE_TIME"),
datetime_column("UPDATE_TIME"),
datetime_column("CHECK_TIME"),
string_column("CHECKSUM"),
string_column("STATUS"),
string_column("EXTRA"),
],
vec![],
),
OPTIMIZER_TRACE => (
vec![
string_column("QUERY"),
string_column("TRACE"),
bigint_column("MISSING_BYTES_BEYOND_MAX_MEM_SIZE"),
bigint_column("INSUFFICIENT_PRIVILEGES"),
],
vec![],
),
// MySQL(https://dev.mysql.com/doc/refman/8.2/en/information-schema-parameters-table.html)
// has the spec that is different from
// PostgreSQL(https://www.postgresql.org/docs/current/infoschema-parameters.html).
// Follow `MySQL` spec here.
PARAMETERS => (
vec![
string_column("SPECIFIC_CATALOG"),
string_column("SPECIFIC_SCHEMA"),
string_column("SPECIFIC_NAME"),
bigint_column("ORDINAL_POSITION"),
string_column("PARAMETER_MODE"),
string_column("PARAMETER_NAME"),
string_column("DATA_TYPE"),
bigint_column("CHARACTER_MAXIMUM_LENGTH"),
bigint_column("CHARACTER_OCTET_LENGTH"),
bigint_column("NUMERIC_PRECISION"),
bigint_column("NUMERIC_SCALE"),
bigint_column("DATETIME_PRECISION"),
string_column("CHARACTER_SET_NAME"),
string_column("COLLATION_NAME"),
string_column("DTD_IDENTIFIER"),
string_column("ROUTINE_TYPE"),
],
vec![],
),
PROFILING => (
vec![
bigint_column("QUERY_ID"),
bigint_column("SEQ"),
string_column("STATE"),
bigint_column("DURATION"),
bigint_column("CPU_USER"),
bigint_column("CPU_SYSTEM"),
bigint_column("CONTEXT_VOLUNTARY"),
bigint_column("CONTEXT_INVOLUNTARY"),
bigint_column("BLOCK_OPS_IN"),
bigint_column("BLOCK_OPS_OUT"),
bigint_column("MESSAGES_SENT"),
bigint_column("MESSAGES_RECEIVED"),
bigint_column("PAGE_FAULTS_MAJOR"),
bigint_column("PAGE_FAULTS_MINOR"),
bigint_column("SWAPS"),
string_column("SOURCE_FUNCTION"),
string_column("SOURCE_FILE"),
bigint_column("SOURCE_LINE"),
],
vec![],
),
// TODO: _Must_ reimplement this table when foreign key constraint is supported.
REFERENTIAL_CONSTRAINTS => (
vec![
string_column("CONSTRAINT_CATALOG"),
string_column("CONSTRAINT_SCHEMA"),
string_column("CONSTRAINT_NAME"),
string_column("UNIQUE_CONSTRAINT_CATALOG"),
string_column("UNIQUE_CONSTRAINT_SCHEMA"),
string_column("UNIQUE_CONSTRAINT_NAME"),
string_column("MATCH_OPTION"),
string_column("UPDATE_RULE"),
string_column("DELETE_RULE"),
string_column("TABLE_NAME"),
string_column("REFERENCED_TABLE_NAME"),
],
vec![],
),
ROUTINES => (
vec![
string_column("SPECIFIC_NAME"),
string_column("ROUTINE_CATALOG"),
string_column("ROUTINE_SCHEMA"),
string_column("ROUTINE_NAME"),
string_column("ROUTINE_TYPE"),
string_column("DATA_TYPE"),
bigint_column("CHARACTER_MAXIMUM_LENGTH"),
bigint_column("CHARACTER_OCTET_LENGTH"),
bigint_column("NUMERIC_PRECISION"),
bigint_column("NUMERIC_SCALE"),
bigint_column("DATETIME_PRECISION"),
string_column("CHARACTER_SET_NAME"),
string_column("COLLATION_NAME"),
string_column("DTD_IDENTIFIER"),
string_column("ROUTINE_BODY"),
string_column("ROUTINE_DEFINITION"),
string_column("EXTERNAL_NAME"),
string_column("EXTERNAL_LANGUAGE"),
string_column("PARAMETER_STYLE"),
string_column("IS_DETERMINISTIC"),
string_column("SQL_DATA_ACCESS"),
string_column("SQL_PATH"),
string_column("SECURITY_TYPE"),
datetime_column("CREATED"),
datetime_column("LAST_ALTERED"),
string_column("SQL_MODE"),
string_column("ROUTINE_COMMENT"),
string_column("DEFINER"),
string_column("CHARACTER_SET_CLIENT"),
string_column("COLLATION_CONNECTION"),
string_column("DATABASE_COLLATION"),
],
vec![],
),
SCHEMA_PRIVILEGES => (
vec![
string_column("GRANTEE"),
string_column("TABLE_CATALOG"),
string_column("TABLE_SCHEMA"),
string_column("PRIVILEGE_TYPE"),
string_column("IS_GRANTABLE"),
],
vec![],
),
TABLE_PRIVILEGES => (
vec![
string_column("GRANTEE"),
string_column("TABLE_CATALOG"),
string_column("TABLE_SCHEMA"),
string_column("TABLE_NAME"),
string_column("PRIVILEGE_TYPE"),
string_column("IS_GRANTABLE"),
],
vec![],
),
TRIGGERS => (
vec![
string_column("TRIGGER_CATALOG"),
string_column("TRIGGER_SCHEMA"),
string_column("TRIGGER_NAME"),
string_column("EVENT_MANIPULATION"),
string_column("EVENT_OBJECT_CATALOG"),
string_column("EVENT_OBJECT_SCHEMA"),
string_column("EVENT_OBJECT_TABLE"),
bigint_column("ACTION_ORDER"),
string_column("ACTION_CONDITION"),
string_column("ACTION_STATEMENT"),
string_column("ACTION_ORIENTATION"),
string_column("ACTION_TIMING"),
string_column("ACTION_REFERENCE_OLD_TABLE"),
string_column("ACTION_REFERENCE_NEW_TABLE"),
string_column("ACTION_REFERENCE_OLD_ROW"),
string_column("ACTION_REFERENCE_NEW_ROW"),
datetime_column("CREATED"),
string_column("SQL_MODE"),
string_column("DEFINER"),
string_column("CHARACTER_SET_CLIENT"),
string_column("COLLATION_CONNECTION"),
string_column("DATABASE_COLLATION"),
],
vec![],
),
// TODO: Considering store internal metrics in `global_status` and
// `session_status` tables.
GLOBAL_STATUS => (
vec![
string_column("VARIABLE_NAME"),
string_column("VARIABLE_VALUE"),
],
vec![],
),
SESSION_STATUS => (
vec![
string_column("VARIABLE_NAME"),
string_column("VARIABLE_VALUE"),
],
vec![],
),
_ => unreachable!("Unknown table in information_schema: {}", table_name),
};
@@ -115,6 +429,22 @@ fn string_column(name: &str) -> ColumnSchema {
)
}
fn bigint_column(name: &str) -> ColumnSchema {
ColumnSchema::new(
str::to_lowercase(name),
ConcreteDataType::int64_datatype(),
false,
)
}
fn datetime_column(name: &str) -> ColumnSchema {
ColumnSchema::new(
str::to_lowercase(name),
ConcreteDataType::datetime_datatype(),
false,
)
}
#[cfg(test)]
mod tests {
use super::*;

View File

@@ -0,0 +1,609 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use arrow::array::StringArray;
use arrow::compute::kernels::comparison;
use common_query::logical_plan::DfExpr;
use datafusion::common::ScalarValue;
use datafusion::logical_expr::expr::Like;
use datafusion::logical_expr::Operator;
use datatypes::value::Value;
use store_api::storage::ScanRequest;
type ColumnName = String;
/// Predicate to filter `information_schema` tables stream,
/// we only support these simple predicates currently.
/// TODO(dennis): supports more predicate types.
#[derive(Clone, PartialEq, Eq, Debug)]
enum Predicate {
Eq(ColumnName, Value),
Like(ColumnName, String, bool),
NotEq(ColumnName, Value),
InList(ColumnName, Vec<Value>),
And(Box<Predicate>, Box<Predicate>),
Or(Box<Predicate>, Box<Predicate>),
Not(Box<Predicate>),
}
impl Predicate {
/// Evaluate the predicate with the row, returns:
/// - `None` when the predicate can't evaluate with the row.
/// - `Some(true)` when the predicate is satisfied,
/// - `Some(false)` when the predicate is not satisfied,
fn eval(&self, row: &[(&str, &Value)]) -> Option<bool> {
match self {
Predicate::Eq(c, v) => {
for (column, value) in row {
if c != column {
continue;
}
return Some(v == *value);
}
}
Predicate::Like(c, pattern, case_insensitive) => {
for (column, value) in row {
if c != column {
continue;
}
let Value::String(bs) = value else {
continue;
};
return like_utf8(bs.as_utf8(), pattern, case_insensitive);
}
}
Predicate::NotEq(c, v) => {
for (column, value) in row {
if c != column {
continue;
}
return Some(v != *value);
}
}
Predicate::InList(c, values) => {
for (column, value) in row {
if c != column {
continue;
}
return Some(values.iter().any(|v| v == *value));
}
}
Predicate::And(left, right) => {
let left = left.eval(row);
// short-circuit
if matches!(left, Some(false)) {
return Some(false);
}
return match (left, right.eval(row)) {
(Some(left), Some(right)) => Some(left && right),
(None, Some(false)) => Some(false),
_ => None,
};
}
Predicate::Or(left, right) => {
let left = left.eval(row);
// short-circuit
if matches!(left, Some(true)) {
return Some(true);
}
return match (left, right.eval(row)) {
(Some(left), Some(right)) => Some(left || right),
(None, Some(true)) => Some(true),
_ => None,
};
}
Predicate::Not(p) => {
let Some(b) = p.eval(row) else {
return None;
};
return Some(!b);
}
}
// Can't evaluate predicate with the row
None
}
/// Try to create a predicate from datafusion [`Expr`], return None if fails.
fn from_expr(expr: DfExpr) -> Option<Predicate> {
match expr {
// NOT expr
DfExpr::Not(expr) => {
let Some(p) = Self::from_expr(*expr) else {
return None;
};
Some(Predicate::Not(Box::new(p)))
}
// expr LIKE pattern
DfExpr::Like(Like {
negated,
expr,
pattern,
case_insensitive,
..
}) if is_column(&expr) && is_string_literal(&pattern) => {
// Safety: ensured by gurad
let DfExpr::Column(c) = *expr else {
unreachable!();
};
let DfExpr::Literal(ScalarValue::Utf8(Some(pattern))) = *pattern else {
unreachable!();
};
let p = Predicate::Like(c.name, pattern, case_insensitive);
if negated {
Some(Predicate::Not(Box::new(p)))
} else {
Some(p)
}
}
// left OP right
DfExpr::BinaryExpr(bin) => match (*bin.left, bin.op, *bin.right) {
// left == right
(DfExpr::Literal(scalar), Operator::Eq, DfExpr::Column(c))
| (DfExpr::Column(c), Operator::Eq, DfExpr::Literal(scalar)) => {
let Ok(v) = Value::try_from(scalar) else {
return None;
};
Some(Predicate::Eq(c.name, v))
}
// left != right
(DfExpr::Literal(scalar), Operator::NotEq, DfExpr::Column(c))
| (DfExpr::Column(c), Operator::NotEq, DfExpr::Literal(scalar)) => {
let Ok(v) = Value::try_from(scalar) else {
return None;
};
Some(Predicate::NotEq(c.name, v))
}
// left AND right
(left, Operator::And, right) => {
let Some(left) = Self::from_expr(left) else {
return None;
};
let Some(right) = Self::from_expr(right) else {
return None;
};
Some(Predicate::And(Box::new(left), Box::new(right)))
}
// left OR right
(left, Operator::Or, right) => {
let Some(left) = Self::from_expr(left) else {
return None;
};
let Some(right) = Self::from_expr(right) else {
return None;
};
Some(Predicate::Or(Box::new(left), Box::new(right)))
}
_ => None,
},
// [NOT] IN (LIST)
DfExpr::InList(list) => {
match (*list.expr, list.list, list.negated) {
// column [NOT] IN (v1, v2, v3, ...)
(DfExpr::Column(c), list, negated) if is_all_scalars(&list) => {
let mut values = Vec::with_capacity(list.len());
for scalar in list {
// Safety: checked by `is_all_scalars`
let DfExpr::Literal(scalar) = scalar else {
unreachable!();
};
let Ok(value) = Value::try_from(scalar) else {
return None;
};
values.push(value);
}
let predicate = Predicate::InList(c.name, values);
if negated {
Some(Predicate::Not(Box::new(predicate)))
} else {
Some(predicate)
}
}
_ => None,
}
}
_ => None,
}
}
}
/// Perform SQL left LIKE right, return `None` if fail to evaluate.
/// - `s` the target string
/// - `pattern` the pattern just like '%abc'
/// - `case_insensitive` whether to perform case-insensitive like or not.
fn like_utf8(s: &str, pattern: &str, case_insensitive: &bool) -> Option<bool> {
let array = StringArray::from(vec![s]);
let patterns = StringArray::new_scalar(pattern);
let Ok(booleans) = (if *case_insensitive {
comparison::ilike(&array, &patterns)
} else {
comparison::like(&array, &patterns)
}) else {
return None;
};
// Safety: at least one value in result
Some(booleans.value(0))
}
fn is_string_literal(expr: &DfExpr) -> bool {
matches!(expr, DfExpr::Literal(ScalarValue::Utf8(Some(_))))
}
fn is_column(expr: &DfExpr) -> bool {
matches!(expr, DfExpr::Column(_))
}
/// A list of predicate
pub struct Predicates {
predicates: Vec<Predicate>,
}
impl Predicates {
/// Try its best to create predicates from [`ScanRequest`].
pub fn from_scan_request(request: &Option<ScanRequest>) -> Predicates {
if let Some(request) = request {
let mut predicates = Vec::with_capacity(request.filters.len());
for filter in &request.filters {
if let Some(predicate) = Predicate::from_expr(filter.df_expr().clone()) {
predicates.push(predicate);
}
}
Self { predicates }
} else {
Self {
predicates: Vec::new(),
}
}
}
/// Evaluate the predicates with the row.
/// returns true when all the predicates are satisfied or can't be evaluated.
pub fn eval(&self, row: &[(&str, &Value)]) -> bool {
// fast path
if self.predicates.is_empty() {
return true;
}
self.predicates
.iter()
.filter_map(|p| p.eval(row))
.all(|b| b)
}
}
/// Returns true when the values are all [`DfExpr::Literal`].
fn is_all_scalars(list: &[DfExpr]) -> bool {
list.iter().all(|v| matches!(v, DfExpr::Literal(_)))
}
#[cfg(test)]
mod tests {
use datafusion::common::{Column, ScalarValue};
use datafusion::logical_expr::expr::InList;
use datafusion::logical_expr::BinaryExpr;
use super::*;
#[test]
fn test_predicate_eval() {
let a_col = "a".to_string();
let b_col = "b".to_string();
let a_value = Value::from("a_value");
let b_value = Value::from("b_value");
let wrong_value = Value::from("wrong_value");
let a_row = [(a_col.as_str(), &a_value)];
let b_row = [("b", &wrong_value)];
let wrong_row = [(a_col.as_str(), &wrong_value)];
// Predicate::Eq
let p = Predicate::Eq(a_col.clone(), a_value.clone());
assert!(p.eval(&a_row).unwrap());
assert!(p.eval(&b_row).is_none());
assert!(!p.eval(&wrong_row).unwrap());
// Predicate::NotEq
let p = Predicate::NotEq(a_col.clone(), a_value.clone());
assert!(!p.eval(&a_row).unwrap());
assert!(p.eval(&b_row).is_none());
assert!(p.eval(&wrong_row).unwrap());
// Predicate::InList
let p = Predicate::InList(a_col.clone(), vec![a_value.clone(), b_value.clone()]);
assert!(p.eval(&a_row).unwrap());
assert!(p.eval(&b_row).is_none());
assert!(!p.eval(&wrong_row).unwrap());
assert!(p.eval(&[(&a_col, &b_value)]).unwrap());
let p1 = Predicate::Eq(a_col.clone(), a_value.clone());
let p2 = Predicate::Eq(b_col.clone(), b_value.clone());
let row = [(a_col.as_str(), &a_value), (b_col.as_str(), &b_value)];
let wrong_row = [(a_col.as_str(), &a_value), (b_col.as_str(), &wrong_value)];
//Predicate::And
let p = Predicate::And(Box::new(p1.clone()), Box::new(p2.clone()));
assert!(p.eval(&row).unwrap());
assert!(!p.eval(&wrong_row).unwrap());
assert!(p.eval(&[]).is_none());
assert!(p.eval(&[("c", &a_value)]).is_none());
assert!(!p
.eval(&[(a_col.as_str(), &b_value), (b_col.as_str(), &a_value)])
.unwrap());
assert!(!p
.eval(&[(a_col.as_str(), &b_value), (b_col.as_str(), &b_value)])
.unwrap());
assert!(p
.eval(&[(a_col.as_ref(), &a_value), ("c", &a_value)])
.is_none());
assert!(!p
.eval(&[(a_col.as_ref(), &b_value), ("c", &a_value)])
.unwrap());
//Predicate::Or
let p = Predicate::Or(Box::new(p1), Box::new(p2));
assert!(p.eval(&row).unwrap());
assert!(p.eval(&wrong_row).unwrap());
assert!(p.eval(&[]).is_none());
assert!(p.eval(&[("c", &a_value)]).is_none());
assert!(!p
.eval(&[(a_col.as_str(), &b_value), (b_col.as_str(), &a_value)])
.unwrap());
assert!(p
.eval(&[(a_col.as_str(), &b_value), (b_col.as_str(), &b_value)])
.unwrap());
assert!(p
.eval(&[(a_col.as_ref(), &a_value), ("c", &a_value)])
.unwrap());
assert!(p
.eval(&[(a_col.as_ref(), &b_value), ("c", &a_value)])
.is_none());
}
#[test]
fn test_predicate_like() {
// case insensitive
let expr = DfExpr::Like(Like {
negated: false,
expr: Box::new(column("a")),
pattern: Box::new(string_literal("%abc")),
case_insensitive: true,
escape_char: None,
});
let p = Predicate::from_expr(expr).unwrap();
assert!(
matches!(&p, Predicate::Like(c, pattern, case_insensitive) if
c == "a"
&& pattern == "%abc"
&& *case_insensitive)
);
let match_row = [
("a", &Value::from("hello AbC")),
("b", &Value::from("b value")),
];
let unmatch_row = [("a", &Value::from("bca")), ("b", &Value::from("b value"))];
assert!(p.eval(&match_row).unwrap());
assert!(!p.eval(&unmatch_row).unwrap());
assert!(p.eval(&[]).is_none());
// case sensitive
let expr = DfExpr::Like(Like {
negated: false,
expr: Box::new(column("a")),
pattern: Box::new(string_literal("%abc")),
case_insensitive: false,
escape_char: None,
});
let p = Predicate::from_expr(expr).unwrap();
assert!(
matches!(&p, Predicate::Like(c, pattern, case_insensitive) if
c == "a"
&& pattern == "%abc"
&& !*case_insensitive)
);
assert!(!p.eval(&match_row).unwrap());
assert!(!p.eval(&unmatch_row).unwrap());
assert!(p.eval(&[]).is_none());
// not like
let expr = DfExpr::Like(Like {
negated: true,
expr: Box::new(column("a")),
pattern: Box::new(string_literal("%abc")),
case_insensitive: true,
escape_char: None,
});
let p = Predicate::from_expr(expr).unwrap();
assert!(!p.eval(&match_row).unwrap());
assert!(p.eval(&unmatch_row).unwrap());
assert!(p.eval(&[]).is_none());
}
fn column(name: &str) -> DfExpr {
DfExpr::Column(Column {
relation: None,
name: name.to_string(),
})
}
fn string_literal(v: &str) -> DfExpr {
DfExpr::Literal(ScalarValue::Utf8(Some(v.to_string())))
}
fn match_string_value(v: &Value, expected: &str) -> bool {
matches!(v, Value::String(bs) if bs.as_utf8() == expected)
}
fn match_string_values(vs: &[Value], expected: &[&str]) -> bool {
assert_eq!(vs.len(), expected.len());
let mut result = true;
for (i, v) in vs.iter().enumerate() {
result = result && match_string_value(v, expected[i]);
}
result
}
fn mock_exprs() -> (DfExpr, DfExpr) {
let expr1 = DfExpr::BinaryExpr(BinaryExpr {
left: Box::new(column("a")),
op: Operator::Eq,
right: Box::new(string_literal("a_value")),
});
let expr2 = DfExpr::BinaryExpr(BinaryExpr {
left: Box::new(column("b")),
op: Operator::NotEq,
right: Box::new(string_literal("b_value")),
});
(expr1, expr2)
}
#[test]
fn test_predicate_from_expr() {
let (expr1, expr2) = mock_exprs();
let p1 = Predicate::from_expr(expr1.clone()).unwrap();
assert!(matches!(&p1, Predicate::Eq(column, v) if column == "a"
&& match_string_value(v, "a_value")));
let p2 = Predicate::from_expr(expr2.clone()).unwrap();
assert!(matches!(&p2, Predicate::NotEq(column, v) if column == "b"
&& match_string_value(v, "b_value")));
let and_expr = DfExpr::BinaryExpr(BinaryExpr {
left: Box::new(expr1.clone()),
op: Operator::And,
right: Box::new(expr2.clone()),
});
let or_expr = DfExpr::BinaryExpr(BinaryExpr {
left: Box::new(expr1.clone()),
op: Operator::Or,
right: Box::new(expr2.clone()),
});
let not_expr = DfExpr::Not(Box::new(expr1.clone()));
let and_p = Predicate::from_expr(and_expr).unwrap();
assert!(matches!(and_p, Predicate::And(left, right) if *left == p1 && *right == p2));
let or_p = Predicate::from_expr(or_expr).unwrap();
assert!(matches!(or_p, Predicate::Or(left, right) if *left == p1 && *right == p2));
let not_p = Predicate::from_expr(not_expr).unwrap();
assert!(matches!(not_p, Predicate::Not(p) if *p == p1));
let inlist_expr = DfExpr::InList(InList {
expr: Box::new(column("a")),
list: vec![string_literal("a1"), string_literal("a2")],
negated: false,
});
let inlist_p = Predicate::from_expr(inlist_expr).unwrap();
assert!(matches!(&inlist_p, Predicate::InList(c, values) if c == "a"
&& match_string_values(values, &["a1", "a2"])));
let inlist_expr = DfExpr::InList(InList {
expr: Box::new(column("a")),
list: vec![string_literal("a1"), string_literal("a2")],
negated: true,
});
let inlist_p = Predicate::from_expr(inlist_expr).unwrap();
assert!(matches!(inlist_p, Predicate::Not(p) if
matches!(&*p,
Predicate::InList(c, values) if c == "a"
&& match_string_values(values, &["a1", "a2"]))));
}
#[test]
fn test_predicates_from_scan_request() {
let predicates = Predicates::from_scan_request(&None);
assert!(predicates.predicates.is_empty());
let (expr1, expr2) = mock_exprs();
let request = ScanRequest {
filters: vec![expr1.into(), expr2.into()],
..Default::default()
};
let predicates = Predicates::from_scan_request(&Some(request));
assert_eq!(2, predicates.predicates.len());
assert!(
matches!(&predicates.predicates[0], Predicate::Eq(column, v) if column == "a"
&& match_string_value(v, "a_value"))
);
assert!(
matches!(&predicates.predicates[1], Predicate::NotEq(column, v) if column == "b"
&& match_string_value(v, "b_value"))
);
}
#[test]
fn test_predicates_eval_row() {
let wrong_row = [
("a", &Value::from("a_value")),
("b", &Value::from("b_value")),
("c", &Value::from("c_value")),
];
let row = [
("a", &Value::from("a_value")),
("b", &Value::from("not_b_value")),
("c", &Value::from("c_value")),
];
let c_row = [("c", &Value::from("c_value"))];
// test empty predicates, always returns true
let predicates = Predicates::from_scan_request(&None);
assert!(predicates.eval(&row));
assert!(predicates.eval(&wrong_row));
assert!(predicates.eval(&c_row));
let (expr1, expr2) = mock_exprs();
let request = ScanRequest {
filters: vec![expr1.into(), expr2.into()],
..Default::default()
};
let predicates = Predicates::from_scan_request(&Some(request));
assert!(predicates.eval(&row));
assert!(!predicates.eval(&wrong_row));
assert!(predicates.eval(&c_row));
}
}

View File

@@ -0,0 +1,228 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::sync::{Arc, Weak};
use arrow_schema::SchemaRef as ArrowSchemaRef;
use common_catalog::consts::INFORMATION_SCHEMA_SCHEMATA_TABLE_ID;
use common_error::ext::BoxedError;
use common_query::physical_plan::TaskContext;
use common_recordbatch::adapter::RecordBatchStreamAdapter;
use common_recordbatch::{RecordBatch, SendableRecordBatchStream};
use datafusion::physical_plan::stream::RecordBatchStreamAdapter as DfRecordBatchStreamAdapter;
use datafusion::physical_plan::streaming::PartitionStream as DfPartitionStream;
use datafusion::physical_plan::SendableRecordBatchStream as DfSendableRecordBatchStream;
use datatypes::prelude::{ConcreteDataType, ScalarVectorBuilder, VectorRef};
use datatypes::schema::{ColumnSchema, Schema, SchemaRef};
use datatypes::value::Value;
use datatypes::vectors::StringVectorBuilder;
use snafu::{OptionExt, ResultExt};
use store_api::storage::{ScanRequest, TableId};
use super::SCHEMATA;
use crate::error::{
CreateRecordBatchSnafu, InternalSnafu, Result, UpgradeWeakCatalogManagerRefSnafu,
};
use crate::information_schema::{InformationTable, Predicates};
use crate::CatalogManager;
const CATALOG_NAME: &str = "catalog_name";
const SCHEMA_NAME: &str = "schema_name";
const DEFAULT_CHARACTER_SET_NAME: &str = "default_character_set_name";
const DEFAULT_COLLATION_NAME: &str = "default_collation_name";
/// The `information_schema.schemata` table implementation.
pub(super) struct InformationSchemaSchemata {
schema: SchemaRef,
catalog_name: String,
catalog_manager: Weak<dyn CatalogManager>,
}
impl InformationSchemaSchemata {
pub(super) fn new(catalog_name: String, catalog_manager: Weak<dyn CatalogManager>) -> Self {
Self {
schema: Self::schema(),
catalog_name,
catalog_manager,
}
}
pub(crate) fn schema() -> SchemaRef {
Arc::new(Schema::new(vec![
ColumnSchema::new(CATALOG_NAME, ConcreteDataType::string_datatype(), false),
ColumnSchema::new(SCHEMA_NAME, ConcreteDataType::string_datatype(), false),
ColumnSchema::new(
DEFAULT_CHARACTER_SET_NAME,
ConcreteDataType::string_datatype(),
false,
),
ColumnSchema::new(
DEFAULT_COLLATION_NAME,
ConcreteDataType::string_datatype(),
false,
),
ColumnSchema::new("sql_path", ConcreteDataType::string_datatype(), true),
]))
}
fn builder(&self) -> InformationSchemaSchemataBuilder {
InformationSchemaSchemataBuilder::new(
self.schema.clone(),
self.catalog_name.clone(),
self.catalog_manager.clone(),
)
}
}
impl InformationTable for InformationSchemaSchemata {
fn table_id(&self) -> TableId {
INFORMATION_SCHEMA_SCHEMATA_TABLE_ID
}
fn table_name(&self) -> &'static str {
SCHEMATA
}
fn schema(&self) -> SchemaRef {
self.schema.clone()
}
fn to_stream(&self, request: ScanRequest) -> Result<SendableRecordBatchStream> {
let schema = self.schema.arrow_schema().clone();
let mut builder = self.builder();
let stream = Box::pin(DfRecordBatchStreamAdapter::new(
schema,
futures::stream::once(async move {
builder
.make_schemata(Some(request))
.await
.map(|x| x.into_df_record_batch())
.map_err(Into::into)
}),
));
Ok(Box::pin(
RecordBatchStreamAdapter::try_new(stream)
.map_err(BoxedError::new)
.context(InternalSnafu)?,
))
}
}
/// Builds the `information_schema.schemata` table row by row
///
/// Columns are based on <https://docs.pingcap.com/tidb/stable/information-schema-schemata>
struct InformationSchemaSchemataBuilder {
schema: SchemaRef,
catalog_name: String,
catalog_manager: Weak<dyn CatalogManager>,
catalog_names: StringVectorBuilder,
schema_names: StringVectorBuilder,
charset_names: StringVectorBuilder,
collation_names: StringVectorBuilder,
sql_paths: StringVectorBuilder,
}
impl InformationSchemaSchemataBuilder {
fn new(
schema: SchemaRef,
catalog_name: String,
catalog_manager: Weak<dyn CatalogManager>,
) -> Self {
Self {
schema,
catalog_name,
catalog_manager,
catalog_names: StringVectorBuilder::with_capacity(42),
schema_names: StringVectorBuilder::with_capacity(42),
charset_names: StringVectorBuilder::with_capacity(42),
collation_names: StringVectorBuilder::with_capacity(42),
sql_paths: StringVectorBuilder::with_capacity(42),
}
}
/// Construct the `information_schema.schemata` virtual table
async fn make_schemata(&mut self, request: Option<ScanRequest>) -> Result<RecordBatch> {
let catalog_name = self.catalog_name.clone();
let catalog_manager = self
.catalog_manager
.upgrade()
.context(UpgradeWeakCatalogManagerRefSnafu)?;
let predicates = Predicates::from_scan_request(&request);
for schema_name in catalog_manager.schema_names(&catalog_name).await? {
if !catalog_manager
.schema_exists(&catalog_name, &schema_name)
.await?
{
continue;
}
self.add_schema(&predicates, &catalog_name, &schema_name);
}
self.finish()
}
fn add_schema(&mut self, predicates: &Predicates, catalog_name: &str, schema_name: &str) {
let row = [
(CATALOG_NAME, &Value::from(catalog_name)),
(SCHEMA_NAME, &Value::from(schema_name)),
(DEFAULT_CHARACTER_SET_NAME, &Value::from("utf8")),
(DEFAULT_COLLATION_NAME, &Value::from("utf8_bin")),
];
if !predicates.eval(&row) {
return;
}
self.catalog_names.push(Some(catalog_name));
self.schema_names.push(Some(schema_name));
self.charset_names.push(Some("utf8"));
self.collation_names.push(Some("utf8_bin"));
self.sql_paths.push(None);
}
fn finish(&mut self) -> Result<RecordBatch> {
let columns: Vec<VectorRef> = vec![
Arc::new(self.catalog_names.finish()),
Arc::new(self.schema_names.finish()),
Arc::new(self.charset_names.finish()),
Arc::new(self.collation_names.finish()),
Arc::new(self.sql_paths.finish()),
];
RecordBatch::new(self.schema.clone(), columns).context(CreateRecordBatchSnafu)
}
}
impl DfPartitionStream for InformationSchemaSchemata {
fn schema(&self) -> &ArrowSchemaRef {
self.schema.arrow_schema()
}
fn execute(&self, _: Arc<TaskContext>) -> DfSendableRecordBatchStream {
let schema = self.schema.arrow_schema().clone();
let mut builder = self.builder();
Box::pin(DfRecordBatchStreamAdapter::new(
schema,
futures::stream::once(async move {
builder
.make_schemata(None)
.await
.map(|x| x.into_df_record_batch())
.map_err(Into::into)
}),
))
}
}

View File

@@ -20,3 +20,21 @@ pub const ENGINES: &str = "engines";
pub const COLUMN_PRIVILEGES: &str = "column_privileges";
pub const COLUMN_STATISTICS: &str = "column_statistics";
pub const BUILD_INFO: &str = "build_info";
pub const CHARACTER_SETS: &str = "character_sets";
pub const COLLATIONS: &str = "collations";
pub const COLLATION_CHARACTER_SET_APPLICABILITY: &str = "collation_character_set_applicability";
pub const CHECK_CONSTRAINTS: &str = "check_constraints";
pub const EVENTS: &str = "events";
pub const FILES: &str = "files";
pub const SCHEMATA: &str = "schemata";
pub const KEY_COLUMN_USAGE: &str = "key_column_usage";
pub const OPTIMIZER_TRACE: &str = "optimizer_trace";
pub const PARAMETERS: &str = "parameters";
pub const PROFILING: &str = "profiling";
pub const REFERENTIAL_CONSTRAINTS: &str = "referential_constraints";
pub const ROUTINES: &str = "routines";
pub const SCHEMA_PRIVILEGES: &str = "schema_privileges";
pub const TABLE_PRIVILEGES: &str = "table_privileges";
pub const TRIGGERS: &str = "triggers";
pub const GLOBAL_STATUS: &str = "global_status";
pub const SESSION_STATUS: &str = "session_status";

View File

@@ -25,18 +25,26 @@ use datafusion::physical_plan::streaming::PartitionStream as DfPartitionStream;
use datafusion::physical_plan::SendableRecordBatchStream as DfSendableRecordBatchStream;
use datatypes::prelude::{ConcreteDataType, ScalarVectorBuilder, VectorRef};
use datatypes::schema::{ColumnSchema, Schema, SchemaRef};
use datatypes::value::Value;
use datatypes::vectors::{StringVectorBuilder, UInt32VectorBuilder};
use snafu::{OptionExt, ResultExt};
use store_api::storage::TableId;
use store_api::storage::{ScanRequest, TableId};
use table::metadata::TableType;
use super::TABLES;
use crate::error::{
CreateRecordBatchSnafu, InternalSnafu, Result, UpgradeWeakCatalogManagerRefSnafu,
};
use crate::information_schema::InformationTable;
use crate::information_schema::{InformationTable, Predicates};
use crate::CatalogManager;
const TABLE_CATALOG: &str = "table_catalog";
const TABLE_SCHEMA: &str = "table_schema";
const TABLE_NAME: &str = "table_name";
const TABLE_TYPE: &str = "table_type";
const TABLE_ID: &str = "table_id";
const ENGINE: &str = "engine";
pub(super) struct InformationSchemaTables {
schema: SchemaRef,
catalog_name: String,
@@ -54,12 +62,12 @@ impl InformationSchemaTables {
pub(crate) fn schema() -> SchemaRef {
Arc::new(Schema::new(vec![
ColumnSchema::new("table_catalog", ConcreteDataType::string_datatype(), false),
ColumnSchema::new("table_schema", ConcreteDataType::string_datatype(), false),
ColumnSchema::new("table_name", ConcreteDataType::string_datatype(), false),
ColumnSchema::new("table_type", ConcreteDataType::string_datatype(), false),
ColumnSchema::new("table_id", ConcreteDataType::uint32_datatype(), true),
ColumnSchema::new("engine", ConcreteDataType::string_datatype(), true),
ColumnSchema::new(TABLE_CATALOG, ConcreteDataType::string_datatype(), false),
ColumnSchema::new(TABLE_SCHEMA, ConcreteDataType::string_datatype(), false),
ColumnSchema::new(TABLE_NAME, ConcreteDataType::string_datatype(), false),
ColumnSchema::new(TABLE_TYPE, ConcreteDataType::string_datatype(), false),
ColumnSchema::new(TABLE_ID, ConcreteDataType::uint32_datatype(), true),
ColumnSchema::new(ENGINE, ConcreteDataType::string_datatype(), true),
]))
}
@@ -85,14 +93,14 @@ impl InformationTable for InformationSchemaTables {
self.schema.clone()
}
fn to_stream(&self) -> Result<SendableRecordBatchStream> {
fn to_stream(&self, request: ScanRequest) -> Result<SendableRecordBatchStream> {
let schema = self.schema.arrow_schema().clone();
let mut builder = self.builder();
let stream = Box::pin(DfRecordBatchStreamAdapter::new(
schema,
futures::stream::once(async move {
builder
.make_tables()
.make_tables(Some(request))
.await
.map(|x| x.into_df_record_batch())
.map_err(Into::into)
@@ -142,12 +150,13 @@ impl InformationSchemaTablesBuilder {
}
/// Construct the `information_schema.tables` virtual table
async fn make_tables(&mut self) -> Result<RecordBatch> {
async fn make_tables(&mut self, request: Option<ScanRequest>) -> Result<RecordBatch> {
let catalog_name = self.catalog_name.clone();
let catalog_manager = self
.catalog_manager
.upgrade()
.context(UpgradeWeakCatalogManagerRefSnafu)?;
let predicates = Predicates::from_scan_request(&request);
for schema_name in catalog_manager.schema_names(&catalog_name).await? {
if !catalog_manager
@@ -167,6 +176,7 @@ impl InformationSchemaTablesBuilder {
{
let table_info = table.table_info();
self.add_table(
&predicates,
&catalog_name,
&schema_name,
&table_name,
@@ -183,8 +193,10 @@ impl InformationSchemaTablesBuilder {
self.finish()
}
#[allow(clippy::too_many_arguments)]
fn add_table(
&mut self,
predicates: &Predicates,
catalog_name: &str,
schema_name: &str,
table_name: &str,
@@ -192,14 +204,27 @@ impl InformationSchemaTablesBuilder {
table_id: Option<u32>,
engine: Option<&str>,
) {
self.catalog_names.push(Some(catalog_name));
self.schema_names.push(Some(schema_name));
self.table_names.push(Some(table_name));
self.table_types.push(Some(match table_type {
let table_type = match table_type {
TableType::Base => "BASE TABLE",
TableType::View => "VIEW",
TableType::Temporary => "LOCAL TEMPORARY",
}));
};
let row = [
(TABLE_CATALOG, &Value::from(catalog_name)),
(TABLE_SCHEMA, &Value::from(schema_name)),
(TABLE_NAME, &Value::from(table_name)),
(TABLE_TYPE, &Value::from(table_type)),
];
if !predicates.eval(&row) {
return;
}
self.catalog_names.push(Some(catalog_name));
self.schema_names.push(Some(schema_name));
self.table_names.push(Some(table_name));
self.table_types.push(Some(table_type));
self.table_ids.push(table_id);
self.engines.push(engine);
}
@@ -229,7 +254,7 @@ impl DfPartitionStream for InformationSchemaTables {
schema,
futures::stream::once(async move {
builder
.make_tables()
.make_tables(None)
.await
.map(|x| x.into_df_record_batch())
.map_err(Into::into)

View File

@@ -19,17 +19,17 @@ use prometheus::*;
lazy_static! {
pub static ref METRIC_CATALOG_MANAGER_CATALOG_COUNT: IntGauge =
register_int_gauge!("catalog_catalog_count", "catalog catalog count").unwrap();
register_int_gauge!("greptime_catalog_catalog_count", "catalog catalog count").unwrap();
pub static ref METRIC_CATALOG_MANAGER_SCHEMA_COUNT: IntGauge =
register_int_gauge!("catalog_schema_count", "catalog schema count").unwrap();
register_int_gauge!("greptime_catalog_schema_count", "catalog schema count").unwrap();
pub static ref METRIC_CATALOG_MANAGER_TABLE_COUNT: IntGaugeVec = register_int_gauge_vec!(
"catalog_table_count",
"greptime_catalog_table_count",
"catalog table count",
&[METRIC_DB_LABEL]
)
.unwrap();
pub static ref METRIC_CATALOG_KV_REMOTE_GET: Histogram =
register_histogram!("catalog_kv_get_remote", "catalog kv get remote").unwrap();
register_histogram!("greptime_catalog_kv_get_remote", "catalog kv get remote").unwrap();
pub static ref METRIC_CATALOG_KV_GET: Histogram =
register_histogram!("catalog_kv_get", "catalog kv get").unwrap();
register_histogram!("greptime_catalog_kv_get", "catalog kv get").unwrap();
}

View File

@@ -16,7 +16,7 @@ use std::any::Any;
use common_error::ext::{BoxedError, ErrorExt};
use common_error::status_code::StatusCode;
use common_error::{GREPTIME_ERROR_CODE, GREPTIME_ERROR_MSG};
use common_error::{GREPTIME_DB_HEADER_ERROR_CODE, GREPTIME_DB_HEADER_ERROR_MSG};
use common_macro::stack_trace_debug;
use snafu::{Location, Snafu};
use tonic::{Code, Status};
@@ -115,7 +115,7 @@ impl From<Status> for Error {
.and_then(|v| String::from_utf8(v.as_bytes().to_vec()).ok())
}
let code = get_metadata_value(&e, GREPTIME_ERROR_CODE)
let code = get_metadata_value(&e, GREPTIME_DB_HEADER_ERROR_CODE)
.and_then(|s| {
if let Ok(code) = s.parse::<u32>() {
StatusCode::from_u32(code)
@@ -125,8 +125,8 @@ impl From<Status> for Error {
})
.unwrap_or(StatusCode::Unknown);
let msg =
get_metadata_value(&e, GREPTIME_ERROR_MSG).unwrap_or_else(|| e.message().to_string());
let msg = get_metadata_value(&e, GREPTIME_DB_HEADER_ERROR_MSG)
.unwrap_or_else(|| e.message().to_string());
Self::Server { code, msg }
}

View File

@@ -17,27 +17,30 @@ use prometheus::*;
lazy_static! {
pub static ref METRIC_GRPC_CREATE_TABLE: Histogram =
register_histogram!("grpc_create_table", "grpc create table").unwrap();
pub static ref METRIC_GRPC_PROMQL_RANGE_QUERY: Histogram =
register_histogram!("grpc_promql_range_query", "grpc promql range query").unwrap();
register_histogram!("greptime_grpc_create_table", "grpc create table").unwrap();
pub static ref METRIC_GRPC_PROMQL_RANGE_QUERY: Histogram = register_histogram!(
"greptime_grpc_promql_range_query",
"grpc promql range query"
)
.unwrap();
pub static ref METRIC_GRPC_INSERT: Histogram =
register_histogram!("grpc_insert", "grpc insert").unwrap();
register_histogram!("greptime_grpc_insert", "grpc insert").unwrap();
pub static ref METRIC_GRPC_DELETE: Histogram =
register_histogram!("grpc_delete", "grpc delete").unwrap();
register_histogram!("greptime_grpc_delete", "grpc delete").unwrap();
pub static ref METRIC_GRPC_SQL: Histogram =
register_histogram!("grpc_sql", "grpc sql").unwrap();
register_histogram!("greptime_grpc_sql", "grpc sql").unwrap();
pub static ref METRIC_GRPC_LOGICAL_PLAN: Histogram =
register_histogram!("grpc_logical_plan", "grpc logical plan").unwrap();
register_histogram!("greptime_grpc_logical_plan", "grpc logical plan").unwrap();
pub static ref METRIC_GRPC_ALTER: Histogram =
register_histogram!("grpc_alter", "grpc alter").unwrap();
register_histogram!("greptime_grpc_alter", "grpc alter").unwrap();
pub static ref METRIC_GRPC_DROP_TABLE: Histogram =
register_histogram!("grpc_drop_table", "grpc drop table").unwrap();
register_histogram!("greptime_grpc_drop_table", "grpc drop table").unwrap();
pub static ref METRIC_GRPC_TRUNCATE_TABLE: Histogram =
register_histogram!("grpc_truncate_table", "grpc truncate table").unwrap();
register_histogram!("greptime_grpc_truncate_table", "grpc truncate table").unwrap();
pub static ref METRIC_GRPC_DO_GET: Histogram =
register_histogram!("grpc_do_get", "grpc do get").unwrap();
register_histogram!("greptime_grpc_do_get", "grpc do get").unwrap();
pub static ref METRIC_REGION_REQUEST_GRPC: HistogramVec = register_histogram_vec!(
"grpc_region_request",
"greptime_grpc_region_request",
"grpc region request",
&["request_type"]
)

View File

@@ -39,7 +39,7 @@ use crate::from_grpc_response;
/// ```
///
/// If you want to see a concrete usage example, please see
/// [stream_inserter.rs](https://github.com/GreptimeTeam/greptimedb/blob/develop/src/client/examples/stream_ingest.rs).
/// [stream_inserter.rs](https://github.com/GreptimeTeam/greptimedb/blob/main/src/client/examples/stream_ingest.rs).
pub struct StreamInserter {
sender: mpsc::Sender<GreptimeRequest>,

View File

@@ -252,10 +252,6 @@ impl StartCommand {
.await
.context(StartFrontendSnafu)?;
instance
.build_export_metrics_task(&opts.export_metrics)
.context(StartFrontendSnafu)?;
instance
.build_servers(opts)
.await

View File

@@ -28,7 +28,7 @@ pub mod standalone;
lazy_static::lazy_static! {
static ref APP_VERSION: prometheus::IntGaugeVec =
prometheus::register_int_gauge_vec!("app_version", "app version", &["short_version", "version"]).unwrap();
prometheus::register_int_gauge_vec!("greptime_app_version", "app version", &["short_version", "version"]).unwrap();
}
#[async_trait]

View File

@@ -22,7 +22,8 @@ use common_config::wal::StandaloneWalConfig;
use common_config::{metadata_store_dir, KvBackendConfig};
use common_meta::cache_invalidator::DummyCacheInvalidator;
use common_meta::datanode_manager::DatanodeManagerRef;
use common_meta::ddl::{DdlTaskExecutorRef, TableMetadataAllocatorRef};
use common_meta::ddl::table_meta::TableMetadataAllocator;
use common_meta::ddl::DdlTaskExecutorRef;
use common_meta::ddl_manager::DdlManager;
use common_meta::key::{TableMetadataManager, TableMetadataManagerRef};
use common_meta::kv_backend::KvBackendRef;
@@ -38,7 +39,6 @@ use datanode::datanode::{Datanode, DatanodeBuilder};
use file_engine::config::EngineConfig as FileEngineConfig;
use frontend::frontend::FrontendOptions;
use frontend::instance::builder::FrontendBuilder;
use frontend::instance::standalone::StandaloneTableMetadataAllocator;
use frontend::instance::{FrontendInstance, Instance as FeInstance, StandaloneDatanodeManager};
use frontend::service_config::{
GrpcOptions, InfluxdbOptions, MysqlOptions, OpentsdbOptions, PostgresOptions, PromStoreOptions,
@@ -406,13 +406,18 @@ impl StartCommand {
opts.wal_meta.clone(),
kv_backend.clone(),
));
let table_meta_allocator = Arc::new(StandaloneTableMetadataAllocator::new(
let table_metadata_manager =
Self::create_table_metadata_manager(kv_backend.clone()).await?;
let table_meta_allocator = TableMetadataAllocator::new(
table_id_sequence,
wal_options_allocator.clone(),
));
table_metadata_manager.clone(),
);
let ddl_task_executor = Self::create_ddl_task_executor(
kv_backend.clone(),
table_metadata_manager,
procedure_manager.clone(),
datanode_manager.clone(),
table_meta_allocator,
@@ -425,10 +430,6 @@ impl StartCommand {
.await
.context(StartFrontendSnafu)?;
frontend
.build_export_metrics_task(&opts.frontend.export_metrics)
.context(StartFrontendSnafu)?;
frontend
.build_servers(opts)
.await
@@ -443,14 +444,11 @@ impl StartCommand {
}
pub async fn create_ddl_task_executor(
kv_backend: KvBackendRef,
table_metadata_manager: TableMetadataManagerRef,
procedure_manager: ProcedureManagerRef,
datanode_manager: DatanodeManagerRef,
table_meta_allocator: TableMetadataAllocatorRef,
table_meta_allocator: TableMetadataAllocator,
) -> Result<DdlTaskExecutorRef> {
let table_metadata_manager =
Self::create_table_metadata_manager(kv_backend.clone()).await?;
let ddl_task_executor: DdlTaskExecutorRef = Arc::new(
DdlManager::try_new(
procedure_manager,
@@ -466,7 +464,7 @@ impl StartCommand {
Ok(ddl_task_executor)
}
async fn create_table_metadata_manager(
pub async fn create_table_metadata_manager(
kv_backend: KvBackendRef,
) -> Result<TableMetadataManagerRef> {
let table_metadata_manager = Arc::new(TableMetadataManager::new(kv_backend));

View File

@@ -44,6 +44,42 @@ pub const INFORMATION_SCHEMA_COLUMN_PRIVILEGES_TABLE_ID: u32 = 6;
pub const INFORMATION_SCHEMA_COLUMN_STATISTICS_TABLE_ID: u32 = 7;
/// id for information_schema.build_info
pub const INFORMATION_SCHEMA_BUILD_INFO_TABLE_ID: u32 = 8;
/// id for information_schema.CHARACTER_SETS
pub const INFORMATION_SCHEMA_CHARACTER_SETS_TABLE_ID: u32 = 9;
/// id for information_schema.COLLATIONS
pub const INFORMATION_SCHEMA_COLLATIONS_TABLE_ID: u32 = 10;
/// id for information_schema.COLLATIONS
pub const INFORMATION_SCHEMA_COLLATION_CHARACTER_SET_APPLICABILITY_TABLE_ID: u32 = 11;
/// id for information_schema.CHECK_CONSTRAINTS
pub const INFORMATION_SCHEMA_CHECK_CONSTRAINTS_TABLE_ID: u32 = 12;
/// id for information_schema.EVENTS
pub const INFORMATION_SCHEMA_EVENTS_TABLE_ID: u32 = 13;
/// id for information_schema.FILES
pub const INFORMATION_SCHEMA_FILES_TABLE_ID: u32 = 14;
/// id for information_schema.SCHEMATA
pub const INFORMATION_SCHEMA_SCHEMATA_TABLE_ID: u32 = 15;
/// id for information_schema.KEY_COLUMN_USAGE
pub const INFORMATION_SCHEMA_KEY_COLUMN_USAGE_TABLE_ID: u32 = 16;
/// id for information_schema.OPTIMIZER_TRACE
pub const INFORMATION_SCHEMA_OPTIMIZER_TRACE_TABLE_ID: u32 = 17;
/// id for information_schema.PARAMETERS
pub const INFORMATION_SCHEMA_PARAMETERS_TABLE_ID: u32 = 18;
/// id for information_schema.PROFILING
pub const INFORMATION_SCHEMA_PROFILING_TABLE_ID: u32 = 19;
/// id for information_schema.REFERENTIAL_CONSTRAINTS
pub const INFORMATION_SCHEMA_REFERENTIAL_CONSTRAINTS_TABLE_ID: u32 = 20;
/// id for information_schema.ROUTINES
pub const INFORMATION_SCHEMA_ROUTINES_TABLE_ID: u32 = 21;
/// id for information_schema.SCHEMA_PRIVILEGES
pub const INFORMATION_SCHEMA_SCHEMA_PRIVILEGES_TABLE_ID: u32 = 22;
/// id for information_schema.TABLE_PRIVILEGES
pub const INFORMATION_SCHEMA_TABLE_PRIVILEGES_TABLE_ID: u32 = 23;
/// id for information_schema.TRIGGERS
pub const INFORMATION_SCHEMA_TRIGGERS_TABLE_ID: u32 = 24;
/// id for information_schema.GLOBAL_STATUS
pub const INFORMATION_SCHEMA_GLOBAL_STATUS_TABLE_ID: u32 = 25;
/// id for information_schema.SESSION_STATUS
pub const INFORMATION_SCHEMA_SESSION_STATUS_TABLE_ID: u32 = 26;
/// ----- End of information_schema tables -----
pub const MITO_ENGINE: &str = "mito";

View File

@@ -90,11 +90,12 @@ mod tests {
#[test]
fn test_serde_kafka_config() {
// With all fields.
let toml_str = r#"
broker_endpoints = ["127.0.0.1:9092"]
max_batch_size = "4MB"
max_batch_size = "1MB"
linger = "200ms"
produce_record_timeout = "100ms"
consumer_wait_timeout = "100ms"
backoff_init = "500ms"
backoff_max = "10s"
backoff_base = 2
@@ -104,9 +105,9 @@ mod tests {
let expected = KafkaConfig {
broker_endpoints: vec!["127.0.0.1:9092".to_string()],
compression: RsKafkaCompression::default(),
max_batch_size: ReadableSize::mb(4),
max_batch_size: ReadableSize::mb(1),
linger: Duration::from_millis(200),
produce_record_timeout: Duration::from_millis(100),
consumer_wait_timeout: Duration::from_millis(100),
backoff: KafkaBackoffConfig {
init: Duration::from_millis(500),
max: Duration::from_secs(10),
@@ -115,6 +116,19 @@ mod tests {
},
};
assert_eq!(decoded, expected);
// With some fields missing.
let toml_str = r#"
broker_endpoints = ["127.0.0.1:9092"]
linger = "200ms"
"#;
let decoded: KafkaConfig = toml::from_str(toml_str).unwrap();
let expected = KafkaConfig {
broker_endpoints: vec!["127.0.0.1:9092".to_string()],
linger: Duration::from_millis(200),
..Default::default()
};
assert_eq!(decoded, expected);
}
#[test]

View File

@@ -40,16 +40,15 @@ pub struct KafkaConfig {
pub broker_endpoints: Vec<String>,
/// The compression algorithm used to compress log entries.
#[serde(skip)]
#[serde(default)]
pub compression: RsKafkaCompression,
/// The maximum log size a kakfa batch producer could buffer.
/// The max size of a single producer batch.
pub max_batch_size: ReadableSize,
/// The linger duration of a kafka batch producer.
#[serde(with = "humantime_serde")]
pub linger: Duration,
/// The maximum amount of time (in milliseconds) to wait for Kafka records to be returned.
/// The consumer wait timeout.
#[serde(with = "humantime_serde")]
pub produce_record_timeout: Duration,
pub consumer_wait_timeout: Duration,
/// The backoff config.
#[serde(flatten, with = "kafka_backoff")]
pub backoff: KafkaBackoffConfig,
@@ -60,9 +59,10 @@ impl Default for KafkaConfig {
Self {
broker_endpoints: vec!["127.0.0.1:9092".to_string()],
compression: RsKafkaCompression::NoCompression,
max_batch_size: ReadableSize::mb(4),
// Warning: Kafka has a default limit of 1MB per message in a topic.
max_batch_size: ReadableSize::mb(1),
linger: Duration::from_millis(200),
produce_record_timeout: Duration::from_millis(100),
consumer_wait_timeout: Duration::from_millis(100),
backoff: KafkaBackoffConfig::default(),
}
}
@@ -73,17 +73,15 @@ with_prefix!(pub kafka_backoff "backoff_");
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
#[serde(default)]
pub struct KafkaBackoffConfig {
/// The initial backoff for kafka clients.
/// The initial backoff delay.
#[serde(with = "humantime_serde")]
pub init: Duration,
/// The maximum backoff for kafka clients.
/// The maximum backoff delay.
#[serde(with = "humantime_serde")]
pub max: Duration,
/// Exponential backoff rate, i.e. next backoff = base * current backoff.
// Sets to u32 type since some structs containing the KafkaConfig need to derive the Eq trait.
pub base: u32,
/// Stop reconnecting if the total wait time reaches the deadline.
/// If it's None, the reconnecting won't terminate.
/// The deadline of retries. `None` stands for no deadline.
#[serde(with = "humantime_serde")]
pub deadline: Option<Duration>,
}
@@ -114,7 +112,7 @@ pub struct StandaloneKafkaConfig {
pub num_partitions: i32,
/// The replication factor of each topic.
pub replication_factor: i16,
/// Above which a topic creation operation will be cancelled.
/// The timeout of topic creation.
#[serde(with = "humantime_serde")]
pub create_topic_timeout: Duration,
}

View File

@@ -19,7 +19,7 @@ pub mod format;
pub mod mock;
pub mod status_code;
pub const GREPTIME_ERROR_CODE: &str = "x-greptime-err-code";
pub const GREPTIME_ERROR_MSG: &str = "x-greptime-err-msg";
pub const GREPTIME_DB_HEADER_ERROR_CODE: &str = "x-greptime-err-code";
pub const GREPTIME_DB_HEADER_ERROR_MSG: &str = "x-greptime-err-msg";
pub use snafu;

View File

@@ -14,6 +14,7 @@ async-stream.workspace = true
async-trait.workspace = true
base64.workspace = true
bytes.workspace = true
chrono.workspace = true
common-catalog.workspace = true
common-config.workspace = true
common-error.workspace = true
@@ -27,6 +28,7 @@ common-time.workspace = true
datatypes.workspace = true
derive_builder.workspace = true
etcd-client.workspace = true
futures-util.workspace = true
futures.workspace = true
humantime-serde.workspace = true
lazy_static.workspace = true
@@ -51,3 +53,4 @@ chrono.workspace = true
common-procedure = { workspace = true, features = ["testing"] }
datatypes.workspace = true
hyper = { version = "0.14", features = ["full"] }
uuid.workspace = true

View File

@@ -24,11 +24,12 @@ use crate::error::Result;
use crate::key::table_route::TableRouteValue;
use crate::key::TableMetadataManagerRef;
use crate::region_keeper::MemoryRegionKeeperRef;
use crate::rpc::ddl::{CreateTableTask, SubmitDdlTaskRequest, SubmitDdlTaskResponse};
use crate::rpc::ddl::{SubmitDdlTaskRequest, SubmitDdlTaskResponse};
pub mod alter_table;
pub mod create_table;
pub mod drop_table;
pub mod table_meta;
pub mod truncate_table;
pub mod utils;
@@ -64,17 +65,6 @@ pub struct TableMetadata {
pub region_wal_options: HashMap<RegionNumber, String>,
}
#[async_trait::async_trait]
pub trait TableMetadataAllocator: Send + Sync {
async fn create(
&self,
ctx: &TableMetadataAllocatorContext,
task: &CreateTableTask,
) -> Result<TableMetadata>;
}
pub type TableMetadataAllocatorRef = Arc<dyn TableMetadataAllocator>;
#[derive(Clone)]
pub struct DdlContext {
pub datanode_manager: DatanodeManagerRef,

View File

@@ -40,9 +40,7 @@ use table::requests::AlterKind;
use crate::cache_invalidator::Context;
use crate::ddl::utils::handle_operate_region_error;
use crate::ddl::DdlContext;
use crate::error::{
self, ConvertAlterTableRequestSnafu, InvalidProtoMsgSnafu, Result, TableRouteNotFoundSnafu,
};
use crate::error::{self, ConvertAlterTableRequestSnafu, InvalidProtoMsgSnafu, Result};
use crate::key::table_info::TableInfoValue;
use crate::key::table_name::TableNameKey;
use crate::key::DeserializedValueWithBytes;
@@ -65,6 +63,7 @@ impl AlterTableProcedure {
cluster_id: u64,
task: AlterTableTask,
table_info_value: DeserializedValueWithBytes<TableInfoValue>,
physical_table_name: Option<TableName>,
context: DdlContext,
) -> Result<Self> {
let alter_kind = task
@@ -84,7 +83,13 @@ impl AlterTableProcedure {
Ok(Self {
context,
data: AlterTableData::new(task, table_info_value, cluster_id, next_column_id),
data: AlterTableData::new(
task,
table_info_value,
physical_table_name,
cluster_id,
next_column_id,
),
kind,
})
}
@@ -182,23 +187,19 @@ impl AlterTableProcedure {
pub async fn submit_alter_region_requests(&mut self) -> Result<Status> {
let table_id = self.data.table_id();
let table_route = self
let (_, physical_table_route) = self
.context
.table_metadata_manager
.table_route_manager()
.get(table_id)
.await?
.context(TableRouteNotFoundSnafu { table_id })?
.into_inner();
let region_routes = table_route.region_routes();
.get_physical_table_route(table_id)
.await?;
let leaders = find_leaders(region_routes);
let leaders = find_leaders(&physical_table_route.region_routes);
let mut alter_region_tasks = Vec::with_capacity(leaders.len());
for datanode in leaders {
let requester = self.context.datanode_manager.datanode(&datanode).await;
let regions = find_leader_regions(region_routes, &datanode);
let regions = find_leader_regions(&physical_table_route.region_routes, &datanode);
for region in regions {
let region_id = RegionId::new(table_id, region);
@@ -335,13 +336,24 @@ impl AlterTableProcedure {
}
fn lock_key_inner(&self) -> Vec<String> {
let mut lock_key = vec![];
if let Some(physical_table_name) = self.data.physical_table_name() {
let physical_table_key = common_catalog::format_full_table_name(
&physical_table_name.catalog_name,
&physical_table_name.schema_name,
&physical_table_name.table_name,
);
lock_key.push(physical_table_key);
}
let table_ref = self.data.table_ref();
let table_key = common_catalog::format_full_table_name(
table_ref.catalog,
table_ref.schema,
table_ref.table,
);
let mut lock_key = vec![table_key];
lock_key.push(table_key);
if let Ok(Kind::RenameTable(RenameTable { new_table_name })) = self.alter_kind() {
lock_key.push(common_catalog::format_full_table_name(
@@ -394,7 +406,7 @@ impl Procedure for AlterTableProcedure {
fn lock_key(&self) -> LockKey {
let key = self.lock_key_inner();
LockKey::new(key)
LockKey::new_exclusive(key)
}
}
@@ -415,6 +427,8 @@ pub struct AlterTableData {
task: AlterTableTask,
/// Table info value before alteration.
table_info_value: DeserializedValueWithBytes<TableInfoValue>,
/// Physical table name, if the table to alter is a logical table.
physical_table_name: Option<TableName>,
cluster_id: u64,
/// Next column id of the table if the task adds columns to the table.
next_column_id: Option<ColumnId>,
@@ -424,6 +438,7 @@ impl AlterTableData {
pub fn new(
task: AlterTableTask,
table_info_value: DeserializedValueWithBytes<TableInfoValue>,
physical_table_name: Option<TableName>,
cluster_id: u64,
next_column_id: Option<ColumnId>,
) -> Self {
@@ -431,6 +446,7 @@ impl AlterTableData {
state: AlterTableState::Prepare,
task,
table_info_value,
physical_table_name,
cluster_id,
next_column_id,
}
@@ -447,6 +463,10 @@ impl AlterTableData {
fn table_info(&self) -> &RawTableInfo {
&self.table_info_value.table_info
}
fn physical_table_name(&self) -> Option<&TableName> {
self.physical_table_name.as_ref()
}
}
/// Creates region proto alter kind from `table_info` and `alter_kind`.

View File

@@ -20,7 +20,6 @@ use api::v1::region::{
};
use api::v1::{ColumnDef, SemanticType};
use async_trait::async_trait;
use common_config::WAL_OPTIONS_KEY;
use common_error::ext::BoxedError;
use common_procedure::error::{
ExternalSnafu, FromJsonSnafu, Result as ProcedureResult, ToJsonSnafu,
@@ -48,6 +47,7 @@ use crate::rpc::ddl::CreateTableTask;
use crate::rpc::router::{
find_leader_regions, find_leaders, operating_leader_regions, RegionRoute,
};
use crate::wal::prepare_wal_option;
pub struct CreateTableProcedure {
pub context: DdlContext,
@@ -217,7 +217,7 @@ impl CreateTableProcedure {
.context(TableRouteNotFoundSnafu {
table_id: physical_table_id,
})?;
let region_routes = physical_table_route.region_routes();
let region_routes = physical_table_route.region_routes()?;
let request_builder = self.new_region_request_builder(Some(physical_table_id))?;
@@ -349,7 +349,7 @@ impl Procedure for CreateTableProcedure {
table_ref.table,
);
LockKey::single(key)
LockKey::single_exclusive(key)
}
}
@@ -455,13 +455,7 @@ impl CreateRequestBuilder {
request.region_id = region_id.as_u64();
request.path = storage_path;
// Stores the encoded wal options into the request options.
region_wal_options
.get(&region_id.region_number())
.and_then(|wal_options| {
request
.options
.insert(WAL_OPTIONS_KEY.to_string(), wal_options.clone())
});
prepare_wal_option(&mut request.options, region_id, region_wal_options);
if let Some(physical_table_id) = self.physical_table_id {
// Logical table has the same region numbers with physical table, and they have a one-to-one mapping.

View File

@@ -116,7 +116,7 @@ impl DropTableProcedure {
/// Register dropping regions if doesn't exist.
fn register_dropping_regions(&mut self) -> Result<()> {
let region_routes = self.data.region_routes();
let region_routes = self.data.region_routes()?;
let dropping_regions = operating_leader_regions(region_routes);
@@ -190,7 +190,7 @@ impl DropTableProcedure {
pub async fn on_datanode_drop_regions(&self) -> Result<Status> {
let table_id = self.data.table_id();
let region_routes = &self.data.region_routes();
let region_routes = &self.data.region_routes()?;
let leaders = find_leaders(region_routes);
let mut drop_region_tasks = Vec::with_capacity(leaders.len());
@@ -273,7 +273,7 @@ impl Procedure for DropTableProcedure {
table_ref.table,
);
LockKey::single(key)
LockKey::single_exclusive(key)
}
}
@@ -306,7 +306,7 @@ impl DropTableData {
self.task.table_ref()
}
fn region_routes(&self) -> &Vec<RegionRoute> {
fn region_routes(&self) -> Result<&Vec<RegionRoute>> {
self.table_route_value.region_routes()
}

View File

@@ -0,0 +1,223 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::collections::HashMap;
use std::sync::Arc;
use async_trait::async_trait;
use common_catalog::consts::METRIC_ENGINE;
use common_telemetry::{debug, info};
use snafu::{ensure, OptionExt};
use store_api::metric_engine_consts::LOGICAL_TABLE_METADATA_KEY;
use store_api::storage::{RegionId, RegionNumber, TableId};
use crate::ddl::{TableMetadata, TableMetadataAllocatorContext};
use crate::error::{Result, TableNotFoundSnafu, UnsupportedSnafu};
use crate::key::table_name::TableNameKey;
use crate::key::table_route::{LogicalTableRouteValue, PhysicalTableRouteValue, TableRouteValue};
use crate::key::TableMetadataManagerRef;
use crate::peer::Peer;
use crate::rpc::ddl::CreateTableTask;
use crate::rpc::router::{Region, RegionRoute};
use crate::sequence::SequenceRef;
use crate::wal::{allocate_region_wal_options, WalOptionsAllocatorRef};
#[derive(Clone)]
pub struct TableMetadataAllocator {
table_id_sequence: SequenceRef,
wal_options_allocator: WalOptionsAllocatorRef,
table_metadata_manager: TableMetadataManagerRef,
peer_allocator: PeerAllocatorRef,
}
impl TableMetadataAllocator {
pub fn new(
table_id_sequence: SequenceRef,
wal_options_allocator: WalOptionsAllocatorRef,
table_metadata_manager: TableMetadataManagerRef,
) -> Self {
Self::with_peer_allocator(
table_id_sequence,
wal_options_allocator,
table_metadata_manager,
Arc::new(NoopPeerAllocator),
)
}
pub fn with_peer_allocator(
table_id_sequence: SequenceRef,
wal_options_allocator: WalOptionsAllocatorRef,
table_metadata_manager: TableMetadataManagerRef,
peer_allocator: PeerAllocatorRef,
) -> Self {
Self {
table_id_sequence,
wal_options_allocator,
table_metadata_manager,
peer_allocator,
}
}
async fn allocate_table_id(&self, task: &CreateTableTask) -> Result<TableId> {
let table_id = if let Some(table_id) = &task.create_table.table_id {
let table_id = table_id.id;
ensure!(
!self
.table_id_sequence
.min_max()
.await
.contains(&(table_id as u64)),
UnsupportedSnafu {
operation: format!(
"create table by id {} that is reserved in this node",
table_id
)
}
);
info!(
"Received explicitly allocated table id {}, will use it directly.",
table_id
);
table_id
} else {
self.table_id_sequence.next().await? as TableId
};
Ok(table_id)
}
fn create_wal_options(
&self,
table_route: &TableRouteValue,
) -> Result<HashMap<RegionNumber, String>> {
match table_route {
TableRouteValue::Physical(x) => {
let region_numbers = x
.region_routes
.iter()
.map(|route| route.region.id.region_number())
.collect();
allocate_region_wal_options(region_numbers, &self.wal_options_allocator)
}
TableRouteValue::Logical(_) => Ok(HashMap::new()),
}
}
async fn create_table_route(
&self,
ctx: &TableMetadataAllocatorContext,
table_id: TableId,
task: &CreateTableTask,
) -> Result<TableRouteValue> {
let regions = task.partitions.len();
let table_route = if task.create_table.engine == METRIC_ENGINE
&& let Some(physical_table_name) = task
.create_table
.table_options
.get(LOGICAL_TABLE_METADATA_KEY)
{
let physical_table_id = self
.table_metadata_manager
.table_name_manager()
.get(TableNameKey::new(
&task.create_table.catalog_name,
&task.create_table.schema_name,
physical_table_name,
))
.await?
.context(TableNotFoundSnafu {
table_name: physical_table_name,
})?
.table_id();
let region_ids = (0..regions)
.map(|i| RegionId::new(table_id, i as RegionNumber))
.collect();
TableRouteValue::Logical(LogicalTableRouteValue::new(physical_table_id, region_ids))
} else {
let peers = self.peer_allocator.alloc(ctx, regions).await?;
let region_routes = task
.partitions
.iter()
.enumerate()
.map(|(i, partition)| {
let region = Region {
id: RegionId::new(table_id, i as u32),
partition: Some(partition.clone().into()),
..Default::default()
};
let peer = peers[i % peers.len()].clone();
RegionRoute {
region,
leader_peer: Some(peer),
..Default::default()
}
})
.collect::<Vec<_>>();
TableRouteValue::Physical(PhysicalTableRouteValue::new(region_routes))
};
Ok(table_route)
}
pub async fn create(
&self,
ctx: &TableMetadataAllocatorContext,
task: &CreateTableTask,
) -> Result<TableMetadata> {
let table_id = self.allocate_table_id(task).await?;
let table_route = self.create_table_route(ctx, table_id, task).await?;
let region_wal_options = self.create_wal_options(&table_route)?;
debug!(
"Allocated region wal options {:?} for table {}",
region_wal_options, table_id
);
Ok(TableMetadata {
table_id,
table_route,
region_wal_options,
})
}
}
pub type PeerAllocatorRef = Arc<dyn PeerAllocator>;
/// [PeerAllocator] allocates [Peer]s for creating regions.
#[async_trait]
pub trait PeerAllocator: Send + Sync {
/// Allocates `regions` size [Peer]s.
async fn alloc(&self, ctx: &TableMetadataAllocatorContext, regions: usize)
-> Result<Vec<Peer>>;
}
struct NoopPeerAllocator;
#[async_trait]
impl PeerAllocator for NoopPeerAllocator {
async fn alloc(
&self,
_ctx: &TableMetadataAllocatorContext,
regions: usize,
) -> Result<Vec<Peer>> {
Ok(vec![Peer::default(); regions])
}
}

View File

@@ -81,7 +81,7 @@ impl Procedure for TruncateTableProcedure {
table_ref.table,
);
LockKey::single(key)
LockKey::single_exclusive(key)
}
}

View File

@@ -26,10 +26,10 @@ use crate::datanode_manager::DatanodeManagerRef;
use crate::ddl::alter_table::AlterTableProcedure;
use crate::ddl::create_table::CreateTableProcedure;
use crate::ddl::drop_table::DropTableProcedure;
use crate::ddl::table_meta::TableMetadataAllocator;
use crate::ddl::truncate_table::TruncateTableProcedure;
use crate::ddl::{
DdlContext, DdlTaskExecutor, ExecutorContext, TableMetadata, TableMetadataAllocatorContext,
TableMetadataAllocatorRef,
};
use crate::error::{
self, RegisterProcedureLoaderSnafu, Result, SubmitProcedureSnafu, TableNotFoundSnafu,
@@ -46,6 +46,8 @@ use crate::rpc::ddl::{
TruncateTableTask,
};
use crate::rpc::router::RegionRoute;
use crate::table_name::TableName;
pub type DdlManagerRef = Arc<DdlManager>;
/// The [DdlManager] provides the ability to execute Ddl.
@@ -54,7 +56,7 @@ pub struct DdlManager {
datanode_manager: DatanodeManagerRef,
cache_invalidator: CacheInvalidatorRef,
table_metadata_manager: TableMetadataManagerRef,
table_metadata_allocator: TableMetadataAllocatorRef,
table_metadata_allocator: TableMetadataAllocator,
memory_region_keeper: MemoryRegionKeeperRef,
}
@@ -65,7 +67,7 @@ impl DdlManager {
datanode_clients: DatanodeManagerRef,
cache_invalidator: CacheInvalidatorRef,
table_metadata_manager: TableMetadataManagerRef,
table_metadata_allocator: TableMetadataAllocatorRef,
table_metadata_allocator: TableMetadataAllocator,
memory_region_keeper: MemoryRegionKeeperRef,
) -> Result<Self> {
let manager = Self {
@@ -160,11 +162,17 @@ impl DdlManager {
cluster_id: u64,
alter_table_task: AlterTableTask,
table_info_value: DeserializedValueWithBytes<TableInfoValue>,
physical_table_name: Option<TableName>,
) -> Result<ProcedureId> {
let context = self.create_context();
let procedure =
AlterTableProcedure::new(cluster_id, alter_table_task, table_info_value, context)?;
let procedure = AlterTableProcedure::new(
cluster_id,
alter_table_task,
table_info_value,
physical_table_name,
context,
)?;
let procedure_with_id = ProcedureWithId::with_random_id(Box::new(procedure));
@@ -278,7 +286,7 @@ async fn handle_truncate_table_task(
let table_route_value =
table_route_value.context(error::TableRouteNotFoundSnafu { table_id })?;
let table_route = table_route_value.into_inner().region_routes().clone();
let table_route = table_route_value.into_inner().region_routes()?.clone();
let id = ddl_manager
.submit_truncate_table_task(
@@ -327,8 +335,38 @@ async fn handle_alter_table_task(
table_name: table_ref.to_string(),
})?;
let physical_table_id = ddl_manager
.table_metadata_manager()
.table_route_manager()
.get_physical_table_id(table_id)
.await?;
let physical_table_name = if physical_table_id == table_id {
None
} else {
let physical_table_info = &ddl_manager
.table_metadata_manager()
.table_info_manager()
.get(physical_table_id)
.await?
.with_context(|| error::TableInfoNotFoundSnafu {
table_name: table_ref.to_string(),
})?
.table_info;
Some(TableName {
catalog_name: physical_table_info.catalog_name.clone(),
schema_name: physical_table_info.schema_name.clone(),
table_name: physical_table_info.name.clone(),
})
};
let id = ddl_manager
.submit_alter_table_task(cluster_id, alter_table_task, table_info_value)
.submit_alter_table_task(
cluster_id,
alter_table_task,
table_info_value,
physical_table_name,
)
.await?;
info!("Table: {table_id} is altered via procedure_id {id:?}");
@@ -461,15 +499,15 @@ mod tests {
use crate::ddl::alter_table::AlterTableProcedure;
use crate::ddl::create_table::CreateTableProcedure;
use crate::ddl::drop_table::DropTableProcedure;
use crate::ddl::table_meta::TableMetadataAllocator;
use crate::ddl::truncate_table::TruncateTableProcedure;
use crate::ddl::{TableMetadata, TableMetadataAllocator, TableMetadataAllocatorContext};
use crate::error::Result;
use crate::key::TableMetadataManager;
use crate::kv_backend::memory::MemoryKvBackend;
use crate::peer::Peer;
use crate::region_keeper::MemoryRegionKeeper;
use crate::rpc::ddl::CreateTableTask;
use crate::sequence::SequenceBuilder;
use crate::state_store::KvStateStore;
use crate::wal::WalOptionsAllocator;
/// A dummy implemented [DatanodeManager].
pub struct DummyDatanodeManager;
@@ -481,26 +519,12 @@ mod tests {
}
}
/// A dummy implemented [TableMetadataAllocator].
pub struct DummyTableMetadataAllocator;
#[async_trait::async_trait]
impl TableMetadataAllocator for DummyTableMetadataAllocator {
async fn create(
&self,
_ctx: &TableMetadataAllocatorContext,
_task: &CreateTableTask,
) -> Result<TableMetadata> {
unimplemented!()
}
}
#[test]
fn test_try_new() {
let kv_backend = Arc::new(MemoryKvBackend::new());
let table_metadata_manager = Arc::new(TableMetadataManager::new(kv_backend.clone()));
let state_store = Arc::new(KvStateStore::new(kv_backend));
let state_store = Arc::new(KvStateStore::new(kv_backend.clone()));
let procedure_manager = Arc::new(LocalManager::new(Default::default(), state_store));
let _ = DdlManager::try_new(
@@ -508,7 +532,11 @@ mod tests {
Arc::new(DummyDatanodeManager),
Arc::new(DummyCacheInvalidator),
table_metadata_manager,
Arc::new(DummyTableMetadataAllocator),
TableMetadataAllocator::new(
Arc::new(SequenceBuilder::new("test", kv_backend.clone()).build()),
Arc::new(WalOptionsAllocator::default()),
Arc::new(TableMetadataManager::new(kv_backend)),
),
Arc::new(MemoryRegionKeeper::default()),
);

View File

@@ -321,6 +321,27 @@ pub enum Error {
error: rskafka::client::error::Error,
},
#[snafu(display(
"Failed to build a Kafka partition client, topic: {}, partition: {}",
topic,
partition
))]
BuildKafkaPartitionClient {
topic: String,
partition: i32,
location: Location,
#[snafu(source)]
error: rskafka::client::error::Error,
},
#[snafu(display("Failed to produce records to Kafka, topic: {}", topic))]
ProduceRecord {
topic: String,
location: Location,
#[snafu(source)]
error: rskafka::client::error::Error,
},
#[snafu(display("Failed to create a Kafka wal topic"))]
CreateKafkaWalTopic {
location: Location,
@@ -330,6 +351,9 @@ pub enum Error {
#[snafu(display("The topic pool is empty"))]
EmptyTopicPool { location: Location },
#[snafu(display("Unexpected table route type: {}", err_msg))]
UnexpectedLogicalRouteTable { location: Location, err_msg: String },
}
pub type Result<T> = std::result::Result<T, Error>;
@@ -368,8 +392,11 @@ impl ErrorExt for Error {
| EncodeWalOptions { .. }
| BuildKafkaClient { .. }
| BuildKafkaCtrlClient { .. }
| BuildKafkaPartitionClient { .. }
| ProduceRecord { .. }
| CreateKafkaWalTopic { .. }
| EmptyTopicPool { .. } => StatusCode::Unexpected,
| EmptyTopicPool { .. }
| UnexpectedLogicalRouteTable { .. } => StatusCode::Unexpected,
SendMessage { .. }
| GetKvCache { .. }

View File

@@ -92,13 +92,15 @@ impl Display for OpenRegion {
}
}
#[serde_with::serde_as]
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
pub struct OpenRegion {
pub region_ident: RegionIdent,
pub region_storage_path: String,
pub region_options: HashMap<String, String>,
#[serde(default)]
pub region_wal_options: HashMap<String, String>,
#[serde_as(as = "HashMap<serde_with::DisplayFromStr, _>")]
pub region_wal_options: HashMap<RegionNumber, String>,
#[serde(default)]
pub skip_wal_replay: bool,
}
@@ -108,7 +110,7 @@ impl OpenRegion {
region_ident: RegionIdent,
path: &str,
region_options: HashMap<String, String>,
region_wal_options: HashMap<String, String>,
region_wal_options: HashMap<RegionNumber, String>,
skip_wal_replay: bool,
) -> Self {
Self {

View File

@@ -427,7 +427,7 @@ impl TableMetadataManager {
&region_storage_path,
region_options,
region_wal_options,
region_distribution(&x.region_routes)?,
region_distribution(&x.region_routes),
)?;
txn = txn.merge(create_datanode_table_txn);
}
@@ -483,7 +483,7 @@ impl TableMetadataManager {
.build_delete_txn(table_id, table_info_value)?;
// Deletes datanode table key value pairs.
let distribution = region_distribution(table_route_value.region_routes())?;
let distribution = region_distribution(table_route_value.region_routes()?);
let delete_datanode_txn = self
.datanode_table_manager()
.build_delete_txn(table_id, distribution)?;
@@ -604,12 +604,12 @@ impl TableMetadataManager {
current_table_route_value: &DeserializedValueWithBytes<TableRouteValue>,
new_region_routes: Vec<RegionRoute>,
new_region_options: &HashMap<String, String>,
new_region_wal_options: &HashMap<String, String>,
new_region_wal_options: &HashMap<RegionNumber, String>,
) -> Result<()> {
// Updates the datanode table key value pairs.
let current_region_distribution =
region_distribution(current_table_route_value.region_routes())?;
let new_region_distribution = region_distribution(&new_region_routes)?;
region_distribution(current_table_route_value.region_routes()?);
let new_region_distribution = region_distribution(&new_region_routes);
let update_datanode_table_txn = self.datanode_table_manager().build_update_txn(
table_id,
@@ -621,7 +621,7 @@ impl TableMetadataManager {
)?;
// Updates the table_route.
let new_table_route_value = current_table_route_value.update(new_region_routes);
let new_table_route_value = current_table_route_value.update(new_region_routes)?;
let (update_table_route_txn, on_update_table_route_failure) = self
.table_route_manager()
@@ -656,7 +656,7 @@ impl TableMetadataManager {
where
F: Fn(&RegionRoute) -> Option<Option<RegionStatus>>,
{
let mut new_region_routes = current_table_route_value.region_routes().clone();
let mut new_region_routes = current_table_route_value.region_routes()?.clone();
let mut updated = 0;
for route in &mut new_region_routes {
@@ -673,7 +673,7 @@ impl TableMetadataManager {
}
// Updates the table_route.
let new_table_route_value = current_table_route_value.update(new_region_routes);
let new_table_route_value = current_table_route_value.update(new_region_routes)?;
let (update_table_route_txn, on_update_table_route_failure) = self
.table_route_manager()
@@ -897,7 +897,11 @@ mod tests {
table_info
);
assert_eq!(
remote_table_route.unwrap().into_inner().region_routes(),
remote_table_route
.unwrap()
.into_inner()
.region_routes()
.unwrap(),
region_routes
);
}
@@ -978,7 +982,7 @@ mod tests {
.unwrap()
.unwrap()
.into_inner();
assert_eq!(removed_table_route.region_routes(), region_routes);
assert_eq!(removed_table_route.region_routes().unwrap(), region_routes);
}
#[tokio::test]
@@ -1173,11 +1177,11 @@ mod tests {
.unwrap();
assert_eq!(
updated_route_value.region_routes()[0].leader_status,
updated_route_value.region_routes().unwrap()[0].leader_status,
Some(RegionStatus::Downgraded)
);
assert_eq!(
updated_route_value.region_routes()[1].leader_status,
updated_route_value.region_routes().unwrap()[1].leader_status,
Some(RegionStatus::Downgraded)
);
}
@@ -1187,7 +1191,7 @@ mod tests {
table_id: u32,
region_routes: &[RegionRoute],
) {
let region_distribution = region_distribution(region_routes).unwrap();
let region_distribution = region_distribution(region_routes);
for (datanode, regions) in region_distribution {
let got = table_metadata_manager
.datanode_table_manager()
@@ -1271,7 +1275,8 @@ mod tests {
let current_table_route_value = DeserializedValueWithBytes::from_inner(
current_table_route_value
.inner
.update(new_region_routes.clone()),
.update(new_region_routes.clone())
.unwrap(),
);
let new_region_routes = vec![new_region_route(2, 4), new_region_route(5, 5)];
// it should be ok.
@@ -1295,13 +1300,16 @@ mod tests {
// if the current_table_route_value is wrong, it should return an error.
// The ABA problem.
let wrong_table_route_value =
DeserializedValueWithBytes::from_inner(current_table_route_value.update(vec![
new_region_route(1, 1),
new_region_route(2, 2),
new_region_route(3, 3),
new_region_route(4, 4),
]));
let wrong_table_route_value = DeserializedValueWithBytes::from_inner(
current_table_route_value
.update(vec![
new_region_route(1, 1),
new_region_route(2, 2),
new_region_route(3, 3),
new_region_route(4, 4),
])
.unwrap(),
);
assert!(table_metadata_manager
.update_table_route(
table_id,

View File

@@ -34,6 +34,7 @@ use crate::rpc::store::RangeRequest;
use crate::rpc::KeyValue;
use crate::DatanodeId;
#[serde_with::serde_as]
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Default)]
/// RegionInfo
/// For compatible reason, DON'T modify the field name.
@@ -48,14 +49,15 @@ pub struct RegionInfo {
#[serde(default)]
pub region_options: HashMap<String, String>,
/// The per-region wal options.
/// Key: region number (in string representation). Value: the encoded wal options of the region.
/// Key: region number. Value: the encoded wal options of the region.
#[serde(default)]
pub region_wal_options: HashMap<String, String>,
#[serde_as(as = "HashMap<serde_with::DisplayFromStr, _>")]
pub region_wal_options: HashMap<RegionNumber, String>,
}
pub struct DatanodeTableKey {
datanode_id: DatanodeId,
table_id: TableId,
pub datanode_id: DatanodeId,
pub table_id: TableId,
}
impl DatanodeTableKey {
@@ -181,7 +183,7 @@ impl DatanodeTableManager {
.filter_map(|region_number| {
region_wal_options
.get(region_number)
.map(|wal_options| (region_number.to_string(), wal_options.clone()))
.map(|wal_options| (*region_number, wal_options.clone()))
})
.collect();
@@ -214,7 +216,7 @@ impl DatanodeTableManager {
current_region_distribution: RegionDistribution,
new_region_distribution: RegionDistribution,
new_region_options: &HashMap<String, String>,
new_region_wal_options: &HashMap<String, String>,
new_region_wal_options: &HashMap<RegionNumber, String>,
) -> Result<Txn> {
let mut opts = Vec::new();
@@ -306,6 +308,61 @@ mod tests {
assert!(parsed.is_ok());
}
#[derive(Debug, Serialize, Deserialize, PartialEq)]
struct StringHashMap {
inner: HashMap<String, String>,
}
#[serde_with::serde_as]
#[derive(Debug, Serialize, Deserialize, PartialEq)]
struct IntegerHashMap {
#[serde_as(as = "HashMap<serde_with::DisplayFromStr, _>")]
inner: HashMap<u32, String>,
}
#[test]
fn test_serde_with_integer_hash_map() {
let map = StringHashMap {
inner: HashMap::from([
("1".to_string(), "aaa".to_string()),
("2".to_string(), "bbb".to_string()),
("3".to_string(), "ccc".to_string()),
]),
};
let encoded = serde_json::to_string(&map).unwrap();
let decoded: IntegerHashMap = serde_json::from_str(&encoded).unwrap();
assert_eq!(
IntegerHashMap {
inner: HashMap::from([
(1, "aaa".to_string()),
(2, "bbb".to_string()),
(3, "ccc".to_string()),
]),
},
decoded
);
let map = IntegerHashMap {
inner: HashMap::from([
(1, "aaa".to_string()),
(2, "bbb".to_string()),
(3, "ccc".to_string()),
]),
};
let encoded = serde_json::to_string(&map).unwrap();
let decoded: StringHashMap = serde_json::from_str(&encoded).unwrap();
assert_eq!(
StringHashMap {
inner: HashMap::from([
("1".to_string(), "aaa".to_string()),
("2".to_string(), "bbb".to_string()),
("3".to_string(), "ccc".to_string()),
]),
},
decoded
);
}
// This test intends to ensure both the `serde_json::to_string` + `serde_json::from_str`
// and `serde_json::to_vec` + `serde_json::from_slice` work for `DatanodeTableValue`.
// Warning: if the key of `region_wal_options` is of type non-String, this test would fail.
@@ -320,9 +377,9 @@ mod tests {
("c".to_string(), "cc".to_string()),
]),
region_wal_options: HashMap::from([
("1".to_string(), "aaa".to_string()),
("2".to_string(), "bbb".to_string()),
("3".to_string(), "ccc".to_string()),
(1, "aaa".to_string()),
(2, "bbb".to_string()),
(3, "ccc".to_string()),
]),
};
let table_value = DatanodeTableValue {

View File

@@ -16,12 +16,14 @@ use std::collections::HashMap;
use std::fmt::Display;
use serde::{Deserialize, Serialize};
use snafu::ResultExt;
use snafu::{ensure, OptionExt, ResultExt};
use store_api::storage::{RegionId, RegionNumber};
use table::metadata::TableId;
use super::{DeserializedValueWithBytes, TableMetaValue};
use crate::error::{Result, SerdeJsonSnafu};
use crate::error::{
Result, SerdeJsonSnafu, TableRouteNotFoundSnafu, UnexpectedLogicalRouteTableSnafu,
};
use crate::key::{to_removed_key, RegionDistribution, TableMetaKey, TABLE_ROUTE_PREFIX};
use crate::kv_backend::txn::{Compare, CompareOp, Txn, TxnOp, TxnOpResponse};
use crate::kv_backend::KvBackendRef;
@@ -53,7 +55,8 @@ pub struct PhysicalTableRouteValue {
#[derive(Debug, PartialEq, Serialize, Deserialize, Clone)]
pub struct LogicalTableRouteValue {
// TODO(LFC): Add table route for MetricsEngine table.
physical_table_id: TableId,
region_ids: Vec<RegionId>,
}
impl TableRouteValue {
@@ -62,29 +65,50 @@ impl TableRouteValue {
}
/// Returns a new version [TableRouteValue] with `region_routes`.
pub fn update(&self, region_routes: Vec<RegionRoute>) -> Self {
pub fn update(&self, region_routes: Vec<RegionRoute>) -> Result<Self> {
ensure!(
self.is_physical(),
UnexpectedLogicalRouteTableSnafu {
err_msg: format!("{self:?} is a non-physical TableRouteValue."),
}
);
let version = self.physical_table_route().version;
Self::Physical(PhysicalTableRouteValue {
Ok(Self::Physical(PhysicalTableRouteValue {
region_routes,
version: version + 1,
})
}))
}
/// Returns the version.
///
/// For test purpose.
#[cfg(any(test, feature = "testing"))]
pub fn version(&self) -> u64 {
self.physical_table_route().version
pub fn version(&self) -> Result<u64> {
ensure!(
self.is_physical(),
UnexpectedLogicalRouteTableSnafu {
err_msg: format!("{self:?} is a non-physical TableRouteValue."),
}
);
Ok(self.physical_table_route().version)
}
/// Returns the corresponding [RegionRoute].
pub fn region_route(&self, region_id: RegionId) -> Option<RegionRoute> {
self.physical_table_route()
/// Returns the corresponding [RegionRoute], returns `None` if it's the specific region is not found.
///
/// Note: It throws an error if it's a logical table
pub fn region_route(&self, region_id: RegionId) -> Result<Option<RegionRoute>> {
ensure!(
self.is_physical(),
UnexpectedLogicalRouteTableSnafu {
err_msg: format!("{self:?} is a non-physical TableRouteValue."),
}
);
Ok(self
.physical_table_route()
.region_routes
.iter()
.find(|route| route.region.id == region_id)
.cloned()
.cloned())
}
/// Returns true if it's [TableRouteValue::Physical].
@@ -93,11 +117,14 @@ impl TableRouteValue {
}
/// Gets the [RegionRoute]s of this [TableRouteValue::Physical].
///
/// # Panics
/// The route type is not the [TableRouteValue::Physical].
pub fn region_routes(&self) -> &Vec<RegionRoute> {
&self.physical_table_route().region_routes
pub fn region_routes(&self) -> Result<&Vec<RegionRoute>> {
ensure!(
self.is_physical(),
UnexpectedLogicalRouteTableSnafu {
err_msg: format!("{self:?} is a non-physical TableRouteValue."),
}
);
Ok(&self.physical_table_route().region_routes)
}
fn physical_table_route(&self) -> &PhysicalTableRouteValue {
@@ -152,12 +179,19 @@ impl PhysicalTableRouteValue {
}
impl LogicalTableRouteValue {
pub fn physical_table_id(&self) -> TableId {
todo!()
pub fn new(physical_table_id: TableId, region_ids: Vec<RegionId>) -> Self {
Self {
physical_table_id,
region_ids,
}
}
pub fn region_ids(&self) -> Vec<RegionId> {
todo!()
pub fn physical_table_id(&self) -> TableId {
self.physical_table_id
}
pub fn region_ids(&self) -> &Vec<RegionId> {
&self.region_ids
}
}
@@ -302,6 +336,54 @@ impl TableRouteManager {
.transpose()
}
pub async fn get_physical_table_id(
&self,
logical_or_physical_table_id: TableId,
) -> Result<TableId> {
let table_route = self
.get(logical_or_physical_table_id)
.await?
.context(TableRouteNotFoundSnafu {
table_id: logical_or_physical_table_id,
})?
.into_inner();
match table_route {
TableRouteValue::Physical(_) => Ok(logical_or_physical_table_id),
TableRouteValue::Logical(x) => Ok(x.physical_table_id()),
}
}
pub async fn get_physical_table_route(
&self,
logical_or_physical_table_id: TableId,
) -> Result<(TableId, PhysicalTableRouteValue)> {
let table_route = self
.get(logical_or_physical_table_id)
.await?
.context(TableRouteNotFoundSnafu {
table_id: logical_or_physical_table_id,
})?
.into_inner();
match table_route {
TableRouteValue::Physical(x) => Ok((logical_or_physical_table_id, x)),
TableRouteValue::Logical(x) => {
let physical_table_id = x.physical_table_id();
let physical_table_route =
self.get(physical_table_id)
.await?
.context(TableRouteNotFoundSnafu {
table_id: physical_table_id,
})?;
Ok((
physical_table_id,
physical_table_route.physical_table_route().clone(),
))
}
}
}
/// It may return a subset of the `table_ids`.
pub async fn batch_get(
&self,
@@ -354,7 +436,7 @@ impl TableRouteManager {
) -> Result<Option<RegionDistribution>> {
self.get(table_id)
.await?
.map(|table_route| region_distribution(table_route.region_routes()))
.map(|table_route| Ok(region_distribution(table_route.region_routes()?)))
.transpose()
}
}

View File

@@ -15,6 +15,7 @@
#![feature(assert_matches)]
#![feature(btree_extract_if)]
#![feature(async_closure)]
#![feature(let_chains)]
pub mod cache_invalidator;
pub mod datanode_manager;
@@ -35,7 +36,6 @@ pub mod sequence;
pub mod state_store;
pub mod table_name;
pub mod util;
#[allow(unused)]
pub mod wal;
pub type ClusterId = u64;

View File

@@ -16,36 +16,43 @@ use lazy_static::lazy_static;
use prometheus::*;
lazy_static! {
pub static ref METRIC_META_TXN_REQUEST: HistogramVec =
register_histogram_vec!("meta_txn_request", "meta txn request", &["target", "op"]).unwrap();
pub static ref METRIC_META_TXN_REQUEST: HistogramVec = register_histogram_vec!(
"greptime_meta_txn_request",
"meta txn request",
&["target", "op"]
)
.unwrap();
pub static ref METRIC_META_CREATE_CATALOG: Histogram =
register_histogram!("meta_create_catalog", "meta create catalog").unwrap();
pub static ref METRIC_META_CREATE_CATALOG_COUNTER: IntCounter =
register_int_counter!("meta_create_catalog_counter", "meta create catalog").unwrap();
register_histogram!("greptime_meta_create_catalog", "meta create catalog").unwrap();
pub static ref METRIC_META_CREATE_CATALOG_COUNTER: IntCounter = register_int_counter!(
"greptime_meta_create_catalog_counter",
"meta create catalog"
)
.unwrap();
pub static ref METRIC_META_CREATE_SCHEMA: Histogram =
register_histogram!("meta_create_schema", "meta create schema").unwrap();
register_histogram!("greptime_meta_create_schema", "meta create schema").unwrap();
pub static ref METRIC_META_CREATE_SCHEMA_COUNTER: IntCounter =
register_int_counter!("meta_create_schema_counter", "meta create schema").unwrap();
register_int_counter!("greptime_meta_create_schema_counter", "meta create schema").unwrap();
pub static ref METRIC_META_PROCEDURE_CREATE_TABLE: HistogramVec = register_histogram_vec!(
"meta_procedure_create_table",
"greptime_meta_procedure_create_table",
"meta procedure create table",
&["step"]
)
.unwrap();
pub static ref METRIC_META_PROCEDURE_DROP_TABLE: HistogramVec = register_histogram_vec!(
"meta_procedure_drop_table",
"greptime_meta_procedure_drop_table",
"meta procedure drop table",
&["step"]
)
.unwrap();
pub static ref METRIC_META_PROCEDURE_ALTER_TABLE: HistogramVec = register_histogram_vec!(
"meta_procedure_alter_table",
"greptime_meta_procedure_alter_table",
"meta procedure alter table",
&["step"]
)
.unwrap();
pub static ref METRIC_META_PROCEDURE_TRUNCATE_TABLE: HistogramVec = register_histogram_vec!(
"meta_procedure_truncate_table",
"greptime_meta_procedure_truncate_table",
"meta procedure truncate table",
&["step"]
)

View File

@@ -30,7 +30,7 @@ use crate::peer::Peer;
use crate::table_name::TableName;
use crate::DatanodeId;
pub fn region_distribution(region_routes: &[RegionRoute]) -> Result<RegionDistribution> {
pub fn region_distribution(region_routes: &[RegionRoute]) -> RegionDistribution {
let mut regions_id_map = RegionDistribution::new();
for route in region_routes.iter() {
if let Some(peer) = route.leader_peer.as_ref() {
@@ -42,7 +42,7 @@ pub fn region_distribution(region_routes: &[RegionRoute]) -> Result<RegionDistri
// id asc
regions.sort()
}
Ok(regions_id_map)
regions_id_map
}
#[derive(Debug, Clone, Deserialize, Serialize, PartialEq)]
@@ -123,11 +123,12 @@ pub fn convert_to_region_leader_status_map(
pub fn find_region_leader(
region_routes: &[RegionRoute],
region_number: RegionNumber,
) -> Option<&Peer> {
) -> Option<Peer> {
region_routes
.iter()
.find(|x| x.region.id.region_number() == region_number)
.and_then(|r| r.leader_peer.as_ref())
.cloned()
}
pub fn find_leader_regions(region_routes: &[RegionRoute], datanode: &Peer) -> Vec<RegionNumber> {

View File

@@ -18,10 +18,10 @@ pub mod options_allocator;
use std::collections::HashMap;
use common_config::wal::StandaloneWalConfig;
use common_config::WAL_OPTIONS_KEY;
use serde::{Deserialize, Serialize};
use serde_with::with_prefix;
use store_api::storage::{RegionId, RegionNumber};
use crate::error::Result;
use crate::wal::kafka::KafkaConfig;
pub use crate::wal::kafka::Topic as KafkaWalTopic;
pub use crate::wal::options_allocator::{
@@ -40,7 +40,7 @@ pub enum WalConfig {
impl From<StandaloneWalConfig> for WalConfig {
fn from(value: StandaloneWalConfig) -> Self {
match value {
StandaloneWalConfig::RaftEngine(config) => WalConfig::RaftEngine,
StandaloneWalConfig::RaftEngine(_) => WalConfig::RaftEngine,
StandaloneWalConfig::Kafka(config) => WalConfig::Kafka(KafkaConfig {
broker_endpoints: config.base.broker_endpoints,
num_topics: config.num_topics,
@@ -55,6 +55,16 @@ impl From<StandaloneWalConfig> for WalConfig {
}
}
pub fn prepare_wal_option(
options: &mut HashMap<String, String>,
region_id: RegionId,
region_wal_options: &HashMap<RegionNumber, String>,
) {
if let Some(wal_options) = region_wal_options.get(&region_id.region_number()) {
options.insert(WAL_OPTIONS_KEY.to_string(), wal_options.clone());
}
}
#[cfg(test)]
mod tests {
use std::time::Duration;

View File

@@ -12,6 +12,8 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#[cfg(any(test, feature = "testing"))]
pub mod test_util;
pub mod topic;
pub mod topic_manager;
pub mod topic_selector;
@@ -19,7 +21,6 @@ pub mod topic_selector;
use std::time::Duration;
use common_config::wal::kafka::{kafka_backoff, KafkaBackoffConfig, TopicSelectorType};
use common_config::wal::StandaloneWalConfig;
use serde::{Deserialize, Serialize};
pub use crate::wal::kafka::topic::Topic;
@@ -27,6 +28,7 @@ pub use crate::wal::kafka::topic_manager::TopicManager;
/// Configurations for kafka wal.
#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
#[serde(default)]
pub struct KafkaConfig {
/// The broker endpoints of the Kafka cluster.
pub broker_endpoints: Vec<String>,
@@ -40,7 +42,7 @@ pub struct KafkaConfig {
pub num_partitions: i32,
/// The replication factor of each topic.
pub replication_factor: i16,
/// Above which a topic creation operation will be cancelled.
/// The timeout of topic creation.
#[serde(with = "humantime_serde")]
pub create_topic_timeout: Duration,
/// The backoff config.

View File

@@ -0,0 +1,33 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use common_telemetry::warn;
use futures_util::future::BoxFuture;
pub async fn run_test_with_kafka_wal<F>(test: F)
where
F: FnOnce(Vec<String>) -> BoxFuture<'static, ()>,
{
let Ok(endpoints) = std::env::var("GT_KAFKA_ENDPOINTS") else {
warn!("The endpoints is empty, skipping the test");
return;
};
let endpoints = endpoints
.split(',')
.map(|s| s.trim().to_string())
.collect::<Vec<_>>();
test(endpoints).await
}

View File

@@ -15,4 +15,5 @@
/// Kafka wal topic.
/// Publishers publish log entries to the topic while subscribers pull log entries from the topic.
/// A topic is simply a string right now. But it may be more complex in the future.
// TODO(niebayes): remove the Topic alias.
pub type Topic = String;

View File

@@ -14,20 +14,22 @@
use std::collections::HashSet;
use std::sync::Arc;
use std::time::Duration;
use common_config::wal::kafka::TopicSelectorType;
use common_telemetry::{debug, error, info};
use common_telemetry::{error, info};
use rskafka::client::controller::ControllerClient;
use rskafka::client::error::Error as RsKafkaError;
use rskafka::client::error::ProtocolError::TopicAlreadyExists;
use rskafka::client::ClientBuilder;
use rskafka::client::partition::{Compression, UnknownTopicHandling};
use rskafka::client::{Client, ClientBuilder};
use rskafka::record::Record;
use rskafka::BackoffConfig;
use snafu::{ensure, AsErrorSource, ResultExt};
use snafu::{ensure, ResultExt};
use crate::error::{
BuildKafkaClientSnafu, BuildKafkaCtrlClientSnafu, CreateKafkaWalTopicSnafu, DecodeJsonSnafu,
EncodeJsonSnafu, InvalidNumTopicsSnafu, Result,
BuildKafkaClientSnafu, BuildKafkaCtrlClientSnafu, BuildKafkaPartitionClientSnafu,
CreateKafkaWalTopicSnafu, DecodeJsonSnafu, EncodeJsonSnafu, InvalidNumTopicsSnafu,
ProduceRecordSnafu, Result,
};
use crate::kv_backend::KvBackendRef;
use crate::rpc::store::PutRequest;
@@ -37,12 +39,15 @@ use crate::wal::kafka::KafkaConfig;
const CREATED_TOPICS_KEY: &str = "__created_wal_topics/kafka/";
// Each topic only has one partition for now.
// The `DEFAULT_PARTITION` refers to the index of the partition.
const DEFAULT_PARTITION: i32 = 0;
/// Manages topic initialization and selection.
pub struct TopicManager {
config: KafkaConfig,
// TODO(niebayes): maybe add a guard to ensure all topics in the topic pool are created.
topic_pool: Vec<Topic>,
topic_selector: TopicSelectorRef,
pub(crate) topic_pool: Vec<Topic>,
pub(crate) topic_selector: TopicSelectorRef,
kv_backend: KvBackendRef,
}
@@ -117,14 +122,20 @@ impl TopicManager {
.await
.with_context(|_| BuildKafkaClientSnafu {
broker_endpoints: self.config.broker_endpoints.clone(),
})?
})?;
let control_client = client
.controller_client()
.context(BuildKafkaCtrlClientSnafu)?;
// Try to create missing topics.
let tasks = to_be_created
.iter()
.map(|i| self.try_create_topic(&topics[*i], &client))
.map(|i| async {
self.try_create_topic(&topics[*i], &control_client).await?;
self.try_append_noop_record(&topics[*i], &client).await?;
Ok(())
})
.collect::<Vec<_>>();
futures::future::try_join_all(tasks).await.map(|_| ())
}
@@ -141,6 +152,31 @@ impl TopicManager {
.collect()
}
async fn try_append_noop_record(&self, topic: &Topic, client: &Client) -> Result<()> {
let partition_client = client
.partition_client(topic, DEFAULT_PARTITION, UnknownTopicHandling::Retry)
.await
.context(BuildKafkaPartitionClientSnafu {
topic,
partition: DEFAULT_PARTITION,
})?;
partition_client
.produce(
vec![Record {
key: None,
value: None,
timestamp: chrono::Utc::now(),
headers: Default::default(),
}],
Compression::NoCompression,
)
.await
.context(ProduceRecordSnafu { topic })?;
Ok(())
}
async fn try_create_topic(&self, topic: &Topic, client: &ControllerClient) -> Result<()> {
match client
.create_topic(
@@ -202,13 +238,9 @@ impl TopicManager {
#[cfg(test)]
mod tests {
use std::env;
use common_telemetry::info;
use super::*;
use crate::kv_backend::memory::MemoryKvBackend;
use crate::kv_backend::{self};
use crate::wal::kafka::test_util::run_test_with_kafka_wal;
// Tests that topics can be successfully persisted into the kv backend and can be successfully restored from the kv backend.
#[tokio::test]
@@ -235,26 +267,60 @@ mod tests {
assert_eq!(topics, restored_topics);
}
/// Tests that the topic manager could allocate topics correctly.
#[tokio::test]
async fn test_topic_manager() {
let endpoints = env::var("GT_KAFKA_ENDPOINTS").unwrap_or_default();
common_telemetry::init_default_ut_logging();
async fn test_alloc_topics() {
run_test_with_kafka_wal(|broker_endpoints| {
Box::pin(async {
// Constructs topics that should be created.
let topics = (0..256)
.map(|i| format!("test_alloc_topics_{}_{}", i, uuid::Uuid::new_v4()))
.collect::<Vec<_>>();
if endpoints.is_empty() {
info!("The endpoints is empty, skipping the test.");
return;
}
// TODO: supports topic prefix
let kv_backend = Arc::new(MemoryKvBackend::new());
let config = KafkaConfig {
replication_factor: 1,
broker_endpoints: endpoints
.split(',')
.map(|s| s.to_string())
.collect::<Vec<_>>(),
..Default::default()
};
let manager = TopicManager::new(config, kv_backend);
manager.start().await.unwrap();
// Creates a topic manager.
let config = KafkaConfig {
replication_factor: broker_endpoints.len() as i16,
broker_endpoints,
..Default::default()
};
let kv_backend = Arc::new(MemoryKvBackend::new()) as KvBackendRef;
let mut manager = TopicManager::new(config.clone(), kv_backend);
// Replaces the default topic pool with the constructed topics.
manager.topic_pool = topics.clone();
// Replaces the default selector with a round-robin selector without shuffled.
manager.topic_selector = Arc::new(RoundRobinTopicSelector::default());
manager.start().await.unwrap();
// Selects exactly the number of `num_topics` topics one by one.
let got = (0..topics.len())
.map(|_| manager.select().unwrap())
.cloned()
.collect::<Vec<_>>();
assert_eq!(got, topics);
// Selects exactly the number of `num_topics` topics in a batching manner.
let got = manager
.select_batch(topics.len())
.unwrap()
.into_iter()
.map(ToString::to_string)
.collect::<Vec<_>>();
assert_eq!(got, topics);
// Selects more than the number of `num_topics` topics.
let got = manager
.select_batch(2 * topics.len())
.unwrap()
.into_iter()
.map(ToString::to_string)
.collect::<Vec<_>>();
let expected = vec![topics.clone(); 2]
.into_iter()
.flatten()
.collect::<Vec<_>>();
assert_eq!(got, expected);
})
})
.await;
}
}

View File

@@ -16,7 +16,6 @@ use std::sync::atomic::{AtomicUsize, Ordering};
use std::sync::Arc;
use rand::Rng;
use serde::{Deserialize, Serialize};
use snafu::ensure;
use crate::error::{EmptyTopicPoolSnafu, Result};
@@ -60,6 +59,14 @@ impl TopicSelector for RoundRobinTopicSelector {
mod tests {
use super::*;
/// Tests that a selector behaves as expected when the given topic pool is empty.
#[test]
fn test_empty_topic_pool() {
let topic_pool = vec![];
let selector = RoundRobinTopicSelector::default();
assert!(selector.select(&topic_pool).is_err());
}
#[test]
fn test_round_robin_topic_selector() {
let topic_pool: Vec<_> = [0, 1, 2].into_iter().map(|v| v.to_string()).collect();

View File

@@ -107,14 +107,16 @@ pub fn allocate_region_wal_options(
mod tests {
use super::*;
use crate::kv_backend::memory::MemoryKvBackend;
use crate::wal::kafka::test_util::run_test_with_kafka_wal;
use crate::wal::kafka::topic_selector::RoundRobinTopicSelector;
use crate::wal::kafka::KafkaConfig;
// Tests the wal options allocator could successfully allocate raft-engine wal options.
// Note: tests for allocator with kafka are integration tests.
#[tokio::test]
async fn test_allocator_with_raft_engine() {
let kv_backend = Arc::new(MemoryKvBackend::new()) as KvBackendRef;
let wal_config = WalConfig::RaftEngine;
let mut allocator = WalOptionsAllocator::new(wal_config, kv_backend);
let allocator = WalOptionsAllocator::new(wal_config, kv_backend);
allocator.start().await.unwrap();
let num_regions = 32;
@@ -128,4 +130,49 @@ mod tests {
.collect();
assert_eq!(got, expected);
}
// Tests that the wal options allocator could successfully allocate Kafka wal options.
#[tokio::test]
async fn test_allocator_with_kafka() {
run_test_with_kafka_wal(|broker_endpoints| {
Box::pin(async {
let topics = (0..256)
.map(|i| format!("test_allocator_with_kafka_{}_{}", i, uuid::Uuid::new_v4()))
.collect::<Vec<_>>();
// Creates a topic manager.
let config = KafkaConfig {
replication_factor: broker_endpoints.len() as i16,
broker_endpoints,
..Default::default()
};
let kv_backend = Arc::new(MemoryKvBackend::new()) as KvBackendRef;
let mut topic_manager = KafkaTopicManager::new(config.clone(), kv_backend);
// Replaces the default topic pool with the constructed topics.
topic_manager.topic_pool = topics.clone();
// Replaces the default selector with a round-robin selector without shuffled.
topic_manager.topic_selector = Arc::new(RoundRobinTopicSelector::default());
// Creates an options allocator.
let allocator = WalOptionsAllocator::Kafka(topic_manager);
allocator.start().await.unwrap();
let num_regions = 32;
let regions = (0..num_regions).collect::<Vec<_>>();
let got = allocate_region_wal_options(regions.clone(), &allocator).unwrap();
// Check the allocated wal options contain the expected topics.
let expected = (0..num_regions)
.map(|i| {
let options = WalOptions::Kafka(KafkaWalOptions {
topic: topics[i as usize].clone(),
});
(i, serde_json::to_string(&options).unwrap())
})
.collect::<HashMap<_, _>>();
assert_eq!(got, expected);
})
})
.await;
}
}

View File

@@ -12,8 +12,8 @@
// See the License for the specific language governing permissions and
// limitations under the License.
mod lock;
mod runner;
mod rwlock;
use std::collections::{HashMap, VecDeque};
use std::sync::atomic::{AtomicBool, Ordering};
@@ -29,11 +29,11 @@ use snafu::{ensure, ResultExt};
use tokio::sync::watch::{self, Receiver, Sender};
use tokio::sync::{Mutex as TokioMutex, Notify};
use self::rwlock::KeyRwLock;
use crate::error::{
DuplicateProcedureSnafu, Error, LoaderConflictSnafu, ManagerNotStartSnafu, Result,
StartRemoveOutdatedMetaTaskSnafu, StopRemoveOutdatedMetaTaskSnafu,
};
use crate::local::lock::LockMap;
use crate::local::runner::Runner;
use crate::procedure::BoxedProcedureLoader;
use crate::store::{ProcedureMessage, ProcedureStore, StateStoreRef};
@@ -57,8 +57,6 @@ const META_TTL: Duration = Duration::from_secs(60 * 10);
pub(crate) struct ProcedureMeta {
/// Id of this procedure.
id: ProcedureId,
/// Notify to wait for a lock.
lock_notify: Notify,
/// Parent procedure id.
parent_id: Option<ProcedureId>,
/// Notify to wait for subprocedures.
@@ -78,7 +76,6 @@ impl ProcedureMeta {
let (state_sender, state_receiver) = watch::channel(ProcedureState::Running);
ProcedureMeta {
id,
lock_notify: Notify::new(),
parent_id,
child_notify: Notify::new(),
lock_key,
@@ -131,7 +128,7 @@ struct LoadedProcedure {
pub(crate) struct ManagerContext {
/// Procedure loaders. The key is the type name of the procedure which the loader returns.
loaders: Mutex<HashMap<String, BoxedProcedureLoader>>,
lock_map: LockMap,
key_lock: KeyRwLock<String>,
procedures: RwLock<HashMap<ProcedureId, ProcedureMetaRef>>,
/// Messages loaded from the procedure store.
messages: Mutex<HashMap<ProcedureId, ProcedureMessage>>,
@@ -152,8 +149,8 @@ impl ManagerContext {
/// Returns a new [ManagerContext].
fn new() -> ManagerContext {
ManagerContext {
key_lock: KeyRwLock::new(),
loaders: Mutex::new(HashMap::new()),
lock_map: LockMap::new(),
procedures: RwLock::new(HashMap::new()),
messages: Mutex::new(HashMap::new()),
finished_procedures: Mutex::new(VecDeque::new()),
@@ -850,7 +847,7 @@ mod tests {
assert!(manager.procedure_watcher(procedure_id).is_none());
let mut procedure = ProcedureToLoad::new("submit");
procedure.lock_key = LockKey::single("test.submit");
procedure.lock_key = LockKey::single_exclusive("test.submit");
assert!(manager
.submit(ProcedureWithId {
id: procedure_id,
@@ -918,7 +915,7 @@ mod tests {
}
fn lock_key(&self) -> LockKey {
LockKey::single("test.submit")
LockKey::single_exclusive("test.submit")
}
}
@@ -955,7 +952,7 @@ mod tests {
let manager = LocalManager::new(config, state_store);
let mut procedure = ProcedureToLoad::new("submit");
procedure.lock_key = LockKey::single("test.submit");
procedure.lock_key = LockKey::single_exclusive("test.submit");
let procedure_id = ProcedureId::random();
assert_matches!(
manager
@@ -986,7 +983,7 @@ mod tests {
manager.start().await.unwrap();
let mut procedure = ProcedureToLoad::new("submit");
procedure.lock_key = LockKey::single("test.submit");
procedure.lock_key = LockKey::single_exclusive("test.submit");
let procedure_id = ProcedureId::random();
assert!(manager
.submit(ProcedureWithId {
@@ -1018,7 +1015,7 @@ mod tests {
manager.manager_ctx.set_running();
let mut procedure = ProcedureToLoad::new("submit");
procedure.lock_key = LockKey::single("test.submit");
procedure.lock_key = LockKey::single_exclusive("test.submit");
let procedure_id = ProcedureId::random();
assert!(manager
.submit(ProcedureWithId {
@@ -1041,7 +1038,7 @@ mod tests {
// The remove_outdated_meta method has been stopped, so any procedure meta-data will not be automatically removed.
manager.stop().await.unwrap();
let mut procedure = ProcedureToLoad::new("submit");
procedure.lock_key = LockKey::single("test.submit");
procedure.lock_key = LockKey::single_exclusive("test.submit");
let procedure_id = ProcedureId::random();
manager.manager_ctx.set_running();
@@ -1063,7 +1060,7 @@ mod tests {
// After restart
let mut procedure = ProcedureToLoad::new("submit");
procedure.lock_key = LockKey::single("test.submit");
procedure.lock_key = LockKey::single_exclusive("test.submit");
let procedure_id = ProcedureId::random();
assert!(manager
.submit(ProcedureWithId {

View File

@@ -1,214 +0,0 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::collections::{HashMap, VecDeque};
use std::sync::RwLock;
use crate::local::ProcedureMetaRef;
use crate::ProcedureId;
/// A lock entry.
#[derive(Debug)]
struct Lock {
/// Current lock owner.
owner: ProcedureMetaRef,
/// Waiter procedures.
waiters: VecDeque<ProcedureMetaRef>,
}
impl Lock {
/// Returns a [Lock] with specific `owner` procedure.
fn from_owner(owner: ProcedureMetaRef) -> Lock {
Lock {
owner,
waiters: VecDeque::new(),
}
}
/// Try to pop a waiter from the waiter list, set it as owner
/// and wake up the new owner.
///
/// Returns false if there is no waiter in the waiter list.
fn switch_owner(&mut self) -> bool {
if let Some(waiter) = self.waiters.pop_front() {
// Update owner.
self.owner = waiter.clone();
// We need to use notify_one() since the waiter may have not called `notified()` yet.
waiter.lock_notify.notify_one();
true
} else {
false
}
}
}
/// Manages lock entries for procedures.
pub(crate) struct LockMap {
locks: RwLock<HashMap<String, Lock>>,
}
impl LockMap {
/// Returns a new [LockMap].
pub(crate) fn new() -> LockMap {
LockMap {
locks: RwLock::new(HashMap::new()),
}
}
/// Acquire lock by `key` for procedure with specific `meta`.
///
/// Though `meta` is cloneable, callers must ensure that only one `meta`
/// is acquiring and holding the lock at the same time.
///
/// # Panics
/// Panics if the procedure acquires the lock recursively.
pub(crate) async fn acquire_lock(&self, key: &str, meta: ProcedureMetaRef) {
assert!(!self.hold_lock(key, meta.id));
{
let mut locks = self.locks.write().unwrap();
if let Some(lock) = locks.get_mut(key) {
// Lock already exists, but we don't expect that a procedure acquires
// the same lock again.
assert_ne!(lock.owner.id, meta.id);
// Add this procedure to the waiter list. Here we don't check
// whether the procedure is already in the waiter list as we
// expect that a procedure should not wait for two lock simultaneously.
lock.waiters.push_back(meta.clone());
} else {
let _ = locks.insert(key.to_string(), Lock::from_owner(meta));
return;
}
}
// Wait for notify.
meta.lock_notify.notified().await;
assert!(self.hold_lock(key, meta.id));
}
/// Release lock by `key`.
pub(crate) fn release_lock(&self, key: &str, procedure_id: ProcedureId) {
let mut locks = self.locks.write().unwrap();
if let Some(lock) = locks.get_mut(key) {
if lock.owner.id != procedure_id {
// This is not the lock owner.
return;
}
if !lock.switch_owner() {
// No body waits for this lock, we can remove the lock entry.
let _ = locks.remove(key);
}
}
}
/// Returns true if the procedure with specific `procedure_id` holds the
/// lock of `key`.
fn hold_lock(&self, key: &str, procedure_id: ProcedureId) -> bool {
let locks = self.locks.read().unwrap();
locks
.get(key)
.map(|lock| lock.owner.id == procedure_id)
.unwrap_or(false)
}
/// Returns true if the procedure is waiting for the lock `key`.
#[cfg(test)]
fn waiting_lock(&self, key: &str, procedure_id: ProcedureId) -> bool {
let locks = self.locks.read().unwrap();
locks
.get(key)
.map(|lock| lock.waiters.iter().any(|meta| meta.id == procedure_id))
.unwrap_or(false)
}
}
#[cfg(test)]
mod tests {
use std::sync::Arc;
use super::*;
use crate::local::test_util;
#[test]
fn test_lock_no_waiter() {
let meta = Arc::new(test_util::procedure_meta_for_test());
let mut lock = Lock::from_owner(meta);
assert!(!lock.switch_owner());
}
#[tokio::test]
async fn test_lock_with_waiter() {
let owner = Arc::new(test_util::procedure_meta_for_test());
let mut lock = Lock::from_owner(owner);
let waiter = Arc::new(test_util::procedure_meta_for_test());
lock.waiters.push_back(waiter.clone());
assert!(lock.switch_owner());
assert!(lock.waiters.is_empty());
waiter.lock_notify.notified().await;
assert_eq!(lock.owner.id, waiter.id);
}
#[tokio::test]
async fn test_lock_map() {
let key = "hello";
let owner = Arc::new(test_util::procedure_meta_for_test());
let lock_map = Arc::new(LockMap::new());
lock_map.acquire_lock(key, owner.clone()).await;
let waiter = Arc::new(test_util::procedure_meta_for_test());
let waiter_id = waiter.id;
// Waiter release the lock, this should not take effect.
lock_map.release_lock(key, waiter_id);
let lock_map2 = lock_map.clone();
let owner_id = owner.id;
let handle = tokio::spawn(async move {
assert!(lock_map2.hold_lock(key, owner_id));
assert!(!lock_map2.hold_lock(key, waiter_id));
// Waiter wait for lock.
lock_map2.acquire_lock(key, waiter.clone()).await;
assert!(lock_map2.hold_lock(key, waiter_id));
});
// Owner still holds the lock.
assert!(lock_map.hold_lock(key, owner_id));
// Wait until the waiter acquired the lock
while !lock_map.waiting_lock(key, waiter_id) {
tokio::time::sleep(std::time::Duration::from_millis(5)).await;
}
// Release lock
lock_map.release_lock(key, owner_id);
assert!(!lock_map.hold_lock(key, owner_id));
// Wait for task.
handle.await.unwrap();
// The waiter should hold the lock now.
assert!(lock_map.hold_lock(key, waiter_id));
lock_map.release_lock(key, waiter_id);
}
}

View File

@@ -19,8 +19,10 @@ use backon::{BackoffBuilder, ExponentialBuilder};
use common_telemetry::logging;
use tokio::time;
use super::rwlock::OwnedKeyRwLockGuard;
use crate::error::{self, ProcedurePanicSnafu, Result};
use crate::local::{ManagerContext, ProcedureMeta, ProcedureMetaRef};
use crate::procedure::StringKey;
use crate::store::ProcedureStore;
use crate::ProcedureState::Retrying;
use crate::{BoxedProcedure, Context, Error, ProcedureId, ProcedureState, ProcedureWithId, Status};
@@ -56,6 +58,7 @@ impl ExecResult {
struct ProcedureGuard {
meta: ProcedureMetaRef,
manager_ctx: Arc<ManagerContext>,
key_guards: Vec<OwnedKeyRwLockGuard>,
finish: bool,
}
@@ -65,6 +68,7 @@ impl ProcedureGuard {
ProcedureGuard {
meta,
manager_ctx,
key_guards: vec![],
finish: false,
}
}
@@ -95,10 +99,15 @@ impl Drop for ProcedureGuard {
self.manager_ctx.notify_by_subprocedure(parent_id);
}
// Release lock in reverse order.
for key in self.meta.lock_key.keys_to_unlock() {
self.manager_ctx.lock_map.release_lock(key, self.meta.id);
// Drops the key guards in the reverse order.
while !self.key_guards.is_empty() {
self.key_guards.pop();
}
// Clean the staled locks.
self.manager_ctx
.key_lock
.clean_keys(self.meta.lock_key.keys_to_lock().map(|k| k.as_string()));
}
}
@@ -121,7 +130,7 @@ impl Runner {
/// Run the procedure.
pub(crate) async fn run(mut self) {
// Ensure we can update the procedure state.
let guard = ProcedureGuard::new(self.meta.clone(), self.manager_ctx.clone());
let mut guard = ProcedureGuard::new(self.meta.clone(), self.manager_ctx.clone());
logging::info!(
"Runner {}-{} starts",
@@ -133,10 +142,14 @@ impl Runner {
// recursive locking by adding a root procedure id to the meta.
for key in self.meta.lock_key.keys_to_lock() {
// Acquire lock for each key.
self.manager_ctx
.lock_map
.acquire_lock(key, self.meta.clone())
.await;
let key_guard = match key {
StringKey::Share(key) => self.manager_ctx.key_lock.read(key.clone()).await.into(),
StringKey::Exclusive(key) => {
self.manager_ctx.key_lock.write(key.clone()).await.into()
}
};
guard.key_guards.push(key_guard);
}
// Execute the procedure. We need to release the lock whenever the the execution
@@ -604,7 +617,7 @@ mod tests {
};
let normal = ProcedureAdapter {
data: "normal".to_string(),
lock_key: LockKey::single("catalog.schema.table"),
lock_key: LockKey::single_exclusive("catalog.schema.table"),
exec_fn,
};
@@ -665,7 +678,7 @@ mod tests {
};
let suspend = ProcedureAdapter {
data: "suspend".to_string(),
lock_key: LockKey::single("catalog.schema.table"),
lock_key: LockKey::single_exclusive("catalog.schema.table"),
exec_fn,
};
@@ -697,7 +710,7 @@ mod tests {
};
let child = ProcedureAdapter {
data: "child".to_string(),
lock_key: LockKey::new(keys.iter().map(|k| k.to_string())),
lock_key: LockKey::new_exclusive(keys.iter().map(|k| k.to_string())),
exec_fn,
};
@@ -765,7 +778,7 @@ mod tests {
};
let parent = ProcedureAdapter {
data: "parent".to_string(),
lock_key: LockKey::single("catalog.schema.table"),
lock_key: LockKey::single_exclusive("catalog.schema.table"),
exec_fn,
};
@@ -784,6 +797,7 @@ mod tests {
runner.manager_ctx = manager_ctx.clone();
runner.run().await;
assert!(manager_ctx.key_lock.is_empty());
// Check child procedures.
for child_id in children_ids {
@@ -810,7 +824,7 @@ mod tests {
let exec_fn = move |_| async move { Ok(Status::Executing { persist: true }) }.boxed();
let normal = ProcedureAdapter {
data: "normal".to_string(),
lock_key: LockKey::single("catalog.schema.table"),
lock_key: LockKey::single_exclusive("catalog.schema.table"),
exec_fn,
};
@@ -851,7 +865,7 @@ mod tests {
|_| async { Err(Error::external(MockError::new(StatusCode::Unexpected))) }.boxed();
let normal = ProcedureAdapter {
data: "fail".to_string(),
lock_key: LockKey::single("catalog.schema.table"),
lock_key: LockKey::single_exclusive("catalog.schema.table"),
exec_fn,
};
@@ -875,7 +889,7 @@ mod tests {
|_| async { Err(Error::external(MockError::new(StatusCode::Unexpected))) }.boxed();
let fail = ProcedureAdapter {
data: "fail".to_string(),
lock_key: LockKey::single("catalog.schema.table"),
lock_key: LockKey::single_exclusive("catalog.schema.table"),
exec_fn,
};
@@ -917,7 +931,7 @@ mod tests {
let retry_later = ProcedureAdapter {
data: "retry_later".to_string(),
lock_key: LockKey::single("catalog.schema.table"),
lock_key: LockKey::single_exclusive("catalog.schema.table"),
exec_fn,
};
@@ -952,7 +966,7 @@ mod tests {
let exceed_max_retry_later = ProcedureAdapter {
data: "exceed_max_retry_later".to_string(),
lock_key: LockKey::single("catalog.schema.table"),
lock_key: LockKey::single_exclusive("catalog.schema.table"),
exec_fn,
};
@@ -993,7 +1007,7 @@ mod tests {
};
let fail = ProcedureAdapter {
data: "fail".to_string(),
lock_key: LockKey::single("catalog.schema.table.region-0"),
lock_key: LockKey::single_exclusive("catalog.schema.table.region-0"),
exec_fn,
};
@@ -1027,7 +1041,7 @@ mod tests {
};
let parent = ProcedureAdapter {
data: "parent".to_string(),
lock_key: LockKey::single("catalog.schema.table"),
lock_key: LockKey::single_exclusive("catalog.schema.table"),
exec_fn,
};
@@ -1042,10 +1056,11 @@ mod tests {
// Manually add this procedure to the manager ctx.
assert!(manager_ctx.try_insert_procedure(meta.clone()));
// Replace the manager ctx.
runner.manager_ctx = manager_ctx;
runner.manager_ctx = manager_ctx.clone();
// Run the runner and execute the procedure.
runner.run().await;
assert!(manager_ctx.key_lock.is_empty());
let err = meta.state().error().unwrap().output_msg();
assert!(err.contains("subprocedure failed"), "{err}");
}

View File

@@ -0,0 +1,247 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::collections::HashMap;
use std::hash::Hash;
use std::sync::{Arc, Mutex};
use tokio::sync::{OwnedRwLockReadGuard, OwnedRwLockWriteGuard, RwLock};
pub enum OwnedKeyRwLockGuard {
Read(OwnedRwLockReadGuard<()>),
Write(OwnedRwLockWriteGuard<()>),
}
impl From<OwnedRwLockReadGuard<()>> for OwnedKeyRwLockGuard {
fn from(guard: OwnedRwLockReadGuard<()>) -> Self {
OwnedKeyRwLockGuard::Read(guard)
}
}
impl From<OwnedRwLockWriteGuard<()>> for OwnedKeyRwLockGuard {
fn from(guard: OwnedRwLockWriteGuard<()>) -> Self {
OwnedKeyRwLockGuard::Write(guard)
}
}
/// Locks based on a key, allowing other keys to lock independently.
#[derive(Debug)]
pub struct KeyRwLock<K> {
/// The inner map of locks for specific keys.
inner: Mutex<HashMap<K, Arc<RwLock<()>>>>,
}
impl<K> KeyRwLock<K>
where
K: Eq + Hash + Clone,
{
pub fn new() -> Self {
KeyRwLock {
inner: Default::default(),
}
}
/// Locks the key with shared read access, returning a guard.
pub async fn read(&self, key: K) -> OwnedRwLockReadGuard<()> {
let lock = {
let mut locks = self.inner.lock().unwrap();
locks.entry(key).or_default().clone()
};
lock.read_owned().await
}
/// Locks the key with exclusive write access, returning a guard.
pub async fn write(&self, key: K) -> OwnedRwLockWriteGuard<()> {
let lock = {
let mut locks = self.inner.lock().unwrap();
locks.entry(key).or_default().clone()
};
lock.write_owned().await
}
/// Clean up stale locks.
///
/// Note: It only cleans a lock if
/// - Its strong ref count equals one.
/// - Able to acquire the write lock.
pub fn clean_keys<'a>(&'a self, iter: impl IntoIterator<Item = &'a K>) {
let mut locks = self.inner.lock().unwrap();
let mut keys = Vec::new();
for key in iter {
if let Some(lock) = locks.get(key) {
if lock.try_write().is_ok() {
debug_assert_eq!(Arc::weak_count(lock), 0);
// Ensures nobody keeps this ref.
if Arc::strong_count(lock) == 1 {
keys.push(key);
}
}
}
}
for key in keys {
locks.remove(key);
}
}
}
#[cfg(test)]
impl<K> KeyRwLock<K>
where
K: Eq + Hash + Clone,
{
/// Tries to lock the key with shared read access, returning immediately.
pub fn try_read(&self, key: K) -> Result<OwnedRwLockReadGuard<()>, tokio::sync::TryLockError> {
let lock = {
let mut locks = self.inner.lock().unwrap();
locks.entry(key).or_default().clone()
};
lock.try_read_owned()
}
/// Tries lock this key with exclusive write access, returning immediately.
pub fn try_write(
&self,
key: K,
) -> Result<OwnedRwLockWriteGuard<()>, tokio::sync::TryLockError> {
let lock = {
let mut locks = self.inner.lock().unwrap();
locks.entry(key).or_default().clone()
};
lock.try_write_owned()
}
/// Returns number of keys.
pub fn len(&self) -> usize {
self.inner.lock().unwrap().len()
}
/// Returns true the inner map is empty.
pub fn is_empty(&self) -> bool {
self.len() == 0
}
}
#[cfg(test)]
mod tests {
use super::*;
#[tokio::test]
async fn test_naive() {
let lock_key = KeyRwLock::new();
{
let _guard = lock_key.read("test1").await;
assert_eq!(lock_key.len(), 1);
assert!(lock_key.try_read("test1").is_ok());
assert!(lock_key.try_write("test1").is_err());
}
{
let _guard0 = lock_key.write("test2").await;
let _guard = lock_key.write("test1").await;
assert_eq!(lock_key.len(), 2);
assert!(lock_key.try_read("test1").is_err());
assert!(lock_key.try_write("test1").is_err());
}
assert_eq!(lock_key.len(), 2);
lock_key.clean_keys(&vec!["test1", "test2"]);
assert!(lock_key.is_empty());
let mut guards = Vec::new();
for key in ["test1", "test2"] {
guards.push(lock_key.read(key).await);
}
while !guards.is_empty() {
guards.pop();
}
lock_key.clean_keys(vec![&"test1", &"test2"]);
assert_eq!(lock_key.len(), 0);
}
#[tokio::test]
async fn test_clean_keys() {
let lock_key = KeyRwLock::<&str>::new();
{
let rwlock = {
lock_key
.inner
.lock()
.unwrap()
.entry("test")
.or_default()
.clone()
};
assert_eq!(Arc::strong_count(&rwlock), 2);
let _guard = rwlock.read_owned().await;
{
let inner = lock_key.inner.lock().unwrap();
let rwlock = inner.get("test").unwrap();
assert_eq!(Arc::strong_count(rwlock), 2);
}
}
{
let rwlock = {
lock_key
.inner
.lock()
.unwrap()
.entry("test")
.or_default()
.clone()
};
assert_eq!(Arc::strong_count(&rwlock), 2);
let _guard = rwlock.write_owned().await;
{
let inner = lock_key.inner.lock().unwrap();
let rwlock = inner.get("test").unwrap();
assert_eq!(Arc::strong_count(rwlock), 2);
}
}
{
let inner = lock_key.inner.lock().unwrap();
let rwlock = inner.get("test").unwrap();
assert_eq!(Arc::strong_count(rwlock), 1);
}
// Someone has the ref of the rwlock, but it waits to be granted the lock.
let rwlock = {
lock_key
.inner
.lock()
.unwrap()
.entry("test")
.or_default()
.clone()
};
assert_eq!(Arc::strong_count(&rwlock), 2);
// However, One thread trying to remove the "test" key should have no effect.
lock_key.clean_keys(vec![&"test"]);
// Should get the rwlock.
{
let inner = lock_key.inner.lock().unwrap();
inner.get("test").unwrap();
}
}
}

View File

@@ -116,22 +116,49 @@ impl<T: Procedure + ?Sized> Procedure for Box<T> {
}
}
#[derive(Clone, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)]
pub enum StringKey {
Share(String),
Exclusive(String),
}
/// Keys to identify required locks.
///
/// [LockKey] always sorts keys lexicographically so that they can be acquired
/// in the same order.
// Most procedures should only acquire 1 ~ 2 locks so we use smallvec to hold keys.
/// Most procedures should only acquire 1 ~ 2 locks so we use smallvec to hold keys.
#[derive(Clone, Debug, Default, PartialEq, Eq)]
pub struct LockKey(SmallVec<[String; 2]>);
pub struct LockKey(SmallVec<[StringKey; 2]>);
impl StringKey {
pub fn into_string(self) -> String {
match self {
StringKey::Share(s) => s,
StringKey::Exclusive(s) => s,
}
}
pub fn as_string(&self) -> &String {
match self {
StringKey::Share(s) => s,
StringKey::Exclusive(s) => s,
}
}
}
impl LockKey {
/// Returns a new [LockKey] with only one key.
pub fn single(key: impl Into<String>) -> LockKey {
pub fn single(key: impl Into<StringKey>) -> LockKey {
LockKey(smallvec![key.into()])
}
/// Returns a new [LockKey] with only one key.
pub fn single_exclusive(key: impl Into<String>) -> LockKey {
LockKey(smallvec![StringKey::Exclusive(key.into())])
}
/// Returns a new [LockKey] with keys from specific `iter`.
pub fn new(iter: impl IntoIterator<Item = String>) -> LockKey {
pub fn new(iter: impl IntoIterator<Item = StringKey>) -> LockKey {
let mut vec: SmallVec<_> = iter.into_iter().collect();
vec.sort();
// Dedup keys to avoid acquiring the same key multiple times.
@@ -139,14 +166,14 @@ impl LockKey {
LockKey(vec)
}
/// Returns the keys to lock.
pub fn keys_to_lock(&self) -> impl Iterator<Item = &String> {
self.0.iter()
/// Returns a new [LockKey] with keys from specific `iter`.
pub fn new_exclusive(iter: impl IntoIterator<Item = String>) -> LockKey {
Self::new(iter.into_iter().map(StringKey::Exclusive))
}
/// Returns the keys to unlock.
pub fn keys_to_unlock(&self) -> impl Iterator<Item = &String> {
self.0.iter().rev()
/// Returns the keys to lock.
pub fn keys_to_lock(&self) -> impl Iterator<Item = &StringKey> {
self.0.iter()
}
}
@@ -340,20 +367,25 @@ mod tests {
#[test]
fn test_lock_key() {
let entity = "catalog.schema.my_table";
let key = LockKey::single(entity);
assert_eq!(vec![entity], key.keys_to_lock().collect::<Vec<_>>());
assert_eq!(vec![entity], key.keys_to_unlock().collect::<Vec<_>>());
let key = LockKey::single_exclusive(entity);
assert_eq!(
vec![&StringKey::Exclusive(entity.to_string())],
key.keys_to_lock().collect::<Vec<_>>()
);
let key = LockKey::new([
let key = LockKey::new_exclusive([
"b".to_string(),
"c".to_string(),
"a".to_string(),
"c".to_string(),
]);
assert_eq!(vec!["a", "b", "c"], key.keys_to_lock().collect::<Vec<_>>());
assert_eq!(
vec!["c", "b", "a"],
key.keys_to_unlock().collect::<Vec<_>>()
vec![
&StringKey::Exclusive("a".to_string()),
&StringKey::Exclusive("b".to_string()),
&StringKey::Exclusive("c".to_string())
],
key.keys_to_lock().collect::<Vec<_>>()
);
}

View File

@@ -87,7 +87,7 @@ impl StateStore for ObjectStateStore {
let mut lister = self
.store
.lister_with(path)
.delimiter("")
.recursive(true)
.await
.map_err(|e| {
BoxedError::new(PlainError::new(

View File

@@ -98,7 +98,7 @@ mod tests {
}
fn lock_key(&self) -> LockKey {
LockKey::single("test.submit")
LockKey::single_exclusive("test.submit")
}
}

View File

@@ -20,13 +20,13 @@ pub const THREAD_NAME_LABEL: &str = "thread_name";
lazy_static! {
pub static ref METRIC_RUNTIME_THREADS_ALIVE: IntGaugeVec = register_int_gauge_vec!(
"runtime_threads_alive",
"greptime_runtime_threads_alive",
"runtime threads alive",
&[THREAD_NAME_LABEL]
)
.unwrap();
pub static ref METRIC_RUNTIME_THREADS_IDLE: IntGaugeVec = register_int_gauge_vec!(
"runtime_threads_idle",
"greptime_runtime_threads_idle",
"runtime threads idle",
&[THREAD_NAME_LABEL]
)

View File

@@ -22,7 +22,7 @@ use prometheus::*;
lazy_static! {
pub static ref PANIC_COUNTER: IntCounter =
register_int_counter!("panic_counter", "panic_counter").unwrap();
register_int_counter!("greptime_panic_counter", "panic_counter").unwrap();
}
pub fn set_panic_hook() {

View File

@@ -4,6 +4,9 @@ version.workspace = true
edition.workspace = true
license.workspace = true
[features]
testing = []
[dependencies]
api.workspace = true
arrow-flight.workspace = true

View File

@@ -22,11 +22,12 @@ use std::sync::Arc;
use catalog::memory::MemoryCatalogManager;
use common_base::Plugins;
use common_config::wal::{KafkaConfig, RaftEngineConfig};
use common_config::{WalConfig, WAL_OPTIONS_KEY};
use common_config::WalConfig;
use common_error::ext::BoxedError;
use common_greptimedb_telemetry::GreptimeDBTelemetryTask;
use common_meta::key::datanode_table::{DatanodeTableManager, DatanodeTableValue};
use common_meta::kv_backend::KvBackendRef;
use common_meta::wal::prepare_wal_option;
pub use common_procedure::options::ProcedureConfig;
use common_runtime::Runtime;
use common_telemetry::{error, info, warn};
@@ -98,7 +99,7 @@ impl Datanode {
self.start_telemetry();
if let Some(t) = self.export_metrics_task.as_ref() {
t.start()
t.start(None).context(StartServerSnafu)?
}
self.start_services().await
@@ -538,13 +539,11 @@ async fn open_all_regions(
for region_number in table_value.regions {
// Augments region options with wal options if a wal options is provided.
let mut region_options = table_value.region_info.region_options.clone();
table_value
.region_info
.region_wal_options
.get(&region_number.to_string())
.and_then(|wal_options| {
region_options.insert(WAL_OPTIONS_KEY.to_string(), wal_options.clone())
});
prepare_wal_option(
&mut region_options,
RegionId::new(table_value.table_id, region_number),
&table_value.region_info.region_wal_options,
);
regions.push((
RegionId::new(table_value.table_id, region_number),

View File

@@ -272,6 +272,16 @@ pub enum Error {
location: Location,
source: BoxedError,
},
#[snafu(display(
"Failed to find logical regions in physical region {}",
physical_region_id
))]
FindLogicalRegions {
physical_region_id: RegionId,
source: metric_engine::error::Error,
location: Location,
},
}
pub type Result<T> = std::result::Result<T, Error>;
@@ -340,6 +350,8 @@ impl ErrorExt for Error {
}
HandleRegionRequest { source, .. } => source.status_code(),
StopRegionEngine { source, .. } => source.status_code(),
FindLogicalRegions { source, .. } => source.status_code(),
}
}

View File

@@ -305,7 +305,7 @@ impl HeartbeatTask {
}
async fn load_region_stats(region_server: &RegionServer) -> Vec<RegionStat> {
let regions = region_server.opened_regions();
let regions = region_server.reportable_regions();
let mut region_stats = Vec::new();
for stat in regions {

View File

@@ -96,6 +96,7 @@ impl HeartbeatResponseHandler for RegionHeartbeatResponseHandler {
Some((_, Instruction::OpenRegion { .. }))
| Some((_, Instruction::CloseRegion { .. }))
| Some((_, Instruction::DowngradeRegion { .. }))
| Some((_, Instruction::UpgradeRegion { .. }))
)
}
@@ -134,7 +135,7 @@ mod tests {
use common_meta::heartbeat::mailbox::{
HeartbeatMailbox, IncomingMessage, MailboxRef, MessageMeta,
};
use common_meta::instruction::{DowngradeRegion, OpenRegion};
use common_meta::instruction::{DowngradeRegion, OpenRegion, UpgradeRegion};
use mito2::config::MitoConfig;
use mito2::engine::MITO_ENGINE_NAME;
use mito2::test_util::{CreateRequestBuilder, TestEnv};
@@ -175,6 +176,44 @@ mod tests {
}
}
#[test]
fn test_is_acceptable() {
common_telemetry::init_default_ut_logging();
let region_server = mock_region_server();
let heartbeat_handler = RegionHeartbeatResponseHandler::new(region_server.clone());
let heartbeat_env = HeartbeatResponseTestEnv::new();
let meta = MessageMeta::new_test(1, "test", "dn-1", "me-0");
// Open region
let region_id = RegionId::new(1024, 1);
let storage_path = "test";
let instruction = open_region_instruction(region_id, storage_path);
assert!(heartbeat_handler
.is_acceptable(&heartbeat_env.create_handler_ctx((meta.clone(), instruction))));
// Close region
let instruction = close_region_instruction(region_id);
assert!(heartbeat_handler
.is_acceptable(&heartbeat_env.create_handler_ctx((meta.clone(), instruction))));
// Downgrade region
let instruction = Instruction::DowngradeRegion(DowngradeRegion {
region_id: RegionId::new(2048, 1),
});
assert!(heartbeat_handler
.is_acceptable(&heartbeat_env.create_handler_ctx((meta.clone(), instruction))));
// Upgrade region
let instruction = Instruction::UpgradeRegion(UpgradeRegion {
region_id,
last_entry_id: None,
wait_for_replay_timeout: None,
});
assert!(
heartbeat_handler.is_acceptable(&heartbeat_env.create_handler_ctx((meta, instruction)))
);
}
fn close_region_instruction(region_id: RegionId) -> Instruction {
Instruction::CloseRegion(RegionIdent {
table_id: region_id.table_id(),

View File

@@ -14,6 +14,7 @@
use common_error::ext::ErrorExt;
use common_meta::instruction::{InstructionReply, OpenRegion, SimpleReply};
use common_meta::wal::prepare_wal_option;
use futures_util::future::BoxFuture;
use store_api::path_utils::region_dir;
use store_api::region_request::{RegionOpenRequest, RegionRequest};
@@ -26,15 +27,14 @@ impl HandlerContext {
OpenRegion {
region_ident,
region_storage_path,
region_options,
mut region_options,
region_wal_options,
skip_wal_replay,
}: OpenRegion,
) -> BoxFuture<'static, InstructionReply> {
Box::pin(async move {
let region_id = Self::region_ident_to_region_id(&region_ident);
// TODO(niebayes): extends region options with region_wal_options.
let _ = region_wal_options;
prepare_wal_option(&mut region_options, region_id, &region_wal_options);
let request = RegionRequest::Open(RegionOpenRequest {
engine: region_ident.engine,
region_dir: region_dir(&region_storage_path, region_id),
@@ -42,10 +42,8 @@ impl HandlerContext {
skip_wal_replay,
});
let result = self.region_server.handle_request(region_id, request).await;
let success = result.is_ok();
let error = result.as_ref().map_err(|e| e.output_msg()).err();
InstructionReply::OpenRegion(SimpleReply {
result: success,
error,

View File

@@ -14,7 +14,7 @@
use common_error::ext::ErrorExt;
use common_meta::instruction::{InstructionReply, UpgradeRegion, UpgradeRegionReply};
use common_telemetry::warn;
use common_telemetry::{info, warn};
use futures_util::future::BoxFuture;
use store_api::region_request::{RegionCatchupRequest, RegionRequest};
@@ -56,6 +56,7 @@ impl HandlerContext {
.try_register(
region_id,
Box::pin(async move {
info!("Executing region: {region_id} catchup to: last entry id {last_entry_id:?}");
region_server_moved
.handle_request(
region_id,

View File

@@ -24,5 +24,5 @@ pub mod heartbeat;
pub mod metrics;
pub mod region_server;
mod store;
#[cfg(test)]
mod tests;
#[cfg(any(test, feature = "testing"))]
pub mod tests;

View File

@@ -24,26 +24,26 @@ pub const REGION_ID: &str = "region_id";
lazy_static! {
/// The elapsed time of handling a request in the region_server.
pub static ref HANDLE_REGION_REQUEST_ELAPSED: HistogramVec = register_histogram_vec!(
"datanode_handle_region_request_elapsed",
"greptime_datanode_handle_region_request_elapsed",
"datanode handle region request elapsed",
&[REGION_REQUEST_TYPE]
)
.unwrap();
/// The elapsed time since the last received heartbeat.
pub static ref LAST_RECEIVED_HEARTBEAT_ELAPSED: IntGauge = register_int_gauge!(
"last_received_heartbeat_lease_elapsed",
"greptime_last_received_heartbeat_lease_elapsed",
"last received heartbeat lease elapsed",
)
.unwrap();
pub static ref LEASE_EXPIRED_REGION: IntGaugeVec = register_int_gauge_vec!(
"lease_expired_region",
"greptime_lease_expired_region",
"lease expired region",
&[REGION_ID]
)
.unwrap();
/// The received region leases via heartbeat.
pub static ref HEARTBEAT_REGION_LEASES: IntGaugeVec = register_int_gauge_vec!(
"heartbeat_region_leases",
"greptime_heartbeat_region_leases",
"received region leases via heartbeat",
&[REGION_ROLE]
)

View File

@@ -43,6 +43,7 @@ use datafusion_common::DataFusionError;
use datafusion_expr::{Expr as DfExpr, TableProviderFilterPushDown, TableType};
use datatypes::arrow::datatypes::SchemaRef;
use futures_util::future::try_join_all;
use metric_engine::engine::MetricEngine;
use prost::Message;
use query::QueryEngineRef;
use servers::error::{self as servers_error, ExecuteGrpcRequestSnafu, Result as ServerResult};
@@ -51,6 +52,7 @@ use servers::grpc::region_server::RegionServerHandler;
use session::context::{QueryContextBuilder, QueryContextRef};
use snafu::{OptionExt, ResultExt};
use store_api::metadata::RegionMetadataRef;
use store_api::metric_engine_consts::{METRIC_ENGINE_NAME, PHYSICAL_TABLE_METADATA_KEY};
use store_api::region_engine::{RegionEngineRef, RegionRole, SetReadonlyResponse};
use store_api::region_request::{AffectedRows, RegionCloseRequest, RegionRequest};
use store_api::storage::{RegionId, ScanRequest};
@@ -60,8 +62,9 @@ use tonic::{Request, Response, Result as TonicResult};
use crate::error::{
self, BuildRegionRequestsSnafu, DecodeLogicalPlanSnafu, ExecuteLogicalPlanSnafu,
GetRegionMetadataSnafu, HandleRegionRequestSnafu, RegionEngineNotFoundSnafu,
RegionNotFoundSnafu, Result, StopRegionEngineSnafu, UnsupportedOutputSnafu,
FindLogicalRegionsSnafu, GetRegionMetadataSnafu, HandleRegionRequestSnafu,
RegionEngineNotFoundSnafu, RegionNotFoundSnafu, Result, StopRegionEngineSnafu, UnexpectedSnafu,
UnsupportedOutputSnafu,
};
use crate::event_listener::RegionServerEventListenerRef;
@@ -123,7 +126,10 @@ impl RegionServer {
self.inner.handle_read(request).await
}
pub fn opened_regions(&self) -> Vec<RegionStat> {
/// Returns all opened and reportable regions.
///
/// Notes: except all metrics regions.
pub fn reportable_regions(&self) -> Vec<RegionStat> {
self.inner
.region_map
.iter()
@@ -369,7 +375,7 @@ impl RegionServerInner {
let current_region_status = self.region_map.get(&region_id);
let engine = match region_change {
RegionChange::Register(ref engine_type) => match current_region_status {
RegionChange::Register(ref engine_type, _) => match current_region_status {
Some(status) => match status.clone() {
RegionEngineWithStatus::Registering(_) => {
return Ok(CurrentEngine::EarlyReturn(0))
@@ -427,8 +433,12 @@ impl RegionServerInner {
.start_timer();
let region_change = match &request {
RegionRequest::Create(create) => RegionChange::Register(create.engine.clone()),
RegionRequest::Open(open) => RegionChange::Register(open.engine.clone()),
RegionRequest::Create(create) => RegionChange::Register(create.engine.clone(), false),
RegionRequest::Open(open) => {
let is_opening_physical_region =
open.options.contains_key(PHYSICAL_TABLE_METADATA_KEY);
RegionChange::Register(open.engine.clone(), is_opening_physical_region)
}
RegionRequest::Close(_) | RegionRequest::Drop(_) => RegionChange::Deregisters,
RegionRequest::Put(_)
| RegionRequest::Delete(_)
@@ -460,7 +470,8 @@ impl RegionServerInner {
{
Ok(result) => {
// Sets corresponding region status to ready.
self.set_region_status_ready(region_id, engine, region_change);
self.set_region_status_ready(region_id, engine, region_change)
.await?;
Ok(result)
}
Err(err) => {
@@ -478,7 +489,7 @@ impl RegionServerInner {
region_change: &RegionChange,
) {
match region_change {
RegionChange::Register(_) => {
RegionChange::Register(_, _) => {
self.region_map.insert(
region_id,
RegionEngineWithStatus::Registering(engine.clone()),
@@ -497,7 +508,7 @@ impl RegionServerInner {
fn unset_region_status(&self, region_id: RegionId, region_change: RegionChange) {
match region_change {
RegionChange::None => {}
RegionChange::Register(_) | RegionChange::Deregisters => {
RegionChange::Register(_, _) | RegionChange::Deregisters => {
self.region_map
.remove(&region_id)
.map(|(id, engine)| engine.set_writable(id, false));
@@ -505,16 +516,20 @@ impl RegionServerInner {
}
}
fn set_region_status_ready(
async fn set_region_status_ready(
&self,
region_id: RegionId,
engine: RegionEngineRef,
region_change: RegionChange,
) {
) -> Result<()> {
let engine_type = engine.name();
match region_change {
RegionChange::None => {}
RegionChange::Register(_) => {
RegionChange::Register(_, is_opening_physical_region) => {
if is_opening_physical_region {
self.register_logical_regions(&engine, region_id).await?;
}
info!("Region {region_id} is registered to engine {engine_type}");
self.region_map
.insert(region_id, RegionEngineWithStatus::Ready(engine));
@@ -528,6 +543,37 @@ impl RegionServerInner {
self.event_listener.on_region_deregistered(region_id);
}
}
Ok(())
}
async fn register_logical_regions(
&self,
engine: &RegionEngineRef,
physical_region_id: RegionId,
) -> Result<()> {
let metric_engine =
engine
.as_any()
.downcast_ref::<MetricEngine>()
.context(UnexpectedSnafu {
violated: format!(
"expecting engine type '{}', actual '{}'",
METRIC_ENGINE_NAME,
engine.name(),
),
})?;
let logical_regions = metric_engine
.logical_regions(physical_region_id)
.await
.context(FindLogicalRegionsSnafu { physical_region_id })?;
for region in logical_regions {
self.region_map
.insert(region, RegionEngineWithStatus::Ready(engine.clone()));
info!("Logical region {} is registered!", region);
}
Ok(())
}
pub async fn handle_read(&self, request: QueryRequest) -> Result<SendableRecordBatchStream> {
@@ -622,7 +668,7 @@ impl RegionServerInner {
enum RegionChange {
None,
Register(String),
Register(String, bool),
Deregisters,
}
@@ -1051,7 +1097,7 @@ mod tests {
CurrentEngineTest {
region_id,
current_region_status: None,
region_change: RegionChange::Register(engine.name().to_string()),
region_change: RegionChange::Register(engine.name().to_string(), false),
assert: Box::new(|result| {
let current_engine = result.unwrap();
assert_matches!(current_engine, CurrentEngine::Engine(_));
@@ -1060,7 +1106,7 @@ mod tests {
CurrentEngineTest {
region_id,
current_region_status: Some(RegionEngineWithStatus::Registering(engine.clone())),
region_change: RegionChange::Register(engine.name().to_string()),
region_change: RegionChange::Register(engine.name().to_string(), false),
assert: Box::new(|result| {
let current_engine = result.unwrap();
assert_matches!(current_engine, CurrentEngine::EarlyReturn(_));
@@ -1069,7 +1115,7 @@ mod tests {
CurrentEngineTest {
region_id,
current_region_status: Some(RegionEngineWithStatus::Deregistering(engine.clone())),
region_change: RegionChange::Register(engine.name().to_string()),
region_change: RegionChange::Register(engine.name().to_string(), false),
assert: Box::new(|result| {
let err = result.unwrap_err();
assert_eq!(err.status_code(), StatusCode::RegionBusy);
@@ -1078,7 +1124,7 @@ mod tests {
CurrentEngineTest {
region_id,
current_region_status: Some(RegionEngineWithStatus::Ready(engine.clone())),
region_change: RegionChange::Register(engine.name().to_string()),
region_change: RegionChange::Register(engine.name().to_string(), false),
assert: Box::new(|result| {
let current_engine = result.unwrap();
assert_matches!(current_engine, CurrentEngine::Engine(_));

View File

@@ -207,4 +207,8 @@ impl RegionEngine for MockRegionEngine {
}
Some(RegionRole::Leader)
}
fn as_any(&self) -> &dyn Any {
self
}
}

View File

@@ -109,6 +109,11 @@ impl ColumnSchema {
&mut self.metadata
}
/// Retrieve the column comment
pub fn column_comment(&self) -> Option<&String> {
self.metadata.get(COMMENT_KEY)
}
pub fn with_time_index(mut self, is_time_index: bool) -> Self {
self.is_time_index = is_time_index;
if is_time_index {
@@ -315,12 +320,16 @@ mod tests {
#[test]
fn test_column_schema_with_metadata() {
let metadata = Metadata::from([("k1".to_string(), "v1".to_string())]);
let metadata = Metadata::from([
("k1".to_string(), "v1".to_string()),
(COMMENT_KEY.to_string(), "test comment".to_string()),
]);
let column_schema = ColumnSchema::new("test", ConcreteDataType::int32_datatype(), true)
.with_metadata(metadata)
.with_default_constraint(Some(ColumnDefaultConstraint::null_value()))
.unwrap();
assert_eq!("v1", column_schema.metadata().get("k1").unwrap());
assert_eq!("test comment", column_schema.column_comment().unwrap());
assert!(column_schema
.metadata()
.get(DEFAULT_CONSTRAINT_KEY)

View File

@@ -12,6 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
use std::any::Any;
use std::collections::HashMap;
use std::sync::{Arc, RwLock};
@@ -119,6 +120,10 @@ impl RegionEngine for FileRegionEngine {
fn role(&self, region_id: RegionId) -> Option<RegionRole> {
self.inner.state(region_id)
}
fn as_any(&self) -> &dyn Any {
self
}
}
struct EngineInner {

View File

@@ -55,7 +55,7 @@ use query::QueryEngineRef;
use raft_engine::{Config, ReadableSize, RecoveryMode};
use servers::error as server_error;
use servers::error::{AuthSnafu, ExecuteQuerySnafu, ParsePromQLSnafu};
use servers::export_metrics::{ExportMetricsOption, ExportMetricsTask};
use servers::export_metrics::ExportMetricsTask;
use servers::interceptor::{
PromQueryInterceptor, PromQueryInterceptorRef, SqlQueryInterceptor, SqlQueryInterceptorRef,
};
@@ -76,6 +76,7 @@ use sql::statements::statement::Statement;
use sqlparser::ast::ObjectName;
pub use standalone::StandaloneDatanodeManager;
use self::prom_store::ExportMetricHandler;
use crate::error::{
self, Error, ExecLogicalPlanSnafu, ExecutePromqlSnafu, ExternalSnafu, ParseSqlSnafu,
PermissionSnafu, PlanStatementSnafu, Result, SqlExecInterceptedSnafu, StartServerSnafu,
@@ -190,18 +191,16 @@ impl Instance {
&mut self,
opts: impl Into<FrontendOptions> + TomlSerializable,
) -> Result<()> {
let opts: FrontendOptions = opts.into();
self.export_metrics_task =
ExportMetricsTask::try_new(&opts.export_metrics, Some(&self.plugins))
.context(StartServerSnafu)?;
let servers = Services::build(opts, Arc::new(self.clone()), self.plugins.clone()).await?;
self.servers = Arc::new(servers);
Ok(())
}
pub fn build_export_metrics_task(&mut self, opts: &ExportMetricsOption) -> Result<()> {
self.export_metrics_task =
ExportMetricsTask::try_new(opts, Some(&self.plugins)).context(StartServerSnafu)?;
Ok(())
}
pub fn catalog_manager(&self) -> &CatalogManagerRef {
&self.catalog_manager
}
@@ -232,7 +231,15 @@ impl FrontendInstance for Instance {
self.script_executor.start(self)?;
if let Some(t) = self.export_metrics_task.as_ref() {
t.start()
if t.send_by_handler {
let handler = ExportMetricHandler::new_handler(
self.inserter.clone(),
self.statement_executor.clone(),
);
t.start(Some(handler)).context(StartServerSnafu)?
} else {
t.start(None).context(StartServerSnafu)?;
}
}
futures::future::try_join_all(self.servers.iter().map(|(name, handler)| async move {

View File

@@ -12,6 +12,8 @@
// See the License for the specific language governing permissions and
// limitations under the License.
use std::sync::Arc;
use api::prom_store::remote::read_request::ResponseType;
use api::prom_store::remote::{Query, QueryResult, ReadRequest, ReadResponse, WriteRequest};
use async_trait::async_trait;
@@ -21,10 +23,14 @@ use common_error::ext::BoxedError;
use common_query::Output;
use common_recordbatch::RecordBatches;
use common_telemetry::logging;
use operator::insert::InserterRef;
use operator::statement::StatementExecutor;
use prost::Message;
use servers::error::{self, AuthSnafu, Result as ServerResult};
use servers::prom_store::{self, Metrics};
use servers::query_handler::{PromStoreProtocolHandler, PromStoreResponse};
use servers::query_handler::{
PromStoreProtocolHandler, PromStoreProtocolHandlerRef, PromStoreResponse,
};
use session::context::QueryContextRef;
use snafu::{OptionExt, ResultExt};
@@ -209,3 +215,49 @@ impl PromStoreProtocolHandler for Instance {
todo!();
}
}
/// This handler is mainly used for `frontend` or `standalone` to directly import
/// the metrics collected by itself, thereby avoiding importing metrics through the network,
/// thus reducing compression and network transmission overhead,
/// so only implement `PromStoreProtocolHandler::write` method.
pub struct ExportMetricHandler {
inserter: InserterRef,
statement_executor: Arc<StatementExecutor>,
}
impl ExportMetricHandler {
pub fn new_handler(
inserter: InserterRef,
statement_executor: Arc<StatementExecutor>,
) -> PromStoreProtocolHandlerRef {
Arc::new(Self {
inserter,
statement_executor,
})
}
}
#[async_trait]
impl PromStoreProtocolHandler for ExportMetricHandler {
async fn write(&self, request: WriteRequest, ctx: QueryContextRef) -> ServerResult<()> {
let (requests, _) = prom_store::to_grpc_row_insert_requests(request)?;
self.inserter
.handle_row_inserts(requests, ctx, self.statement_executor.as_ref())
.await
.map_err(BoxedError::new)
.context(error::ExecuteGrpcQuerySnafu)?;
Ok(())
}
async fn read(
&self,
_request: ReadRequest,
_ctx: QueryContextRef,
) -> ServerResult<PromStoreResponse> {
unreachable!();
}
async fn ingest_metrics(&self, _metrics: Metrics) -> ServerResult<()> {
unreachable!();
}
}

View File

@@ -22,10 +22,10 @@ use common_recordbatch::SendableRecordBatchStream;
use partition::manager::PartitionRuleManagerRef;
use query::error::{RegionQuerySnafu, Result as QueryResult};
use query::region_query::RegionQueryHandler;
use snafu::{OptionExt, ResultExt};
use snafu::ResultExt;
use store_api::storage::RegionId;
use crate::error::{FindDatanodeSnafu, FindTableRouteSnafu, RequestQuerySnafu, Result};
use crate::error::{FindTableRouteSnafu, RequestQuerySnafu, Result};
pub(crate) struct FrontendRegionQueryHandler {
partition_manager: PartitionRuleManagerRef,
@@ -58,18 +58,13 @@ impl FrontendRegionQueryHandler {
async fn do_get_inner(&self, request: QueryRequest) -> Result<SendableRecordBatchStream> {
let region_id = RegionId::from_u64(request.region_id);
let table_route = self
let peer = &self
.partition_manager
.find_table_route(region_id.table_id())
.find_region_leader(region_id)
.await
.context(FindTableRouteSnafu {
table_id: region_id.table_id(),
})?;
let peer = table_route
.find_region_leader(region_id.region_number())
.context(FindDatanodeSnafu {
region: region_id.region_number(),
})?;
let client = self.datanode_manager.datanode(peer).await;

View File

@@ -12,33 +12,21 @@
// See the License for the specific language governing permissions and
// limitations under the License.
use std::collections::HashMap;
use std::sync::Arc;
use api::v1::region::{QueryRequest, RegionRequest, RegionResponse};
use async_trait::async_trait;
use client::region::check_response_header;
use common_catalog::consts::METRIC_ENGINE;
use common_error::ext::BoxedError;
use common_meta::datanode_manager::{AffectedRows, Datanode, DatanodeManager, DatanodeRef};
use common_meta::ddl::{TableMetadata, TableMetadataAllocator, TableMetadataAllocatorContext};
use common_meta::error::{self as meta_error, Result as MetaResult, UnsupportedSnafu};
use common_meta::key::table_route::{
LogicalTableRouteValue, PhysicalTableRouteValue, TableRouteValue,
};
use common_meta::error::{self as meta_error, Result as MetaResult};
use common_meta::peer::Peer;
use common_meta::rpc::ddl::CreateTableTask;
use common_meta::rpc::router::{Region, RegionRoute};
use common_meta::sequence::SequenceRef;
use common_meta::wal::options_allocator::allocate_region_wal_options;
use common_meta::wal::WalOptionsAllocatorRef;
use common_recordbatch::SendableRecordBatchStream;
use common_telemetry::tracing;
use common_telemetry::tracing_context::{FutureExt, TracingContext};
use common_telemetry::{debug, info, tracing};
use datanode::region_server::RegionServer;
use servers::grpc::region_server::RegionServerHandler;
use snafu::{ensure, OptionExt, ResultExt};
use store_api::storage::{RegionId, RegionNumber, TableId};
use snafu::{OptionExt, ResultExt};
use crate::error::{InvalidRegionRequestSnafu, InvokeRegionServerSnafu, Result};
@@ -52,7 +40,7 @@ impl DatanodeManager for StandaloneDatanodeManager {
}
/// Relative to [client::region::RegionRequester]
struct RegionInvoker {
pub struct RegionInvoker {
region_server: RegionServer,
}
@@ -109,121 +97,3 @@ impl Datanode for RegionInvoker {
.context(meta_error::ExternalSnafu)
}
}
pub struct StandaloneTableMetadataAllocator {
table_id_sequence: SequenceRef,
wal_options_allocator: WalOptionsAllocatorRef,
}
impl StandaloneTableMetadataAllocator {
pub fn new(
table_id_sequence: SequenceRef,
wal_options_allocator: WalOptionsAllocatorRef,
) -> Self {
Self {
table_id_sequence,
wal_options_allocator,
}
}
async fn allocate_table_id(&self, task: &CreateTableTask) -> MetaResult<TableId> {
let table_id = if let Some(table_id) = &task.create_table.table_id {
let table_id = table_id.id;
ensure!(
!self
.table_id_sequence
.min_max()
.await
.contains(&(table_id as u64)),
UnsupportedSnafu {
operation: format!(
"create table by id {} that is reserved in this node",
table_id
)
}
);
info!(
"Received explicitly allocated table id {}, will use it directly.",
table_id
);
table_id
} else {
self.table_id_sequence.next().await? as TableId
};
Ok(table_id)
}
fn create_wal_options(
&self,
table_route: &TableRouteValue,
) -> MetaResult<HashMap<RegionNumber, String>> {
match table_route {
TableRouteValue::Physical(x) => {
let region_numbers = x
.region_routes
.iter()
.map(|route| route.region.id.region_number())
.collect();
allocate_region_wal_options(region_numbers, &self.wal_options_allocator)
}
TableRouteValue::Logical(_) => Ok(HashMap::new()),
}
}
}
fn create_table_route(table_id: TableId, task: &CreateTableTask) -> TableRouteValue {
if task.create_table.engine == METRIC_ENGINE {
TableRouteValue::Logical(LogicalTableRouteValue {})
} else {
let region_routes = task
.partitions
.iter()
.enumerate()
.map(|(i, partition)| {
let region = Region {
id: RegionId::new(table_id, i as u32),
partition: Some(partition.clone().into()),
..Default::default()
};
// It's only a placeholder.
let peer = Peer::default();
RegionRoute {
region,
leader_peer: Some(peer),
follower_peers: vec![],
leader_status: None,
}
})
.collect::<Vec<_>>();
TableRouteValue::Physical(PhysicalTableRouteValue::new(region_routes))
}
}
#[async_trait]
impl TableMetadataAllocator for StandaloneTableMetadataAllocator {
async fn create(
&self,
_ctx: &TableMetadataAllocatorContext,
task: &CreateTableTask,
) -> MetaResult<TableMetadata> {
let table_id = self.allocate_table_id(task).await?;
let table_route = create_table_route(table_id, task);
let region_wal_options = self.create_wal_options(&table_route)?;
debug!(
"Allocated region wal options {:?} for table {}",
region_wal_options, table_id
);
Ok(TableMetadata {
table_id,
table_route,
region_wal_options,
})
}
}

View File

@@ -17,34 +17,34 @@ use prometheus::*;
lazy_static! {
pub static ref METRIC_HANDLE_SQL_ELAPSED: Histogram =
register_histogram!("frontend_handle_sql_elapsed", "frontend handle sql elapsed").unwrap();
register_histogram!("greptime_frontend_handle_sql_elapsed", "frontend handle sql elapsed").unwrap();
pub static ref METRIC_HANDLE_PROMQL_ELAPSED: Histogram = register_histogram!(
"frontend_handle_promql_elapsed",
"greptime_frontend_handle_promql_elapsed",
"frontend handle promql elapsed"
)
.unwrap();
pub static ref METRIC_EXEC_PLAN_ELAPSED: Histogram =
register_histogram!("frontend_exec_plan_elapsed", "frontend exec plan elapsed").unwrap();
register_histogram!("greptime_frontend_exec_plan_elapsed", "frontend exec plan elapsed").unwrap();
pub static ref METRIC_HANDLE_SCRIPTS_ELAPSED: Histogram = register_histogram!(
"frontend_handle_scripts_elapsed",
"greptime_frontend_handle_scripts_elapsed",
"frontend handle scripts elapsed"
)
.unwrap();
pub static ref METRIC_RUN_SCRIPT_ELAPSED: Histogram =
register_histogram!("frontend_run_script_elapsed", "frontend run script elapsed").unwrap();
register_histogram!("greptime_frontend_run_script_elapsed", "frontend run script elapsed").unwrap();
/// The samples count of Prometheus remote write.
pub static ref PROM_STORE_REMOTE_WRITE_SAMPLES: IntCounter = register_int_counter!(
"frontend_prometheus_remote_write_samples",
"greptime_frontend_prometheus_remote_write_samples",
"frontend prometheus remote write samples"
)
.unwrap();
pub static ref OTLP_METRICS_ROWS: IntCounter = register_int_counter!(
"frontend_otlp_metrics_rows",
"greptime_frontend_otlp_metrics_rows",
"frontend otlp metrics rows"
)
.unwrap();
pub static ref OTLP_TRACES_ROWS: IntCounter = register_int_counter!(
"frontend_otlp_traces_rows",
"greptime_frontend_otlp_traces_rows",
"frontend otlp traces rows"
)
.unwrap();

View File

@@ -113,7 +113,7 @@ pub enum Error {
#[snafu(display("Failed to parse regex DFA"))]
ParseDFA {
#[snafu(source)]
error: regex_automata::Error,
error: Box<regex_automata::dfa::Error>,
location: Location,
},

View File

@@ -48,7 +48,7 @@
//! More detailed information regarding the encoding of the inverted indices can be found in the [RFC].
//!
//! [`InvertedIndexMetas`]: https://github.com/GreptimeTeam/greptime-proto/blob/2aaee38de81047537dfa42af9df63bcfb866e06c/proto/greptime/v1/index/inverted_index.proto#L32-L64
//! [RFC]: https://github.com/GreptimeTeam/greptimedb/blob/develop/docs/rfcs/2023-11-03-inverted-index.md
//! [RFC]: https://github.com/GreptimeTeam/greptimedb/blob/main/docs/rfcs/2023-11-03-inverted-index.md
pub mod reader;
pub mod writer;

View File

@@ -30,4 +30,7 @@ pub trait FstApplier: Send + Sync {
///
/// Returns a `Vec<u64>`, with each u64 being a value from the FstMap.
fn apply(&self, fst: &FstMap) -> Vec<u64>;
/// Returns the memory usage of the applier.
fn memory_usage(&self) -> usize;
}

View File

@@ -12,9 +12,11 @@
// See the License for the specific language governing permissions and
// limitations under the License.
use std::mem::size_of;
use fst::map::OpBuilder;
use fst::{IntoStreamer, Streamer};
use regex_automata::DenseDFA;
use regex_automata::dfa::dense::DFA;
use snafu::{ensure, ResultExt};
use crate::inverted_index::error::{
@@ -24,15 +26,13 @@ use crate::inverted_index::search::fst_apply::FstApplier;
use crate::inverted_index::search::predicate::{Predicate, Range};
use crate::inverted_index::FstMap;
type Dfa = DenseDFA<Vec<usize>, usize>;
/// `IntersectionFstApplier` applies intersection operations on an FstMap using specified ranges and regex patterns.
pub struct IntersectionFstApplier {
/// A list of `Range` which define inclusive or exclusive ranges for keys to be queried in the FstMap.
ranges: Vec<Range>,
/// A list of `Dfa` compiled from regular expression patterns.
dfas: Vec<Dfa>,
dfas: Vec<DFA<Vec<u32>>>,
}
impl FstApplier for IntersectionFstApplier {
@@ -70,6 +70,26 @@ impl FstApplier for IntersectionFstApplier {
}
values
}
fn memory_usage(&self) -> usize {
let mut size = self.ranges.capacity() * size_of::<Range>();
for range in &self.ranges {
size += range
.lower
.as_ref()
.map_or(0, |bound| bound.value.capacity());
size += range
.upper
.as_ref()
.map_or(0, |bound| bound.value.capacity());
}
size += self.dfas.capacity() * size_of::<DFA<Vec<u32>>>();
for dfa in &self.dfas {
size += dfa.memory_usage();
}
size
}
}
impl IntersectionFstApplier {
@@ -88,8 +108,8 @@ impl IntersectionFstApplier {
match predicate {
Predicate::Range(range) => ranges.push(range.range),
Predicate::RegexMatch(regex) => {
let dfa = DenseDFA::new(&regex.pattern);
let dfa = dfa.context(ParseDFASnafu)?;
let dfa = DFA::new(&regex.pattern);
let dfa = dfa.map_err(Box::new).context(ParseDFASnafu)?;
dfas.push(dfa);
}
// Rejection of `InList` predicates is enforced here.
@@ -210,47 +230,67 @@ mod tests {
#[test]
fn test_intersection_fst_applier_with_valid_pattern() {
let test_fst = FstMap::from_iter([("aa", 1), ("bb", 2), ("cc", 3)]).unwrap();
let test_fst = FstMap::from_iter([("123", 1), ("abc", 2)]).unwrap();
let applier = create_applier_from_pattern("a.?").unwrap();
let results = applier.apply(&test_fst);
assert_eq!(results, vec![1]);
let cases = vec![
("1", vec![1]),
("2", vec![1]),
("3", vec![1]),
("^1", vec![1]),
("^2", vec![]),
("^3", vec![]),
("^1.*", vec![1]),
("^.*2", vec![1]),
("^.*3", vec![1]),
("1$", vec![]),
("2$", vec![]),
("3$", vec![1]),
("1.*$", vec![1]),
("2.*$", vec![1]),
("3.*$", vec![1]),
("^1..$", vec![1]),
("^.2.$", vec![1]),
("^..3$", vec![1]),
("^[0-9]", vec![1]),
("^[0-9]+$", vec![1]),
("^[0-9][0-9]$", vec![]),
("^[0-9][0-9][0-9]$", vec![1]),
("^123$", vec![1]),
("a", vec![2]),
("b", vec![2]),
("c", vec![2]),
("^a", vec![2]),
("^b", vec![]),
("^c", vec![]),
("^a.*", vec![2]),
("^.*b", vec![2]),
("^.*c", vec![2]),
("a$", vec![]),
("b$", vec![]),
("c$", vec![2]),
("a.*$", vec![2]),
("b.*$", vec![2]),
("c.*$", vec![2]),
("^.[a-z]", vec![2]),
("^abc$", vec![2]),
("^ab$", vec![]),
("abc$", vec![2]),
("^a.c$", vec![2]),
("^..c$", vec![2]),
("ab", vec![2]),
(".*", vec![1, 2]),
("", vec![1, 2]),
("^$", vec![]),
("1|a", vec![1, 2]),
("^123$|^abc$", vec![1, 2]),
("^123$|d", vec![1]),
];
let applier = create_applier_from_pattern("b.?").unwrap();
let results = applier.apply(&test_fst);
assert_eq!(results, vec![2]);
let applier = create_applier_from_pattern("c.?").unwrap();
let results = applier.apply(&test_fst);
assert_eq!(results, vec![3]);
let applier = create_applier_from_pattern("a.*").unwrap();
let results = applier.apply(&test_fst);
assert_eq!(results, vec![1]);
let applier = create_applier_from_pattern("b.*").unwrap();
let results = applier.apply(&test_fst);
assert_eq!(results, vec![2]);
let applier = create_applier_from_pattern("c.*").unwrap();
let results = applier.apply(&test_fst);
assert_eq!(results, vec![3]);
let applier = create_applier_from_pattern("d.?").unwrap();
let results = applier.apply(&test_fst);
assert!(results.is_empty());
let applier = create_applier_from_pattern("a.?|b.?").unwrap();
let results = applier.apply(&test_fst);
assert_eq!(results, vec![1, 2]);
let applier = create_applier_from_pattern("d.?|a.?").unwrap();
let results = applier.apply(&test_fst);
assert_eq!(results, vec![1]);
let applier = create_applier_from_pattern(".*").unwrap();
let results = applier.apply(&test_fst);
assert_eq!(results, vec![1, 2, 3]);
for (pattern, expected) in cases {
let applier = create_applier_from_pattern(pattern).unwrap();
let results = applier.apply(&test_fst);
assert_eq!(results, expected);
}
}
#[test]
@@ -322,4 +362,36 @@ mod tests {
Err(Error::IntersectionApplierWithInList { .. })
));
}
#[test]
fn test_intersection_fst_applier_memory_usage() {
let applier = IntersectionFstApplier {
ranges: vec![],
dfas: vec![],
};
assert_eq!(applier.memory_usage(), 0);
let dfa = DFA::new("^abc$").unwrap();
assert_eq!(dfa.memory_usage(), 320);
let applier = IntersectionFstApplier {
ranges: vec![Range {
lower: Some(Bound {
value: b"aa".to_vec(),
inclusive: true,
}),
upper: Some(Bound {
value: b"cc".to_vec(),
inclusive: true,
}),
}],
dfas: vec![dfa],
};
assert_eq!(
applier.memory_usage(),
size_of::<Range>() + 4 + size_of::<DFA<Vec<u32>>>() + 320
);
}
}

View File

@@ -13,6 +13,7 @@
// limitations under the License.
use std::collections::HashSet;
use std::mem::size_of;
use snafu::{ensure, ResultExt};
@@ -35,6 +36,11 @@ impl FstApplier for KeysFstApplier {
fn apply(&self, fst: &FstMap) -> Vec<u64> {
self.keys.iter().filter_map(|k| fst.get(k)).collect()
}
fn memory_usage(&self) -> usize {
self.keys.capacity() * size_of::<Bytes>()
+ self.keys.iter().map(|k| k.capacity()).sum::<usize>()
}
}
impl KeysFstApplier {
@@ -302,4 +308,15 @@ mod tests {
let result = KeysFstApplier::try_from(predicates);
assert!(matches!(result, Err(Error::ParseRegex { .. })));
}
#[test]
fn test_keys_fst_applier_memory_usage() {
let applier = KeysFstApplier { keys: vec![] };
assert_eq!(applier.memory_usage(), 0);
let applier = KeysFstApplier {
keys: vec![b("foo"), b("bar")],
};
assert_eq!(applier.memory_usage(), 2 * size_of::<Bytes>() + 6);
}
}

View File

@@ -14,6 +14,8 @@
mod predicates_apply;
use std::collections::BTreeSet;
use async_trait::async_trait;
pub use predicates_apply::PredicatesIndexApplier;
@@ -24,15 +26,19 @@ use crate::inverted_index::format::reader::InvertedIndexReader;
///
/// Applier instances are reusable and work with various `InvertedIndexReader` instances,
/// avoiding repeated compilation of fixed predicates such as regex patterns.
#[mockall::automock]
#[async_trait]
pub trait IndexApplier {
/// Applies the predefined predicates to the data read by the given index reader, returning
/// a list of relevant indices (e.g., post IDs, group IDs, row IDs).
async fn apply(
async fn apply<'a>(
&self,
context: SearchContext,
reader: &mut dyn InvertedIndexReader,
) -> Result<Vec<usize>>;
reader: &mut (dyn InvertedIndexReader + 'a),
) -> Result<BTreeSet<usize>>;
/// Returns the memory usage of the applier.
fn memory_usage(&self) -> usize;
}
/// A context for searching the inverted index.

View File

@@ -12,6 +12,9 @@
// See the License for the specific language governing permissions and
// limitations under the License.
use std::collections::BTreeSet;
use std::mem::size_of;
use async_trait::async_trait;
use common_base::BitVec;
use greptime_proto::v1::index::InvertedIndexMetas;
@@ -41,11 +44,11 @@ pub struct PredicatesIndexApplier {
impl IndexApplier for PredicatesIndexApplier {
/// Applies all `FstApplier`s to the data in the inverted index reader, intersecting the individual
/// bitmaps obtained for each index to result in a final set of indices.
async fn apply(
async fn apply<'a>(
&self,
context: SearchContext,
reader: &mut dyn InvertedIndexReader,
) -> Result<Vec<usize>> {
reader: &mut (dyn InvertedIndexReader + 'a),
) -> Result<BTreeSet<usize>> {
let metadata = reader.metadata().await?;
let mut bitmap = Self::bitmap_full_range(&metadata);
@@ -58,7 +61,7 @@ impl IndexApplier for PredicatesIndexApplier {
let Some(meta) = metadata.metas.get(name) else {
match context.index_not_found_strategy {
IndexNotFoundStrategy::ReturnEmpty => {
return Ok(vec![]);
return Ok(BTreeSet::default());
}
IndexNotFoundStrategy::Ignore => {
continue;
@@ -80,6 +83,16 @@ impl IndexApplier for PredicatesIndexApplier {
Ok(bitmap.iter_ones().collect())
}
/// Returns the memory usage of the applier.
fn memory_usage(&self) -> usize {
let mut size = self.fst_appliers.capacity() * size_of::<(IndexName, Box<dyn FstApplier>)>();
for (name, fst_applier) in &self.fst_appliers {
size += name.capacity();
size += fst_applier.memory_usage();
}
size
}
}
impl PredicatesIndexApplier {
@@ -197,7 +210,7 @@ mod tests {
.apply(SearchContext::default(), &mut mock_reader)
.await
.unwrap();
assert_eq!(indices, vec![0, 2, 4, 6]);
assert_eq!(indices, BTreeSet::from_iter([0, 2, 4, 6]));
// An index reader with a single tag "tag-0" but without value "tag-0_value-0"
let mut mock_reader = MockInvertedIndexReader::new();
@@ -251,7 +264,7 @@ mod tests {
.apply(SearchContext::default(), &mut mock_reader)
.await
.unwrap();
assert_eq!(indices, vec![0, 4, 6]);
assert_eq!(indices, BTreeSet::from_iter([0, 4, 6]));
}
#[tokio::test]
@@ -269,7 +282,7 @@ mod tests {
.apply(SearchContext::default(), &mut mock_reader)
.await
.unwrap();
assert_eq!(indices, vec![0, 1, 2, 3, 4, 5, 6, 7]); // full range to scan
assert_eq!(indices, BTreeSet::from_iter([0, 1, 2, 3, 4, 5, 6, 7])); // full range to scan
}
#[tokio::test]
@@ -341,6 +354,21 @@ mod tests {
)
.await
.unwrap();
assert_eq!(indices, vec![0, 1, 2, 3, 4, 5, 6, 7]);
assert_eq!(indices, BTreeSet::from_iter([0, 1, 2, 3, 4, 5, 6, 7]));
}
#[test]
fn test_index_applier_memory_usage() {
let mut mock_fst_applier = MockFstApplier::new();
mock_fst_applier.expect_memory_usage().returning(|| 100);
let applier = PredicatesIndexApplier {
fst_appliers: vec![(s("tag-0"), Box::new(mock_fst_applier))],
};
assert_eq!(
applier.memory_usage(),
size_of::<(IndexName, Box<dyn FstApplier>)>() + 5 + 100
);
}
}

View File

@@ -14,6 +14,7 @@ async-stream.workspace = true
async-trait.workspace = true
byteorder = "1.4"
bytes.workspace = true
chrono.workspace = true
common-base.workspace = true
common-config.workspace = true
common-error.workspace = true
@@ -21,7 +22,6 @@ common-macro.workspace = true
common-meta.workspace = true
common-runtime.workspace = true
common-telemetry.workspace = true
dashmap.workspace = true
futures-util.workspace = true
futures.workspace = true
protobuf = { version = "2", features = ["bytes"] }
@@ -37,4 +37,7 @@ tokio.workspace = true
[dev-dependencies]
common-meta = { workspace = true, features = ["testing"] }
common-test-util.workspace = true
itertools.workspace = true
rand.workspace = true
rand_distr = "0.4"
uuid.workspace = true

View File

@@ -18,6 +18,7 @@ use common_config::wal::KafkaWalTopic;
use common_error::ext::ErrorExt;
use common_macro::stack_trace_debug;
use common_runtime::error::Error as RuntimeError;
use serde_json::error::Error as JsonError;
use snafu::{Location, Snafu};
use crate::kafka::NamespaceImpl as KafkaNamespace;
@@ -123,20 +124,6 @@ pub enum Error {
error: String,
},
#[snafu(display("Failed to encode a record meta"))]
EncodeMeta {
location: Location,
#[snafu(source)]
error: serde_json::Error,
},
#[snafu(display("Failed to decode a record meta"))]
DecodeMeta {
location: Location,
#[snafu(source)]
error: serde_json::Error,
},
#[snafu(display("Missing required key in a record"))]
MissingKey { location: Location },
@@ -146,9 +133,16 @@ pub enum Error {
#[snafu(display("Cannot build a record from empty entries"))]
EmptyEntries { location: Location },
#[snafu(display("Failed to produce records to Kafka, topic: {}", topic))]
#[snafu(display(
"Failed to produce records to Kafka, topic: {}, size: {}, limit: {}",
topic,
size,
limit,
))]
ProduceRecord {
topic: KafkaWalTopic,
size: usize,
limit: usize,
location: Location,
#[snafu(source)]
error: rskafka::client::producer::Error,
@@ -172,6 +166,23 @@ pub enum Error {
#[snafu(display("Failed to do a cast"))]
Cast { location: Location },
#[snafu(display("Failed to encode object into json"))]
EncodeJson {
location: Location,
#[snafu(source)]
error: JsonError,
},
#[snafu(display("Failed to decode object from json"))]
DecodeJson {
location: Location,
#[snafu(source)]
error: JsonError,
},
#[snafu(display("The record sequence is not legal, error: {}", error))]
IllegalSequence { location: Location, error: String },
}
impl ErrorExt for Error {

View File

@@ -12,10 +12,9 @@
// See the License for the specific language governing permissions and
// limitations under the License.
mod client_manager;
pub(crate) mod client_manager;
pub mod log_store;
mod offset;
mod record_utils;
pub(crate) mod util;
use std::fmt::Display;
@@ -29,8 +28,8 @@ use crate::error::Error;
/// Kafka Namespace implementation.
#[derive(Debug, PartialEq, Eq, Hash, Clone, Serialize, Deserialize)]
pub struct NamespaceImpl {
region_id: u64,
topic: Topic,
pub region_id: u64,
pub topic: Topic,
}
impl Namespace for NamespaceImpl {
@@ -41,7 +40,7 @@ impl Namespace for NamespaceImpl {
impl Display for NamespaceImpl {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{}/{}", self.topic, self.region_id)
write!(f, "[topic: {}, region: {}]", self.topic, self.region_id)
}
}
@@ -49,11 +48,11 @@ impl Display for NamespaceImpl {
#[derive(Debug, PartialEq, Clone)]
pub struct EntryImpl {
/// Entry payload.
data: Vec<u8>,
pub data: Vec<u8>,
/// The logical entry id.
id: EntryId,
pub id: EntryId,
/// The namespace used to identify and isolate log entries from different regions.
ns: NamespaceImpl,
pub ns: NamespaceImpl,
}
impl Entry for EntryImpl {
@@ -77,7 +76,7 @@ impl Display for EntryImpl {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(
f,
"Entry (ns: {}, id: {}, data_len: {})",
"Entry [ns: {}, id: {}, data_len: {}]",
self.ns,
self.id,
self.data.len()

View File

@@ -12,17 +12,17 @@
// See the License for the specific language governing permissions and
// limitations under the License.
use std::collections::HashMap;
use std::sync::Arc;
use common_config::wal::{KafkaConfig, KafkaWalTopic as Topic};
use dashmap::mapref::entry::Entry as DashMapEntry;
use dashmap::DashMap;
use rskafka::client::partition::{PartitionClient, UnknownTopicHandling};
use rskafka::client::producer::aggregator::RecordAggregator;
use rskafka::client::producer::{BatchProducer, BatchProducerBuilder};
use rskafka::client::{Client as RsKafkaClient, ClientBuilder};
use rskafka::BackoffConfig;
use snafu::ResultExt;
use tokio::sync::RwLock;
use crate::error::{BuildClientSnafu, BuildPartitionClientSnafu, Result};
@@ -62,12 +62,12 @@ impl Client {
/// Manages client construction and accesses.
#[derive(Debug)]
pub(crate) struct ClientManager {
config: KafkaConfig,
pub(crate) config: KafkaConfig,
/// Top-level client in kafka. All clients are constructed by this client.
client_factory: RsKafkaClient,
/// A pool maintaining a collection of clients.
/// Key: a topic. Value: the associated client of the topic.
client_pool: DashMap<Topic, Client>,
client_pool: RwLock<HashMap<Topic, Client>>,
}
impl ClientManager {
@@ -91,18 +91,27 @@ impl ClientManager {
Ok(Self {
config: config.clone(),
client_factory: client,
client_pool: DashMap::new(),
client_pool: RwLock::new(HashMap::new()),
})
}
/// Gets the client associated with the topic. If the client does not exist, a new one will
/// be created and returned.
pub(crate) async fn get_or_insert(&self, topic: &Topic) -> Result<Client> {
match self.client_pool.entry(topic.to_string()) {
DashMapEntry::Occupied(entry) => Ok(entry.get().clone()),
DashMapEntry::Vacant(entry) => {
let topic_client = self.try_create_client(topic).await?;
Ok(entry.insert(topic_client).clone())
{
let client_pool = self.client_pool.read().await;
if let Some(client) = client_pool.get(topic) {
return Ok(client.clone());
}
}
let mut client_pool = self.client_pool.write().await;
match client_pool.get(topic) {
Some(client) => Ok(client.clone()),
None => {
let client = self.try_create_client(topic).await?;
client_pool.insert(topic.clone(), client.clone());
Ok(client)
}
}
}
@@ -124,3 +133,95 @@ impl ClientManager {
Ok(Client::new(raw_client, &self.config))
}
}
#[cfg(test)]
mod tests {
use common_meta::wal::kafka::test_util::run_test_with_kafka_wal;
use tokio::sync::Barrier;
use super::*;
use crate::test_util::kafka::create_topics;
/// Prepares for a test in that a collection of topics and a client manager are created.
async fn prepare(
test_name: &str,
num_topics: usize,
broker_endpoints: Vec<String>,
) -> (ClientManager, Vec<Topic>) {
let topics = create_topics(
num_topics,
|i| format!("{test_name}_{}_{}", i, uuid::Uuid::new_v4()),
&broker_endpoints,
)
.await;
let config = KafkaConfig {
broker_endpoints,
..Default::default()
};
let manager = ClientManager::try_new(&config).await.unwrap();
(manager, topics)
}
/// Sends `get_or_insert` requests sequentially to the client manager, and checks if it could handle them correctly.
#[tokio::test]
async fn test_sequential() {
run_test_with_kafka_wal(|broker_endpoints| {
Box::pin(async {
let (manager, topics) = prepare("test_sequential", 128, broker_endpoints).await;
// Assigns multiple regions to a topic.
let region_topic = (0..512)
.map(|region_id| (region_id, &topics[region_id % topics.len()]))
.collect::<HashMap<_, _>>();
// Gets all clients sequentially.
for (_, topic) in region_topic {
manager.get_or_insert(topic).await.unwrap();
}
// Ensures all clients exist.
let client_pool = manager.client_pool.read().await;
let all_exist = topics.iter().all(|topic| client_pool.contains_key(topic));
assert!(all_exist);
})
})
.await;
}
/// Sends `get_or_insert` requests in parallel to the client manager, and checks if it could handle them correctly.
#[tokio::test(flavor = "multi_thread")]
async fn test_parallel() {
run_test_with_kafka_wal(|broker_endpoints| {
Box::pin(async {
let (manager, topics) = prepare("test_parallel", 128, broker_endpoints).await;
// Assigns multiple regions to a topic.
let region_topic = (0..512)
.map(|region_id| (region_id, topics[region_id % topics.len()].clone()))
.collect::<HashMap<_, _>>();
// Gets all clients in parallel.
let manager = Arc::new(manager);
let barrier = Arc::new(Barrier::new(region_topic.len()));
let tasks = region_topic
.into_values()
.map(|topic| {
let manager = manager.clone();
let barrier = barrier.clone();
tokio::spawn(async move {
barrier.wait().await;
assert!(manager.get_or_insert(&topic).await.is_ok());
})
})
.collect::<Vec<_>>();
futures::future::try_join_all(tasks).await.unwrap();
// Ensures all clients exist.
let client_pool = manager.client_pool.read().await;
let all_exist = topics.iter().all(|topic| client_pool.contains_key(topic));
assert!(all_exist);
})
})
.await;
}
}

View File

@@ -26,10 +26,10 @@ use store_api::logstore::entry_stream::SendableEntryStream;
use store_api::logstore::namespace::Id as NamespaceId;
use store_api::logstore::{AppendBatchResponse, AppendResponse, LogStore};
use crate::error::{ConsumeRecordSnafu, Error, GetOffsetSnafu, Result};
use crate::error::{ConsumeRecordSnafu, Error, GetOffsetSnafu, IllegalSequenceSnafu, Result};
use crate::kafka::client_manager::{ClientManager, ClientManagerRef};
use crate::kafka::offset::Offset;
use crate::kafka::record_utils::{decode_from_record, RecordProducer};
use crate::kafka::util::offset::Offset;
use crate::kafka::util::record::{maybe_emit_entry, Record, RecordProducer};
use crate::kafka::{EntryImpl, NamespaceImpl};
/// A log store backed by Kafka.
@@ -85,8 +85,6 @@ impl LogStore for KafkaLogStore {
/// Appends a batch of entries and returns a response containing a map where the key is a region id
/// while the value is the id of the last successfully written entry of the region.
async fn append_batch(&self, entries: Vec<Self::Entry>) -> Result<AppendBatchResponse> {
debug!("LogStore handles append_batch with entries {:?}", entries);
if entries.is_empty() {
return Ok(AppendBatchResponse::default());
}
@@ -96,29 +94,26 @@ impl LogStore for KafkaLogStore {
for entry in entries {
producers
.entry(entry.ns.region_id)
.or_insert(RecordProducer::new(entry.ns.clone()))
.or_insert_with(|| RecordProducer::new(entry.ns.clone()))
.push(entry);
}
// Builds a record from entries belong to a region and produces them to kafka server.
let region_ids = producers.keys().cloned().collect::<Vec<_>>();
// Produces entries for each region and gets the offset those entries written to.
// The returned offset is then converted into an entry id.
let last_entry_ids = futures::future::try_join_all(producers.into_iter().map(
|(region_id, producer)| async move {
let entry_id = producer
.produce(&self.client_manager)
.await
.map(TryInto::try_into)??;
Ok((region_id, entry_id))
},
))
.await?
.into_iter()
.collect::<HashMap<_, _>>();
let tasks = producers
.into_values()
.map(|producer| producer.produce(&self.client_manager))
.collect::<Vec<_>>();
// Each produce operation returns a kafka offset of the produced record.
// The offsets are then converted to entry ids.
let entry_ids = futures::future::try_join_all(tasks)
.await?
.into_iter()
.map(TryInto::try_into)
.collect::<Result<Vec<_>>>()?;
debug!("The entries are appended at offsets {:?}", entry_ids);
Ok(AppendBatchResponse {
last_entry_ids: region_ids.into_iter().zip(entry_ids).collect(),
})
Ok(AppendBatchResponse { last_entry_ids })
}
/// Creates a new `EntryStream` to asynchronously generates `Entry` with entry ids
@@ -128,13 +123,10 @@ impl LogStore for KafkaLogStore {
ns: &Self::Namespace,
entry_id: EntryId,
) -> Result<SendableEntryStream<Self::Entry, Self::Error>> {
let topic = ns.topic.clone();
let region_id = ns.region_id;
// Gets the client associated with the topic.
let client = self
.client_manager
.get_or_insert(&topic)
.get_or_insert(&ns.topic)
.await?
.raw_client
.clone();
@@ -148,14 +140,19 @@ impl LogStore for KafkaLogStore {
.await
.context(GetOffsetSnafu { ns: ns.clone() })?
- 1;
// Reads entries with offsets in the range [start_offset, end_offset).
// Reads entries with offsets in the range [start_offset, end_offset].
let start_offset = Offset::try_from(entry_id)?.0;
debug!(
"Start reading entries in range [{}, {}] for ns {}",
start_offset, end_offset, ns
);
// Abort if there're no new entries.
// FIXME(niebayes): how come this case happens?
if start_offset > end_offset {
warn!(
"No new entries for ns {} in range [{}, {})",
"No new entries for ns {} in range [{}, {}]",
ns, start_offset, end_offset
);
return Ok(futures_util::stream::empty().boxed());
@@ -163,46 +160,56 @@ impl LogStore for KafkaLogStore {
let mut stream_consumer = StreamConsumerBuilder::new(client, StartOffset::At(start_offset))
.with_max_batch_size(self.config.max_batch_size.as_bytes() as i32)
.with_max_wait_ms(self.config.produce_record_timeout.as_millis() as i32)
.with_max_wait_ms(self.config.consumer_wait_timeout.as_millis() as i32)
.build();
debug!(
"Built a stream consumer for ns {} to consume entries in range [{}, {})",
"Built a stream consumer for ns {} to consume entries in range [{}, {}]",
ns, start_offset, end_offset
);
// Key: entry id, Value: the records associated with the entry.
let mut entry_records: HashMap<_, Vec<_>> = HashMap::new();
let ns_clone = ns.clone();
let stream = async_stream::stream!({
while let Some(consume_result) = stream_consumer.next().await {
// Each next will prdoce a `RecordAndOffset` and a high watermark offset.
// Each next on the stream consumer produces a `RecordAndOffset` and a high watermark offset.
// The `RecordAndOffset` contains the record data and its start offset.
// The high watermark offset is the end offset of the latest record in the partition.
let (record, high_watermark) = consume_result.context(ConsumeRecordSnafu {
ns: ns_clone.clone(),
})?;
let record_offset = record.offset;
// The high watermark offset is the offset of the last record plus one.
let (record_and_offset, high_watermark) =
consume_result.with_context(|_| ConsumeRecordSnafu {
ns: ns_clone.clone(),
})?;
let (kafka_record, offset) = (record_and_offset.record, record_and_offset.offset);
debug!(
"Read a record at offset {} for ns {}, high watermark: {}",
record_offset, ns_clone, high_watermark
offset, ns_clone, high_watermark
);
let entries = decode_from_record(record.record)?;
// Filters entries by region id.
if let Some(entry) = entries.first()
&& entry.ns.region_id == region_id
{
yield Ok(entries);
} else {
yield Ok(vec![]);
// Ignores no-op records.
if kafka_record.value.is_none() {
if check_termination(offset, end_offset, &entry_records)? {
break;
}
continue;
}
// Terminates the stream if the entry with the end offset was read.
if record_offset >= end_offset {
debug!(
"Stream consumer for ns {} terminates at offset {}",
ns_clone, record_offset
);
// Filters records by namespace.
let record = Record::try_from(kafka_record)?;
if record.meta.ns != ns_clone {
if check_termination(offset, end_offset, &entry_records)? {
break;
}
continue;
}
// Tries to construct an entry from records consumed so far.
if let Some(entry) = maybe_emit_entry(record, &mut entry_records)? {
yield Ok(vec![entry]);
}
if check_termination(offset, end_offset, &entry_records)? {
break;
}
}
@@ -251,3 +258,226 @@ impl LogStore for KafkaLogStore {
Ok(())
}
}
fn check_termination(
offset: i64,
end_offset: i64,
entry_records: &HashMap<EntryId, Vec<Record>>,
) -> Result<bool> {
// Terminates the stream if the entry with the end offset was read.
if offset >= end_offset {
debug!("Stream consumer terminates at offset {}", offset);
// There must have no records when the stream terminates.
if !entry_records.is_empty() {
return IllegalSequenceSnafu {
error: "Found records leftover",
}
.fail();
}
Ok(true)
} else {
Ok(false)
}
}
#[cfg(test)]
mod tests {
use common_base::readable_size::ReadableSize;
use common_config::wal::KafkaWalTopic as Topic;
use rand::seq::IteratorRandom;
use super::*;
use crate::test_util::kafka::{
create_topics, entries_with_random_data, new_namespace, EntryBuilder,
};
// Stores test context for a region.
struct RegionContext {
ns: NamespaceImpl,
entry_builder: EntryBuilder,
expected: Vec<EntryImpl>,
flushed_entry_id: EntryId,
}
/// Prepares for a test in that a log store is constructed and a collection of topics is created.
async fn prepare(
test_name: &str,
num_topics: usize,
broker_endpoints: Vec<String>,
) -> (KafkaLogStore, Vec<Topic>) {
let topics = create_topics(
num_topics,
|i| format!("{test_name}_{}_{}", i, uuid::Uuid::new_v4()),
&broker_endpoints,
)
.await;
let config = KafkaConfig {
broker_endpoints,
max_batch_size: ReadableSize::kb(32),
..Default::default()
};
let logstore = KafkaLogStore::try_new(&config).await.unwrap();
// Appends a no-op record to each topic.
for topic in topics.iter() {
let last_entry_id = logstore
.append(EntryImpl {
data: vec![],
id: 0,
ns: new_namespace(topic, 0),
})
.await
.unwrap()
.last_entry_id;
assert_eq!(last_entry_id, 0);
}
(logstore, topics)
}
/// Creates a vector containing indexes of all regions if the `all` is true.
/// Otherwise, creates a subset of the indexes. The cardinality of the subset
/// is nearly a quarter of that of the universe set.
fn all_or_subset(all: bool, num_regions: usize) -> Vec<u64> {
assert!(num_regions > 0);
let amount = if all {
num_regions
} else {
(num_regions / 4).max(1)
};
(0..num_regions as u64).choose_multiple(&mut rand::thread_rng(), amount)
}
/// Builds entries for regions specified by `which`. Builds large entries if `large` is true.
/// Returns the aggregated entries.
fn build_entries(
region_contexts: &mut HashMap<u64, RegionContext>,
which: &[u64],
large: bool,
) -> Vec<EntryImpl> {
let mut aggregated = Vec::with_capacity(which.len());
for region_id in which {
let ctx = region_contexts.get_mut(region_id).unwrap();
// Builds entries for the region.
ctx.expected = if !large {
entries_with_random_data(3, &ctx.entry_builder)
} else {
// Builds a large entry of size 256KB which is way greater than the configured `max_batch_size` which is 32KB.
let large_entry = ctx.entry_builder.with_data([b'1'; 256 * 1024]);
vec![large_entry]
};
// Aggregates entries of all regions.
aggregated.push(ctx.expected.clone());
}
aggregated.into_iter().flatten().collect()
}
/// Starts a test with:
/// * `test_name` - The name of the test.
/// * `num_topics` - Number of topics to be created in the preparation phase.
/// * `num_regions` - Number of regions involved in the test.
/// * `num_appends` - Number of append operations to be performed.
/// * `all` - All regions will be involved in an append operation if `all` is true. Otherwise,
/// an append operation will only randomly choose a subset of regions.
/// * `large` - Builds large entries for each region is `large` is true.
async fn test_with(
test_name: &str,
num_topics: usize,
num_regions: usize,
num_appends: usize,
all: bool,
large: bool,
) {
let Ok(broker_endpoints) = std::env::var("GT_KAFKA_ENDPOINTS") else {
warn!("The endpoints is empty, skipping the test {test_name}");
return;
};
let broker_endpoints = broker_endpoints
.split(',')
.map(|s| s.trim().to_string())
.collect::<Vec<_>>();
let (logstore, topics) = prepare(test_name, num_topics, broker_endpoints).await;
let mut region_contexts = (0..num_regions)
.map(|i| {
let topic = &topics[i % topics.len()];
let ns = new_namespace(topic, i as u64);
let entry_builder = EntryBuilder::new(ns.clone());
(
i as u64,
RegionContext {
ns,
entry_builder,
expected: Vec::new(),
flushed_entry_id: 0,
},
)
})
.collect();
for _ in 0..num_appends {
// Appends entries for a subset of regions.
let which = all_or_subset(all, num_regions);
let entries = build_entries(&mut region_contexts, &which, large);
let last_entry_ids = logstore.append_batch(entries).await.unwrap().last_entry_ids;
// Reads entries for regions and checks for each region that the gotten entries are identical with the expected ones.
for region_id in which {
let ctx = &region_contexts[&region_id];
let stream = logstore
.read(&ctx.ns, ctx.flushed_entry_id + 1)
.await
.unwrap();
let got = stream
.collect::<Vec<_>>()
.await
.into_iter()
.flat_map(|x| x.unwrap())
.collect::<Vec<_>>();
assert_eq!(ctx.expected, got);
}
// Simulates a flush for regions.
for (region_id, last_entry_id) in last_entry_ids {
let ctx = region_contexts.get_mut(&region_id).unwrap();
ctx.flushed_entry_id = last_entry_id;
}
}
}
/// Appends entries for one region and checks all entries can be read successfully.
#[tokio::test]
async fn test_one_region() {
test_with("test_one_region", 1, 1, 1, true, false).await;
}
/// Appends entries for multiple regions and checks entries for each region can be read successfully.
/// A topic is assigned only a single region.
#[tokio::test]
async fn test_multi_regions_disjoint() {
test_with("test_multi_regions_disjoint", 5, 5, 1, true, false).await;
}
/// Appends entries for multiple regions and checks entries for each region can be read successfully.
/// A topic is assigned multiple regions.
#[tokio::test]
async fn test_multi_regions_overlapped() {
test_with("test_multi_regions_overlapped", 5, 20, 1, true, false).await;
}
/// Appends entries for multiple regions and checks entries for each region can be read successfully.
/// A topic may be assigned multiple regions. The append operation repeats for a several iterations.
/// Each append operation will only append entries for a subset of randomly chosen regions.
#[tokio::test]
async fn test_multi_appends() {
test_with("test_multi_appends", 5, 20, 3, false, false).await;
}
/// Appends large entries for multiple regions and checks entries for each region can be read successfully.
/// A topic may be assigned multiple regions.
#[tokio::test]
async fn test_append_large_entries() {
test_with("test_append_large_entries", 5, 20, 3, true, true).await;
}
}

View File

@@ -1,188 +0,0 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use rskafka::record::Record;
use serde::{Deserialize, Serialize};
use snafu::{ensure, OptionExt, ResultExt};
use crate::error::{
DecodeMetaSnafu, EmptyEntriesSnafu, EncodeMetaSnafu, GetClientSnafu, MissingKeySnafu,
MissingValueSnafu, ProduceRecordSnafu, Result,
};
use crate::kafka::client_manager::ClientManagerRef;
use crate::kafka::offset::Offset;
use crate::kafka::{EntryId, EntryImpl, NamespaceImpl};
/// Record metadata which will be serialized/deserialized to/from the `key` of a Record.
#[derive(Debug, Serialize, Deserialize, PartialEq)]
struct RecordMeta {
/// Meta version. Used for backward compatibility.
version: u32,
/// The namespace of the entries wrapped in the record.
ns: NamespaceImpl,
/// Ids of the entries built into the record.
entry_ids: Vec<EntryId>,
/// entry_offsets[i] is the end offset (exclusive) of the data of the i-th entry in the record value.
entry_offsets: Vec<usize>,
}
impl RecordMeta {
fn new(ns: NamespaceImpl, entries: &[EntryImpl]) -> Self {
Self {
version: 0,
ns,
entry_ids: entries.iter().map(|entry| entry.id).collect(),
entry_offsets: entries
.iter()
.map(|entry| entry.data.len())
.scan(0, |presum, x| {
*presum += x;
Some(*presum)
})
.collect(),
}
}
}
/// Produces a record to a kafka topic.
pub(crate) struct RecordProducer {
/// The namespace of the entries.
ns: NamespaceImpl,
/// Entries are buffered before being built into a record.
entries: Vec<EntryImpl>,
}
impl RecordProducer {
/// Creates a new producer for producing entries with the given namespace.
pub(crate) fn new(ns: NamespaceImpl) -> Self {
Self {
ns,
entries: Vec::new(),
}
}
/// Populates the entry buffer with the given entries.
pub(crate) fn with_entries(self, entries: Vec<EntryImpl>) -> Self {
Self { entries, ..self }
}
/// Pushes an entry into the entry buffer.
pub(crate) fn push(&mut self, entry: EntryImpl) {
self.entries.push(entry);
}
/// Produces the buffered entries to kafka sever as a kafka record.
/// Returns the kafka offset of the produced record.
// TODO(niebayes): since the total size of a region's entries may be way-too large,
// the producer may need to support splitting entries into multiple records.
pub(crate) async fn produce(self, client_manager: &ClientManagerRef) -> Result<Offset> {
ensure!(!self.entries.is_empty(), EmptyEntriesSnafu);
// Produces the record through a client. The client determines when to send the record to kafka server.
let client = client_manager
.get_or_insert(&self.ns.topic)
.await
.map_err(|e| {
GetClientSnafu {
topic: &self.ns.topic,
error: e.to_string(),
}
.build()
})?;
client
.producer
.produce(encode_to_record(self.ns.clone(), self.entries)?)
.await
.map(Offset)
.context(ProduceRecordSnafu {
topic: &self.ns.topic,
})
}
}
fn encode_to_record(ns: NamespaceImpl, entries: Vec<EntryImpl>) -> Result<Record> {
let meta = RecordMeta::new(ns, &entries);
let data = entries.into_iter().flat_map(|entry| entry.data).collect();
Ok(Record {
key: Some(serde_json::to_vec(&meta).context(EncodeMetaSnafu)?),
value: Some(data),
timestamp: rskafka::chrono::Utc::now(),
headers: Default::default(),
})
}
pub(crate) fn decode_from_record(record: Record) -> Result<Vec<EntryImpl>> {
let key = record.key.context(MissingKeySnafu)?;
let value = record.value.context(MissingValueSnafu)?;
let meta: RecordMeta = serde_json::from_slice(&key).context(DecodeMetaSnafu)?;
let mut entries = Vec::with_capacity(meta.entry_ids.len());
let mut start_offset = 0;
for (i, end_offset) in meta.entry_offsets.iter().enumerate() {
entries.push(EntryImpl {
// TODO(niebayes): try to avoid the clone.
data: value[start_offset..*end_offset].to_vec(),
id: meta.entry_ids[i],
ns: meta.ns.clone(),
});
start_offset = *end_offset;
}
Ok(entries)
}
#[cfg(test)]
mod tests {
use super::*;
fn new_test_entry<D: AsRef<[u8]>>(data: D, entry_id: EntryId, ns: NamespaceImpl) -> EntryImpl {
EntryImpl {
data: data.as_ref().to_vec(),
id: entry_id,
ns,
}
}
#[test]
fn test_serde_record_meta() {
let ns = NamespaceImpl {
region_id: 1,
topic: "test_topic".to_string(),
};
let entries = vec![
new_test_entry(b"111", 1, ns.clone()),
new_test_entry(b"2222", 2, ns.clone()),
new_test_entry(b"33333", 3, ns.clone()),
];
let meta = RecordMeta::new(ns, &entries);
let encoded = serde_json::to_vec(&meta).unwrap();
let decoded: RecordMeta = serde_json::from_slice(&encoded).unwrap();
assert_eq!(meta, decoded);
}
#[test]
fn test_encdec_record() {
let ns = NamespaceImpl {
region_id: 1,
topic: "test_topic".to_string(),
};
let entries = vec![
new_test_entry(b"111", 1, ns.clone()),
new_test_entry(b"2222", 2, ns.clone()),
new_test_entry(b"33333", 3, ns.clone()),
];
let record = encode_to_record(ns, entries.clone()).unwrap();
let decoded_entries = decode_from_record(record).unwrap();
assert_eq!(entries, decoded_entries);
}
}

View File

@@ -0,0 +1,18 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
pub mod offset;
pub mod record;
#[cfg(test)]
mod test_util;

Some files were not shown because too many files have changed in this diff Show More